mratsim · Vindaar · May 7, 2024 · Apr 21, 2024 · Apr 21, 2024 · May 7, 2024
diff --git a/src/arraymancer/tensor/shapeshifting.nim b/src/arraymancer/tensor/shapeshifting.nim
@@ -571,3 +571,183 @@ proc roll*[T](t: Tensor[T], shift: int, axis: Natural): Tensor[T] {.noinit.} =
       let result_idx = floorMod(n + shift, t.shape[axis])
       rolled_slices[result_idx] = t_slice
     result = concat(rolled_slices, axis)
+
+proc repeat_values*[T](t: Tensor[T], reps: int, axis = -1): Tensor[T] {.noinit.} =
+  ## Create a new tensor with each value repeated (the same amount of) `reps` times
+  ##
+  ## Inputs:
+  ##   - t: A tensor.
+  ##   - reps: The integer number of times that each value must be repeated.
+  ##   - axis: The axis over which values will be repeated. Defaults to the
+  ##           last axis.
+  ##
+  ## Returns:
+  ##   - A new tensor containing the values of the input tensor repeated `reps`
+  ##     times over the selected axis.
+  ##
+  ## Notes:
+  ##   - All values are repeated (the same amount of) `reps` times along the
+  ##     selected axis. This makes the output shape the same as the input shape
+  ##     except at the selected axis, which is `reps` times greater.
+  ##   - There are an alternative versions of this function which take a list
+  ##     of `reps` instead of a single `reps` value.
+  ##   - The equivalent numpy function is called `repeat`, while the
+  ##     equivalent Matlab function is called `repelem`. Different names
+  ##     where chosen here to avoid confusion with nim's `repeat` function
+  ##     which behaves like numpy's `tile`, not like this function.
+  ##
+  ## Examples:
+  ## ```nim
+  ## let t = arange(6).reshape(2, 3)
+  ## echo t.repeat_values(2)
+  ## # Tensor[system.int] of shape "[3, 8]" on backend "Cpu"
+  ## # |0      0     1     1     2     2     3     3|
+  ## # |4      4     5     5     6     6     7     7|
+  ## # |8      8     9     9    10    10    11    11|
+  ##
+  ## echo t.repeat_values(2, axis = 0)
+  ## # Tensor[system.int] of shape "[6, 4]" on backend "Cpu"
+  ## # |0      1     2     3|
+  ## # |0      1     2     3|
+  ## # |4      5     6     7|
+  ## # |4      5     6     7|
+  ## # |8      9    10    11|
+  ## # |8      9    10    11|
+  ## ```
+  let axis = if axis >= 0: axis else: t.shape.len + axis
+
+  when compileOption("boundChecks"):
+    doAssert axis < t.rank,
+      "repeat_values called with an axis (" & $axis &
+      ") that exceeds the input tensor rank (" & $t.rank & ")"
+
+  var target_shape = t.shape
+  target_shape[axis] *= reps
+
+  result = newTensorUninit[T](t.size * reps)
+  var step = 1
+  for idx in countdown(t.shape.high, axis + 1):
+    step *= t.shape[idx]
+  for (idx, it) in t.enumerate():
+    for n in countup(0, reps - 1):
+      let base = (step * reps) * (idx div step) + idx mod step
+      result[base + n * step] = it
+  return result.reshape(target_shape)
+
+proc repeat_values*[T](t: Tensor[T], reps: openArray[int]): Tensor[T] {.noinit.} =
+  ## Create a new rank-1 tensor with each value `t[i]` repeated `reps[i]` times
+  ##
+  ## Compared to the version of `repeat_values` that takes a single integer
+  ## `reps` value this version always returns a rank-1 tensor (regardless of  and does not take
+  ## the input shape) and does not take an axis argument.
+  ##
+  ## Inputs:
+  ##   - t: A tensor.
+  ##   - reps: A sequence or array of integers indicating the number of times
+  ##           that each value must be repeated. It must have as many values
+  ##           as the input tensor.
+  ##
+  ## Returns:
+  ##   - A new rank-1 tensor containing the values of the input tensor repeated
+  ##     `reps` times.
+  ##
+  ## Notes:
+  ##   - If a rep value is 0, the corresponding item in the input tensor will
+  ##     be skipped from the output.
+  ##   - The equivalent numpy function is called `repeat`, while the
+  ##     equivalent Matlab function is called `repelem`. Different names
+  ##     where chosen here to avoid confusion with nim's `repeat` function
+  ##     which behaves like numpy's `tile`, not like this function.
+  ##
+  ## Example:
+  ## ```nim
+  ## let t = [3, 5, 2, 4].toTensor
+  ## echo t.repeat_values([1, 0, 3, 2])
+  ## # Tensor[system.int] of shape "[6]" on backend "Cpu"
+  ## #     3     2     2     2     4     4
+  ## ```
+  when compileOption("boundChecks"):
+    doAssert reps.len == t.len,
+      "repeat_values called with a reps list whose length (" & $reps.len &
+      ") does not match the input tensor size (" & $t.len & ")"
+
+  result = newTensorUninit[T](sum(reps))
+  var base_pos = 0
+  for (idx, it) in t.enumerate():
+    for n in countup(0, reps[idx] - 1):
+      result[base_pos + n] = it
+    base_pos += reps[idx]
+  return result
+
+proc repeat_values*[T](t: Tensor[T], reps: Tensor[int]): Tensor[T] {.noinit, inline.} =
+  ## Create a new rank-1 tensor with each value `t[i]` repeated `reps[i]` times
+  ##
+  ## Overload of this function which takes a `Tensor[int]` instead of an
+  ## `openArray[int]`. Behavior is exactly the same as the `openArray[int]`
+  ## version.
+  ## ```
+  t.repeat_values(reps.toSeq1D)
+
+proc tile*[T](t: Tensor[T], reps: varargs[int]): Tensor[T] =
+  ## Construct a new tensor by repeating the input tensor a number of times on one or more axes
+  ##
+  ## Inputs:
+  ##   - t: The tensor to repeat
+  ##   - reps: One or more integers indicating the number of times to repeat
+  ##           the tensor on each axis (starting with axis 0)
+  ##
+  ## Result:
+  ##   - A new tensor whose shape is `t.shape *. reps`
+  ##
+  ## Notes:
+  ##   - If a rep value is 1, the tensor is not repeated on that particular axis
+  ##   - If there are more rep values than the input tensor has axes, additional
+  ##     dimensions are prepended to the input tensor as needed. Note that this
+  ##     is similar to numpy's `tile` function behavior, but different to
+  ##     Matlab's `repmat` behavior, which appends missing dimensions instead
+  ##     of prepending them.
+  ##   - This function behavior is similar to nims `sequtils.repeat`, in that
+  ##     it repeats the full tensor multiple times. If what you want is to
+  ##     repeat the _elements_ of the tensor multiple times, rather than the
+  ##     full tensor, use the `repeat_values` procedure instead.
+  ##
+  ## Examples:
+  ## ```nim
+  ## let x = arange(4).reshape(2, 2)
+  ##
+  ## # When the number of reps and tensor dimensions match, the ouptut tensor
+  ## # shape is the `reps *. t.shape`
+  ## echo tile(x, 2, 3)
+  ## > Tensor[system.int] of shape "[4, 6]" on backend "Cpu"
+  ## > |0      1     0     1     0     1|
+  ## > |2      3     2     3     2     3|
+  ## > |0      1     0     1     0     1|
+  ## > |2      3     2     3     2     3|
+  ##
+  ## # If there are fewer reps than tensor dimensions, start
+  ## # repeating on the first axis (leaving alone axis with missing reps)
+  ## echo tile(x, 2)
+  ## > Tensor[system.int] of shape "[4, 2]" on backend "Cpu"
+  ## > |0      1|
+  ## > |2      3|
+  ## > |0      1|
+  ## > |2      3|
+  ##
+  ## # If there are more reps than tensor dimensions, prepend the missing
+  ## # dimensions before repeating
+  ## echo tile(x, 1, 2, 3)
+  ## > Tensor[system.int] of shape "[1, 4, 6]" on backend "Cpu"
+  ## >                 0
+  ## > |0      1     0     1     0     1|
+  ## > |2      3     2     3     2     3|
+  ## > |0      1     0     1     0     1|
+  ## > |2      3     2     3     2     3|
+  ## ```
+  result = t
+  for ax in countdown(reps.high, 0):
+    var concat_seq = repeat(result, reps[ax])
+    if ax >= result.shape.len:
+      # mutate the repeated tensors to have one more axis
+      concat_seq.applyIt(unsqueeze(it, 0))
+    result = concat(concat_seq, axis=ax)
+
diff --git a/tests/tensor/test_shapeshifting.nim b/tests/tensor/test_shapeshifting.nim
@@ -42,7 +42,7 @@
        let d = a.asContiguous(colMajor, force = true)
        # this test needs `toRawSeq` due to the changed layout. `toFlatSeq` provides the
        # same as for `c` above!
        check: d.toRawSeq == @[7, 8, 2, 4, 1, 0, 3, 6, 4, 1, 2, 3, 8, 6, 2, 6, 6, 0]


        # # Now test with a non contiguous tensor
@@ -305,5 +305,97 @@
           a_permuted_2 == a.permute(1, 2, 0)
           a_permuted_2 == a.moveaxis(1, 0).moveaxis(2, 1)
 
+    test "Repeat Values":
+      let t = arange(6).reshape(2, 3)
+
+      block: # Repeat columns
+        let expected = [
+          [0, 0, 1, 1, 2, 2],
+          [3, 3, 4, 4, 5, 5]
+        ].toTensor
+        check: t.repeat_values(2) == expected
+        check: t.repeat_values(2, axis = 1) == expected
+
+      block: # Repeat rows
+        let expected = [
+          [0, 1, 2],
+          [0, 1, 2],
+          [3, 4, 5],
+          [3, 4, 5]
+        ].toTensor
+        check: t.repeat_values(2, axis = 0) == expected
+
+      block: # Repeat a higher dimension
+        let expected = [
+          [
+            [0, 1, 2],
+            [3, 4, 5],
+          ],
+          [
+            [0, 1, 2],
+            [3, 4, 5]
+          ]
+        ].toTensor
+        check: t.unsqueeze(axis = 0).repeat_values(2, axis = 0) == expected
+
+      block: # Repeat different times each value (including zero times)
+        let a = [3, 5, 2, 4].toTensor
+        let expected = [3, 2, 2, 2, 4, 4].toTensor
+        check: a.repeat_values([1, 0, 3, 2]) == expected
+        check: a.repeat_values([1, 0, 3, 2].toTensor) == expected
+
+    test "Tile":
+      let t = arange(6).reshape(2, 3)
+
+      block: # Tile over the first axis
+        let expected = [
+          [0, 1, 2],
+          [3, 4, 5],
+          [0, 1, 2],
+          [3, 4, 5],
+        ].toTensor
+        check: t.tile(2) == expected
+
+      block: # Tile over the all the axis of the input tensor
+        let expected = [
+          [0, 1, 2, 0, 1, 2, 0, 1, 2],
+          [3, 4, 5, 3, 4, 5, 3, 4, 5],
+          [0, 1, 2, 0, 1, 2, 0, 1, 2],
+          [3, 4, 5, 3, 4, 5, 3, 4, 5]
+        ].toTensor
+        check: t.tile(2, 3) == expected
+
+      block: # Tile over the more axis than the input tensor has
+        let expected = [
+          [
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5],
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5]
+          ],
+          [
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5],
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5]
+          ],
+          [
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5],
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5]
+          ],
+          [
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5],
+            [0, 1, 2, 0, 1, 2, 0, 1, 2],
+            [3, 4, 5, 3, 4, 5, 3, 4, 5]
+          ]
+        ].toTensor
+        check: t.tile(4, 2, 3) == expected
+
+      block: # tiling and repeating values are sometimes equivalent
+        check: t.tile(2, 1, 1) == t.unsqueeze(axis=0).repeat_values(2, axis = 0)
+
 main()
 GC_fullCollect()