Add a (set of) repeat_values procedures

These procedures let you repeat the values of a Tensor multiple times. This functionality exists both in numpy (`repeat`) and in Matlab (`repelem`). A different name was chosen here to avoid confusion with nim's `repeat` function, which behaves differently (it repeats the whole input sequence, like numpy's `tile` or Matlab's `repmat` functions), and to make the name more self explanatory. There are two versions of this procedure (with multiple overloads): - One that repeats all values the same amount of times over a given axis. - One that repeats each value a different amount of times, but returns a rank-1 tensor. Note that the second one is implemented as 2 procs with different argument types (openArray[int] and Tensor[int]). I measured the performance using the timeit library. The results show that the performance is comparable to numpy's `repeat` function. In particular, a small example which takes numpy's `repeat` ~2-3 usec per iteration, takes ~4 usec in --d:release mode, and ~1-2 usec in --d:danger mode.
mratsim · Apr 21, 2024 · e34dc35 · e34dc35
1 parent 1448698
commit e34dc35
Show file tree

Hide file tree

Showing 2 changed files with 155 additions and 0 deletions.
diff --git a/src/arraymancer/tensor/shapeshifting.nim b/src/arraymancer/tensor/shapeshifting.nim
@@ -571,3 +571,119 @@ proc roll*[T](t: Tensor[T], shift: int, axis: Natural): Tensor[T] {.noinit.} =
       let result_idx = floorMod(n + shift, t.shape[axis])
       rolled_slices[result_idx] = t_slice
     result = concat(rolled_slices, axis)
+
+proc repeat_values*[T](t: Tensor[T], reps: int, axis = -1): Tensor[T] {.noinit.} =
+  ## Create a new tensor with each value repeated (the same amount of)`reps` times
+  ##
+  ## Inputs:
+  ##   - t: A tensor.
+  ##   - reps: The integer number of times that each value must be repeated.
+  ##   - axis: The axis over which values will be repeated. Defaults to the
+  ##           last axis.
+  ##
+  ## Returns:
+  ##   - A new tensor containing the values of the input tensor repeated `reps`
+  ##     times over the selected axis.
+  ##
+  ## Notes:
+  ##   - All values are repeated (the same amount of) `reps` times along the
+  ##     selected axis. This makes the output shape the same as the input shape
+  ##     except at the selected axis, which is `reps` times greater.
+  ##   - There are an alternative versions of this function which take a list
+  ##     of `reps` instead of a single `reps` value.
+  ##   - The equivalent numpy function is called `repeat`, while the
+  ##     equivalent Matlab function is called `repelem`. Different names
+  ##     where chosen here to avoid confusion with nim's `repeat` function
+  ##     which behaves like numpy's `tile`, not like this function.
+  ##
+  ## Examples:
+  ## ```nim
+  ## let t = arange(6).reshape(2, 3)
+  ## echo t.repeat_values(2)
+  ## # Tensor[system.int] of shape "[3, 8]" on backend "Cpu"
+  ## # |0      0     1     1     2     2     3     3|
+  ## # |4      4     5     5     6     6     7     7|
+  ## # |8      8     9     9    10    10    11    11|
+  ##
+  ## echo t.repeat_values(2, axis = 0)
+  ## # Tensor[system.int] of shape "[6, 4]" on backend "Cpu"
+  ## # |0      1     2     3|
+  ## # |0      1     2     3|
+  ## # |4      5     6     7|
+  ## # |4      5     6     7|
+  ## # |8      9    10    11|
+  ## # |8      9    10    11|
+  ## ```
+  let axis = if axis >= 0: axis else: t.shape.len + axis
+
+  when compileOption("boundChecks"):
+    doAssert axis < t.rank,
+      "repeat_values called with an axis (" & $axis &
+      ") that exceeds the input tensor rank (" & $t.rank & ")"
+
+  var target_shape = t.shape
+  target_shape[axis] *= reps
+
+  result = newTensorUninit[T](t.size * reps)
+  var step = 1
+  for idx in countdown(t.shape.high, axis + 1):
+    step *= t.shape[idx]
+  for (idx, it) in t.enumerate():
+    for n in countup(0, reps - 1):
+      let base = (step * reps) * (idx div step) + idx mod step
+      result[base + n * step] = it
+  return result.reshape(target_shape)
+
+proc repeat_values*[T](t: Tensor[T], reps: openArray[int]): Tensor[T] {.noinit.} =
+  ## Create a new rank-1 tensor with each value `t[i]` repeated `reps[i]` times
+  ##
+  ## Compared to the version of `repeat_values` that takes a single integer
+  ## `reps` value this version always returns a rank-1 tensor (regardless of  and does not take
+  ## the input shape) and does not take an axis argument.
+  ##
+  ## Inputs:
+  ##   - t: A tensor.
+  ##   - reps: A sequence or array of integers indicating the number of times
+  ##           that each value must be repeated. It must have as many values
+  ##           as the input tensor.
+  ##
+  ## Returns:
+  ##   - A new rank-1 tensor containing the values of the input tensor repeated
+  ##     `reps` times.
+  ##
+  ## Notes:
+  ##   - If a rep value is 0, the corresponding item in the input tensor will
+  ##     be skipped from the output.
+  ##   - The equivalent numpy function is called `repeat`, while the
+  ##     equivalent Matlab function is called `repelem`. Different names
+  ##     where chosen here to avoid confusion with nim's `repeat` function
+  ##     which behaves like numpy's `tile`, not like this function.
+  ##
+  ## Example:
+  ## ```nim
+  ## let t = [3, 5, 2, 4].toTensor
+  ## echo t.repeat_values([1, 0, 3, 2])
+  ## # Tensor[system.int] of shape "[6]" on backend "Cpu"
+  ## #     3     2     2     2     4     4
+  ## ```
+  when compileOption("boundChecks"):
+    doAssert reps.len == t.len,
+      "repeat_values called with a reps list whose length (" & $reps.len &
+      ") does not match the input tensor size (" & $t.len & ")"
+
+  result = newTensorUninit[T](sum(reps))
+  var base_pos = 0
+  for (idx, it) in t.enumerate():
+    for n in countup(0, reps[idx] - 1):
+      result[base_pos + n] = it
+    base_pos += reps[idx]
+  return result
+
+proc repeat_values*[T](t: Tensor[T], reps: Tensor[int]): Tensor[T] {.noinit, inline.} =
+  ## Create a new rank-1 tensor with each value `t[i]` repeated `reps[i]` times
+  ##
+  ## Overload of this function which takes a `Tensor[int]` instead of an
+  ## `openArray[int]`. Behavior is exactly the same as the `openArray[int]`
+  ## version.
+  ## ```
+  t.repeat_values(reps.toSeq1D)
diff --git a/tests/tensor/test_shapeshifting.nim b/tests/tensor/test_shapeshifting.nim
@@ -305,5 +305,44 @@ proc main() =
           a_permuted_2 == a.permute(1, 2, 0)
           a_permuted_2 == a.moveaxis(1, 0).moveaxis(2, 1)
 
+    test "Repeat Values":
+      let t = arange(6).reshape(2, 3)
+
+      block: # Repeat columns
+        let expected = [
+          [0, 0, 1, 1, 2, 2],
+          [3, 3, 4, 4, 5, 5]
+        ].toTensor
+        check: t.repeat_values(2) == expected
+        check: t.repeat_values(2, axis = 1) == expected
+
+      block: # Repeat rows
+        let expected = [
+          [0, 1, 2],
+          [0, 1, 2],
+          [3, 4, 5],
+          [3, 4, 5]
+        ].toTensor
+        check: t.repeat_values(2, axis = 0) == expected
+
+      block: # Repeat a higher dimension
+        let expected = [
+          [
+            [0, 1, 2],
+            [3, 4, 5],
+          ],
+          [
+            [0, 1, 2],
+            [3, 4, 5]
+          ]
+        ].toTensor
+        check: t.unsqueeze(axis = 0).repeat_values(2, axis = 0) == expected
+
+      block: # Repeat different times each value (including zero times)
+        let a = [3, 5, 2, 4].toTensor
+        let expected = [3, 2, 2, 2, 4, 4].toTensor
+        check: a.repeat_values([1, 0, 3, 2]) == expected
+        check: a.repeat_values([1, 0, 3, 2].toTensor) == expected
+
 main()
 GC_fullCollect()