Skip to content

Commit

Permalink
Merge pull request #669 from wrangleworks/length-wrangle
Browse files Browse the repository at this point in the history
Length wrangle + test
  • Loading branch information
ChrisWRWX authored Feb 24, 2025
2 parents 6c2735a + 41b05e7 commit 52c941b
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 1 deletion.
2 changes: 1 addition & 1 deletion tests/recipes/wrangles/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,7 @@ def test_extract_custom_ai_single_output(self):
model_id: 8e4ce4c6-9908-4f67
"""
)
assert df["results"][0] == '["1", "2", "3", "4", "5"]'
assert df["results"][0] == ["1", "2", "3", "4", "5"]

def test_extract_custom_ai_multiple_output(self):
"""
Expand Down
84 changes: 84 additions & 0 deletions tests/recipes/wrangles/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -2458,3 +2458,87 @@ def test_sample_where(self):
len(df) == 2 and
all([x < 0 for x in df["header2"]])
)

class TestSelectLength:
def test_select_string_length(self):
"""
Test select.length on strings
"""
df = wrangles.recipe.run(
"""
wrangles:
- select.length:
input: Col1
output: length
""",
dataframe=pd.DataFrame({
"Col1": ["One Two Three Four", "Five Six Seven Eight", "Nine Ten Eleven Twelve"]
})
)
assert df["length"].to_list() == [18, 20, 22]

def test_select_array_length(self):
"""
Test select.length on arrays
"""
df = wrangles.recipe.run(
"""
wrangles:
- select.length:
input: Col1
output: length
""",
dataframe=pd.DataFrame({
"Col1": [[1,2,3,4], [5,6,7,8], [9,10,11,12]]
})
)
assert df["length"].to_list() == [4, 4, 4]

def test_select_dictionary_length(self):
"""
Test select.length on dictionaries
"""
df = wrangles.recipe.run(
"""
wrangles:
- select.length:
input: Col1
output: length
""",
dataframe=pd.DataFrame({
"Col1": [{"a": 1, "b": 2, "c": 3}, {"d": 4, "e": 5}, {"f": 6}]
})
)
assert df["length"].to_list() == [3, 2, 1]

def test_select_empty_length(self):
"""
Test select.length on empty values
"""
df = wrangles.recipe.run(
"""
wrangles:
- select.length:
input: Col1
output: length
""",
dataframe=pd.DataFrame({
"Col1": ["", [], {}]
})
)
assert df["length"].to_list() == [0, 0, 0]

def test_select_empty_dataframe(self):
"""
Test select.length on an empty dataframe
"""
df = wrangles.recipe.run(
"""
wrangles:
- select.length:
input: Col1
output: length
""",
dataframe=pd.DataFrame({"Col1": []})
)
assert len(df) == 0
36 changes: 36 additions & 0 deletions wrangles/recipe_wrangles/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,42 @@ def left(
return df


def length(
df: _pd.DataFrame,
input: _Union[str, list],
output: _Union[str, list] = None,
):
"""
type: object
description: >-
Calculate the lengths of data in a column.
The length depends on the data type
e.g. text will be the length of the text,
lists will be the number of elements in the list.
required:
- input
properties:
input:
type:
- string
- array
description: Name of the input column(s).
output:
type:
- string
- array
description: Name of the output column(s).
"""
if not output: output = input
if not isinstance(output, list): output = [output]
if not isinstance(input, list): input = [input]

for input_col, output_col in zip(input, output):
df[output_col] = [len(item) for item in df[input_col].values]

return df


def list_element(
df: _pd.DataFrame,
input: _Union[str, list],
Expand Down

0 comments on commit 52c941b

Please sign in to comment.