Skip to content

Commit

Permalink
Merge pull request #655 from wrangleworks/Batch-Accordion-Variables
Browse files Browse the repository at this point in the history
Added variables to accordion, batch, and rename sub-recipes
  • Loading branch information
ChrisWRWX authored Jan 18, 2025
2 parents 51f665d + b5b274a commit f2449b6
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 4 deletions.
155 changes: 155 additions & 0 deletions tests/recipes/wrangles/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1359,6 +1359,55 @@ def test_rename_dict_where(self):
})
)

def test_rename_wrangles_variables(self):
"""
Use wrangles to rename columns based on a variable
"""
df = wrangles.recipe.run(
"""
read:
- test:
rows: 5
values:
header1: value1
header2: value2
wrangles:
- rename:
wrangles:
- convert.case:
input: columns
case: ${case}
""",
variables={"case": "upper"}
)
assert df.columns.tolist() == ["HEADER1","HEADER2"]

def test_rename_wrangles_variables_if(self):
"""
Use wrangles to rename columns based on a variable with an if
"""
df = wrangles.recipe.run(
"""
read:
- test:
rows: 5
values:
header1: value1
header2: value2
wrangles:
- rename:
variables:
condition: ${condition}
wrangles:
- convert.case:
input: columns
case: upper
if: ${condition}
""",
variables={"condition": True}
)
assert df.columns.tolist() == ["HEADER1","HEADER2"]


class TestSimilarity:
"""
Expand Down Expand Up @@ -3849,6 +3898,67 @@ def test_accordion_where(self):
)
assert df["list_column"][0] == ["A","B","C"] and df["list_column"][1] == ["e","f","g"]

def test_accordion_variables(self):
"""
Test an accordion with a variable passed through
"""
df = wrangles.recipe.run(
"""
read:
- test:
rows: 5
values:
list_column:
- a
- b
- c
wrangles:
- accordion:
input: list_column
wrangles:
- convert.case:
input: list_column
case: ${case}
""",
variables={"case": "upper"}
)
assert (
len(df) == 5 and
df["list_column"][0] == ["A","B","C"]
)

def test_accordion_variables_if(self):
"""
Test an accordion with a variable passed through to an if
"""
df = wrangles.recipe.run(
"""
read:
- test:
rows: 5
values:
list_column:
- a
- b
- c
wrangles:
- accordion:
input: list_column
wrangles:
- convert.case:
input: list_column
case: upper
if: ${condition}
""",
variables={"condition": True}
)
assert (
len(df) == 5 and
df["list_column"][0] == ["A","B","C"]
)


class TestBatch:
"""
Expand Down Expand Up @@ -4213,6 +4323,51 @@ def test_batch_where(self):
)
assert df['output col'].to_list() == ["A","","C"]

def test_batch_variables(self):
"""
Test batch wrangle with a variable passed through
"""
df = wrangles.recipe.run(
"""
read:
- test:
rows: 1000
values:
column: a
wrangles:
- batch:
wrangles:
- convert.case:
input: column
case: ${case}
""",
variables={"case": "upper"}
)
assert df['column'].tolist() == ["A"] * 1000

def test_batch_variable_if(self):
"""
Test batch wrangle with a variable passed to an if
"""
df = wrangles.recipe.run(
"""
read:
- test:
rows: 1000
values:
column: a
wrangles:
- batch:
wrangles:
- convert.case:
input: column
case: upper
if: ${condition}
""",
variables={"condition": True}
)
assert df['column'].tolist() == ["A"] * 1000


class TestLookup:
"""
Expand Down
14 changes: 10 additions & 4 deletions wrangles/recipe_wrangles/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def accordion(
output: _Union[str, list] = None,
propagate: _Union[str, list] = None,
functions: _Union[_types.FunctionType, list] = [],
variables: dict = {}
) -> _pd.DataFrame:
"""
type: object
Expand Down Expand Up @@ -116,7 +117,8 @@ def accordion(
] + wrangles
},
dataframe=df_temp,
functions=functions
functions=functions,
variables=variables
)
except KeyError as e:
e.args = (f"Did you forget the column in the accordion input or propagate? - {e.args[0]}",)
Expand All @@ -129,7 +131,8 @@ def accordion(
{"rename": {x + ".list": x for x in output}}
]},
dataframe=df_temp,
functions=functions
functions=functions,
variables=variables
)
except KeyError as e:
e.args = (f"Did you forget the column in the accordion output? - {e.args[0]}",)
Expand Down Expand Up @@ -164,6 +167,7 @@ def batch(
df,
wrangles: list,
functions: _Union[_types.FunctionType, list] = [],
variables: dict = {},
batch_size: int = 1000,
threads: int = 1,
on_error: dict = None
Expand Down Expand Up @@ -222,7 +226,8 @@ def _batch_thread(df, wrangles, functions):
return _wrangles.recipe.run(
{"wrangles": wrangles},
dataframe=df,
functions=functions
functions=functions,
variables=variables
)
except Exception as err:
if on_error:
Expand Down Expand Up @@ -1235,7 +1240,8 @@ def rename(
dataframe=_pd.DataFrame({
"columns": input
}),
functions=kwargs.get("functions", {})
functions=kwargs.get("functions", {}),
variables=kwargs.get("variables", {})
)["columns"].tolist()
except:
raise RuntimeError("If using wrangles to rename, a column named 'columns' must be returned.")
Expand Down

0 comments on commit f2449b6

Please sign in to comment.