From d426148ae27410aa4fb10a4a9dc67647a058244f Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Wed, 2 Oct 2024 12:17:34 +0100 Subject: [PATCH] fix: Modernize expression_lib (#111) --- README.md | 22 +++++---- .../expression_lib/expression_lib/_typing.py | 13 +++++ .../expression_lib/expression_lib/_utils.py | 3 ++ .../expression_lib/date_util.py | 25 ++++++---- .../expression_lib/expression_lib/dist.py | 32 +++++++------ .../expression_lib/extension.py | 7 ++- .../expression_lib/expression_lib/language.py | 20 ++++---- .../expression_lib/expression_lib/panic.py | 16 ++++--- .../expression_lib/expression_lib/utils.py | 48 ------------------- 9 files changed, 87 insertions(+), 99 deletions(-) create mode 100644 example/derive_expression/expression_lib/expression_lib/_typing.py create mode 100644 example/derive_expression/expression_lib/expression_lib/_utils.py delete mode 100644 example/derive_expression/expression_lib/expression_lib/utils.py diff --git a/README.md b/README.md index a8b66c7..dbcd728 100644 --- a/README.md +++ b/README.md @@ -56,20 +56,24 @@ fn pig_latinnify(inputs: &[Series], kwargs: PigLatinKwargs) -> PolarsResult pl.Expr: - expr = parse_into_expr(expr) - return expr.register_plugin( - lib=lib, - symbol="pig_latinnify", +def pig_latinnify(expr: IntoExprColumn, capitalize: bool = False) -> pl.Expr: + return register_plugin_function( + plugin_path=LIB, + args=[expr], + function_name="pig_latinnify", is_elementwise=True, kwargs={"capitalize": capitalize}, ) diff --git a/example/derive_expression/expression_lib/expression_lib/_typing.py b/example/derive_expression/expression_lib/expression_lib/_typing.py new file mode 100644 index 0000000..7caf954 --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/_typing.py @@ -0,0 +1,13 @@ +from typing import TYPE_CHECKING, Union + +if TYPE_CHECKING: + import sys + + import polars as pl + + if sys.version_info >= (3, 10): + from typing import TypeAlias + else: + from typing_extensions import TypeAlias + + IntoExprColumn: TypeAlias = Union[pl.Expr, str, pl.Series] diff --git a/example/derive_expression/expression_lib/expression_lib/_utils.py b/example/derive_expression/expression_lib/expression_lib/_utils.py new file mode 100644 index 0000000..4becf5c --- /dev/null +++ b/example/derive_expression/expression_lib/expression_lib/_utils.py @@ -0,0 +1,3 @@ +from pathlib import Path + +LIB = Path(__file__).parent diff --git a/example/derive_expression/expression_lib/expression_lib/date_util.py b/example/derive_expression/expression_lib/expression_lib/date_util.py index 928d7b8..b0c6741 100644 --- a/example/derive_expression/expression_lib/expression_lib/date_util.py +++ b/example/derive_expression/expression_lib/expression_lib/date_util.py @@ -1,15 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + import polars as pl -from polars.type_aliases import IntoExpr from polars.plugins import register_plugin_function -from pathlib import Path -from expression_lib.utils import parse_into_expr +from expression_lib._utils import LIB +if TYPE_CHECKING: + from expression_lib._typing import IntoExprColumn -def is_leap_year(expr: IntoExpr) -> pl.Expr: - expr = parse_into_expr(expr) + +def is_leap_year(expr: IntoExprColumn) -> pl.Expr: return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[expr], function_name="is_leap_year", is_elementwise=True, @@ -18,10 +22,11 @@ def is_leap_year(expr: IntoExpr) -> pl.Expr: # Note that this already exists in Polars. It is just for explanatory # purposes. -def change_time_zone(expr: IntoExpr, tz: str = "Europe/Amsterdam") -> pl.Expr: - expr = parse_into_expr(expr) +def change_time_zone(expr: IntoExprColumn, tz: str = "Europe/Amsterdam") -> pl.Expr: return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[expr], - function_name="change_time_zone", is_elementwise=True, kwargs={"tz": tz} + function_name="change_time_zone", + is_elementwise=True, + kwargs={"tz": tz}, ) diff --git a/example/derive_expression/expression_lib/expression_lib/dist.py b/example/derive_expression/expression_lib/expression_lib/dist.py index e9274e6..bcd7fc5 100644 --- a/example/derive_expression/expression_lib/expression_lib/dist.py +++ b/example/derive_expression/expression_lib/expression_lib/dist.py @@ -1,25 +1,28 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + import polars as pl -from polars.type_aliases import IntoExpr from polars.plugins import register_plugin_function -from pathlib import Path +from expression_lib._utils import LIB + +if TYPE_CHECKING: + from expression_lib._typing import IntoExprColumn -from expression_lib.utils import parse_into_expr -def hamming_distance(expr: IntoExpr, other: IntoExpr) -> pl.Expr: - expr = parse_into_expr(expr) +def hamming_distance(expr: IntoExprColumn, other: IntoExprColumn) -> pl.Expr: return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[expr, other], function_name="hamming_distance", is_elementwise=True, ) -def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr: - expr = parse_into_expr(expr) +def jaccard_similarity(expr: IntoExprColumn, other: IntoExprColumn) -> pl.Expr: return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[expr, other], function_name="jaccard_similarity", is_elementwise=True, @@ -27,14 +30,13 @@ def jaccard_similarity(expr: IntoExpr, other: IntoExpr) -> pl.Expr: def haversine( - start_lat: IntoExpr, - start_long: IntoExpr, - end_lat: IntoExpr, - end_long: IntoExpr, + start_lat: IntoExprColumn, + start_long: IntoExprColumn, + end_lat: IntoExprColumn, + end_long: IntoExprColumn, ) -> pl.Expr: - start_lat = parse_into_expr(start_lat) return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[start_lat, start_long, end_lat, end_long], function_name="haversine", is_elementwise=True, diff --git a/example/derive_expression/expression_lib/expression_lib/extension.py b/example/derive_expression/expression_lib/expression_lib/extension.py index c286424..fbd9bc5 100644 --- a/example/derive_expression/expression_lib/expression_lib/extension.py +++ b/example/derive_expression/expression_lib/expression_lib/extension.py @@ -16,11 +16,14 @@ - static typing will not recognise your custom namespace. Errors such as `"Expr" has no attribute "dist" [attr-defined]`. """ + from __future__ import annotations -import polars as pl from typing import Any, Callable -from expression_lib import date_util, dist, language, utils, panic + +import polars as pl + +from expression_lib import date_util, dist, language, panic @pl.api.register_expr_namespace("language") diff --git a/example/derive_expression/expression_lib/expression_lib/language.py b/example/derive_expression/expression_lib/expression_lib/language.py index 0713f0d..ca46062 100644 --- a/example/derive_expression/expression_lib/expression_lib/language.py +++ b/example/derive_expression/expression_lib/expression_lib/language.py @@ -1,16 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + import polars as pl -from polars.type_aliases import IntoExpr from polars.plugins import register_plugin_function -from pathlib import Path -from expression_lib.utils import parse_into_expr +from expression_lib._utils import LIB +if TYPE_CHECKING: + from expression_lib._typing import IntoExprColumn -def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr: - expr = parse_into_expr(expr) +def pig_latinnify(expr: IntoExprColumn, capitalize: bool = False) -> pl.Expr: return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[expr], function_name="pig_latinnify", is_elementwise=True, @@ -19,7 +22,7 @@ def pig_latinnify(expr: IntoExpr, capitalize: bool = False) -> pl.Expr: def append_args( - expr: IntoExpr, + expr: IntoExprColumn, float_arg: float, integer_arg: int, string_arg: str, @@ -28,9 +31,8 @@ def append_args( """ This example shows how arguments other than `Series` can be used. """ - expr = parse_into_expr(expr) return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[expr], kwargs={ "float_arg": float_arg, diff --git a/example/derive_expression/expression_lib/expression_lib/panic.py b/example/derive_expression/expression_lib/expression_lib/panic.py index 6338a76..1ef710c 100644 --- a/example/derive_expression/expression_lib/expression_lib/panic.py +++ b/example/derive_expression/expression_lib/expression_lib/panic.py @@ -1,15 +1,19 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + import polars as pl -from polars.type_aliases import IntoExpr from polars.plugins import register_plugin_function -from expression_lib.utils import parse_into_expr -from pathlib import Path +from expression_lib._utils import LIB + +if TYPE_CHECKING: + from expression_lib._typing import IntoExprColumn -def panic(expr: IntoExpr) -> pl.Expr: - expr = parse_into_expr(expr) +def panic(expr: IntoExprColumn) -> pl.Expr: return register_plugin_function( - plugin_path=Path(__file__).parent, + plugin_path=LIB, args=[expr], function_name="panic", ) diff --git a/example/derive_expression/expression_lib/expression_lib/utils.py b/example/derive_expression/expression_lib/expression_lib/utils.py deleted file mode 100644 index 73f7a19..0000000 --- a/example/derive_expression/expression_lib/expression_lib/utils.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING - -import polars as pl - -if TYPE_CHECKING: - from polars.type_aliases import IntoExpr, PolarsDataType - - -def parse_into_expr( - expr: IntoExpr, - *, - str_as_lit: bool = False, - list_as_lit: bool = True, - dtype: PolarsDataType | None = None, -) -> pl.Expr: - """ - Parse a single input into an expression. - - Parameters - ---------- - expr - The input to be parsed as an expression. - str_as_lit - Interpret string input as a string literal. If set to `False` (default), - strings are parsed as column names. - list_as_lit - Interpret list input as a lit literal, If set to `False`, - lists are parsed as `Series` literals. - dtype - If the input is expected to resolve to a literal with a known dtype, pass - this to the `lit` constructor. - - Returns - ------- - polars.Expr - """ - if isinstance(expr, pl.Expr): - pass - elif isinstance(expr, str) and not str_as_lit: - expr = pl.col(expr) - elif isinstance(expr, list) and not list_as_lit: - expr = pl.lit(pl.Series(expr), dtype=dtype) - else: - expr = pl.lit(expr, dtype=dtype) - - return expr