diff --git a/splink/blocking.py b/splink/blocking.py index 303a4c6622..aa52dd1b77 100644 --- a/splink/blocking.py +++ b/splink/blocking.py @@ -200,7 +200,7 @@ def _sql_gen_where_condition(link_type, unique_id_cols): source_dataset_col = unique_id_cols[0] where_condition = ( f"where {id_expr_l} < {id_expr_r} " - f"and l.{source_dataset_col.name()} != r.{source_dataset_col.name()}" + f"and l.{source_dataset_col.name} != r.{source_dataset_col.name}" ) return where_condition diff --git a/splink/cluster_metrics.py b/splink/cluster_metrics.py index d15977245a..b68308b7a6 100644 --- a/splink/cluster_metrics.py +++ b/splink/cluster_metrics.py @@ -24,7 +24,7 @@ def _size_density_sql( clusters_table = df_clustered.physical_name input_col = InputColumn(_unique_id_col) - unique_id_col_l = input_col.name_l() + unique_id_col_l = input_col.name_l sqls = [] sql = f""" diff --git a/splink/comparison.py b/splink/comparison.py index 3e3d0f3986..b0254fcafd 100644 --- a/splink/comparison.py +++ b/splink/comparison.py @@ -210,14 +210,14 @@ def _columns_to_select_for_comparison_vector_values(self): output_cols = [] for col in input_cols: if self._settings_obj._retain_matching_columns: - output_cols.extend(col.names_l_r()) + output_cols.extend(col.names_l_r) output_cols.append(self._case_statement) for cl in self.comparison_levels: if cl._has_tf_adjustments: col = cl._tf_adjustment_input_column - output_cols.extend(col.tf_name_l_r()) + output_cols.extend(col.tf_name_l_r) return dedupe_preserving_order(output_cols) @@ -230,7 +230,7 @@ def _columns_to_select_for_bayes_factor_parts(self): output_cols = [] for col in input_cols: if self._settings_obj._retain_matching_columns: - output_cols.extend(col.names_l_r()) + output_cols.extend(col.names_l_r) output_cols.append(self._gamma_column_name) @@ -240,7 +240,7 @@ def _columns_to_select_for_bayes_factor_parts(self): and self._settings_obj._retain_intermediate_calculation_columns ): col = cl._tf_adjustment_input_column - output_cols.extend(col.tf_name_l_r()) + output_cols.extend(col.tf_name_l_r) # Bayes factor case when statement sqls = [cl._bayes_factor_sql for cl in self.comparison_levels] @@ -268,7 +268,7 @@ def _columns_to_select_for_predict(self): output_cols = [] for col in input_cols: if self._settings_obj._retain_matching_columns: - output_cols.extend(col.names_l_r()) + output_cols.extend(col.names_l_r) if ( self._settings_obj._training_mode @@ -282,7 +282,7 @@ def _columns_to_select_for_predict(self): and self._settings_obj._retain_intermediate_calculation_columns ): col = cl._tf_adjustment_input_column - output_cols.extend(col.tf_name_l_r()) + output_cols.extend(col.tf_name_l_r) for _col in input_cols: if self._settings_obj._retain_intermediate_calculation_columns: @@ -445,7 +445,7 @@ def _comparison_level_description_list(self): @property def _human_readable_description_succinct(self): input_cols = join_list_with_commas_final_and( - [c.name() for c in self._input_columns_used_by_case_statement] + [c.name for c in self._input_columns_used_by_case_statement] ) comp_levels = self._comparison_level_description_list @@ -463,7 +463,7 @@ def _human_readable_description_succinct(self): @property def human_readable_description(self): input_cols = join_list_with_commas_final_and( - [c.name() for c in self._input_columns_used_by_case_statement] + [c.name for c in self._input_columns_used_by_case_statement] ) comp_levels = self._comparison_level_description_list diff --git a/splink/comparison_level.py b/splink/comparison_level.py index 9011be8d9e..8b44d9036f 100644 --- a/splink/comparison_level.py +++ b/splink/comparison_level.py @@ -202,7 +202,7 @@ def _tf_adjustment_input_column(self): def _tf_adjustment_input_column_name(self): input_column = self._tf_adjustment_input_column if input_column: - return input_column.unquote().name() + return input_column.unquote().name @property def _has_comparison(self): @@ -465,11 +465,9 @@ def _columns_to_select_for_blocking(self): cols = self._input_columns_used_by_sql_condition for c in cols: - output_cols.extend(c.l_r_names_as_l_r()) + output_cols.extend(c.l_r_names_as_l_r) if self._tf_adjustment_input_column: - output_cols.extend( - self._tf_adjustment_input_column.l_r_tf_names_as_l_r() - ) + output_cols.extend(self._tf_adjustment_input_column.l_r_tf_names_as_l_r) return dedupe_preserving_order(output_cols) @@ -577,12 +575,8 @@ def _tf_adjustment_sql(self): else: tf_adj_col = self._tf_adjustment_input_column - coalesce_l_r = ( - f"coalesce({tf_adj_col.tf_name_l()}, {tf_adj_col.tf_name_r()})" - ) - coalesce_r_l = ( - f"coalesce({tf_adj_col.tf_name_r()}, {tf_adj_col.tf_name_l()})" - ) + coalesce_l_r = f"coalesce({tf_adj_col.tf_name_l}, {tf_adj_col.tf_name_r})" + coalesce_r_l = f"coalesce({tf_adj_col.tf_name_r}, {tf_adj_col.tf_name_l})" tf_adjustment_exists = f"{coalesce_l_r} is not null" u_prob_exact_match = self._u_probability_corresponding_to_exact_match @@ -730,7 +724,7 @@ def _human_readable_succinct(self): @property def human_readable_description(self): input_cols = join_list_with_commas_final_and( - [c.name() for c in self._input_columns_used_by_sql_condition] + [c.name for c in self._input_columns_used_by_sql_condition] ) desc = ( f"Comparison level: {self.label_for_charts} of {input_cols}\n" diff --git a/splink/comparison_level_library.py b/splink/comparison_level_library.py index d7807b99c4..6f1744fe09 100644 --- a/splink/comparison_level_library.py +++ b/splink/comparison_level_library.py @@ -98,7 +98,7 @@ def __init__( valid_string_pattern = valid_string_regex col = InputColumn(col_name, sql_dialect=self._sql_dialect) - col_name_l, col_name_r = col.name_l(), col.name_r() + col_name_l, col_name_r = col.name_l, col.name_r if invalid_dates_as_null: col_name_l = self._valid_date_function(col_name_l, valid_string_pattern) @@ -231,7 +231,7 @@ def __init__( else: label_suffix = "" - col_name_l, col_name_r = col.name_l(), col.name_r() + col_name_l, col_name_r = col.name_l, col.name_r if set_to_lowercase: col_name_l = f"lower({col_name_l})" @@ -395,7 +395,7 @@ def __init__( else: operator = "<=" - col_name_l, col_name_r = col.name_l(), col.name_r() + col_name_l, col_name_r = col.name_l, col.name_r if set_to_lowercase: col_name_l = f"lower({col_name_l})" @@ -938,8 +938,8 @@ def __init__( col_1 = InputColumn(col_name_1, sql_dialect=self._sql_dialect) col_2 = InputColumn(col_name_2, sql_dialect=self._sql_dialect) - col_1_l, col_1_r = col_1.name_l(), col_1.name_r() - col_2_l, col_2_r = col_2.name_l(), col_2.name_r() + col_1_l, col_1_r = col_1.name_l, col_1.name_r + col_2_l, col_2_r = col_2.name_l, col_2.name_r if set_to_lowercase: col_1_l = f"lower({col_1_l})" @@ -1030,8 +1030,8 @@ def __init__( lat = InputColumn(lat_col, sql_dialect=self._sql_dialect) long = InputColumn(long_col, sql_dialect=self._sql_dialect) - lat_l, lat_r = lat.names_l_r() - long_l, long_r = long.names_l_r() + lat_l, lat_r = lat.names_l_r + long_l, long_r = long.names_l_r distance_km_sql = f""" {great_circle_distance_km_sql(lat_l, lat_r, long_l, long_r)} <= {km_threshold} @@ -1108,11 +1108,11 @@ def __init__( """ col = InputColumn(col_name, sql_dialect=self._sql_dialect) - s = f"""(abs({col.name_l()} - {col.name_r()})/ + s = f"""(abs({col.name_l} - {col.name_r})/ (case - when {col.name_r()} > {col.name_l()} - then {col.name_r()} - else {col.name_l()} + when {col.name_r} > {col.name_l} + then {col.name_r} + else {col.name_l} end)) < {percentage_distance_threshold}""" @@ -1178,7 +1178,7 @@ def __init__( col = InputColumn(col_name, sql_dialect=self._sql_dialect) size_array_intersection = ( - f"{self._size_array_intersect_function(col.name_l(), col.name_r())}" + f"{self._size_array_intersect_function(col.name_l, col.name_r)}" ) sql = f"{size_array_intersection} >= {min_intersection}" @@ -1359,7 +1359,7 @@ def __init__( """ date = InputColumn(date_col, sql_dialect=self._sql_dialect) - date_l, date_r = date.names_l_r() + date_l, date_r = date.names_l_r datediff_sql = self._datediff_function( date_l, diff --git a/splink/find_matches_to_new_records.py b/splink/find_matches_to_new_records.py index 23bcd72820..ad6b452c08 100644 --- a/splink/find_matches_to_new_records.py +++ b/splink/find_matches_to_new_records.py @@ -11,7 +11,7 @@ def add_unique_id_and_source_dataset_cols_if_needed( linker: "Linker", new_records_df: "SplinkDataFrame" ): cols = new_records_df.columns - cols = [c.unquote().name() for c in cols] + cols = [c.unquote().name for c in cols] # Add source dataset column to new records if required and not exists sds_sel_sql = "" @@ -21,7 +21,7 @@ def add_unique_id_and_source_dataset_cols_if_needed( # TODO: Shouldn't be necessary but the source dataset properties on settings # are currently broken sds_col = InputColumn(sds_col, linker._settings_obj) - sds_col = sds_col.unquote().name() + sds_col = sds_col.unquote().name if sds_col not in cols: sds_sel_sql = f", 'new_record' as {sds_col}" @@ -29,7 +29,7 @@ def add_unique_id_and_source_dataset_cols_if_needed( uid_sel_sql = "" uid_col = linker._settings_obj._unique_id_column_name uid_col = InputColumn(uid_col, linker._settings_obj) - uid_col = uid_col.unquote().name() + uid_col = uid_col.unquote().name if uid_col not in cols: uid_sel_sql = f", 'no_id_provided' as {uid_col}" diff --git a/splink/input_column.py b/splink/input_column.py index 9a5f8423f5..8d94c4e245 100644 --- a/splink/input_column.py +++ b/splink/input_column.py @@ -168,74 +168,89 @@ def tf_prefix(self) -> str: "_tf_prefix", "term_frequency_adjustment_column_prefix" ) + @property def name(self) -> str: return self.input_name_as_tree.sql(dialect=self._sql_dialect) + @property def name_l(self) -> str: return add_suffix(self.input_name_as_tree, suffix="_l").sql( dialect=self._sql_dialect ) + @property def name_r(self) -> str: return add_suffix(self.input_name_as_tree, suffix="_r").sql( dialect=self._sql_dialect ) + @property def names_l_r(self) -> list[str]: - return [self.name_l(), self.name_r()] + return [self.name_l, self.name_r] + @property def l_name_as_l(self) -> str: name_with_l_table = add_table(self.input_name_as_tree, "l").sql( dialect=self._sql_dialect ) - return f"{name_with_l_table} as {self.name_l()}" + return f"{name_with_l_table} as {self.name_l}" + @property def r_name_as_r(self) -> str: name_with_r_table = add_table(self.input_name_as_tree, "r").sql( dialect=self._sql_dialect ) - return f"{name_with_r_table} as {self.name_r()}" + return f"{name_with_r_table} as {self.name_r}" + @property def l_r_names_as_l_r(self) -> list[str]: - return [self.l_name_as_l(), self.r_name_as_r()] + return [self.l_name_as_l, self.r_name_as_r] + @property def bf_name(self) -> str: return add_prefix(self.input_name_as_tree, prefix=self.bf_prefix).sql( dialect=self._sql_dialect ) + @property def tf_name(self) -> str: return add_prefix(self.input_name_as_tree, prefix=self.tf_prefix).sql( dialect=self._sql_dialect ) + @property def tf_name_l(self) -> str: tree = add_prefix(self.input_name_as_tree, prefix=self.tf_prefix) return add_suffix(tree, suffix="_l").sql(dialect=self._sql_dialect) + @property def tf_name_r(self) -> str: tree = add_prefix(self.input_name_as_tree, prefix=self.tf_prefix) return add_suffix(tree, suffix="_r").sql(dialect=self._sql_dialect) + @property def tf_name_l_r(self) -> list[str]: - return [self.tf_name_l(), self.tf_name_r()] + return [self.tf_name_l, self.tf_name_r] + @property def l_tf_name_as_l(self) -> str: tree = add_prefix(self.input_name_as_tree, prefix=self.tf_prefix) tf_name_with_l_table = add_table(tree, tablename="l").sql( dialect=self._sql_dialect ) - return f"{tf_name_with_l_table} as {self.tf_name_l()}" + return f"{tf_name_with_l_table} as {self.tf_name_l}" + @property def r_tf_name_as_r(self) -> str: tree = add_prefix(self.input_name_as_tree, prefix=self.tf_prefix) tf_name_with_r_table = add_table(tree, tablename="r").sql( dialect=self._sql_dialect ) - return f"{tf_name_with_r_table} as {self.tf_name_r()}" + return f"{tf_name_with_r_table} as {self.tf_name_r}" + @property def l_r_tf_names_as_l_r(self) -> list[str]: - return [self.l_tf_name_as_l(), self.r_tf_name_as_r()] + return [self.l_tf_name_as_l, self.r_tf_name_as_r] def _quote_if_sql_keyword(self, name: str) -> str: if name not in {"group", "index"}: diff --git a/splink/linker.py b/splink/linker.py index f173e86a4f..c0da5d0336 100644 --- a/splink/linker.py +++ b/splink/linker.py @@ -260,7 +260,7 @@ def _input_columns( # sort it for consistent ordering, and give each frame's # columns as a tuple so we can hash it column_names_by_input_df = [ - tuple(sorted([col.name() for col in input_df.columns])) + tuple(sorted([col.name for col in input_df.columns])) for input_df in input_dfs ] # check that the set of input columns is the same for each frame, diff --git a/splink/lower_id_on_lhs.py b/splink/lower_id_on_lhs.py index 6f4ad48c67..2e3d2d0e70 100644 --- a/splink/lower_id_on_lhs.py +++ b/splink/lower_id_on_lhs.py @@ -66,7 +66,7 @@ def lower_id_to_left_hand_side( """ # noqa cols = df.columns - cols = [c.unquote().name() for c in cols] + cols = [c.unquote().name for c in cols] l_cols = [c for c in cols if c.endswith("_l")] r_cols = [c for c in cols if c.endswith("_r")] diff --git a/splink/missingness.py b/splink/missingness.py index bd5711bd6c..c936c34046 100644 --- a/splink/missingness.py +++ b/splink/missingness.py @@ -8,8 +8,8 @@ def missingness_sqls(columns, input_tablename): selects = [ col_template.format( - col_name_escaped=col.name(), - col_name=col.unquote().name(), + col_name_escaped=col.name, + col_name=col.unquote().name, input_tablename=input_tablename, ) for col in columns diff --git a/splink/profile_data.py b/splink/profile_data.py index f09d6340f6..ea035a54c1 100644 --- a/splink/profile_data.py +++ b/splink/profile_data.py @@ -232,7 +232,7 @@ def profile_columns(linker, column_expressions=None, top_n=10, bottom_n=10): """ if not column_expressions: - column_expressions = [col.name() for col in linker._input_columns] + column_expressions = [col.name for col in linker._input_columns] df_concat = linker._initialise_df_concat() @@ -297,7 +297,6 @@ def profile_columns(linker, column_expressions=None, top_n=10, bottom_n=10): inner_charts.append(inner_chart) if inner_charts != []: - outer_spec = deepcopy(_outer_chart_spec_freq) outer_spec["vconcat"] = inner_charts diff --git a/splink/settings.py b/splink/settings.py index f14b2d79c1..51d29476a5 100644 --- a/splink/settings.py +++ b/splink/settings.py @@ -131,10 +131,10 @@ def _get_additional_columns_to_retain(self): used_by_brs = [InputColumn(c) for c in used_by_brs] - used_by_brs = [c.unquote().name() for c in used_by_brs] + used_by_brs = [c.unquote().name for c in used_by_brs] already_used = self._columns_used_by_comparisons already_used = [InputColumn(c) for c in already_used] - already_used = [c.unquote().name() for c in already_used] + already_used = [c.unquote().name for c in already_used] new_cols = list(set(used_by_brs) - set(already_used)) a_cols.extend(new_cols) @@ -170,7 +170,7 @@ def _source_dataset_input_column(self): @property def _source_dataset_col(self): input_column = self._source_dataset_input_column - return (input_column, InputColumn(input_column, self).name()) + return (input_column, InputColumn(input_column, self).name) @property def _unique_id_input_columns(self) -> list[InputColumn]: @@ -214,7 +214,7 @@ def _columns_used_by_comparisons(self): cols_used.append(self._unique_id_column_name) for cc in self.comparisons: cols = cc._input_columns_used_by_case_statement - cols = [c.name() for c in cols] + cols = [c.name for c in cols] cols_used.extend(cols) return dedupe_preserving_order(cols_used) @@ -224,14 +224,14 @@ def _columns_to_select_for_blocking(self): cols = [] for uid_col in self._unique_id_input_columns: - cols.append(uid_col.l_name_as_l()) - cols.append(uid_col.r_name_as_r()) + cols.append(uid_col.l_name_as_l) + cols.append(uid_col.r_name_as_r) for cc in self.comparisons: cols.extend(cc._columns_to_select_for_blocking) for add_col in self._additional_columns_to_retain: - cols.extend(add_col.l_r_names_as_l_r()) + cols.extend(add_col.l_r_names_as_l_r) return dedupe_preserving_order(cols) @@ -240,14 +240,14 @@ def _columns_to_select_for_comparison_vector_values(self): cols = [] for uid_col in self._unique_id_input_columns: - cols.append(uid_col.name_l()) - cols.append(uid_col.name_r()) + cols.append(uid_col.name_l) + cols.append(uid_col.name_r) for cc in self.comparisons: cols.extend(cc._columns_to_select_for_comparison_vector_values) for add_col in self._additional_columns_to_retain: - cols.extend(add_col.names_l_r()) + cols.extend(add_col.names_l_r) if self._needs_matchkey_column: cols.append("match_key") @@ -260,14 +260,14 @@ def _columns_to_select_for_bayes_factor_parts(self): cols = [] for uid_col in self._unique_id_input_columns: - cols.append(uid_col.name_l()) - cols.append(uid_col.name_r()) + cols.append(uid_col.name_l) + cols.append(uid_col.name_r) for cc in self.comparisons: cols.extend(cc._columns_to_select_for_bayes_factor_parts) for add_col in self._additional_columns_to_retain: - cols.extend(add_col.names_l_r()) + cols.extend(add_col.names_l_r) if self._needs_matchkey_column: cols.append("match_key") @@ -280,14 +280,14 @@ def _columns_to_select_for_predict(self): cols = [] for uid_col in self._unique_id_input_columns: - cols.append(uid_col.name_l()) - cols.append(uid_col.name_r()) + cols.append(uid_col.name_l) + cols.append(uid_col.name_r) for cc in self.comparisons: cols.extend(cc._columns_to_select_for_predict) for add_col in self._additional_columns_to_retain: - cols.extend(add_col.names_l_r()) + cols.extend(add_col.names_l_r) if self._needs_matchkey_column: cols.append("match_key") diff --git a/splink/splink_comparison_viewer.py b/splink/splink_comparison_viewer.py index f46435611e..d6ec3ef496 100644 --- a/splink/splink_comparison_viewer.py +++ b/splink/splink_comparison_viewer.py @@ -18,8 +18,8 @@ def row_examples(linker: Linker, example_rows_per_category=2): sqls = [] uid_cols = linker._settings_obj._unique_id_input_columns - uid_cols_l = [uid_col.name_l() for uid_col in uid_cols] - uid_cols_r = [uid_col.name_r() for uid_col in uid_cols] + uid_cols_l = [uid_col.name_l for uid_col in uid_cols] + uid_cols_r = [uid_col.name_r for uid_col in uid_cols] uid_cols = uid_cols_l + uid_cols_r uid_expr = " || '-' ||".join(uid_cols) diff --git a/splink/splink_dataframe.py b/splink/splink_dataframe.py index a561cd01be..5721d5f8e2 100644 --- a/splink/splink_dataframe.py +++ b/splink/splink_dataframe.py @@ -33,7 +33,7 @@ def columns(self): @property def columns_escaped(self): cols = self.columns - return [c.name() for c in cols] + return [c.name for c in cols] def validate(): pass diff --git a/splink/term_frequencies.py b/splink/term_frequencies.py index 38e0807ff2..dc0dd84d3d 100644 --- a/splink/term_frequencies.py +++ b/splink/term_frequencies.py @@ -31,13 +31,13 @@ def colname_to_tf_tablename(input_column: InputColumn): def term_frequencies_for_single_column_sql( input_column: InputColumn, table_name="__splink__df_concat" ): - col_name = input_column.name() + col_name = input_column.name sql = f""" select {col_name}, cast(count(*) as float8) / (select count({col_name}) as total from {table_name}) - as {input_column.tf_name()} + as {input_column.tf_name} from {table_name} where {col_name} is not null group by {col_name} @@ -56,7 +56,7 @@ def _join_tf_to_input_df_sql(linker: Linker): tbl = colname_to_tf_tablename(col) if tbl in linker._intermediate_table_cache: tbl = linker._intermediate_table_cache[tbl].physical_name - tf_col = col.tf_name() + tf_col = col.tf_name select_cols.append(f"{tbl}.{tf_col}") select_cols.insert(0, "__splink__df_concat.*") @@ -69,11 +69,11 @@ def _join_tf_to_input_df_sql(linker: Linker): tbl = colname_to_tf_tablename(col) if tbl in linker._intermediate_table_cache: tbl = linker._intermediate_table_cache[tbl].physical_name - sql = templ.format(tbl=tbl, col=col.name()) + sql = templ.format(tbl=tbl, col=col.name) left_joins.append(sql) # left_joins = [ - # templ.format(tbl=colname_to_tf_tablename(col), col=col.name()) + # templ.format(tbl=colname_to_tf_tablename(col), col=col.name) # for col in tf_cols # ] left_joins = " ".join(left_joins) @@ -90,8 +90,8 @@ def _join_tf_to_input_df_sql(linker: Linker): def term_frequencies_from_concat_with_tf(input_column): sql = f""" select - distinct {input_column.name()}, - {input_column.tf_name()} + distinct {input_column.name}, + {input_column.tf_name} from __splink__df_concat_with_tf """ diff --git a/splink/unique_id_concat.py b/splink/unique_id_concat.py index 6b74c9299b..f5b7cd9bc8 100644 --- a/splink/unique_id_concat.py +++ b/splink/unique_id_concat.py @@ -11,7 +11,7 @@ def _composite_unique_id_from_nodes_sql(unique_id_cols, table_prefix=None): else: table_prefix = "" - cols = [f"{table_prefix}{c.name()}" for c in unique_id_cols] + cols = [f"{table_prefix}{c.name}" for c in unique_id_cols] return f" || '{CONCAT_SEPARATOR}' || ".join(cols) @@ -28,10 +28,10 @@ def _composite_unique_id_from_edges_sql(unique_id_cols, l_or_r, table_prefix=Non table_prefix = "" if l_or_r == "l": - cols = [f"{table_prefix}{c.name_l()}" for c in unique_id_cols] + cols = [f"{table_prefix}{c.name_l}" for c in unique_id_cols] if l_or_r == "r": - cols = [f"{table_prefix}{c.name_r()}" for c in unique_id_cols] + cols = [f"{table_prefix}{c.name_r}" for c in unique_id_cols] if l_or_r is None: - cols = [f"{table_prefix}{c.name()}" for c in unique_id_cols] + cols = [f"{table_prefix}{c.name}" for c in unique_id_cols] return f" || '{CONCAT_SEPARATOR}' || ".join(cols) diff --git a/splink/waterfall_chart.py b/splink/waterfall_chart.py index e52822e214..03f1325d9b 100644 --- a/splink/waterfall_chart.py +++ b/splink/waterfall_chart.py @@ -60,8 +60,8 @@ def _comparison_records(record_as_dict, comparison: Comparison): waterfall_record["u_probability"] = cl.u_probability waterfall_record["bayes_factor_description"] = cl._bayes_factor_description input_cols_used = c._input_columns_used_by_case_statement - input_cols_l = [ic.unquote().name_l() for ic in input_cols_used] - input_cols_r = [ic.unquote().name_r() for ic in input_cols_used] + input_cols_l = [ic.unquote().name_l for ic in input_cols_used] + input_cols_r = [ic.unquote().name_r for ic in input_cols_used] waterfall_record["value_l"] = ", ".join( [str(record_as_dict[n]) for n in input_cols_l] ) @@ -78,10 +78,10 @@ def _comparison_records(record_as_dict, comparison: Comparison): if cl._tf_adjustment_input_column is not None: waterfall_record_2["value_l"] = str( - record_as_dict[cl._tf_adjustment_input_column.unquote().name_l()] + record_as_dict[cl._tf_adjustment_input_column.unquote().name_l] ) waterfall_record_2["value_r"] = str( - record_as_dict[cl._tf_adjustment_input_column.unquote().name_r()] + record_as_dict[cl._tf_adjustment_input_column.unquote().name_r] ) else: waterfall_record_2["value_l"] = "" diff --git a/tests/test_input_column.py b/tests/test_input_column.py index aed6f0f2f8..8a1dd794bd 100644 --- a/tests/test_input_column.py +++ b/tests/test_input_column.py @@ -3,33 +3,33 @@ def test_input_column(): c = InputColumn("my_col") - assert c.name() == '"my_col"' - assert c.unquote().name() == "my_col" + assert c.name == '"my_col"' + assert c.unquote().name == "my_col" - assert c.name_l() == '"my_col_l"' - assert c.tf_name_l() == '"tf_my_col_l"' - assert c.unquote().quote().l_tf_name_as_l() == '"l"."tf_my_col" as "tf_my_col_l"' - assert c.unquote().l_tf_name_as_l() == '"l".tf_my_col as tf_my_col_l' + assert c.name_l == '"my_col_l"' + assert c.tf_name_l == '"tf_my_col_l"' + assert c.unquote().quote().l_tf_name_as_l == '"l"."tf_my_col" as "tf_my_col_l"' + assert c.unquote().l_tf_name_as_l == '"l".tf_my_col as tf_my_col_l' c = InputColumn("SUR name") - assert c.name() == '"SUR name"' - assert c.name_r() == '"SUR name_r"' - assert c.r_name_as_r() == '"r"."SUR name" as "SUR name_r"' + assert c.name == '"SUR name"' + assert c.name_r == '"SUR name_r"' + assert c.r_name_as_r == '"r"."SUR name" as "SUR name_r"' c = InputColumn("col['lat']") name = """ "col"['lat'] """.strip() - assert c.name() == name + assert c.name == name l_tf_name_as_l = """ "l"."tf_col"['lat'] as "tf_col_l"['lat'] """.strip() - assert c.l_tf_name_as_l() == l_tf_name_as_l + assert c.l_tf_name_as_l == l_tf_name_as_l - assert c.unquote().name() == "col['lat']" - assert c.unquote().quote().name() == name + assert c.unquote().name == "col['lat']" + assert c.unquote().quote().name == name c = InputColumn("first name", sql_dialect="spark") - assert c.name() == "`first name`" + assert c.name == "`first name`" diff --git a/tests/test_sql_transform.py b/tests/test_sql_transform.py index fb3f80e885..1fc67ceb74 100644 --- a/tests/test_sql_transform.py +++ b/tests/test_sql_transform.py @@ -92,36 +92,36 @@ def test_set_numeric_as_double(): def test_add_pref_and_suffix(): dull = InputColumn("dull") dull_l_r = ['"l"."dull" as "dull_l"', '"r"."dull" as "dull_r"'] - assert dull.l_r_names_as_l_r() == dull_l_r + assert dull.l_r_names_as_l_r == dull_l_r - assert dull.bf_name() == '"bf_dull"' - assert dull.tf_name_l() == '"tf_dull_l"' + assert dull.bf_name == '"bf_dull"' + assert dull.tf_name_l == '"tf_dull_l"' tf_dull_l_r = ['"l"."tf_dull" as "tf_dull_l"', '"r"."tf_dull" as "tf_dull_r"'] - assert dull.l_r_tf_names_as_l_r() == tf_dull_l_r + assert dull.l_r_tf_names_as_l_r == tf_dull_l_r ll = InputColumn("lat['long']") - assert ll.name_l() == "\"lat_l\"['long']" + assert ll.name_l == "\"lat_l\"['long']" ll_tf_l_r = [ '"l"."tf_lat"[\'long\'] as "tf_lat_l"[\'long\']', '"r"."tf_lat"[\'long\'] as "tf_lat_r"[\'long\']', ] - assert ll.l_r_tf_names_as_l_r() == ll_tf_l_r + assert ll.l_r_tf_names_as_l_r == ll_tf_l_r group = InputColumn("cluster") - assert group.name_l() == '"cluster_l"' - assert group.bf_name() == '"bf_cluster"' + assert group.name_l == '"cluster_l"' + assert group.bf_name == '"bf_cluster"' group_l_r_names = ['"l"."cluster" as "cluster_l"', '"r"."cluster" as "cluster_r"'] - assert group.l_r_names_as_l_r() == group_l_r_names + assert group.l_r_names_as_l_r == group_l_r_names group_tf_l_r = [ '"l"."tf_cluster" as "tf_cluster_l"', '"r"."tf_cluster" as "tf_cluster_r"', ] - assert group.l_r_tf_names_as_l_r() == group_tf_l_r + assert group.l_r_tf_names_as_l_r == group_tf_l_r cols = ["unique_id", "SUR name", "cluster"] out_cols = ['"unique_id"', '"SUR name"', '"cluster"'] cols_class = [InputColumn(c) for c in cols] - assert [c.name() for c in cols_class] == out_cols + assert [c.name for c in cols_class] == out_cols