From 47b067776924b3e90f36d40acd75a9c75efe0f55 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 21 Sep 2024 20:07:29 -0700 Subject: [PATCH] Remove python symmetrize from the SG graph creation --- .../cugraph/structure/graph_classes.py | 4 + .../simpleDistributedGraph.py | 8 ++ .../graph_implementation/simpleGraph.py | 73 +++++++++++++++++-- .../cugraph/cugraph/structure/symmetrize.py | 16 +++- 4 files changed, 92 insertions(+), 9 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_classes.py b/python/cugraph/cugraph/structure/graph_classes.py index e90c0576f55..82a4be5976c 100644 --- a/python/cugraph/cugraph/structure/graph_classes.py +++ b/python/cugraph/cugraph/structure/graph_classes.py @@ -174,6 +174,10 @@ def from_cudf_edgelist( This parameter is deprecated and will be removed. + symmetrize: bool, optional (default=True) + If True, symmetrize the edge list for an undirected graph. Setting + this flag to True for a directed graph returns an error. + Examples -------- >>> df = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 7f3f7e83e59..5fa02e738ce 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -98,6 +98,7 @@ def _make_plc_graph( edge_id_type, edge_type_id, drop_multi_edges, + symmetrize ): weights = None edge_ids = None @@ -151,6 +152,7 @@ def _make_plc_graph( else ([cudf.Series(dtype=edge_type_id)] if edge_type_id else None), num_arrays=num_arrays, store_transposed=store_transposed, + symmetrize=symmetrize, do_expensive_check=False, drop_multi_edges=drop_multi_edges, ) @@ -183,6 +185,11 @@ def __from_edgelist( destination ].dtype not in [np.int32, np.int64]: raise ValueError("set renumber to True for non integer columns ids") + + if (self.properties.directed and symmetrize): + raise ValueError( + "The edgelist can only be symmetrized for undirected graphs." + ) s_col = source d_col = destination @@ -370,6 +377,7 @@ def __from_edgelist( self.edge_id_type, self.edge_type_id_type, not self.properties.multi_edge, + not self.properties.directed ) for w, edata in persisted_keys_d.items() } diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index bc5cca67c2e..87be03bebc5 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -11,9 +11,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure import graph_primtypes_wrapper +#from cugraph.structure import graph_primtypes_wrapper ********* from cugraph.structure.replicate_edgelist import replicate_cudf_dataframe -from cugraph.structure.symmetrize import symmetrize +from cugraph.structure.symmetrize import symmetrize as symmetrize_ from cugraph.structure.number_map import NumberMap import cugraph.dask.common.mg_utils as mg_utils import cudf @@ -134,6 +134,7 @@ def __from_edgelist( renumber=True, legacy_renum_only=False, store_transposed=False, + symmetrize=False ): if legacy_renum_only: warning_msg = ( @@ -142,6 +143,11 @@ def __from_edgelist( warnings.warn( warning_msg, ) + + if (self.properties.directed and symmetrize): + raise ValueError( + "The edgelist can only be symmetrized for undirected graphs." + ) # Verify column names present in input DataFrame s_col = source @@ -268,8 +274,9 @@ def __from_edgelist( # otherwise the inital dataframe will be returned. Duplicated edges # will be dropped unless the graph is a MultiGraph(Not Implemented yet) # TODO: Update Symmetrize to work on Graph and/or DataFrame + """ if edge_attr is not None: - source_col, dest_col, value_col = symmetrize( + source_col, dest_col, value_col = symmetrize_( elist, source, destination, @@ -285,7 +292,7 @@ def __from_edgelist( value_col = value_dict else: value_col = None - source_col, dest_col = symmetrize( + source_col, dest_col = symmetrize_( elist, source, destination, @@ -304,6 +311,43 @@ def __from_edgelist( self.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col, value_col) + print("original edgelist = ", len(elist[source]), " symmetrize edgelist = ", len(source_col)) + print("value_col = \n", value_col) + """ + + #self.edgelist = simpleGraphImpl.EdgeList(elist[source], elist[destination], elist[weight]) + print("\nelist = \n", elist.head()) + + #""" + if edge_attr is not None: + value_col = { + self.edgeWeightCol: elist[weight] if weight in edge_attr else None, + self.edgeIdCol: elist[edge_id] if edge_id in edge_attr else None, + self.edgeTypeCol: elist[edge_type] + if edge_type in edge_attr else None, + } + + print("value_col = \n", value_col) + else: + value_col = None + + # unsymmetrize edgelist + # FIXME: if the user calls self.edgelist after creating the graph, returns the symmetrized + # edgelist if the graph is undirected or symmetrize = True (decompress) + self.edgelist = simpleGraphImpl.EdgeList(elist[source], elist[destination], value_col) + + #print("value_col_df = \n", elist[weight]) + #""" + + + + + + + + + + if self.batch_enabled: self._replicate_edgelist() @@ -312,6 +356,7 @@ def __from_edgelist( store_transposed=store_transposed, renumber=renumber, drop_multi_edges=not self.properties.multi_edge, + symmetrize=not self.properties.directed ) def to_pandas_edgelist( @@ -428,7 +473,7 @@ def view_edge_list(self): then containing the weight value for each edge """ if self.edgelist is None: - src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) + src, dst, weights = (None, None, None)#graph_primtypes_wrapper.view_edge_list(self) **** self.edgelist = self.EdgeList(src, dst, weights) srcCol = self.source_columns @@ -555,7 +600,10 @@ def __from_adjlist( if value_col is not None: self.properties.weighted = True self._make_plc_graph( - value_col=value_col, store_transposed=store_transposed, renumber=renumber + value_col=value_col, + store_transposed=store_transposed, + renumber=renumber, + symmetrize=not self.properties.directed ) if self.batch_enabled: @@ -596,7 +644,7 @@ def view_adj_list(self): self.transposedadjlist.weights, ) else: - off, ind, vals = graph_primtypes_wrapper.view_adj_list(self) + off, ind, vals = (None, None, None)#graph_primtypes_wrapper.view_adj_list(self) **** self.adjlist = self.AdjList(off, ind, vals) if self.batch_enabled: @@ -643,7 +691,7 @@ def view_transposed_adj_list(self): off, ind, vals, - ) = graph_primtypes_wrapper.view_transposed_adj_list(self) + ) = (None, None, None)#graph_primtypes_wrapper.view_transposed_adj_list(self) ******* self.transposedadjlist = self.transposedAdjList(off, ind, vals) if self.batch_enabled: @@ -1146,6 +1194,7 @@ def _make_plc_graph( store_transposed: bool = False, renumber: bool = True, drop_multi_edges: bool = False, + symmetrize: bool = False ): """ Parameters @@ -1164,6 +1213,8 @@ def _make_plc_graph( int32 or int64 type. drop_multi_edges: bool (default=False) Whether to drop multi edges + symmetrize: bool (default=False) + Whether to symmetrize """ if value_col is None: @@ -1214,7 +1265,12 @@ def _make_plc_graph( "This may cause extra memory usage. Consider passing" " a int64 list of edge ids instead." ) + df = cudf.DataFrame() + df["srcs"] = src_or_offset_array + df["dsts"] = dst_or_index_array + df["wgts"] = weight_col + print("df = \n", df) self._plc_graph = SGGraph( resource_handle=ResourceHandle(), graph_properties=graph_props, @@ -1228,6 +1284,7 @@ def _make_plc_graph( do_expensive_check=True, input_array_format=input_array_format, drop_multi_edges=drop_multi_edges, + symmetrize=symmetrize ) def to_directed(self, DiG, store_transposed=False): diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index 6c7bfac4e70..a8369598515 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -251,12 +251,25 @@ def symmetrize( >>> df['values'] = cudf.Series(M['2']) >>> src, dst, val = symmetrize(df, 'sources', 'destinations', 'values', multi=True) """ + print("multi = \n", multi) + return ( + input_df[source_col_name], + input_df[dest_col_name], + input_df[value_col_name], + ) + + + + """ warnings.warn( "This method is deprecated and will no longer be supported. The symmetrization " - "of the edges are only supported by creating an undirected graph", + "of the edges are only supported by setting the 'symmetrize' flag to 'True'", FutureWarning, ) + """ + + """ # FIXME: Redundant check that should be done at the graph creation if "edge_id" in input_df.columns and symmetrize: @@ -300,6 +313,7 @@ def symmetrize( ) return output_df[source_col_name], output_df[dest_col_name] + """ def _add_reverse_edges(df, src_name, dst_name, weight_name):