From e93585765ab96c40c747795ef2d6974cfb37cc2c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 31 Oct 2024 19:30:26 -0700 Subject: [PATCH 01/13] optimize ktruss --- cpp/src/community/k_truss_impl.cuh | 590 +++++++++++++++++++++++++++-- 1 file changed, 555 insertions(+), 35 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index e052a892917..6ba9d6ef119 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -17,6 +17,7 @@ #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" #include "prims/transform_e.cuh" @@ -40,9 +41,124 @@ #include #include #include +#include +using namespace std::chrono; namespace cugraph { +template +struct extract_weak_edges { + edge_t k{}; + __device__ thrust::optional> operator()( + vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const + { + return count < k - 2 + ? thrust::optional>{thrust::make_tuple(src, dst)} + : thrust::nullopt; + } +}; + + +template +struct extract_edges { // FIXME: ******************************Remove this functor. For testing purposes only******************* + __device__ thrust::optional> operator()( + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const + { + return thrust::make_tuple(src, dst, count); + } +}; + + +template +struct extract_edges_ { // FIXME: ******************************Remove this functor. For testing purposes only******************* + __device__ thrust::optional> operator()( + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + { + return thrust::make_tuple(src, dst); + } +}; + + + +template +struct extract_masked_edges { // FIXME: ******************************Remove this functor. For testing purposes only******************* + __device__ thrust::optional> operator()( + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto mask) const + { + return mask == 0 + ? thrust::optional>{thrust::make_tuple(src, dst)} + : thrust::nullopt; + } +}; + + +template +struct generate_p_q { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; + + + __device__ thrust::tuple operator()(edge_t i) const + { + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + + return thrust::make_tuple(weak_srcs[chunk_start + idx], weak_dsts[chunk_start + idx]); + } +}; + +template +struct generate_p_r_or_q_r_from_p_q { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; + EdgeIterator edgelist_first{}; + EdgeIterator weak_edgelist_first{}; + EdgeIterator edgelist_last{}; + + __device__ thrust::tuple operator()(edge_t i) const + { + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + + auto edge = thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); + + if constexpr (generate_p_r) { + edge = thrust::make_tuple(weak_srcs[chunk_start + idx], intersection_indices[i]); + } + + // Check in the valid edge range + auto itr_pair = thrust::lower_bound( + thrust::seq, edgelist_first, weak_edgelist_first, edge); + + if ((itr_pair == weak_edgelist_first) || *itr_pair != edge) { // FIXME: Do binary search instead + // Search in the weak edge partition. + itr_pair = thrust::lower_bound( + thrust::seq, weak_edgelist_first, edgelist_last, edge); + + //auto idx_ = thrust::distance(weak_edgelist_first, itr_pair); // FIXME: Only for debugging purposes + + + if ((itr_pair == edgelist_last) || *itr_pair != edge) { // FIXME: Do binary search instead + edge = thrust::make_tuple(thrust::get<1>(edge), thrust::get<0>(edge)); // Edge must be in the other direction + } + } + + + return edge; + } +}; + namespace { template @@ -116,11 +232,15 @@ k_truss(raft::handle_t const& handle, std::optional> modified_graph{std::nullopt}; std::optional> modified_graph_view{std::nullopt}; + std::optional> undirected_graph_view{std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; std::optional> wgts{std::nullopt}; + // Ideally, leverage the undirected graph derived from k-core + undirected_graph_view = graph_view; + if (graph_view.count_self_loops(handle) > edge_t{0}) { auto [srcs, dsts] = extract_transform_e(handle, graph_view, @@ -148,13 +268,13 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + false); modified_graph_view = (*modified_graph).view(); } // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - + //#if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -197,10 +317,12 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + false); modified_graph_view = (*modified_graph).view(); + //undirected_graph_view = (*modified_graph).view(); + if (renumber_map) { // collapse renumber_map unrenumber_int_vertices(handle, (*tmp_renumber_map).data(), @@ -211,6 +333,7 @@ k_truss(raft::handle_t const& handle, renumber_map = std::move(tmp_renumber_map); } + //#endif // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -277,7 +400,10 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - true); + // If renumber is set to True, cur_graph_view and graph_view don't have the same + // renumbering scheme. Will need to renumber before performing certain operations on + // graph_view like nbr_intersection. + false); modified_graph_view = (*modified_graph).view(); if (renumber_map) { // collapse renumber_map @@ -293,63 +419,457 @@ k_truss(raft::handle_t const& handle, // 4. Compute triangle count using nbr_intersection and unroll weak edges { + + auto cur_undirected_graph_view = undirected_graph_view ? *undirected_graph_view : graph_view; auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view, false); + + // Extract all undirected edges + auto [edgelist_srcs, edgelist_dsts, edgelist_cnts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + // FIXME: Replace by lambda function + extract_edges{}); + + // sort the edges by keys where keys are triangle_counts + auto edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); + auto edgelist_last = thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()); + + // Symmetrize the DODG graph + + std::optional> tmp_renumber_map{std::nullopt}; + + cugraph::graph_t cur_graph(handle); + cur_graph = std::move(*modified_graph); + + std::optional< + cugraph::edge_property_t, + weight_t>> + sg_edge_weights{std::nullopt}; + + std::tie(*modified_graph, std::ignore, std::ignore) = + cugraph::symmetrize_graph( + handle, + std::move(cur_graph), + std::move(sg_edge_weights), + //std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + /* + tmp_renumber_map ? std::optional>(std::move(*tmp_renumber_map)) + : std::nullopt, + */ + false); + + cur_undirected_graph_view = (*modified_graph).view(); + + // Sort once + thrust::sort_by_key( + handle.get_thrust_policy(), + edgelist_first, + edgelist_last, + edgelist_cnts.begin() + ); + + // FIXME: edgelist_cnts - rename to num_triangles + auto edge_triangle_count_pair_first = + thrust::make_zip_iterator(edgelist_first, edgelist_cnts.begin()); + edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); + cugraph::edge_bucket_t edges_to_mask(handle); + cugraph::edge_property_t edge_mask_undirected_graph(handle, cur_undirected_graph_view); + cugraph::fill_edge_property(handle, cur_undirected_graph_view, edge_mask_undirected_graph.mutable_view(), bool{true}); - while (true) { - // FIXME: This approach is very expensive when invalidating only few edges per iteration - // and should be address. - auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view); + size_t prev_chunk_size = 0; - // Mask all the edges that have k - 2 count + auto iteration = -1; - auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); + std::chrono::seconds s (0); + //std::chrono::duration k_truss_ms = duration_cast (s); + + std::chrono::duration k_truss_ms = duration_cast (s); + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto start = high_resolution_clock::now(); - cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [k] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count >= k - 2; - }, - edge_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(edge_mask.view()); + vertex_t number_edges = 0; + while (true) { + iteration += 1; + + auto prev_number_of_edges = cur_undirected_graph_view.compute_number_of_edges(handle); + + if (iteration == 2) { + //break; + } + + auto weak_edge_triangle_count_first = + thrust::stable_partition(handle.get_thrust_policy(), + edge_triangle_count_pair_first, + edge_triangle_count_pair_first + edgelist_srcs.size(), + [k] __device__(auto e) { + auto num_triangles = thrust::get<1>(e); + return num_triangles >= k - 2; + }); + + auto num_weak_edges = static_cast( + thrust::distance(weak_edge_triangle_count_first, + edge_triangle_count_pair_first + edgelist_srcs.size())); + + auto num_valid_edges = edgelist_srcs.size() - num_weak_edges; + + auto weak_edgelist_first = edgelist_first + num_valid_edges; + auto weak_edgelist_last = edgelist_first + edgelist_srcs.size(); + + // Once identifying the weak edges, perform nbr_intersection on the weak edges. + + auto [intersection_offsets, intersection_indices] = \ + per_v_pair_dst_nbr_intersection( + handle, + cur_undirected_graph_view, + weak_edgelist_first, + weak_edgelist_last, + false); + + // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) + // To avoid overcompensation for (q, r) edges, check whether None of the other edges were part of the (p, q) edges. + // To avoid overcompensation for (p, r) edges, check whether NOne of the other edges were part of the (p, q) and (q, r) edges. + + auto vertex_pair_buffer_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + generate_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(edgelist_srcs.data() + num_valid_edges, + num_weak_edges), + raft::device_span(edgelist_dsts.data() + num_valid_edges, + num_weak_edges) + }); + + // From nbr_intersection on the undirected graph, we know the endpoints (vertices) of the triangles however + // we don't know the edges directions. Since edges of the DODG are directed, we can easily recover the + // direction of the edges with a binary search + + auto vertex_pair_buffer_p_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(edgelist_srcs.data() + num_valid_edges, + num_weak_edges), + raft::device_span(edgelist_dsts.data() + num_valid_edges, + num_weak_edges), + edgelist_first, + edgelist_first + num_valid_edges, + edgelist_last}); + + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(edgelist_srcs.data() + num_valid_edges, + num_weak_edges), + raft::device_span(edgelist_dsts.data() + num_valid_edges, + num_weak_edges), + edgelist_first, + edgelist_first + num_valid_edges, + edgelist_last}); + + auto vertex_pair_buffer_p_q_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q).begin(), std::get<1>(vertex_pair_buffer_p_q).begin()); + auto vertex_pair_buffer_p_r_edge_p_q_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).begin(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).begin()); + auto vertex_pair_buffer_q_r_edge_p_q_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).begin()); + + + auto triangles_first = thrust::make_zip_iterator(vertex_pair_buffer_p_q_first, vertex_pair_buffer_p_r_edge_p_q_first, vertex_pair_buffer_q_r_edge_p_q_first); // FIXME: not really a triangle but two edges of a triangle so rename + auto num_triangles = intersection_indices.size(); + + auto vertex_pair_buffer_p_q_= + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + auto vertex_pair_buffer_p_r_edge_p_q_ = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + auto vertex_pair_buffer_q_r_edge_p_q_ = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + + auto vertex_pair_buffer_p_q_first_ = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q_).begin(), std::get<1>(vertex_pair_buffer_p_q_).begin()); + auto vertex_pair_buffer_p_r_edge_p_q_first_ = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r_edge_p_q_).begin(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q_).begin()); + auto vertex_pair_buffer_q_r_edge_p_q_first_ = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_q_).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q_).begin()); + auto triangles_first_ = thrust::make_zip_iterator(vertex_pair_buffer_p_q_first_, vertex_pair_buffer_p_r_edge_p_q_first_, vertex_pair_buffer_q_r_edge_p_q_first_); // FIXME: not really a triangle but two edges of a triangle so rename + + // Reorder edges' position in the triangle + thrust::transform( + handle.get_thrust_policy(), + triangles_first, + triangles_first + num_triangles, + triangles_first_, + [] __device__(auto triangle) { + auto edge_p_q = thrust::get<0>(triangle); + auto edge_p_r = thrust::get<1>(triangle); + auto edge_q_r = thrust::get<2>(triangle); + if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { + if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { + return thrust::tie(edge_p_r, edge_q_r, edge_p_q); + } else { + return thrust::tie(edge_p_r, edge_p_q, edge_q_r); + } + } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { + return thrust::tie(edge_p_q, edge_q_r, edge_p_r); + + } else { // Only for debugging purposes. Remove after. + printf("\ninvalid combination\n"); + } + }); + + + thrust::sort( + handle.get_thrust_policy(), + triangles_first_, + triangles_first_ + num_triangles); + + auto unique_triangle_end = thrust::unique( + handle.get_thrust_policy(), + triangles_first_, + triangles_first_ + num_triangles); + + auto num_unique_triangles = thrust::distance(triangles_first_, unique_triangle_end); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_, num_unique_triangles, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_p_q_, num_unique_triangles, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_q_, num_unique_triangles, handle.get_stream()); + + + resize_dataframe_buffer(vertex_pair_buffer_p_q_, 3 * num_unique_triangles, handle.get_stream()); + + // Copy p_r edges + thrust::copy( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q_), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q_), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_) + num_unique_triangles); + + thrust::copy( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q_), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q_), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_) + (2*num_unique_triangles)); + + thrust::sort( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_)); + + auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_)); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_), + thrust::make_constant_iterator(size_t{1}), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + + // Update the triangle count of edges + + auto weak_srcs = edgelist_srcs.begin() + num_valid_edges; + auto weak_dsts = edgelist_dsts.begin() + num_valid_edges; + + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(unique_pair_count), + [ + vertex_pair_buffer_unique = get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count = decrease_count.begin(), + edgelist_cnts = edgelist_cnts.begin(), + edgelist_first, + weak_edgelist_first = edgelist_first + num_valid_edges, // FIXME: No need to assign. simply pass weak_edgelist_first + edgelist_last, + num_valid_edges + ] __device__(auto i) { + + // Check in the valid edge range + // FIXMEEE: thrust::find + auto itr_pair = thrust::lower_bound( + thrust::seq, edgelist_first, weak_edgelist_first, vertex_pair_buffer_unique[i]); + + auto idx = thrust::distance(edgelist_first, itr_pair); + #if 0 + if ((itr_pair == weak_edgelist_first) || *itr_pair != *(vertex_pair_buffer_unique + i)) { // FIXME: Do binary search instead ********************** + //if ((itr_pair == weak_edgelist_first)) { // FIXME: Do binary search instead ********************** + // Search in the weak edge partition. FIXME: Future optimization might + // be to simply ommit updating weak edges and just discard them per + // iteration for performance reasons. + + + // FIXMEEE: thrust::find + itr_pair = thrust::lower_bound( + thrust::seq, weak_edgelist_first, edgelist_last, vertex_pair_buffer_unique[i]); + + idx = num_valid_edges + thrust::distance(weak_edgelist_first, itr_pair); + + } + #endif + + if ((itr_pair != weak_edgelist_first) && *itr_pair == *(vertex_pair_buffer_unique + i)) { + // Update counts of valid edges only since weak edges will be deleted anyways + edgelist_cnts[idx] -= decrease_count[i]; + } + + } + + ); + + edges_to_mask.clear(); + edges_to_mask.insert(edgelist_srcs.begin() + num_valid_edges, + edgelist_srcs.end(), + edgelist_dsts.begin() + num_valid_edges); + + cugraph::transform_e( + handle, + cur_undirected_graph_view, + edges_to_mask, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [iteration] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + + return false; + }, + edge_mask_undirected_graph.mutable_view(), + false); + + edges_to_mask.clear(); + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, edgelist_srcs.begin() + num_valid_edges), + thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_srcs.end()) + ); + + edges_to_mask.insert(edgelist_dsts.begin() + num_valid_edges, + edgelist_dsts.end(), + edgelist_srcs.begin() + num_valid_edges); + + cugraph::transform_e( + handle, + cur_undirected_graph_view, + edges_to_mask, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [iteration] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + + return false; + }, + edge_mask_undirected_graph.mutable_view(), + false); + + cur_undirected_graph_view.attach_edge_mask(edge_mask_undirected_graph.view()); + + // Need to unsort - FIXME: NO need to re-sort since those edges will be removed anyway + /* + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(edgelist_srcs.begin() + num_valid_edges, edgelist_dsts.begin() + num_valid_edges), + thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()) + ); + */ + + + auto [edgelist_srcs__, edgelist_dsts__, edgelist_cnts__] = + extract_transform_e(handle, + cur_undirected_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + // FIXME: Replace by lambda function + extract_edges{}); + + edgelist_srcs.resize(num_valid_edges, handle.get_stream()); + edgelist_dsts.resize(num_valid_edges, handle.get_stream()); + edgelist_cnts.resize(num_valid_edges, handle.get_stream()); + + edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); + edgelist_last = thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()); + + + number_edges = cur_undirected_graph_view.compute_number_of_edges(handle); + if (prev_number_of_edges == cur_undirected_graph_view.compute_number_of_edges(handle)) { break; } - if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } } - rmm::device_uvector edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector edgelist_dsts(0, handle.get_stream()); - std::optional> edgelist_wgts{std::nullopt}; + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto stop = high_resolution_clock::now(); + k_truss_ms = duration_cast(stop - start); + + std::cout << "k_truss took " << k_truss_ms.count() / 1000 << " milliseconds" << std::endl; + std::cout << "The number of edges = " << number_edges << " and the num_iteration = " << iteration << std::endl; + std::optional> edgelist_wgts{std::nullopt}; + + //#if 0 std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, - cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + cur_undirected_graph_view, + std::optional>{std::nullopt}, + //edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, // support edgeweights std::optional>{std::nullopt}, std::optional>{std::nullopt}, + std::optional>{std::nullopt} + /* std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size()))); - + raft::device_span((*renumber_map).data(), (*renumber_map).size())) // Update renumbering if it exist. + */ + ); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts), false); - + return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } From e61a580d8d3c9ac6d25d5e4cf59ee0932cf67945 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 31 Oct 2024 20:09:14 -0700 Subject: [PATCH 02/13] benchmark k-truss --- cpp/src/community/k_truss_impl.cuh | 82 ++++++++++-------------------- 1 file changed, 27 insertions(+), 55 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 6ba9d6ef119..04e09999da1 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -138,18 +138,15 @@ struct generate_p_r_or_q_r_from_p_q { } // Check in the valid edge range - auto itr_pair = thrust::lower_bound( + auto has_edge = thrust::binary_search( thrust::seq, edgelist_first, weak_edgelist_first, edge); - if ((itr_pair == weak_edgelist_first) || *itr_pair != edge) { // FIXME: Do binary search instead + if (!has_edge) { // FIXME: Do binary search instead // Search in the weak edge partition. - itr_pair = thrust::lower_bound( + has_edge = thrust::binary_search( thrust::seq, weak_edgelist_first, edgelist_last, edge); - - //auto idx_ = thrust::distance(weak_edgelist_first, itr_pair); // FIXME: Only for debugging purposes - - if ((itr_pair == edgelist_last) || *itr_pair != edge) { // FIXME: Do binary search instead + if (!has_edge) { // FIXME: Do binary search instead edge = thrust::make_tuple(thrust::get<1>(edge), thrust::get<0>(edge)); // Edge must be in the other direction } } @@ -451,6 +448,10 @@ k_truss(raft::handle_t const& handle, cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; + + edge_weight_view = + edge_weight ? std::make_optional((*edge_weight).view()) + : std::optional>{std::nullopt}; std::tie(*modified_graph, std::ignore, std::ignore) = cugraph::symmetrize_graph( @@ -479,10 +480,6 @@ k_truss(raft::handle_t const& handle, auto edge_triangle_count_pair_first = thrust::make_zip_iterator(edgelist_first, edgelist_cnts.begin()); - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - cugraph::edge_bucket_t edges_to_mask(handle); cugraph::edge_property_t edge_mask_undirected_graph(handle, cur_undirected_graph_view); cugraph::fill_edge_property(handle, cur_undirected_graph_view, edge_mask_undirected_graph.mutable_view(), bool{true}); @@ -495,6 +492,7 @@ k_truss(raft::handle_t const& handle, //std::chrono::duration k_truss_ms = duration_cast (s); std::chrono::duration k_truss_ms = duration_cast (s); + std::chrono::duration intersection_ms = duration_cast (s); RAFT_CUDA_TRY(cudaDeviceSynchronize()); auto start = high_resolution_clock::now(); @@ -527,7 +525,9 @@ k_truss(raft::handle_t const& handle, auto weak_edgelist_last = edgelist_first + edgelist_srcs.size(); // Once identifying the weak edges, perform nbr_intersection on the weak edges. - + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto intersection_start = high_resolution_clock::now(); + auto [intersection_offsets, intersection_indices] = \ per_v_pair_dst_nbr_intersection( handle, @@ -535,6 +535,14 @@ k_truss(raft::handle_t const& handle, weak_edgelist_first, weak_edgelist_last, false); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + auto intersection_stop = high_resolution_clock::now(); + + intersection_ms += duration_cast(intersection_stop - intersection_start); + + + // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) // To avoid overcompensation for (q, r) edges, check whether None of the other edges were part of the (p, q) edges. @@ -654,7 +662,6 @@ k_truss(raft::handle_t const& handle, } }); - thrust::sort( handle.get_thrust_policy(), triangles_first_, @@ -723,7 +730,7 @@ k_truss(raft::handle_t const& handle, decrease_count = decrease_count.begin(), edgelist_cnts = edgelist_cnts.begin(), edgelist_first, - weak_edgelist_first = edgelist_first + num_valid_edges, // FIXME: No need to assign. simply pass weak_edgelist_first + weak_edgelist_first = edgelist_first + num_valid_edges, edgelist_last, num_valid_edges ] __device__(auto i) { @@ -733,26 +740,9 @@ k_truss(raft::handle_t const& handle, auto itr_pair = thrust::lower_bound( thrust::seq, edgelist_first, weak_edgelist_first, vertex_pair_buffer_unique[i]); - auto idx = thrust::distance(edgelist_first, itr_pair); - #if 0 - if ((itr_pair == weak_edgelist_first) || *itr_pair != *(vertex_pair_buffer_unique + i)) { // FIXME: Do binary search instead ********************** - //if ((itr_pair == weak_edgelist_first)) { // FIXME: Do binary search instead ********************** - // Search in the weak edge partition. FIXME: Future optimization might - // be to simply ommit updating weak edges and just discard them per - // iteration for performance reasons. - - - // FIXMEEE: thrust::find - itr_pair = thrust::lower_bound( - thrust::seq, weak_edgelist_first, edgelist_last, vertex_pair_buffer_unique[i]); - - idx = num_valid_edges + thrust::distance(weak_edgelist_first, itr_pair); - - } - #endif - + // Update counts of valid edges only since weak edges will be deleted anyways if ((itr_pair != weak_edgelist_first) && *itr_pair == *(vertex_pair_buffer_unique + i)) { - // Update counts of valid edges only since weak edges will be deleted anyways + auto idx = thrust::distance(edgelist_first, itr_pair); edgelist_cnts[idx] -= decrease_count[i]; } @@ -806,25 +796,6 @@ k_truss(raft::handle_t const& handle, cur_undirected_graph_view.attach_edge_mask(edge_mask_undirected_graph.view()); - // Need to unsort - FIXME: NO need to re-sort since those edges will be removed anyway - /* - thrust::sort( - handle.get_thrust_policy(), - thrust::make_zip_iterator(edgelist_srcs.begin() + num_valid_edges, edgelist_dsts.begin() + num_valid_edges), - thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()) - ); - */ - - - auto [edgelist_srcs__, edgelist_dsts__, edgelist_cnts__] = - extract_transform_e(handle, - cur_undirected_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - // FIXME: Replace by lambda function - extract_edges{}); - edgelist_srcs.resize(num_valid_edges, handle.get_stream()); edgelist_dsts.resize(num_valid_edges, handle.get_stream()); edgelist_cnts.resize(num_valid_edges, handle.get_stream()); @@ -833,7 +804,7 @@ k_truss(raft::handle_t const& handle, edgelist_last = thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()); - number_edges = cur_undirected_graph_view.compute_number_of_edges(handle); + //number_edges = cur_undirected_graph_view.compute_number_of_edges(handle); if (prev_number_of_edges == cur_undirected_graph_view.compute_number_of_edges(handle)) { break; } } @@ -842,8 +813,9 @@ k_truss(raft::handle_t const& handle, auto stop = high_resolution_clock::now(); k_truss_ms = duration_cast(stop - start); - std::cout << "k_truss took " << k_truss_ms.count() / 1000 << " milliseconds" << std::endl; - std::cout << "The number of edges = " << number_edges << " and the num_iteration = " << iteration << std::endl; + std::cout << "k_truss took " << k_truss_ms.count() / 1000 << " milliseconds" << std::endl; + std::cout << "intersection took " << intersection_ms.count()/1000 << " milliseconds" << std::endl; + std::cout << "The number of edges = " << cur_undirected_graph_view.compute_number_of_edges(handle) << " and the num_iteration = " << iteration << std::endl; std::optional> edgelist_wgts{std::nullopt}; From 0c83b5442cdc1b546374a86feacce1e6513de46f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 31 Oct 2024 21:15:20 -0700 Subject: [PATCH 03/13] add benchmark print --- cpp/src/community/k_truss_impl.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 04e09999da1..4dacaffd796 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -815,6 +815,7 @@ k_truss(raft::handle_t const& handle, std::cout << "k_truss took " << k_truss_ms.count() / 1000 << " milliseconds" << std::endl; std::cout << "intersection took " << intersection_ms.count()/1000 << " milliseconds" << std::endl; + std::cout << "percentage during intersection = " << ((intersection_ms.count()/1000) / (k_truss_ms.count() / 1000)) * 100 << std::endl; std::cout << "The number of edges = " << cur_undirected_graph_view.compute_number_of_edges(handle) << " and the num_iteration = " << iteration << std::endl; std::optional> edgelist_wgts{std::nullopt}; From 2e0ef1a0873a642f9a91ad156ead6c29b335b30c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 5 Nov 2024 09:48:36 -0800 Subject: [PATCH 04/13] add weights support --- cpp/src/community/k_truss_impl.cuh | 447 ++++++++++++++--------------- 1 file changed, 211 insertions(+), 236 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 4dacaffd796..fcd56a1b36c 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -41,8 +41,6 @@ #include #include #include -#include -using namespace std::chrono; namespace cugraph { @@ -95,6 +93,74 @@ struct extract_masked_edges { // FIXME: ******************************Remove th }; +template +struct extract_triangles_from_weak_edges { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; + EdgeIterator edgelist_first{}; + EdgeIterator weak_edgelist_first{}; + EdgeIterator edgelist_last{}; + + + __device__ thrust::tuple + operator()(edge_t i) const + { + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + + auto edge_p_q = thrust::make_tuple(weak_srcs[chunk_start + idx], weak_dsts[chunk_start + idx]); + + // Extract (p, r) edges + auto edge_p_r = thrust::make_tuple(weak_srcs[chunk_start + idx], intersection_indices[i]); + + // check for edge existance in the DODG edgelist- FIXME: Create a function + // Check in the valid edge range + auto has_edge = thrust::binary_search( + thrust::seq, edgelist_first, weak_edgelist_first, edge_p_r); + + if (!has_edge) { // FIXME: Do binary search instead + // Search in the weak edge partition. + has_edge = thrust::binary_search( + thrust::seq, weak_edgelist_first, edgelist_last, edge_p_r); + + if (!has_edge) { + // Edge must be in the other direction + edge_p_r = thrust::make_tuple(thrust::get<1>(edge_p_r), thrust::get<0>(edge_p_r)); + } + } + + // Extract (q, r) edges + auto edge_q_r = thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); + + + // check for edge existance in the DODG edgelist- FIXME: Can be a function + // Check in the valid edge range + has_edge = thrust::binary_search( + thrust::seq, edgelist_first, weak_edgelist_first, edge_q_r); + + if (!has_edge) { // FIXME: Do binary search instead + // Search in the weak edge partition. + has_edge = thrust::binary_search( + thrust::seq, weak_edgelist_first, edgelist_last, edge_q_r); + + if (!has_edge) { + // Edge must be in the other direction + edge_q_r = thrust::make_tuple(thrust::get<1>(edge_q_r), thrust::get<0>(edge_q_r)); + } + } + + return thrust::make_tuple( + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r)); + } +}; + + template struct generate_p_q { size_t chunk_start{}; @@ -265,13 +331,12 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); } // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - //#if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -314,12 +379,10 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); - //undirected_graph_view = (*modified_graph).view(); - if (renumber_map) { // collapse renumber_map unrenumber_int_vertices(handle, (*tmp_renumber_map).data(), @@ -330,7 +393,6 @@ k_truss(raft::handle_t const& handle, renumber_map = std::move(tmp_renumber_map); } - //#endif // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -397,10 +459,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - // If renumber is set to True, cur_graph_view and graph_view don't have the same - // renumbering scheme. Will need to renumber before performing certain operations on - // graph_view like nbr_intersection. - false); + true); modified_graph_view = (*modified_graph).view(); if (renumber_map) { // collapse renumber_map @@ -417,13 +476,15 @@ k_truss(raft::handle_t const& handle, { - auto cur_undirected_graph_view = undirected_graph_view ? *undirected_graph_view : graph_view; auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; auto edge_triangle_counts = edge_triangle_count(handle, cur_graph_view, false); - // Extract all undirected edges + // Extract all directed edges with their count + // Note. Maintaining this data-structure is not that expensive after applying + // k-core and DODG however, it would be more efficient to maintain and operate on a + // graph_view intead. auto [edgelist_srcs, edgelist_dsts, edgelist_cnts] = extract_transform_e(handle, cur_graph_view, @@ -433,7 +494,6 @@ k_truss(raft::handle_t const& handle, // FIXME: Replace by lambda function extract_edges{}); - // sort the edges by keys where keys are triangle_counts auto edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); auto edgelist_last = thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()); @@ -444,67 +504,43 @@ k_truss(raft::handle_t const& handle, cugraph::graph_t cur_graph(handle); cur_graph = std::move(*modified_graph); - std::optional< - cugraph::edge_property_t, - weight_t>> - sg_edge_weights{std::nullopt}; - - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - - std::tie(*modified_graph, std::ignore, std::ignore) = + std::tie(*modified_graph, edge_weight, tmp_renumber_map) = cugraph::symmetrize_graph( handle, std::move(cur_graph), - std::move(sg_edge_weights), - //std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - /* - tmp_renumber_map ? std::optional>(std::move(*tmp_renumber_map)) - : std::nullopt, - */ + std::move(edge_weight), + std::move(renumber_map), false); - cur_undirected_graph_view = (*modified_graph).view(); + edge_weight_view = + edge_weight ? std::make_optional((*edge_weight).view()) + : std::optional>{std::nullopt}; + renumber_map = std::move(tmp_renumber_map); + + // Leverage the undirected graph view to find triangles + cur_graph_view = (*modified_graph).view(); - // Sort once + // sort the edges by keys once where keys are triangle_counts thrust::sort_by_key( handle.get_thrust_policy(), edgelist_first, edgelist_last, - edgelist_cnts.begin() + edgelist_cnts.begin() // FIXME: edgelist_cnts - rename to num_triangles ); - // FIXME: edgelist_cnts - rename to num_triangles auto edge_triangle_count_pair_first = thrust::make_zip_iterator(edgelist_first, edgelist_cnts.begin()); cugraph::edge_bucket_t edges_to_mask(handle); - cugraph::edge_property_t edge_mask_undirected_graph(handle, cur_undirected_graph_view); - cugraph::fill_edge_property(handle, cur_undirected_graph_view, edge_mask_undirected_graph.mutable_view(), bool{true}); + cugraph::edge_property_t weak_edges_mask(handle, cur_graph_view); + cugraph::fill_edge_property(handle, cur_graph_view, weak_edges_mask.mutable_view(), bool{true}); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - size_t prev_chunk_size = 0; + size_t prev_chunk_size = 0; // FIXME: Add support for chunking - auto iteration = -1; - - std::chrono::seconds s (0); - //std::chrono::duration k_truss_ms = duration_cast (s); - - std::chrono::duration k_truss_ms = duration_cast (s); - std::chrono::duration intersection_ms = duration_cast (s); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto start = high_resolution_clock::now(); - - vertex_t number_edges = 0; while (true) { - iteration += 1; - - auto prev_number_of_edges = cur_undirected_graph_view.compute_number_of_edges(handle); - if (iteration == 2) { - //break; - } + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); auto weak_edge_triangle_count_first = thrust::stable_partition(handle.get_thrust_policy(), @@ -517,46 +553,36 @@ k_truss(raft::handle_t const& handle, auto num_weak_edges = static_cast( thrust::distance(weak_edge_triangle_count_first, - edge_triangle_count_pair_first + edgelist_srcs.size())); + edge_triangle_count_pair_first + edgelist_srcs.size())); auto num_valid_edges = edgelist_srcs.size() - num_weak_edges; auto weak_edgelist_first = edgelist_first + num_valid_edges; auto weak_edgelist_last = edgelist_first + edgelist_srcs.size(); - - // Once identifying the weak edges, perform nbr_intersection on the weak edges. - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto intersection_start = high_resolution_clock::now(); + // Perform nbr_intersection of the weak edges leveraging the undirected + // graph view auto [intersection_offsets, intersection_indices] = \ per_v_pair_dst_nbr_intersection( handle, - cur_undirected_graph_view, + cur_graph_view, weak_edgelist_first, weak_edgelist_last, false); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto intersection_stop = high_resolution_clock::now(); - - intersection_ms += duration_cast(intersection_stop - intersection_start); - - - // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) - // To avoid overcompensation for (q, r) edges, check whether None of the other edges were part of the (p, q) edges. - // To avoid overcompensation for (p, r) edges, check whether NOne of the other edges were part of the (p, q) and (q, r) edges. - - auto vertex_pair_buffer_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - + // 'triangles_from_weak_edges' contains the triplet pair as follow (p, q, p, r, q, r) + auto triangles_from_weak_edges = + allocate_dataframe_buffer>( + intersection_indices.size(), + handle.get_stream()); + + // Extract triangle from weak edges thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - generate_p_q{ + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + extract_triangles_from_weak_edges{ prev_chunk_size, raft::device_span(intersection_offsets.data(), intersection_offsets.size()), @@ -564,144 +590,114 @@ k_truss(raft::handle_t const& handle, intersection_indices.size()), raft::device_span(edgelist_srcs.data() + num_valid_edges, num_weak_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_weak_edges) - }); - - // From nbr_intersection on the undirected graph, we know the endpoints (vertices) of the triangles however - // we don't know the edges directions. Since edges of the DODG are directed, we can easily recover the - // direction of the edges with a binary search - - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, - num_weak_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_weak_edges), - edgelist_first, - edgelist_first + num_valid_edges, - edgelist_last}); - - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, - num_weak_edges), raft::device_span(edgelist_dsts.data() + num_valid_edges, num_weak_edges), edgelist_first, edgelist_first + num_valid_edges, - edgelist_last}); + edgelist_last + } + ); - auto vertex_pair_buffer_p_q_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q).begin(), std::get<1>(vertex_pair_buffer_p_q).begin()); - auto vertex_pair_buffer_p_r_edge_p_q_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).begin(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).begin()); - auto vertex_pair_buffer_q_r_edge_p_q_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).begin()); - - - auto triangles_first = thrust::make_zip_iterator(vertex_pair_buffer_p_q_first, vertex_pair_buffer_p_r_edge_p_q_first, vertex_pair_buffer_q_r_edge_p_q_first); // FIXME: not really a triangle but two edges of a triangle so rename - auto num_triangles = intersection_indices.size(); - - auto vertex_pair_buffer_p_q_= - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - auto vertex_pair_buffer_p_r_edge_p_q_ = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - auto vertex_pair_buffer_q_r_edge_p_q_ = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - - auto vertex_pair_buffer_p_q_first_ = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q_).begin(), std::get<1>(vertex_pair_buffer_p_q_).begin()); - auto vertex_pair_buffer_p_r_edge_p_q_first_ = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r_edge_p_q_).begin(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q_).begin()); - auto vertex_pair_buffer_q_r_edge_p_q_first_ = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_q_).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q_).begin()); - auto triangles_first_ = thrust::make_zip_iterator(vertex_pair_buffer_p_q_first_, vertex_pair_buffer_p_r_edge_p_q_first_, vertex_pair_buffer_q_r_edge_p_q_first_); // FIXME: not really a triangle but two edges of a triangle so rename - - // Reorder edges' position in the triangle - thrust::transform( - handle.get_thrust_policy(), - triangles_first, - triangles_first + num_triangles, - triangles_first_, - [] __device__(auto triangle) { - auto edge_p_q = thrust::get<0>(triangle); - auto edge_p_r = thrust::get<1>(triangle); - auto edge_q_r = thrust::get<2>(triangle); - if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { + // Reorder each triangle's edges to match the unique order (p, q), (q, r) and (p, r) + thrust::transform( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + get_dataframe_buffer_begin(triangles_from_weak_edges), + [] __device__(auto triangle) { + auto edge_p_q = thrust::make_tuple(thrust::get<0>(triangle), thrust::get<1>(triangle)); + auto edge_p_r = thrust::make_tuple(thrust::get<2>(triangle), thrust::get<3>(triangle)); + auto edge_q_r = thrust::make_tuple(thrust::get<4>(triangle), thrust::get<5>(triangle)); + + if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { - return thrust::tie(edge_p_r, edge_q_r, edge_p_q); + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q) + ); + } else { - return thrust::tie(edge_p_r, edge_p_q, edge_q_r); + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r) + ); } } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { - return thrust::tie(edge_p_q, edge_q_r, edge_p_r); + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) + ); - } else { // Only for debugging purposes. Remove after. - printf("\ninvalid combination\n"); } - }); - + return triangle; + } + ); + + // Sort and remove duplicated triangles which will lead to overcompensation thrust::sort( handle.get_thrust_policy(), - triangles_first_, - triangles_first_ + num_triangles); - + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + auto unique_triangle_end = thrust::unique( handle.get_thrust_policy(), - triangles_first_, - triangles_first_ + num_triangles); - - auto num_unique_triangles = thrust::distance(triangles_first_, unique_triangle_end); + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); - resize_dataframe_buffer(vertex_pair_buffer_p_q_, num_unique_triangles, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_p_q_, num_unique_triangles, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_q_, num_unique_triangles, handle.get_stream()); + auto num_unique_triangles = thrust::distance(get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_, 3 * num_unique_triangles, handle.get_stream()); - - // Copy p_r edges - thrust::copy( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q_), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q_), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_) + num_unique_triangles); - - thrust::copy( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q_), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q_), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_) + (2*num_unique_triangles)); + resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + auto edgelist_to_update_count = + allocate_dataframe_buffer>(3* num_unique_triangles, + handle.get_stream()); + // Flatten the triangles into an edgelist + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), + get_dataframe_buffer_begin(edgelist_to_update_count), + [ + num_unique_triangles, + triangles_from_weak_edges = get_dataframe_buffer_begin(triangles_from_weak_edges) + ] __device__(auto idx) { + auto idx_triangle = idx % num_unique_triangles; + auto idx_vertex_in_triangle = idx / num_unique_triangles; + auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); + + vertex_t src; + vertex_t dst; + + if (idx_vertex_in_triangle == 0) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<1>(triangle)); + } + + if (idx_vertex_in_triangle == 1) { + src = *(thrust::get<2>(triangle)); + dst = *(thrust::get<3>(triangle)); + } + + if (idx_vertex_in_triangle == 2) { + src = *(thrust::get<4>(triangle)); + dst = *(thrust::get<5>(triangle)); + } + + return thrust::make_tuple(src, dst); + } + ); + thrust::sort( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_)); + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_)); + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( unique_pair_count, handle.get_stream()); @@ -709,19 +705,14 @@ k_truss(raft::handle_t const& handle, rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); thrust::reduce_by_key(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), thrust::make_constant_iterator(size_t{1}), get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); - - // Update the triangle count of edges - - auto weak_srcs = edgelist_srcs.begin() + num_valid_edges; - auto weak_dsts = edgelist_dsts.begin() + num_valid_edges; - + // Update the triangle count of edges in the DODG edgelist thrust::for_each(handle.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(unique_pair_count), @@ -734,9 +725,7 @@ k_truss(raft::handle_t const& handle, edgelist_last, num_valid_edges ] __device__(auto i) { - // Check in the valid edge range - // FIXMEEE: thrust::find auto itr_pair = thrust::lower_bound( thrust::seq, edgelist_first, weak_edgelist_first, vertex_pair_buffer_unique[i]); @@ -745,9 +734,7 @@ k_truss(raft::handle_t const& handle, auto idx = thrust::distance(edgelist_first, itr_pair); edgelist_cnts[idx] -= decrease_count[i]; } - } - ); edges_to_mask.clear(); @@ -755,18 +742,19 @@ k_truss(raft::handle_t const& handle, edgelist_srcs.end(), edgelist_dsts.begin() + num_valid_edges); + // Remove weak edges in both direction from the undirected graph view cugraph::transform_e( handle, - cur_undirected_graph_view, + cur_graph_view, edges_to_mask, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [iteration] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { return false; }, - edge_mask_undirected_graph.mutable_view(), + weak_edges_mask.mutable_view(), false); edges_to_mask.clear(); @@ -782,19 +770,17 @@ k_truss(raft::handle_t const& handle, cugraph::transform_e( handle, - cur_undirected_graph_view, + cur_graph_view, edges_to_mask, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [iteration] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { return false; }, - edge_mask_undirected_graph.mutable_view(), + weak_edges_mask.mutable_view(), false); - - cur_undirected_graph_view.attach_edge_mask(edge_mask_undirected_graph.view()); edgelist_srcs.resize(num_valid_edges, handle.get_stream()); edgelist_dsts.resize(num_valid_edges, handle.get_stream()); @@ -803,45 +789,34 @@ k_truss(raft::handle_t const& handle, edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); edgelist_last = thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()); - - //number_edges = cur_undirected_graph_view.compute_number_of_edges(handle); - if (prev_number_of_edges == cur_undirected_graph_view.compute_number_of_edges(handle)) { break; } + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } } - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto stop = high_resolution_clock::now(); - k_truss_ms = duration_cast(stop - start); - - std::cout << "k_truss took " << k_truss_ms.count() / 1000 << " milliseconds" << std::endl; - std::cout << "intersection took " << intersection_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "percentage during intersection = " << ((intersection_ms.count()/1000) / (k_truss_ms.count() / 1000)) * 100 << std::endl; - std::cout << "The number of edges = " << cur_undirected_graph_view.compute_number_of_edges(handle) << " and the num_iteration = " << iteration << std::endl; - std::optional> edgelist_wgts{std::nullopt}; //#if 0 std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, - cur_undirected_graph_view, - std::optional>{std::nullopt}, - //edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, // support edgeweights + cur_graph_view, + //std::optional>{std::nullopt}, + edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, // support edgeweights std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::optional>{std::nullopt} - /* + //std::optional>{std::nullopt} std::make_optional( raft::device_span((*renumber_map).data(), (*renumber_map).size())) // Update renumbering if it exist. - */ ); - + + /* std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts), false); + */ return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); From 1259d415501f0a65e8d0e5d10ce22c2b651e2636 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 05:32:15 -0800 Subject: [PATCH 05/13] update SG implementation of K-Truss and add MG --- cpp/src/community/k_truss_impl.cuh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index fcd56a1b36c..0d00d89315e 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -624,14 +624,14 @@ k_truss(raft::handle_t const& handle, thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r) ); } - } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) - ); - - } + } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) + ); + + } return triangle; } ); From 7f48a9187b53e004f41d8b3849bb966eaa227e7a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 22:01:18 -0800 Subject: [PATCH 06/13] add function to reorder the edges based on the DODG --- cpp/src/community/k_truss_impl.cuh | 1265 +++++++++++++++++++++++----- 1 file changed, 1042 insertions(+), 223 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 0d00d89315e..e7a6d51c3b1 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -41,16 +41,140 @@ #include #include #include +#include +using namespace std::chrono; namespace cugraph { +//template +template +void order_edge_based_on_dodg( + raft::handle_t const& handle, + GraphViewType const& graph_view, + raft::device_span edgelist_srcs, + raft::device_span edgelist_dsts) + +{ + // FIXME: Use global comm for debugging purposes + // then replace it by minor comm once the accuracy is verified + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}; + + + rmm::device_uvector cp_edgelist_srcs(edgelist_srcs.size(), handle.get_stream()); + rmm::device_uvector cp_edgelist_dsts(edgelist_srcs.size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_srcs.begin()), + thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_dsts.end()), + thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin())); + + auto d_tx_counts = cugraph::groupby_and_count( + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()), + [func]__device__(auto val) { + return func(val); + }, + comm_size, + std::numeric_limits::max(), + handle.get_stream()); + + + std::vector h_tx_counts(d_tx_counts.size()); + + raft::update_host(h_tx_counts.data(), + d_tx_counts.data(), + d_tx_counts.size(), + handle.get_stream()); + + rmm::device_uvector srcs(0, handle.get_stream()); + rmm::device_uvector dsts(0, handle.get_stream()); + std::vector rx_counts{}; + + std::tie(srcs, rx_counts) = + shuffle_values(handle.get_comms(), cp_edge_srcs_from_triangle.begin(), h_tx_counts, handle.get_stream()); + + std::tie(dsts, std::ignore) = + shuffle_values(handle.get_comms(), cp_edge_dsts_from_triangle.begin(), h_tx_counts, handle.get_stream()); + + edge_exists = graph_view.has_edge( + handle, + raft::device_span(srcs.data(), srcs.size()), + raft::device_span(dsts.data(), dsts.size()) + ); + + std::tie(edge_exists, std::ignore) = + shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); + + + thrust::sort_by_key( + handle.get_thrust_policy(), + thrust::make_zip_iterator( + cp_edgelist_srcs.begin(), + cp_edgelist_dsts.begin()), + thrust::make_zip_iterator( + cp_edgelist_srcs.end(), + cp_edgelist_dsts.end()), + edge_exists.begin()); + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(edgelist_srcs.size()), + edge_exists.begin(), + [ + //num_unique_triangles, + edge_exists = edge_exists.data(), + edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + cp_edgelist_first = thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin()), + cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs.end(), cp_edgelist_dsts.end()), + ] __device__(auto idx) { + auto src = thrust::get<0>(edgelist_first[idx]); + auto dst = thrust::get<1>(edgelist_first[idx]); + + auto itr_pair = thrust::lower_bound( + thrust::seq, cp_edgelist_first, cp_edgelist_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); + + return edge_exists[idx_pair]; + } + ); + +} + template struct extract_weak_edges { edge_t k{}; __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { - return count < k - 2 + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + //auto count_ = thrust::get<0>(count); + return ((count < k - 2) && (count > 0)) + //return count < k - 2 // FIXME: might be faster to skip edges with count = 0 ? thrust::optional>{thrust::make_tuple(src, dst)} : thrust::nullopt; } @@ -93,21 +217,18 @@ struct extract_masked_edges { // FIXME: ******************************Remove th }; -template +template struct extract_triangles_from_weak_edges { size_t chunk_start{}; raft::device_span intersection_offsets{}; raft::device_span intersection_indices{}; raft::device_span weak_srcs{}; raft::device_span weak_dsts{}; - EdgeIterator edgelist_first{}; - EdgeIterator weak_edgelist_first{}; - EdgeIterator edgelist_last{}; - __device__ thrust::tuple operator()(edge_t i) const { + auto itr = thrust::upper_bound( thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); @@ -117,42 +238,9 @@ struct extract_triangles_from_weak_edges { // Extract (p, r) edges auto edge_p_r = thrust::make_tuple(weak_srcs[chunk_start + idx], intersection_indices[i]); - // check for edge existance in the DODG edgelist- FIXME: Create a function - // Check in the valid edge range - auto has_edge = thrust::binary_search( - thrust::seq, edgelist_first, weak_edgelist_first, edge_p_r); - - if (!has_edge) { // FIXME: Do binary search instead - // Search in the weak edge partition. - has_edge = thrust::binary_search( - thrust::seq, weak_edgelist_first, edgelist_last, edge_p_r); - - if (!has_edge) { - // Edge must be in the other direction - edge_p_r = thrust::make_tuple(thrust::get<1>(edge_p_r), thrust::get<0>(edge_p_r)); - } - } - // Extract (q, r) edges auto edge_q_r = thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); - - // check for edge existance in the DODG edgelist- FIXME: Can be a function - // Check in the valid edge range - has_edge = thrust::binary_search( - thrust::seq, edgelist_first, weak_edgelist_first, edge_q_r); - - if (!has_edge) { // FIXME: Do binary search instead - // Search in the weak edge partition. - has_edge = thrust::binary_search( - thrust::seq, weak_edgelist_first, edgelist_last, edge_q_r); - - if (!has_edge) { - // Edge must be in the other direction - edge_q_r = thrust::make_tuple(thrust::get<1>(edge_q_r), thrust::get<0>(edge_q_r)); - } - } - return thrust::make_tuple( thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), @@ -301,6 +389,10 @@ k_truss(raft::handle_t const& handle, edge_weight{std::nullopt}; std::optional> wgts{std::nullopt}; + cugraph::edge_bucket_t edgelist_dodg(handle); + + cugraph::edge_property_t, bool> dodg_mask(handle, graph_view); + // Ideally, leverage the undirected graph derived from k-core undirected_graph_view = graph_view; @@ -337,6 +429,7 @@ k_truss(raft::handle_t const& handle, } // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -397,6 +490,7 @@ k_truss(raft::handle_t const& handle, // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { + auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; auto vertex_partition_range_lasts = @@ -438,38 +532,29 @@ k_truss(raft::handle_t const& handle, extract_low_to_high_degree_edges_t{}); } - if constexpr (multi_gpu) { - std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(srcs), std::move(dsts), std::move(wgts), std::nullopt, std::nullopt); - } - - std::optional> tmp_renumber_map{std::nullopt}; - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(srcs), - std::move(dsts), - std::move(wgts), - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - true); + cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); - modified_graph_view = (*modified_graph).view(); - if (renumber_map) { // collapse renumber_map - unrenumber_int_vertices(handle, - (*tmp_renumber_map).data(), - (*tmp_renumber_map).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts); - } - renumber_map = std::move(tmp_renumber_map); + // Masking edges not part of the DODG + edgelist_dodg.insert(srcs.begin(), + srcs.end(), + dsts.begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_dodg, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + + return true; + }, + dodg_mask.mutable_view(), + false); + + edgelist_dodg.clear(); } // 4. Compute triangle count using nbr_intersection and unroll weak edges @@ -478,90 +563,48 @@ k_truss(raft::handle_t const& handle, auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view, false); - - // Extract all directed edges with their count - // Note. Maintaining this data-structure is not that expensive after applying - // k-core and DODG however, it would be more efficient to maintain and operate on a - // graph_view intead. - auto [edgelist_srcs, edgelist_dsts, edgelist_cnts] = - extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - // FIXME: Replace by lambda function - extract_edges{}); - - auto edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); - auto edgelist_last = thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()); - - // Symmetrize the DODG graph - - std::optional> tmp_renumber_map{std::nullopt}; - - cugraph::graph_t cur_graph(handle); - cur_graph = std::move(*modified_graph); - - std::tie(*modified_graph, edge_weight, tmp_renumber_map) = - cugraph::symmetrize_graph( - handle, - std::move(cur_graph), - std::move(edge_weight), - std::move(renumber_map), - false); - - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - renumber_map = std::move(tmp_renumber_map); - - // Leverage the undirected graph view to find triangles - cur_graph_view = (*modified_graph).view(); - - // sort the edges by keys once where keys are triangle_counts - thrust::sort_by_key( - handle.get_thrust_policy(), - edgelist_first, - edgelist_last, - edgelist_cnts.begin() // FIXME: edgelist_cnts - rename to num_triangles - ); - - auto edge_triangle_count_pair_first = - thrust::make_zip_iterator(edgelist_first, edgelist_cnts.begin()); - - cugraph::edge_bucket_t edges_to_mask(handle); cugraph::edge_property_t weak_edges_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, weak_edges_mask.mutable_view(), bool{true}); - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + // Attach mask + cur_graph_view.attach_edge_mask(dodg_mask.view()); - size_t prev_chunk_size = 0; // FIXME: Add support for chunking + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view, false); - while (true) { + cugraph::edge_bucket_t edgelist_weak(handle); + cugraph::edge_bucket_t edges_to_decrement_count(handle); - auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); + size_t prev_chunk_size = 0; // FIXME: Add support for chunking - auto weak_edge_triangle_count_first = - thrust::stable_partition(handle.get_thrust_policy(), - edge_triangle_count_pair_first, - edge_triangle_count_pair_first + edgelist_srcs.size(), - [k] __device__(auto e) { - auto num_triangles = thrust::get<1>(e); - return num_triangles >= k - 2; - }); - auto num_weak_edges = static_cast( - thrust::distance(weak_edge_triangle_count_first, - edge_triangle_count_pair_first + edgelist_srcs.size())); + while (true) { + + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); - auto num_valid_edges = edgelist_srcs.size() - num_weak_edges; + // Extract weak edges + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + //view_concat(edge_triangle_counts.view(), weak_edges_mask.view()), + // FIXME: Replace by lambda function + extract_weak_edges{k}); + + auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); - auto weak_edgelist_first = edgelist_first + num_valid_edges; - auto weak_edgelist_last = edgelist_first + edgelist_srcs.size(); - // Perform nbr_intersection of the weak edges leveraging the undirected + // Perform nbr_intersection of the weak edges from the undirected // graph view + cur_graph_view.clear_edge_mask(); + // Attach the weak edge mask + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto [intersection_offsets, intersection_indices] = \ per_v_pair_dst_nbr_intersection( handle, @@ -571,34 +614,530 @@ k_truss(raft::handle_t const& handle, false); // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) - // 'triangles_from_weak_edges' contains the triplet pair as follow (p, q, p, r, q, r) + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) + auto triangles_from_weak_edges = allocate_dataframe_buffer>( intersection_indices.size(), handle.get_stream()); - // Extract triangle from weak edges + // Form (p, q) edges + // Extract triangle from weak thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_from_weak_edges), get_dataframe_buffer_end(triangles_from_weak_edges), - extract_triangles_from_weak_edges{ + extract_triangles_from_weak_edges{ prev_chunk_size, raft::device_span(intersection_offsets.data(), intersection_offsets.size()), raft::device_span(intersection_indices.data(), intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, - num_weak_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_weak_edges), - edgelist_first, - edgelist_first + num_valid_edges, - edgelist_last + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) + } + ); + + cur_graph_view.clear_edge_mask(); + // Check for edge existance on the directed graph view + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + rmm::device_uvector edge_exists(0, handle.get_stream()); + + // Handling (p, r) edges + + if constexpr (multi_gpu) { + // (p, q) edges are owned by the current GPU while (p, r) and (q, r) + // can be owned by different GPUs + // Ordering (p, q) edges based on the DODG + order_edge_based_on_dodg( + handle, + cur_graph_view, + raft::device_span( + std::get<2>(triangles_from_weak_edges).data(), + std::get<2>(triangles_from_weak_edges).size()), + raft::device_span( + std::get<3>(triangles_from_weak_edges).data(), + std::get<3>(triangles_from_weak_edges).size()) + raft::device_span( + edge_exists.data(), + edge_exists.size()) + ); + + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto my_rank = handle.get_comms().get_rank(); + + + if (my_rank == 1) { + printf("****initial****\n\n"); + raft::print_device_vector("triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); + } + + + auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}; + + + rmm::device_uvector cp_edge_srcs_from_triangle(intersection_indices.size(), handle.get_stream()); + rmm::device_uvector cp_edge_dsts_from_triangle(intersection_indices.size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).begin(), std::get<3>(triangles_from_weak_edges).begin()), + thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).end(), std::get<3>(triangles_from_weak_edges).end()), + thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin())); + + //std::cout << "part 0" << std::endl; + auto d_tx_counts = cugraph::groupby_and_count( + //thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).begin(), std::get<3>(triangles_from_weak_edges).begin()), + //thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).end(), std::get<3>(triangles_from_weak_edges).end()), + thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin()), + thrust::make_zip_iterator(cp_edge_srcs_from_triangle.end(), cp_edge_dsts_from_triangle.end()), + [func, comm_size]__device__(auto val) { + //[func, min_comm_size]__device__(auto val) { + //printf("\nval = %d\n", val); + return func(val); //% major_comm_size; + //return func(val) % minor_comm_size; + }, + //major_comm_size, + //minor_comm_size, + comm_size, + std::numeric_limits::max(), + handle.get_stream()); + + //std::cout << "part 1" << std::endl; + + //printf("\ncount size = %d\n", d_tx_counts.size()); + raft::print_device_vector("d_tx_counts", d_tx_counts.data(), d_tx_counts.size(), std::cout); + + std::vector h_tx_counts(d_tx_counts.size()); + + raft::update_host(h_tx_counts.data(), + d_tx_counts.data(), + d_tx_counts.size(), + handle.get_stream()); + + + + + + raft::print_host_vector("h_tx_counts", h_tx_counts.data(), h_tx_counts.size(), std::cout); + + + + rmm::device_uvector srcs(0, handle.get_stream()); + rmm::device_uvector dsts(0, handle.get_stream()); + std::vector rx_counts{}; + + std::cout << "part 2" << std::endl; + std::cout << "h_tx_counts size = " << h_tx_counts.size() << " copy egdes size " << cp_edge_srcs_from_triangle.size() << " comm size = " << comm_size << std::endl; + + std::tie(srcs, rx_counts) = + shuffle_values(handle.get_comms(), cp_edge_srcs_from_triangle.begin(), h_tx_counts, handle.get_stream()); + + std::tie(dsts, std::ignore) = + shuffle_values(handle.get_comms(), cp_edge_dsts_from_triangle.begin(), h_tx_counts, handle.get_stream()); + + std::cout << "part 3" << std::endl; + /* + raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); + raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); + */ + + + edge_exists = cur_graph_view.has_edge( + handle, + raft::device_span(srcs.data(), srcs.size()), + raft::device_span(dsts.data(), dsts.size()) + ); + + std::cout << "part 4" << std::endl; + + std::tie(edge_exists, std::ignore) = + shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); + + std::cout << "part 5" << std::endl; + thrust::sort_by_key( + handle.get_thrust_policy(), + thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.begin(), + cp_edge_dsts_from_triangle.begin()), + thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.end(), + cp_edge_dsts_from_triangle.end()), + edge_exists.begin() // FIXME: edgelist_cnts - rename to num_triangles + ); + std::cout << "part 6" << std::endl; + + + // Update count + /* + if (my_rank == 1) { + printf("\nedge existance before updating\n"); + raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); + } + */ + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + edge_exists.begin(), + [ + //num_unique_triangles, + edge_exists = edge_exists.data(), + edge_p_r_first = thrust::make_zip_iterator( + std::get<2>(triangles_from_weak_edges).begin(), + std::get<3>(triangles_from_weak_edges).begin()), + cp_edge_pr_first = thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.begin(), + cp_edge_dsts_from_triangle.begin()), + cp_edge_pr_last = thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.end(), + cp_edge_dsts_from_triangle.end()), + rank = my_rank + ] __device__(auto idx) { + auto src = thrust::get<0>(edge_p_r_first[idx]); + auto dst = thrust::get<1>(edge_p_r_first[idx]); + + auto itr_pair = thrust::lower_bound( + thrust::seq, cp_edge_pr_first, cp_edge_pr_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(cp_edge_pr_first, itr_pair); + if (rank == 1) { + } + + return edge_exists[idx_pair]; + } + ); + + std::cout << "part 7" << std::endl; + + /* + if (my_rank == 1) { + printf("\nedge existance after updating\n"); + raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); + raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); + raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); + } + */ + + } else { + + edge_exists = cur_graph_view.has_edge( + handle, + raft::device_span(std::get<2>(triangles_from_weak_edges).data(), intersection_indices.size()), + raft::device_span(std::get<3>(triangles_from_weak_edges).data(), intersection_indices.size()) + ); + + } + //printf("****************p, r*********************\n\n"); + //raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); + + //raft::print_device_vector("edge_exist_1", edge_exists.data(), edge_exists.size(), std::cout); + + // From nbr_intersection on the undirected graph, we know the endpoints (vertices) of the triangles however + // we don't know the edges directions. Since edges of the DODG are directed, we can easily recover the + // direction of the edges with a binary search + + // Match DODG edges + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + thrust::make_zip_iterator( + std::get<2>(triangles_from_weak_edges).begin(), + std::get<3>(triangles_from_weak_edges).begin()), + [ + //num_unique_triangles, + edge_exists = edge_exists.data(), + edge_p_r = thrust::make_zip_iterator( + std::get<2>(triangles_from_weak_edges).begin(), + std::get<3>(triangles_from_weak_edges).begin()) + ] __device__(auto idx) { + auto src = thrust::get<0>(edge_p_r[idx]); + auto dst = thrust::get<1>(edge_p_r[idx]); + + return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); } ); - // Reorder each triangle's edges to match the unique order (p, q), (q, r) and (p, r) + + + + + // Handling (q, r) edges + + //rmm::device_uvector edge_exists_(0, handle.get_stream()); + + + if constexpr (multi_gpu) { + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto my_rank = handle.get_comms().get_rank(); + + + + if (my_rank == 1) { + printf("\nAfter re-ordering (p, r) edges, \n"); + raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); + } + + + + + + + + auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}; + + rmm::device_uvector cp_edge_srcs_from_triangle(intersection_indices.size(), handle.get_stream()); + rmm::device_uvector cp_edge_dsts_from_triangle(intersection_indices.size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).begin(), std::get<5>(triangles_from_weak_edges).begin()), + thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).end(), std::get<5>(triangles_from_weak_edges).end()), + thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin())); + + auto d_tx_counts = cugraph::groupby_and_count( + //thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).begin(), std::get<5>(triangles_from_weak_edges).begin()), + //thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).end(), std::get<5>(triangles_from_weak_edges).end()), + thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin()), + thrust::make_zip_iterator(cp_edge_srcs_from_triangle.end(), cp_edge_dsts_from_triangle.end()), + [func, comm_size]__device__(auto val) { + //[func, min_comm_size]__device__(auto val) { + //printf("\nval = %d\n", val); + return func(val); //% major_comm_size; + //return func(val) % minor_comm_size; + }, + //major_comm_size, + //minor_comm_size, + comm_size, + std::numeric_limits::max(), + handle.get_stream()); + + if (my_rank == 1) { + printf("\nAfter groupby (q, r)\n"); + raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); + } + + + + + //printf("\ncount size = %d\n", d_tx_counts.size()); + //raft::print_device_vector("d_tx_counts", d_tx_counts.data(), d_tx_counts.size(), std::cout); + + std::vector h_tx_counts(d_tx_counts.size()); + + raft::update_host(h_tx_counts.data(), + d_tx_counts.data(), + d_tx_counts.size(), + handle.get_stream()); + + rmm::device_uvector srcs(0, handle.get_stream()); + rmm::device_uvector dsts(0, handle.get_stream()); + std::vector rx_counts{}; + + std::vector rx_counts_{}; + + std::tie(srcs, rx_counts) = + shuffle_values(handle.get_comms(), cp_edge_srcs_from_triangle.begin(), h_tx_counts, handle.get_stream()); + + std::tie(dsts, std::ignore) = + shuffle_values(handle.get_comms(), cp_edge_dsts_from_triangle.begin(), h_tx_counts, handle.get_stream()); + + + //raft::print_host_vector("rx_counts", rx_counts.data(), rx_counts.size(), std::cout); + //raft::print_host_vector("rx_counts_", rx_counts_.data(), rx_counts_.size(), std::cout); + /* + if (my_rank == 1) { + raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); + raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); + } + */ + + + edge_exists = cur_graph_view.has_edge( + handle, + raft::device_span(srcs.data(), srcs.size()), + raft::device_span(dsts.data(), dsts.size()) + ); + + /* + if (my_rank == 1) { + raft::print_device_vector("*edge_exists", edge_exists.data(), edge_exists.size(), std::cout); + } + */ + std::tie(edge_exists, std::ignore) = + shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); + + + + + thrust::sort_by_key( + handle.get_thrust_policy(), + thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.begin(), + cp_edge_dsts_from_triangle.begin()), + thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.end(), + cp_edge_dsts_from_triangle.end()), + edge_exists.begin() // FIXME: edgelist_cnts - rename to num_triangles + ); + + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + edge_exists.begin(), + [ + //num_unique_triangles, + edge_exists = edge_exists.data(), + edge_p_r_first = thrust::make_zip_iterator( + std::get<4>(triangles_from_weak_edges).begin(), + std::get<5>(triangles_from_weak_edges).begin()), + cp_edge_pr_first = thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.begin(), + cp_edge_dsts_from_triangle.begin()), + cp_edge_pr_last = thrust::make_zip_iterator( + cp_edge_srcs_from_triangle.end(), + cp_edge_dsts_from_triangle.end()), + rank = my_rank + ] __device__(auto idx) { + auto src = thrust::get<0>(edge_p_r_first[idx]); + auto dst = thrust::get<1>(edge_p_r_first[idx]); + + auto itr_pair = thrust::lower_bound( + thrust::seq, cp_edge_pr_first, cp_edge_pr_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(cp_edge_pr_first, itr_pair); + if (rank == 1) { + } + + return edge_exists[idx_pair]; + } + ); + + /* + if (my_rank == 1) { + printf("****************q, r*********************\n\n"); + raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); + } + */ + + + } else { + + edge_exists = cur_graph_view.has_edge( + handle, + raft::device_span(std::get<4>(triangles_from_weak_edges).data(), intersection_indices.size()), + raft::device_span(std::get<5>(triangles_from_weak_edges).data(), intersection_indices.size()) + ); + } + /* + if (my_rank == 1) { + printf("****************q, r*********************\n\n"); + raft::print_device_vector("edge_exists_", edge_exists_.data(), edge_exists_.size(), std::cout); + } + */ + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + thrust::make_zip_iterator( + std::get<4>(triangles_from_weak_edges).begin(), + std::get<5>(triangles_from_weak_edges).begin()), + [ + //num_unique_triangles, + edge_exists = edge_exists.data(), + edge_q_r = thrust::make_zip_iterator( + std::get<4>(triangles_from_weak_edges).begin(), + std::get<5>(triangles_from_weak_edges).begin()) + ] __device__(auto idx) { + auto src = thrust::get<0>(edge_q_r[idx]); + auto dst = thrust::get<1>(edge_q_r[idx]); + + return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); + } + ); + + + auto my_rank = handle.get_comms().get_rank(); + if (my_rank == 1) { + printf("\nBefore reordering the triangles and after matching DODG edges\n"); + raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); + } + + + // re-order triangles thrust::transform( handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_from_weak_edges), @@ -624,24 +1163,37 @@ k_truss(raft::handle_t const& handle, thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r) ); } - } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) - ); - - } + } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) + ); + + } else { // Only for debugging purposes. Remove after. + printf("\ninvalid combination\n"); + } + return triangle; } - ); + ); - // Sort and remove duplicated triangles which will lead to overcompensation thrust::sort( handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_from_weak_edges), get_dataframe_buffer_end(triangles_from_weak_edges)); - + + //auto my_rank = handle.get_comms().get_rank(); + if (my_rank == 1) { + printf("\nAfter re-ordering and sorting triangles\n"); + raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); + } + auto unique_triangle_end = thrust::unique( handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_from_weak_edges), @@ -650,11 +1202,170 @@ k_truss(raft::handle_t const& handle, auto num_unique_triangles = thrust::distance(get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - + + //auto my_rank = handle.get_comms().get_rank(); + if (my_rank == 1) { + printf("\nAfter reducing triangles\n"); + raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); + raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); + } + + // If multi_gpu, shuffle and reduce once more + + if constexpr (multi_gpu) { + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto my_rank = handle.get_comms().get_rank(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); // FIXME: Make it global variable + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); // FIXME: Create an optional parameter instead that can be populated if running MG + + //rmm::device_uvector<> vertex_gpu_ids(multi_gpu ? vertices.size() : 0, stream); + //rmm::device_uvector vertex_pos(multi_gpu ? vertices.size() : 0, stream); + + auto x_df = + allocate_dataframe_buffer>( + 0, + handle.get_stream()); + + + /* + std::tie(std::ignore, x_df, std::ignore) = + groupby_gpu_id_and_shuffle_kv_pairs( + handle.get_comms(), + //intersection_indices.begin(), + //intersection_indices.end(), + + thrust::make_zip_iterator( + std::get<0>(triangles_from_weak_edges).begin(), + std::get<1>(triangles_from_weak_edges).begin()), + thrust::make_zip_iterator( + std::get<0>(triangles_from_weak_edges).end(), + std::get<1>(triangles_from_weak_edges).end()), + + get_dataframe_buffer_begin(triangles_from_weak_edges), + //intersection_indices.begin(), + //local_counts.begin(), + cugraph::detail::compute_gpu_id_from_int_vertex_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + major_comm_size, + minor_comm_size}, + handle.get_stream()); + */ + + + rmm::device_uvector v1(0, handle.get_stream()); + rmm::device_uvector v2(0, handle.get_stream()); + rmm::device_uvector v3(0, handle.get_stream()); + //std::tie(std::ignore, x_df, std::ignore) = + //std::tie(x_df, std::ignore) = + + + + std::tie(triangles_from_weak_edges, std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, + + handle.get_stream() + ); + + + if (my_rank == 1) { + auto my_rank = handle.get_comms().get_rank(); + printf("\nAfter shufling triangles\n"); + raft::print_device_vector("fin_triangle_srcs", std::get<0>(x_df).data(), std::get<0>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_dsts", std::get<1>(x_df).data(), std::get<1>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_srcs_pr", std::get<2>(x_df).data(), std::get<2>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_dsts_pr", std::get<3>(x_df).data(), std::get<3>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_srcs_qr", std::get<4>(x_df).data(), std::get<4>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_dsts_qr", std::get<5>(x_df).data(), std::get<5>(x_df).size(), std::cout); + } + + + unique_triangle_end = thrust::unique( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + num_unique_triangles = thrust::distance(get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + + resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + + + if (my_rank == 1) { + auto my_rank = handle.get_comms().get_rank(); + printf("\nAfter reducing triangles once more\n"); + raft::print_device_vector("fin_triangle_srcs", std::get<0>(x_df).data(), std::get<0>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_dsts", std::get<1>(x_df).data(), std::get<1>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_srcs_pr", std::get<2>(x_df).data(), std::get<2>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_dsts_pr", std::get<3>(x_df).data(), std::get<3>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_srcs_qr", std::get<4>(x_df).data(), std::get<4>(x_df).size(), std::cout); + raft::print_device_vector("fin_triangle_dsts_qr", std::get<5>(x_df).data(), std::get<5>(x_df).size(), std::cout); + } + + // Sort and Reduce once more + + /* working + std::forward_as_tuple( + std::tie(v1, v2, v3), std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(triangles_from_weak_edges).begin(), + std::get<1>(triangles_from_weak_edges).begin(), + std::get<3>(triangles_from_weak_edges).begin())), + + thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(triangles_from_weak_edges).end(), + std::get<1>(triangles_from_weak_edges).end(), + std::get<3>(triangles_from_weak_edges).end())), + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, + + handle.get_stream() + ); + */ + + + + } + auto edgelist_to_update_count = allocate_dataframe_buffer>(3* num_unique_triangles, - handle.get_stream()); - // Flatten the triangles into an edgelist + handle.get_stream()); + + thrust::transform( handle.get_thrust_policy(), thrust::make_counting_iterator(0), @@ -699,6 +1410,68 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(edgelist_to_update_count), get_dataframe_buffer_end(edgelist_to_update_count)); + + // If multi-GPU, shuffle and reduce + if constexpr (multi_gpu) { + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); // FIXME: Make it global variable + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); // FIXME: Create an optional parameter instead that can be populated if running MG + + auto my_rank = handle.get_comms().get_rank(); + + std::tie(edgelist_to_update_count, std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, + + handle.get_stream() + ); + + if (my_rank == 1) { + printf("\nbefore sorting\n"); + raft::print_device_vector("edgelist_to_update_count_srcs", std::get<0>(edgelist_to_update_count).data(), std::get<0>(edgelist_to_update_count).size(), std::cout); + raft::print_device_vector("edgelist_to_update_count_dsts", std::get<1>(edgelist_to_update_count).data(), std::get<1>(edgelist_to_update_count).size(), std::cout); + } + } + + thrust::sort( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + + if (my_rank == 1) { + printf("\nafter sorting\n"); + raft::print_device_vector("edgelist_to_update_count_srcs", std::get<0>(edgelist_to_update_count).data(), std::get<0>(edgelist_to_update_count).size(), std::cout); + raft::print_device_vector("edgelist_to_update_count_dsts", std::get<1>(edgelist_to_update_count).data(), std::get<1>(edgelist_to_update_count).size(), std::cout); + } + + unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( unique_pair_count, handle.get_stream()); @@ -711,42 +1484,74 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); - - // Update the triangle count of edges in the DODG edgelist - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(unique_pair_count), - [ - vertex_pair_buffer_unique = get_dataframe_buffer_begin(vertex_pair_buffer_unique), - decrease_count = decrease_count.begin(), - edgelist_cnts = edgelist_cnts.begin(), - edgelist_first, - weak_edgelist_first = edgelist_first + num_valid_edges, - edgelist_last, - num_valid_edges - ] __device__(auto i) { - // Check in the valid edge range - auto itr_pair = thrust::lower_bound( - thrust::seq, edgelist_first, weak_edgelist_first, vertex_pair_buffer_unique[i]); - - // Update counts of valid edges only since weak edges will be deleted anyways - if ((itr_pair != weak_edgelist_first) && *itr_pair == *(vertex_pair_buffer_unique + i)) { - auto idx = thrust::distance(edgelist_first, itr_pair); - edgelist_cnts[idx] -= decrease_count[i]; - } - } - ); - - edges_to_mask.clear(); - edges_to_mask.insert(edgelist_srcs.begin() + num_valid_edges, - edgelist_srcs.end(), - edgelist_dsts.begin() + num_valid_edges); + + if (my_rank == 1) { + printf("\nafter reducing and count\n"); + raft::print_device_vector("vertex_pair_buffer_unique_srcs", std::get<0>(vertex_pair_buffer_unique).data(), std::get<0>(vertex_pair_buffer_unique).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_unique_dsts", std::get<1>(vertex_pair_buffer_unique).data(), std::get<1>(vertex_pair_buffer_unique).size(), std::cout); + raft::print_device_vector("decrease_count", decrease_count.data(), decrease_count.size(), std::cout); + } + - // Remove weak edges in both direction from the undirected graph view + // Update count of weak edges + + edges_to_decrement_count.clear(); + + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [ + edge_buffer_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<1>(vertex_pair_buffer_unique).begin()), + edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).end()), + decrease_count = raft::device_span(decrease_count.data(), decrease_count.size()) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + + auto itr_pair = thrust::lower_bound( + thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); + + count -= decrease_count[idx_pair]; + + return count; + + }, + edge_triangle_counts.mutable_view(), + false); // FIXME: set expensive check to False + + edgelist_weak.clear(); + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()) + ); + + edgelist_weak.insert(weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin()); + + // Get undirected graph view + + cur_graph_view.clear_edge_mask(); + + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); // FIXME: rename 'prev_number_of_edges' to 'cur_number_of_edges' ? + cugraph::transform_e( handle, cur_graph_view, - edges_to_mask, + edgelist_weak, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), @@ -757,21 +1562,21 @@ k_truss(raft::handle_t const& handle, weak_edges_mask.mutable_view(), false); - edges_to_mask.clear(); + edgelist_weak.clear(); thrust::sort( handle.get_thrust_policy(), - thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, edgelist_srcs.begin() + num_valid_edges), - thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_srcs.end()) + thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), + thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end()) ); - edges_to_mask.insert(edgelist_dsts.begin() + num_valid_edges, - edgelist_dsts.end(), - edgelist_srcs.begin() + num_valid_edges); + edgelist_weak.insert(weak_edgelist_dsts.begin(), + weak_edgelist_dsts.end(), + weak_edgelist_srcs.begin()); cugraph::transform_e( handle, cur_graph_view, - edges_to_mask, + edgelist_weak, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), @@ -781,42 +1586,56 @@ k_truss(raft::handle_t const& handle, }, weak_edges_mask.mutable_view(), false); - - edgelist_srcs.resize(num_valid_edges, handle.get_stream()); - edgelist_dsts.resize(num_valid_edges, handle.get_stream()); - edgelist_cnts.resize(num_valid_edges, handle.get_stream()); - - edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); - edgelist_last = thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()); + + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } - + } + + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + //cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count == 0 ? false : true; + }, + dodg_mask.mutable_view(), + true); + + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; + - //#if 0 std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, cur_graph_view, - //std::optional>{std::nullopt}, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, // support edgeweights + edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - //std::optional>{std::nullopt} - std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size())) // Update renumbering if it exist. + renumber_map + ? std::make_optional( + raft::device_span((*renumber_map).data(), (*renumber_map).size())): // Update renumbering if it exist. + std::nullopt ); - - /* + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts), false); - */ + + std::cout << "strong edgelist srcs = " << edgelist_srcs.size() << std::endl; return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); From 9ad6bfda2800589067929467fe420b08ace3b85e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 22 Nov 2024 08:32:25 -0800 Subject: [PATCH 07/13] remove debug print statement --- cpp/src/community/k_truss_impl.cuh | 864 ++++------------------------- 1 file changed, 97 insertions(+), 767 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index e7a6d51c3b1..84aebc00bb1 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -46,13 +46,13 @@ using namespace std::chrono; namespace cugraph { -//template -template +template void order_edge_based_on_dodg( raft::handle_t const& handle, - GraphViewType const& graph_view, + graph_view_t & graph_view, raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts) + raft::device_span edgelist_dsts + ) { // FIXME: Use global comm for debugging purposes @@ -114,21 +114,26 @@ void order_edge_based_on_dodg( std::vector rx_counts{}; std::tie(srcs, rx_counts) = - shuffle_values(handle.get_comms(), cp_edge_srcs_from_triangle.begin(), h_tx_counts, handle.get_stream()); + shuffle_values(handle.get_comms(), edgelist_srcs.begin(), h_tx_counts, handle.get_stream()); std::tie(dsts, std::ignore) = - shuffle_values(handle.get_comms(), cp_edge_dsts_from_triangle.begin(), h_tx_counts, handle.get_stream()); + shuffle_values(handle.get_comms(), edgelist_dsts.begin(), h_tx_counts, handle.get_stream()); + + //rmm::device_uvector edge_exists(0, handle.get_stream()); + - edge_exists = graph_view.has_edge( + auto edge_exists = graph_view.has_edge( handle, raft::device_span(srcs.data(), srcs.size()), raft::device_span(dsts.data(), dsts.size()) ); + // Send the result back std::tie(edge_exists, std::ignore) = shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); - + // The 'edge_exists' array is ordered based on 'cp_edgelist_srcs' where the edges where group, + // hoever it needs to match 'edgelist_srcs', hence re-order 'edge_exists' accordingly. thrust::sort_by_key( handle.get_thrust_policy(), thrust::make_zip_iterator( @@ -145,11 +150,10 @@ void order_edge_based_on_dodg( thrust::make_counting_iterator(edgelist_srcs.size()), edge_exists.begin(), [ - //num_unique_triangles, edge_exists = edge_exists.data(), edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), cp_edgelist_first = thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin()), - cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs.end(), cp_edgelist_dsts.end()), + cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs.end(), cp_edgelist_dsts.end()) ] __device__(auto idx) { auto src = thrust::get<0>(edgelist_first[idx]); auto dst = thrust::get<1>(edgelist_first[idx]); @@ -162,6 +166,23 @@ void order_edge_based_on_dodg( return edge_exists[idx_pair]; } ); + + // Match DODG edges + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(edgelist_srcs.size()), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + [ + edge_exists = edge_exists.data(), + edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()) + ] __device__(auto idx) { + auto src = thrust::get<0>(edgelist_first[idx]); + auto dst = thrust::get<1>(edgelist_first[idx]); + + return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); + } + ); } @@ -171,52 +192,13 @@ struct extract_weak_edges { __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - //auto count_ = thrust::get<0>(count); + return ((count < k - 2) && (count > 0)) - //return count < k - 2 // FIXME: might be faster to skip edges with count = 0 ? thrust::optional>{thrust::make_tuple(src, dst)} : thrust::nullopt; } }; - -template -struct extract_edges { // FIXME: ******************************Remove this functor. For testing purposes only******************* - __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const - { - return thrust::make_tuple(src, dst, count); - } -}; - - -template -struct extract_edges_ { // FIXME: ******************************Remove this functor. For testing purposes only******************* - __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const - { - return thrust::make_tuple(src, dst); - } -}; - - - -template -struct extract_masked_edges { // FIXME: ******************************Remove this functor. For testing purposes only******************* - __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto mask) const - { - return mask == 0 - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; - } -}; - - template struct extract_triangles_from_weak_edges { size_t chunk_start{}; @@ -248,68 +230,6 @@ struct extract_triangles_from_weak_edges { } }; - -template -struct generate_p_q { - size_t chunk_start{}; - raft::device_span intersection_offsets{}; - raft::device_span intersection_indices{}; - raft::device_span weak_srcs{}; - raft::device_span weak_dsts{}; - - - __device__ thrust::tuple operator()(edge_t i) const - { - auto itr = thrust::upper_bound( - thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); - auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - - return thrust::make_tuple(weak_srcs[chunk_start + idx], weak_dsts[chunk_start + idx]); - } -}; - -template -struct generate_p_r_or_q_r_from_p_q { - size_t chunk_start{}; - raft::device_span intersection_offsets{}; - raft::device_span intersection_indices{}; - raft::device_span weak_srcs{}; - raft::device_span weak_dsts{}; - EdgeIterator edgelist_first{}; - EdgeIterator weak_edgelist_first{}; - EdgeIterator edgelist_last{}; - - __device__ thrust::tuple operator()(edge_t i) const - { - auto itr = thrust::upper_bound( - thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); - auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - - auto edge = thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); - - if constexpr (generate_p_r) { - edge = thrust::make_tuple(weak_srcs[chunk_start + idx], intersection_indices[i]); - } - - // Check in the valid edge range - auto has_edge = thrust::binary_search( - thrust::seq, edgelist_first, weak_edgelist_first, edge); - - if (!has_edge) { // FIXME: Do binary search instead - // Search in the weak edge partition. - has_edge = thrust::binary_search( - thrust::seq, weak_edgelist_first, edgelist_last, edge); - - if (!has_edge) { // FIXME: Do binary search instead - edge = thrust::make_tuple(thrust::get<1>(edge), thrust::get<0>(edge)); // Edge must be in the other direction - } - } - - - return edge; - } -}; - namespace { template @@ -590,21 +510,20 @@ k_truss(raft::handle_t const& handle, edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), - //view_concat(edge_triangle_counts.view(), weak_edges_mask.view()), - // FIXME: Replace by lambda function extract_weak_edges{k}); - auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - auto weak_edgelist_last = thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); - + auto weak_edgelist_first = thrust::make_zip_iterator( + weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = thrust::make_zip_iterator( + weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); // Perform nbr_intersection of the weak edges from the undirected // graph view cur_graph_view.clear_edge_mask(); + // Attach the weak edge mask cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - auto [intersection_offsets, intersection_indices] = \ per_v_pair_dst_nbr_intersection( handle, @@ -646,12 +565,11 @@ k_truss(raft::handle_t const& handle, rmm::device_uvector edge_exists(0, handle.get_stream()); // Handling (p, r) edges - if constexpr (multi_gpu) { // (p, q) edges are owned by the current GPU while (p, r) and (q, r) // can be owned by different GPUs - // Ordering (p, q) edges based on the DODG - order_edge_based_on_dodg( + // Ordering (p, r) edges based on the DODG + order_edge_based_on_dodg( handle, cur_graph_view, raft::device_span( @@ -660,484 +578,48 @@ k_truss(raft::handle_t const& handle, raft::device_span( std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size()) - raft::device_span( - edge_exists.data(), - edge_exists.size()) - ); - - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto my_rank = handle.get_comms().get_rank(); - - - if (my_rank == 1) { - printf("****initial****\n\n"); - raft::print_device_vector("triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); - } - - - auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}; - - - rmm::device_uvector cp_edge_srcs_from_triangle(intersection_indices.size(), handle.get_stream()); - rmm::device_uvector cp_edge_dsts_from_triangle(intersection_indices.size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).begin(), std::get<3>(triangles_from_weak_edges).begin()), - thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).end(), std::get<3>(triangles_from_weak_edges).end()), - thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin())); - - //std::cout << "part 0" << std::endl; - auto d_tx_counts = cugraph::groupby_and_count( - //thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).begin(), std::get<3>(triangles_from_weak_edges).begin()), - //thrust::make_zip_iterator(std::get<2>(triangles_from_weak_edges).end(), std::get<3>(triangles_from_weak_edges).end()), - thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin()), - thrust::make_zip_iterator(cp_edge_srcs_from_triangle.end(), cp_edge_dsts_from_triangle.end()), - [func, comm_size]__device__(auto val) { - //[func, min_comm_size]__device__(auto val) { - //printf("\nval = %d\n", val); - return func(val); //% major_comm_size; - //return func(val) % minor_comm_size; - }, - //major_comm_size, - //minor_comm_size, - comm_size, - std::numeric_limits::max(), - handle.get_stream()); - - //std::cout << "part 1" << std::endl; - - //printf("\ncount size = %d\n", d_tx_counts.size()); - raft::print_device_vector("d_tx_counts", d_tx_counts.data(), d_tx_counts.size(), std::cout); - - std::vector h_tx_counts(d_tx_counts.size()); - - raft::update_host(h_tx_counts.data(), - d_tx_counts.data(), - d_tx_counts.size(), - handle.get_stream()); - - - - - - raft::print_host_vector("h_tx_counts", h_tx_counts.data(), h_tx_counts.size(), std::cout); - - - - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - std::vector rx_counts{}; - - std::cout << "part 2" << std::endl; - std::cout << "h_tx_counts size = " << h_tx_counts.size() << " copy egdes size " << cp_edge_srcs_from_triangle.size() << " comm size = " << comm_size << std::endl; - - std::tie(srcs, rx_counts) = - shuffle_values(handle.get_comms(), cp_edge_srcs_from_triangle.begin(), h_tx_counts, handle.get_stream()); - - std::tie(dsts, std::ignore) = - shuffle_values(handle.get_comms(), cp_edge_dsts_from_triangle.begin(), h_tx_counts, handle.get_stream()); - - std::cout << "part 3" << std::endl; - /* - raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); - raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); - */ - - - edge_exists = cur_graph_view.has_edge( - handle, - raft::device_span(srcs.data(), srcs.size()), - raft::device_span(dsts.data(), dsts.size()) - ); - - std::cout << "part 4" << std::endl; - - std::tie(edge_exists, std::ignore) = - shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); - - std::cout << "part 5" << std::endl; - thrust::sort_by_key( - handle.get_thrust_policy(), - thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.begin(), - cp_edge_dsts_from_triangle.begin()), - thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.end(), - cp_edge_dsts_from_triangle.end()), - edge_exists.begin() // FIXME: edgelist_cnts - rename to num_triangles - ); - std::cout << "part 6" << std::endl; - - - // Update count - /* - if (my_rank == 1) { - printf("\nedge existance before updating\n"); - raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); - } - */ - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - edge_exists.begin(), - [ - //num_unique_triangles, - edge_exists = edge_exists.data(), - edge_p_r_first = thrust::make_zip_iterator( - std::get<2>(triangles_from_weak_edges).begin(), - std::get<3>(triangles_from_weak_edges).begin()), - cp_edge_pr_first = thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.begin(), - cp_edge_dsts_from_triangle.begin()), - cp_edge_pr_last = thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.end(), - cp_edge_dsts_from_triangle.end()), - rank = my_rank - ] __device__(auto idx) { - auto src = thrust::get<0>(edge_p_r_first[idx]); - auto dst = thrust::get<1>(edge_p_r_first[idx]); - - auto itr_pair = thrust::lower_bound( - thrust::seq, cp_edge_pr_first, cp_edge_pr_last, thrust::make_tuple(src, dst)); - - auto idx_pair = thrust::distance(cp_edge_pr_first, itr_pair); - if (rank == 1) { - } - - return edge_exists[idx_pair]; - } ); - std::cout << "part 7" << std::endl; - - /* - if (my_rank == 1) { - printf("\nedge existance after updating\n"); - raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); - raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); - raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); - } - */ - } else { - edge_exists = cur_graph_view.has_edge( handle, - raft::device_span(std::get<2>(triangles_from_weak_edges).data(), intersection_indices.size()), - raft::device_span(std::get<3>(triangles_from_weak_edges).data(), intersection_indices.size()) + raft::device_span( + std::get<2>(triangles_from_weak_edges).data(), intersection_indices.size()), + raft::device_span( + std::get<3>(triangles_from_weak_edges).data(), intersection_indices.size()) ); - } - //printf("****************p, r*********************\n\n"); - //raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); - - //raft::print_device_vector("edge_exist_1", edge_exists.data(), edge_exists.size(), std::cout); - - // From nbr_intersection on the undirected graph, we know the endpoints (vertices) of the triangles however - // we don't know the edges directions. Since edges of the DODG are directed, we can easily recover the - // direction of the edges with a binary search - - // Match DODG edges - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - thrust::make_zip_iterator( - std::get<2>(triangles_from_weak_edges).begin(), - std::get<3>(triangles_from_weak_edges).begin()), - [ - //num_unique_triangles, - edge_exists = edge_exists.data(), - edge_p_r = thrust::make_zip_iterator( - std::get<2>(triangles_from_weak_edges).begin(), - std::get<3>(triangles_from_weak_edges).begin()) - ] __device__(auto idx) { - auto src = thrust::get<0>(edge_p_r[idx]); - auto dst = thrust::get<1>(edge_p_r[idx]); - - return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); - } - ); - - - - // Handling (q, r) edges - //rmm::device_uvector edge_exists_(0, handle.get_stream()); - - if constexpr (multi_gpu) { - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto my_rank = handle.get_comms().get_rank(); - - - - if (my_rank == 1) { - printf("\nAfter re-ordering (p, r) edges, \n"); - raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); - } - - - - - - - - auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}; - - rmm::device_uvector cp_edge_srcs_from_triangle(intersection_indices.size(), handle.get_stream()); - rmm::device_uvector cp_edge_dsts_from_triangle(intersection_indices.size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).begin(), std::get<5>(triangles_from_weak_edges).begin()), - thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).end(), std::get<5>(triangles_from_weak_edges).end()), - thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin())); - - auto d_tx_counts = cugraph::groupby_and_count( - //thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).begin(), std::get<5>(triangles_from_weak_edges).begin()), - //thrust::make_zip_iterator(std::get<4>(triangles_from_weak_edges).end(), std::get<5>(triangles_from_weak_edges).end()), - thrust::make_zip_iterator(cp_edge_srcs_from_triangle.begin(), cp_edge_dsts_from_triangle.begin()), - thrust::make_zip_iterator(cp_edge_srcs_from_triangle.end(), cp_edge_dsts_from_triangle.end()), - [func, comm_size]__device__(auto val) { - //[func, min_comm_size]__device__(auto val) { - //printf("\nval = %d\n", val); - return func(val); //% major_comm_size; - //return func(val) % minor_comm_size; - }, - //major_comm_size, - //minor_comm_size, - comm_size, - std::numeric_limits::max(), - handle.get_stream()); - - if (my_rank == 1) { - printf("\nAfter groupby (q, r)\n"); - raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); - } - - - - - //printf("\ncount size = %d\n", d_tx_counts.size()); - //raft::print_device_vector("d_tx_counts", d_tx_counts.data(), d_tx_counts.size(), std::cout); - - std::vector h_tx_counts(d_tx_counts.size()); - - raft::update_host(h_tx_counts.data(), - d_tx_counts.data(), - d_tx_counts.size(), - handle.get_stream()); - - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - std::vector rx_counts{}; - - std::vector rx_counts_{}; - - std::tie(srcs, rx_counts) = - shuffle_values(handle.get_comms(), cp_edge_srcs_from_triangle.begin(), h_tx_counts, handle.get_stream()); - - std::tie(dsts, std::ignore) = - shuffle_values(handle.get_comms(), cp_edge_dsts_from_triangle.begin(), h_tx_counts, handle.get_stream()); - - - //raft::print_host_vector("rx_counts", rx_counts.data(), rx_counts.size(), std::cout); - //raft::print_host_vector("rx_counts_", rx_counts_.data(), rx_counts_.size(), std::cout); - /* - if (my_rank == 1) { - raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); - raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); - } - */ - - - edge_exists = cur_graph_view.has_edge( - handle, - raft::device_span(srcs.data(), srcs.size()), - raft::device_span(dsts.data(), dsts.size()) - ); - - /* - if (my_rank == 1) { - raft::print_device_vector("*edge_exists", edge_exists.data(), edge_exists.size(), std::cout); - } - */ - std::tie(edge_exists, std::ignore) = - shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); - - - - - thrust::sort_by_key( - handle.get_thrust_policy(), - thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.begin(), - cp_edge_dsts_from_triangle.begin()), - thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.end(), - cp_edge_dsts_from_triangle.end()), - edge_exists.begin() // FIXME: edgelist_cnts - rename to num_triangles + // (p, q) edges are owned by the current GPU while (p, r) and (q, r) + // can be owned by different GPUs + // Ordering (q, r) edges based on the DODG + order_edge_based_on_dodg( + handle, + cur_graph_view, + raft::device_span( + std::get<4>(triangles_from_weak_edges).data(), + std::get<4>(triangles_from_weak_edges).size()), + raft::device_span( + std::get<5>(triangles_from_weak_edges).data(), + std::get<5>(triangles_from_weak_edges).size()) ); - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - edge_exists.begin(), - [ - //num_unique_triangles, - edge_exists = edge_exists.data(), - edge_p_r_first = thrust::make_zip_iterator( - std::get<4>(triangles_from_weak_edges).begin(), - std::get<5>(triangles_from_weak_edges).begin()), - cp_edge_pr_first = thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.begin(), - cp_edge_dsts_from_triangle.begin()), - cp_edge_pr_last = thrust::make_zip_iterator( - cp_edge_srcs_from_triangle.end(), - cp_edge_dsts_from_triangle.end()), - rank = my_rank - ] __device__(auto idx) { - auto src = thrust::get<0>(edge_p_r_first[idx]); - auto dst = thrust::get<1>(edge_p_r_first[idx]); - - auto itr_pair = thrust::lower_bound( - thrust::seq, cp_edge_pr_first, cp_edge_pr_last, thrust::make_tuple(src, dst)); - - auto idx_pair = thrust::distance(cp_edge_pr_first, itr_pair); - if (rank == 1) { - } - - return edge_exists[idx_pair]; - } - ); - - /* - if (my_rank == 1) { - printf("****************q, r*********************\n\n"); - raft::print_device_vector("edge_exists", edge_exists.data(), edge_exists.size(), std::cout); - } - */ - - } else { - edge_exists = cur_graph_view.has_edge( handle, - raft::device_span(std::get<4>(triangles_from_weak_edges).data(), intersection_indices.size()), - raft::device_span(std::get<5>(triangles_from_weak_edges).data(), intersection_indices.size()) + raft::device_span( + std::get<4>(triangles_from_weak_edges).data(), intersection_indices.size()), + raft::device_span( + std::get<5>(triangles_from_weak_edges).data(), intersection_indices.size()) ); } - /* - if (my_rank == 1) { - printf("****************q, r*********************\n\n"); - raft::print_device_vector("edge_exists_", edge_exists_.data(), edge_exists_.size(), std::cout); - } - */ - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - thrust::make_zip_iterator( - std::get<4>(triangles_from_weak_edges).begin(), - std::get<5>(triangles_from_weak_edges).begin()), - [ - //num_unique_triangles, - edge_exists = edge_exists.data(), - edge_q_r = thrust::make_zip_iterator( - std::get<4>(triangles_from_weak_edges).begin(), - std::get<5>(triangles_from_weak_edges).begin()) - ] __device__(auto idx) { - auto src = thrust::get<0>(edge_q_r[idx]); - auto dst = thrust::get<1>(edge_q_r[idx]); - - return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); - } - ); - - - auto my_rank = handle.get_comms().get_rank(); - if (my_rank == 1) { - printf("\nBefore reordering the triangles and after matching DODG edges\n"); - raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); - } - // re-order triangles + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) thrust::transform( handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_from_weak_edges), @@ -1170,8 +652,6 @@ k_truss(raft::handle_t const& handle, thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) ); - } else { // Only for debugging purposes. Remove after. - printf("\ninvalid combination\n"); } return triangle; @@ -1182,17 +662,6 @@ k_truss(raft::handle_t const& handle, handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_from_weak_edges), get_dataframe_buffer_end(triangles_from_weak_edges)); - - //auto my_rank = handle.get_comms().get_rank(); - if (my_rank == 1) { - printf("\nAfter re-ordering and sorting triangles\n"); - raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); - } auto unique_triangle_end = thrust::unique( handle.get_thrust_policy(), @@ -1203,19 +672,6 @@ k_truss(raft::handle_t const& handle, resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - //auto my_rank = handle.get_comms().get_rank(); - if (my_rank == 1) { - printf("\nAfter reducing triangles\n"); - raft::print_device_vector("*triangle_srcs", std::get<0>(triangles_from_weak_edges).data(), std::get<0>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts", std::get<1>(triangles_from_weak_edges).data(), std::get<1>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_pr", std::get<2>(triangles_from_weak_edges).data(), std::get<2>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_pr", std::get<3>(triangles_from_weak_edges).data(), std::get<3>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_srcs_qr", std::get<4>(triangles_from_weak_edges).data(), std::get<4>(triangles_from_weak_edges).size(), std::cout); - raft::print_device_vector("*triangle_dsts_qr", std::get<5>(triangles_from_weak_edges).data(), std::get<5>(triangles_from_weak_edges).size(), std::cout); - } - - // If multi_gpu, shuffle and reduce once more - if constexpr (multi_gpu) { auto& comm = handle.get_comms(); @@ -1224,140 +680,43 @@ k_truss(raft::handle_t const& handle, auto const major_comm_size = major_comm.get_size(); auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - - auto my_rank = handle.get_comms().get_rank(); - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); // FIXME: Make it global variable + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), vertex_partition_range_lasts.data(), vertex_partition_range_lasts.size(), - handle.get_stream()); // FIXME: Create an optional parameter instead that can be populated if running MG - - //rmm::device_uvector<> vertex_gpu_ids(multi_gpu ? vertices.size() : 0, stream); - //rmm::device_uvector vertex_pos(multi_gpu ? vertices.size() : 0, stream); - - auto x_df = - allocate_dataframe_buffer>( - 0, - handle.get_stream()); - + handle.get_stream()); - /* - std::tie(std::ignore, x_df, std::ignore) = - groupby_gpu_id_and_shuffle_kv_pairs( - handle.get_comms(), - //intersection_indices.begin(), - //intersection_indices.end(), - - thrust::make_zip_iterator( - std::get<0>(triangles_from_weak_edges).begin(), - std::get<1>(triangles_from_weak_edges).begin()), - thrust::make_zip_iterator( - std::get<0>(triangles_from_weak_edges).end(), - std::get<1>(triangles_from_weak_edges).end()), - - get_dataframe_buffer_begin(triangles_from_weak_edges), - //intersection_indices.begin(), - //local_counts.begin(), - cugraph::detail::compute_gpu_id_from_int_vertex_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - major_comm_size, - minor_comm_size}, - handle.get_stream()); - */ - - - rmm::device_uvector v1(0, handle.get_stream()); - rmm::device_uvector v2(0, handle.get_stream()); - rmm::device_uvector v3(0, handle.get_stream()); - //std::tie(std::ignore, x_df, std::ignore) = - //std::tie(x_df, std::ignore) = - - - - std::tie(triangles_from_weak_edges, std::ignore) = - groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges), + // FIXME: put the redundant code above in a function + std::tie(triangles_from_weak_edges, std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, - - handle.get_stream() - ); - - - if (my_rank == 1) { - auto my_rank = handle.get_comms().get_rank(); - printf("\nAfter shufling triangles\n"); - raft::print_device_vector("fin_triangle_srcs", std::get<0>(x_df).data(), std::get<0>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_dsts", std::get<1>(x_df).data(), std::get<1>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_srcs_pr", std::get<2>(x_df).data(), std::get<2>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_dsts_pr", std::get<3>(x_df).data(), std::get<3>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_srcs_qr", std::get<4>(x_df).data(), std::get<4>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_dsts_qr", std::get<5>(x_df).data(), std::get<5>(x_df).size(), std::cout); - } - - - unique_triangle_end = thrust::unique( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - num_unique_triangles = thrust::distance(get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - - resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - - - if (my_rank == 1) { - auto my_rank = handle.get_comms().get_rank(); - printf("\nAfter reducing triangles once more\n"); - raft::print_device_vector("fin_triangle_srcs", std::get<0>(x_df).data(), std::get<0>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_dsts", std::get<1>(x_df).data(), std::get<1>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_srcs_pr", std::get<2>(x_df).data(), std::get<2>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_dsts_pr", std::get<3>(x_df).data(), std::get<3>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_srcs_qr", std::get<4>(x_df).data(), std::get<4>(x_df).size(), std::cout); - raft::print_device_vector("fin_triangle_dsts_qr", std::get<5>(x_df).data(), std::get<5>(x_df).size(), std::cout); - } + handle.get_stream() + ); - // Sort and Reduce once more + unique_triangle_end = thrust::unique( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); - /* working - std::forward_as_tuple( - std::tie(v1, v2, v3), std::ignore) = - groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - - thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(triangles_from_weak_edges).begin(), - std::get<1>(triangles_from_weak_edges).begin(), - std::get<3>(triangles_from_weak_edges).begin())), - - thrust::make_zip_iterator(thrust::make_tuple(std::get<0>(triangles_from_weak_edges).end(), - std::get<1>(triangles_from_weak_edges).end(), - std::get<3>(triangles_from_weak_edges).end())), - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, - - handle.get_stream() - ); - */ - - + num_unique_triangles = thrust::distance( + get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); } @@ -1365,7 +724,7 @@ k_truss(raft::handle_t const& handle, allocate_dataframe_buffer>(3* num_unique_triangles, handle.get_stream()); - + // Flatten the triangle to a list of egdes. thrust::transform( handle.get_thrust_policy(), thrust::make_counting_iterator(0), @@ -1378,7 +737,6 @@ k_truss(raft::handle_t const& handle, auto idx_triangle = idx % num_unique_triangles; auto idx_vertex_in_triangle = idx / num_unique_triangles; auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); - vertex_t src; vertex_t dst; @@ -1406,11 +764,11 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(edgelist_to_update_count), get_dataframe_buffer_end(edgelist_to_update_count)); - auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); + auto unique_pair_count = thrust::unique_count( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); - // If multi-GPU, shuffle and reduce if constexpr (multi_gpu) { @@ -1421,14 +779,14 @@ k_truss(raft::handle_t const& handle, auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); // FIXME: Make it global variable + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), handle.get_stream()); raft::update_device(d_vertex_partition_range_lasts.data(), vertex_partition_range_lasts.data(), vertex_partition_range_lasts.size(), - handle.get_stream()); // FIXME: Create an optional parameter instead that can be populated if running MG + handle.get_stream()); auto my_rank = handle.get_comms().get_rank(); @@ -1448,12 +806,6 @@ k_truss(raft::handle_t const& handle, handle.get_stream() ); - - if (my_rank == 1) { - printf("\nbefore sorting\n"); - raft::print_device_vector("edgelist_to_update_count_srcs", std::get<0>(edgelist_to_update_count).data(), std::get<0>(edgelist_to_update_count).size(), std::cout); - raft::print_device_vector("edgelist_to_update_count_dsts", std::get<1>(edgelist_to_update_count).data(), std::get<1>(edgelist_to_update_count).size(), std::cout); - } } thrust::sort( @@ -1461,13 +813,6 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(edgelist_to_update_count), get_dataframe_buffer_end(edgelist_to_update_count)); - - if (my_rank == 1) { - printf("\nafter sorting\n"); - raft::print_device_vector("edgelist_to_update_count_srcs", std::get<0>(edgelist_to_update_count).data(), std::get<0>(edgelist_to_update_count).size(), std::cout); - raft::print_device_vector("edgelist_to_update_count_dsts", std::get<1>(edgelist_to_update_count).data(), std::get<1>(edgelist_to_update_count).size(), std::cout); - } - unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), get_dataframe_buffer_begin(edgelist_to_update_count), get_dataframe_buffer_end(edgelist_to_update_count)); @@ -1484,17 +829,8 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); - - if (my_rank == 1) { - printf("\nafter reducing and count\n"); - raft::print_device_vector("vertex_pair_buffer_unique_srcs", std::get<0>(vertex_pair_buffer_unique).data(), std::get<0>(vertex_pair_buffer_unique).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_unique_dsts", std::get<1>(vertex_pair_buffer_unique).data(), std::get<1>(vertex_pair_buffer_unique).size(), std::cout); - raft::print_device_vector("decrease_count", decrease_count.data(), decrease_count.size(), std::cout); - } - // Update count of weak edges - edges_to_decrement_count.clear(); edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), @@ -1526,7 +862,7 @@ k_truss(raft::handle_t const& handle, }, edge_triangle_counts.mutable_view(), - false); // FIXME: set expensive check to False + false); edgelist_weak.clear(); @@ -1541,12 +877,10 @@ k_truss(raft::handle_t const& handle, weak_edgelist_dsts.begin()); // Get undirected graph view - cur_graph_view.clear_edge_mask(); - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); // FIXME: rename 'prev_number_of_edges' to 'cur_number_of_edges' ? + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); cugraph::transform_e( handle, @@ -1593,7 +927,6 @@ k_truss(raft::handle_t const& handle, } - cur_graph_view.clear_edge_mask(); cur_graph_view.attach_edge_mask(dodg_mask.view()); @@ -1603,7 +936,6 @@ k_truss(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), - //cugraph::edge_dummy_property_t{}.view(), [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { return count == 0 ? false : true; }, @@ -1624,7 +956,7 @@ k_truss(raft::handle_t const& handle, std::optional>{std::nullopt}, renumber_map ? std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size())): // Update renumbering if it exist. + raft::device_span((*renumber_map).data(), (*renumber_map).size())): std::nullopt ); @@ -1634,8 +966,6 @@ k_truss(raft::handle_t const& handle, std::move(edgelist_dsts), std::move(edgelist_wgts), false); - - std::cout << "strong edgelist srcs = " << edgelist_srcs.size() << std::endl; return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); From b515cd9bdc28990f4b673a581327cc0042c680b6 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 22 Nov 2024 08:33:22 -0800 Subject: [PATCH 08/13] fix style --- cpp/src/community/k_truss_impl.cuh | 1067 +++++++++++++--------------- 1 file changed, 506 insertions(+), 561 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 84aebc00bb1..77c858a2020 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -17,9 +17,9 @@ #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" -#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" @@ -41,18 +41,17 @@ #include #include #include + #include using namespace std::chrono; namespace cugraph { template -void order_edge_based_on_dodg( - raft::handle_t const& handle, - graph_view_t & graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts - ) +void order_edge_based_on_dodg(raft::handle_t const& handle, + graph_view_t& graph_view, + raft::device_span edgelist_srcs, + raft::device_span edgelist_dsts) { // FIXME: Use global comm for debugging purposes @@ -67,123 +66,105 @@ void order_edge_based_on_dodg( auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); + handle.get_stream()); raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}; + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}; rmm::device_uvector cp_edgelist_srcs(edgelist_srcs.size(), handle.get_stream()); rmm::device_uvector cp_edgelist_dsts(edgelist_srcs.size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_srcs.begin()), - thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_dsts.end()), - thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin())); + thrust::copy(handle.get_thrust_policy(), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_srcs.begin()), + thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_dsts.end()), + thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin())); auto d_tx_counts = cugraph::groupby_and_count( thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()), - [func]__device__(auto val) { - return func(val); - }, - comm_size, - std::numeric_limits::max(), - handle.get_stream()); - + [func] __device__(auto val) { return func(val); }, + comm_size, + std::numeric_limits::max(), + handle.get_stream()); std::vector h_tx_counts(d_tx_counts.size()); - raft::update_host(h_tx_counts.data(), - d_tx_counts.data(), - d_tx_counts.size(), - handle.get_stream()); - + raft::update_host( + h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); + rmm::device_uvector srcs(0, handle.get_stream()); rmm::device_uvector dsts(0, handle.get_stream()); std::vector rx_counts{}; std::tie(srcs, rx_counts) = shuffle_values(handle.get_comms(), edgelist_srcs.begin(), h_tx_counts, handle.get_stream()); - + std::tie(dsts, std::ignore) = shuffle_values(handle.get_comms(), edgelist_dsts.begin(), h_tx_counts, handle.get_stream()); - - //rmm::device_uvector edge_exists(0, handle.get_stream()); + // rmm::device_uvector edge_exists(0, handle.get_stream()); + + auto edge_exists = + graph_view.has_edge(handle, + raft::device_span(srcs.data(), srcs.size()), + raft::device_span(dsts.data(), dsts.size())); - auto edge_exists = graph_view.has_edge( - handle, - raft::device_span(srcs.data(), srcs.size()), - raft::device_span(dsts.data(), dsts.size()) - ); - // Send the result back std::tie(edge_exists, std::ignore) = - shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); - + shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); + // The 'edge_exists' array is ordered based on 'cp_edgelist_srcs' where the edges where group, // hoever it needs to match 'edgelist_srcs', hence re-order 'edge_exists' accordingly. - thrust::sort_by_key( - handle.get_thrust_policy(), - thrust::make_zip_iterator( - cp_edgelist_srcs.begin(), - cp_edgelist_dsts.begin()), - thrust::make_zip_iterator( - cp_edgelist_srcs.end(), - cp_edgelist_dsts.end()), - edge_exists.begin()); - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(edgelist_srcs.size()), - edge_exists.begin(), - [ - edge_exists = edge_exists.data(), - edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - cp_edgelist_first = thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin()), - cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs.end(), cp_edgelist_dsts.end()) - ] __device__(auto idx) { - auto src = thrust::get<0>(edgelist_first[idx]); - auto dst = thrust::get<1>(edgelist_first[idx]); - - auto itr_pair = thrust::lower_bound( - thrust::seq, cp_edgelist_first, cp_edgelist_last, thrust::make_tuple(src, dst)); - - auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); - - return edge_exists[idx_pair]; - } - ); - - // Match DODG edges + thrust::sort_by_key(handle.get_thrust_policy(), + thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin()), + thrust::make_zip_iterator(cp_edgelist_srcs.end(), cp_edgelist_dsts.end()), + edge_exists.begin()); + thrust::transform( handle.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(edgelist_srcs.size()), - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - [ - edge_exists = edge_exists.data(), - edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()) - ] __device__(auto idx) { + edge_exists.begin(), + [edge_exists = edge_exists.data(), + edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + cp_edgelist_first = + thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin()), + cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs.end(), + cp_edgelist_dsts.end())] __device__(auto idx) { auto src = thrust::get<0>(edgelist_first[idx]); auto dst = thrust::get<1>(edgelist_first[idx]); - return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); - } - ); + auto itr_pair = thrust::lower_bound( + thrust::seq, cp_edgelist_first, cp_edgelist_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); + return edge_exists[idx_pair]; + }); + + // Match DODG edges + thrust::transform(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(edgelist_srcs.size()), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + [edge_exists = edge_exists.data(), + edgelist_first = thrust::make_zip_iterator( + edgelist_srcs.begin(), edgelist_dsts.begin())] __device__(auto idx) { + auto src = thrust::get<0>(edgelist_first[idx]); + auto dst = thrust::get<1>(edgelist_first[idx]); + + return edge_exists[idx] ? thrust::make_tuple(src, dst) + : thrust::make_tuple(dst, src); + }); } template @@ -192,8 +173,7 @@ struct extract_weak_edges { __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { - - return ((count < k - 2) && (count > 0)) + return ((count < k - 2) && (count > 0)) ? thrust::optional>{thrust::make_tuple(src, dst)} : thrust::nullopt; } @@ -207,10 +187,9 @@ struct extract_triangles_from_weak_edges { raft::device_span weak_srcs{}; raft::device_span weak_dsts{}; - __device__ thrust::tuple - operator()(edge_t i) const + __device__ thrust::tuple operator()( + edge_t i) const { - auto itr = thrust::upper_bound( thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); @@ -223,10 +202,12 @@ struct extract_triangles_from_weak_edges { // Extract (q, r) edges auto edge_q_r = thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); - return thrust::make_tuple( - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r)); + return thrust::make_tuple(thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q), + thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r)); } }; @@ -303,7 +284,8 @@ k_truss(raft::handle_t const& handle, std::optional> modified_graph{std::nullopt}; std::optional> modified_graph_view{std::nullopt}; - std::optional> undirected_graph_view{std::nullopt}; + std::optional> undirected_graph_view{ + std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; @@ -311,7 +293,8 @@ k_truss(raft::handle_t const& handle, cugraph::edge_bucket_t edgelist_dodg(handle); - cugraph::edge_property_t, bool> dodg_mask(handle, graph_view); + cugraph::edge_property_t, bool> dodg_mask( + handle, graph_view); // Ideally, leverage the undirected graph derived from k-core undirected_graph_view = graph_view; @@ -410,7 +393,6 @@ k_truss(raft::handle_t const& handle, // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; auto vertex_partition_range_lasts = @@ -452,501 +434,466 @@ k_truss(raft::handle_t const& handle, extract_low_to_high_degree_edges_t{}); } - cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); // Masking edges not part of the DODG - edgelist_dodg.insert(srcs.begin(), - srcs.end(), - dsts.begin()); - + edgelist_dodg.insert(srcs.begin(), srcs.end(), dsts.begin()); + cugraph::transform_e( - handle, - cur_graph_view, - edgelist_dodg, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - - return true; - }, - dodg_mask.mutable_view(), - false); - + handle, + cur_graph_view, + edgelist_dodg, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return true; + }, + dodg_mask.mutable_view(), + false); + edgelist_dodg.clear(); } // 4. Compute triangle count using nbr_intersection and unroll weak edges { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - cugraph::edge_property_t weak_edges_mask(handle, cur_graph_view); + cugraph::edge_property_t weak_edges_mask(handle, + cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, weak_edges_mask.mutable_view(), bool{true}); - + // Attach mask cur_graph_view.attach_edge_mask(dodg_mask.view()); auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view, false); + edge_triangle_count(handle, cur_graph_view, false); cugraph::edge_bucket_t edgelist_weak(handle); cugraph::edge_bucket_t edges_to_decrement_count(handle); - size_t prev_chunk_size = 0; // FIXME: Add support for chunking - + size_t prev_chunk_size = 0; // FIXME: Add support for chunking while (true) { - - cur_graph_view.clear_edge_mask(); - cur_graph_view.attach_edge_mask(dodg_mask.view()); - - // Extract weak edges - auto [weak_edgelist_srcs, weak_edgelist_dsts] = - extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - extract_weak_edges{k}); - - auto weak_edgelist_first = thrust::make_zip_iterator( - weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - auto weak_edgelist_last = thrust::make_zip_iterator( - weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); - - // Perform nbr_intersection of the weak edges from the undirected - // graph view - cur_graph_view.clear_edge_mask(); - - // Attach the weak edge mask - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - auto [intersection_offsets, intersection_indices] = \ - per_v_pair_dst_nbr_intersection( - handle, - cur_graph_view, - weak_edgelist_first, - weak_edgelist_last, - false); - - // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) - // To avoid overcompensation, redirect all edges in the triangle to follow this unique - // pattern: (p, q) then (q, r) then (p, r) - - auto triangles_from_weak_edges = - allocate_dataframe_buffer>( - intersection_indices.size(), - handle.get_stream()); - - // Form (p, q) edges - // Extract triangle from weak - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges), - extract_triangles_from_weak_edges{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + // Extract weak edges + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + extract_weak_edges{k}); + + auto weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); + + // Perform nbr_intersection of the weak edges from the undirected + // graph view + cur_graph_view.clear_edge_mask(); + + // Attach the weak edge mask + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection( + handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, false); + + // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) + + auto triangles_from_weak_edges = allocate_dataframe_buffer< + thrust::tuple>( + intersection_indices.size(), handle.get_stream()); + + // Form (p, q) edges + // Extract triangle from weak + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + extract_triangles_from_weak_edges{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + + cur_graph_view.clear_edge_mask(); + // Check for edge existance on the directed graph view + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + rmm::device_uvector edge_exists(0, handle.get_stream()); + + // Handling (p, r) edges + if constexpr (multi_gpu) { + // (p, q) edges are owned by the current GPU while (p, r) and (q, r) + // can be owned by different GPUs + // Ordering (p, r) edges based on the DODG + order_edge_based_on_dodg( + handle, + cur_graph_view, + raft::device_span(std::get<2>(triangles_from_weak_edges).data(), + std::get<2>(triangles_from_weak_edges).size()), + raft::device_span(std::get<3>(triangles_from_weak_edges).data(), + std::get<3>(triangles_from_weak_edges).size())); + + } else { + edge_exists = cur_graph_view.has_edge( + handle, + raft::device_span(std::get<2>(triangles_from_weak_edges).data(), + intersection_indices.size()), + raft::device_span(std::get<3>(triangles_from_weak_edges).data(), + intersection_indices.size())); + } + + // Handling (q, r) edges + + if constexpr (multi_gpu) { + // (p, q) edges are owned by the current GPU while (p, r) and (q, r) + // can be owned by different GPUs + // Ordering (q, r) edges based on the DODG + order_edge_based_on_dodg( + handle, + cur_graph_view, + raft::device_span(std::get<4>(triangles_from_weak_edges).data(), + std::get<4>(triangles_from_weak_edges).size()), + raft::device_span(std::get<5>(triangles_from_weak_edges).data(), + std::get<5>(triangles_from_weak_edges).size())); + + } else { + edge_exists = cur_graph_view.has_edge( + handle, + raft::device_span(std::get<4>(triangles_from_weak_edges).data(), + intersection_indices.size()), + raft::device_span(std::get<5>(triangles_from_weak_edges).data(), + intersection_indices.size())); + } + + // re-order triangles + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) + thrust::transform( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + get_dataframe_buffer_begin(triangles_from_weak_edges), + [] __device__(auto triangle) { + auto edge_p_q = thrust::make_tuple(thrust::get<0>(triangle), thrust::get<1>(triangle)); + auto edge_p_r = thrust::make_tuple(thrust::get<2>(triangle), thrust::get<3>(triangle)); + auto edge_q_r = thrust::make_tuple(thrust::get<4>(triangle), thrust::get<5>(triangle)); + + if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { + if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { + triangle = thrust::make_tuple(thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q)); + + } else { + triangle = thrust::make_tuple(thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r), + thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r)); + } + } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { + triangle = thrust::make_tuple(thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r)); } - ); - - cur_graph_view.clear_edge_mask(); - // Check for edge existance on the directed graph view - cur_graph_view.attach_edge_mask(dodg_mask.view()); - - rmm::device_uvector edge_exists(0, handle.get_stream()); - - // Handling (p, r) edges - if constexpr (multi_gpu) { - // (p, q) edges are owned by the current GPU while (p, r) and (q, r) - // can be owned by different GPUs - // Ordering (p, r) edges based on the DODG - order_edge_based_on_dodg( - handle, - cur_graph_view, - raft::device_span( - std::get<2>(triangles_from_weak_edges).data(), - std::get<2>(triangles_from_weak_edges).size()), - raft::device_span( - std::get<3>(triangles_from_weak_edges).data(), - std::get<3>(triangles_from_weak_edges).size()) - ); - - } else { - edge_exists = cur_graph_view.has_edge( - handle, - raft::device_span( - std::get<2>(triangles_from_weak_edges).data(), intersection_indices.size()), - raft::device_span( - std::get<3>(triangles_from_weak_edges).data(), intersection_indices.size()) - ); - } - - // Handling (q, r) edges - - if constexpr (multi_gpu) { - // (p, q) edges are owned by the current GPU while (p, r) and (q, r) - // can be owned by different GPUs - // Ordering (q, r) edges based on the DODG - order_edge_based_on_dodg( - handle, - cur_graph_view, - raft::device_span( - std::get<4>(triangles_from_weak_edges).data(), - std::get<4>(triangles_from_weak_edges).size()), - raft::device_span( - std::get<5>(triangles_from_weak_edges).data(), - std::get<5>(triangles_from_weak_edges).size()) - ); - - } else { - edge_exists = cur_graph_view.has_edge( - handle, - raft::device_span( - std::get<4>(triangles_from_weak_edges).data(), intersection_indices.size()), - raft::device_span( - std::get<5>(triangles_from_weak_edges).data(), intersection_indices.size()) - ); - } - - // re-order triangles - // To avoid overcompensation, redirect all edges in the triangle to follow this unique - // pattern: (p, q) then (q, r) then (p, r) - thrust::transform( - handle.get_thrust_policy(), + + return triangle; + }); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + auto unique_triangle_end = + thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + auto num_unique_triangles = thrust::distance( + get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + + resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + // FIXME: put the redundant code above in a function + std::tie(triangles_from_weak_edges, std::ignore) = groupby_gpu_id_and_shuffle_values( + handle.get_comms(), get_dataframe_buffer_begin(triangles_from_weak_edges), get_dataframe_buffer_end(triangles_from_weak_edges), - get_dataframe_buffer_begin(triangles_from_weak_edges), - [] __device__(auto triangle) { - auto edge_p_q = thrust::make_tuple(thrust::get<0>(triangle), thrust::get<1>(triangle)); - auto edge_p_r = thrust::make_tuple(thrust::get<2>(triangle), thrust::get<3>(triangle)); - auto edge_q_r = thrust::make_tuple(thrust::get<4>(triangle), thrust::get<5>(triangle)); - - if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { - if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q) - ); - - } else { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r) - ); - } - } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) - ); - - } - - return triangle; - } - ); - thrust::sort( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - auto unique_triangle_end = thrust::unique( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - auto num_unique_triangles = thrust::distance(get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - - resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - - if constexpr (multi_gpu) { - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - // FIXME: put the redundant code above in a function - std::tie(triangles_from_weak_edges, std::ignore) = - groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges), - - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, - - handle.get_stream() - ); - - unique_triangle_end = thrust::unique( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - num_unique_triangles = thrust::distance( - get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - - } - - auto edgelist_to_update_count = - allocate_dataframe_buffer>(3* num_unique_triangles, - handle.get_stream()); - - // Flatten the triangle to a list of egdes. - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), - get_dataframe_buffer_begin(edgelist_to_update_count), - [ - num_unique_triangles, - triangles_from_weak_edges = get_dataframe_buffer_begin(triangles_from_weak_edges) - ] __device__(auto idx) { - auto idx_triangle = idx % num_unique_triangles; - auto idx_vertex_in_triangle = idx / num_unique_triangles; - auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); - vertex_t src; - vertex_t dst; - - if (idx_vertex_in_triangle == 0) { - src = *(thrust::get<0>(triangle)); - dst = *(thrust::get<1>(triangle)); - } + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, - if (idx_vertex_in_triangle == 1) { - src = *(thrust::get<2>(triangle)); - dst = *(thrust::get<3>(triangle)); - } + handle.get_stream()); + + unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + num_unique_triangles = thrust::distance( + get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + resize_dataframe_buffer( + triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + } + + auto edgelist_to_update_count = allocate_dataframe_buffer>( + 3 * num_unique_triangles, handle.get_stream()); + + // Flatten the triangle to a list of egdes. + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), + get_dataframe_buffer_begin(edgelist_to_update_count), + [num_unique_triangles, + triangles_from_weak_edges = + get_dataframe_buffer_begin(triangles_from_weak_edges)] __device__(auto idx) { + auto idx_triangle = idx % num_unique_triangles; + auto idx_vertex_in_triangle = idx / num_unique_triangles; + auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); + vertex_t src; + vertex_t dst; + + if (idx_vertex_in_triangle == 0) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<1>(triangle)); + } - if (idx_vertex_in_triangle == 2) { - src = *(thrust::get<4>(triangle)); - dst = *(thrust::get<5>(triangle)); - } - - return thrust::make_tuple(src, dst); + if (idx_vertex_in_triangle == 1) { + src = *(thrust::get<2>(triangle)); + dst = *(thrust::get<3>(triangle)); } - ); - thrust::sort( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - auto unique_pair_count = thrust::unique_count( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - // If multi-GPU, shuffle and reduce - if constexpr (multi_gpu) { - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto my_rank = handle.get_comms().get_rank(); - - std::tie(edgelist_to_update_count, std::ignore) = - groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count), - - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, - - handle.get_stream() - ); - } - - thrust::sort( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( - unique_pair_count, handle.get_stream()); - - rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); - - thrust::reduce_by_key(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count), - thrust::make_constant_iterator(size_t{1}), - get_dataframe_buffer_begin(vertex_pair_buffer_unique), - decrease_count.begin(), - thrust::equal_to>{}); - - // Update count of weak edges - edges_to_decrement_count.clear(); - - edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), - std::get<0>(vertex_pair_buffer_unique).end(), - std::get<1>(vertex_pair_buffer_unique).begin()); - - cugraph::transform_e( - handle, - cur_graph_view, - edges_to_decrement_count, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [ - edge_buffer_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<1>(vertex_pair_buffer_unique).begin()), - edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).end()), - decrease_count = raft::device_span(decrease_count.data(), decrease_count.size()) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - - auto itr_pair = thrust::lower_bound( - thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); - - auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); - - count -= decrease_count[idx_pair]; - - return count; + if (idx_vertex_in_triangle == 2) { + src = *(thrust::get<4>(triangle)); + dst = *(thrust::get<5>(triangle)); + } + + return thrust::make_tuple(src, dst); + }); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto unique_pair_count = + thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + // If multi-GPU, shuffle and reduce + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto my_rank = handle.get_comms().get_rank(); + + std::tie(edgelist_to_update_count, std::ignore) = groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, - edge_triangle_counts.mutable_view(), - false); - - edgelist_weak.clear(); - - thrust::sort( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()) - ); - - edgelist_weak.insert(weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin()); - - // Get undirected graph view - cur_graph_view.clear_edge_mask(); - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); - - cugraph::transform_e( - handle, - cur_graph_view, - edgelist_weak, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - - return false; - }, - weak_edges_mask.mutable_view(), - false); - - edgelist_weak.clear(); - thrust::sort( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), - thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end()) - ); - - edgelist_weak.insert(weak_edgelist_dsts.begin(), - weak_edgelist_dsts.end(), - weak_edgelist_srcs.begin()); - - cugraph::transform_e( - handle, - cur_graph_view, - edgelist_weak, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - - return false; - }, - weak_edges_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } - + + handle.get_stream()); + } + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + thrust::make_constant_iterator(size_t{1}), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + // Update count of weak edges + edges_to_decrement_count.clear(); + + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [edge_buffer_first = + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin()), + edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end()), + decrease_count = raft::device_span( + decrease_count.data(), decrease_count.size())] __device__(auto src, + auto dst, + thrust::nullopt_t, + thrust::nullopt_t, + edge_t count) { + auto itr_pair = thrust::lower_bound( + thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); + + count -= decrease_count[idx_pair]; + + return count; + }, + edge_triangle_counts.mutable_view(), + false); + + edgelist_weak.clear(); + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end())); + + edgelist_weak.insert( + weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin()); + + // Get undirected graph view + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return false; + }, + weak_edges_mask.mutable_view(), + false); + + edgelist_weak.clear(); + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), + thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end())); + + edgelist_weak.insert( + weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return false; + }, + weak_edges_mask.mutable_view(), + false); + + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } } - + cur_graph_view.clear_edge_mask(); cur_graph_view.attach_edge_mask(dodg_mask.view()); - + cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count == 0 ? false : true; - }, - dodg_mask.mutable_view(), - true); - + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count == 0 ? false : true; + }, + dodg_mask.mutable_view(), + true); + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, @@ -954,19 +901,17 @@ k_truss(raft::handle_t const& handle, edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - renumber_map - ? std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size())): - std::nullopt - ); - + renumber_map ? std::make_optional(raft::device_span((*renumber_map).data(), + (*renumber_map).size())) + : std::nullopt); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts), false); - + return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } From 3dfae34efedf66d2a8bd5005e062cf2abdfc5a75 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 16:36:00 -0800 Subject: [PATCH 09/13] add sync call and fix typos --- cpp/src/community/k_truss_impl.cuh | 1145 +++++++++++++++------------- 1 file changed, 620 insertions(+), 525 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 77c858a2020..540ab678437 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -17,9 +17,9 @@ #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" -#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" @@ -41,130 +41,200 @@ #include #include #include - #include using namespace std::chrono; namespace cugraph { - template -void order_edge_based_on_dodg(raft::handle_t const& handle, - graph_view_t& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts) +void order_edge_based_on_dodg( + raft::handle_t const& handle, + graph_view_t & graph_view, + raft::device_span edgelist_srcs, + raft::device_span edgelist_dsts + ) { - // FIXME: Use global comm for debugging purposes - // then replace it by minor comm once the accuracy is verified - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}; - - rmm::device_uvector cp_edgelist_srcs(edgelist_srcs.size(), handle.get_stream()); - rmm::device_uvector cp_edgelist_dsts(edgelist_srcs.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_srcs.begin()), - thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_dsts.end()), - thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin())); - - auto d_tx_counts = cugraph::groupby_and_count( - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()), - [func] __device__(auto val) { return func(val); }, + std::vector rx_counts{}; + std::optional> srcs{std::nullopt}; + std::optional> dsts{std::nullopt}; + + std::optional> cp_edgelist_srcs{std::nullopt}; + std::optional> cp_edgelist_dsts{std::nullopt}; + + + // FIXME: Minor comm is not working for all cases so I believe some edges a beyong + // the partitioning range + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}; + + + rmm::device_uvector tmp_srcs(edgelist_srcs.size(), handle.get_stream()); + rmm::device_uvector tmp_dsts(edgelist_srcs.size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()), + thrust::make_zip_iterator(tmp_srcs.begin(), tmp_dsts.begin())); + + cp_edgelist_srcs = std::move(tmp_srcs); + cp_edgelist_dsts = std::move(tmp_dsts); + + auto d_tx_counts = cugraph::groupby_and_count( + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()), + [func]__device__(auto val) { + return func(val); + }, comm_size, std::numeric_limits::max(), handle.get_stream()); - std::vector h_tx_counts(d_tx_counts.size()); - - raft::update_host( - h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); - - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - std::vector rx_counts{}; - - std::tie(srcs, rx_counts) = - shuffle_values(handle.get_comms(), edgelist_srcs.begin(), h_tx_counts, handle.get_stream()); - - std::tie(dsts, std::ignore) = - shuffle_values(handle.get_comms(), edgelist_dsts.begin(), h_tx_counts, handle.get_stream()); - - // rmm::device_uvector edge_exists(0, handle.get_stream()); - auto edge_exists = - graph_view.has_edge(handle, - raft::device_span(srcs.data(), srcs.size()), - raft::device_span(dsts.data(), dsts.size())); + std::vector h_tx_counts(d_tx_counts.size()); - // Send the result back - std::tie(edge_exists, std::ignore) = - shuffle_values(handle.get_comms(), edge_exists.begin(), rx_counts, handle.get_stream()); + handle.sync_stream(); - // The 'edge_exists' array is ordered based on 'cp_edgelist_srcs' where the edges where group, - // hoever it needs to match 'edgelist_srcs', hence re-order 'edge_exists' accordingly. - thrust::sort_by_key(handle.get_thrust_policy(), - thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin()), - thrust::make_zip_iterator(cp_edgelist_srcs.end(), cp_edgelist_dsts.end()), - edge_exists.begin()); + raft::update_host(h_tx_counts.data(), + d_tx_counts.data(), + d_tx_counts.size(), + handle.get_stream()); + + std::tie(srcs, rx_counts) = + shuffle_values( + handle.get_comms(), + cp_edgelist_srcs->begin(), h_tx_counts, handle.get_stream()); + + std::tie(dsts, std::ignore) = + shuffle_values( + handle.get_comms(), + cp_edgelist_dsts->begin(), h_tx_counts, handle.get_stream()); + } + std::optional> edge_exists{std::nullopt}; + edge_exists = graph_view.has_edge( + handle, + srcs ? raft::device_span(srcs->data(), srcs->size()) + : raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), + dsts ? raft::device_span(dsts->data(), dsts->size()) + : raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()) + ); + + if constexpr (multi_gpu) { + + // Send the result back + std::tie(edge_exists, std::ignore) = + shuffle_values(handle.get_comms(), edge_exists->begin(), rx_counts, handle.get_stream()); + + // The 'edge_exists' array is ordered based on 'cp_edgelist_srcs' where the edges where grouped, + // however it needs to match 'edgelist_srcs', hence re-order 'edge_exists' accordingly. + thrust::sort_by_key( + handle.get_thrust_policy(), + thrust::make_zip_iterator( + cp_edgelist_srcs->begin(), + cp_edgelist_dsts->begin()), + thrust::make_zip_iterator( + cp_edgelist_srcs->end(), + cp_edgelist_dsts->end()), + edge_exists->begin()); + + auto num_unique_pair = thrust::unique_count( + handle.get_thrust_policy(), + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end())); + + rmm::device_uvector tmp_srcs(num_unique_pair, handle.get_stream()); + rmm::device_uvector tmp_dsts(num_unique_pair, handle.get_stream()); + rmm::device_uvector tmp_edge_exists(num_unique_pair, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + thrust::make_zip_iterator( + cp_edgelist_srcs->begin(), + cp_edgelist_dsts->begin()), + thrust::make_zip_iterator( + cp_edgelist_srcs->end(), + cp_edgelist_dsts->end()), + edge_exists->begin(), + thrust::make_zip_iterator( + tmp_srcs.begin(), + tmp_dsts.begin()), + tmp_edge_exists.begin(), + thrust::equal_to>{}); + + cp_edgelist_srcs = std::move(tmp_srcs); + cp_edgelist_dsts = std::move(tmp_dsts); + edge_exists = std::move(tmp_edge_exists); + + // Match DODG edges + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(edgelist_srcs.size()), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + [ + edge_exists = edge_exists->data(), + edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + cp_edgelist_first = thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()) + ] __device__(auto idx) { + auto src = thrust::get<0>(edgelist_first[idx]); + auto dst = thrust::get<1>(edgelist_first[idx]); + + auto itr_pair = thrust::find( // FIXME: replace by lower bound + thrust::seq, cp_edgelist_first, cp_edgelist_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); + + + return edge_exists[idx_pair] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); + } + ); + + } else { + + + // Match DODG edges thrust::transform( handle.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(edgelist_srcs.size()), - edge_exists.begin(), - [edge_exists = edge_exists.data(), - edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - cp_edgelist_first = - thrust::make_zip_iterator(cp_edgelist_srcs.begin(), cp_edgelist_dsts.begin()), - cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs.end(), - cp_edgelist_dsts.end())] __device__(auto idx) { + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + [ + edge_exists = edge_exists->data(), + edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()) + ] __device__(auto idx) { auto src = thrust::get<0>(edgelist_first[idx]); auto dst = thrust::get<1>(edgelist_first[idx]); - auto itr_pair = thrust::lower_bound( - thrust::seq, cp_edgelist_first, cp_edgelist_last, thrust::make_tuple(src, dst)); - auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); - - return edge_exists[idx_pair]; - }); + return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); + } + ); + } - // Match DODG edges - thrust::transform(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(edgelist_srcs.size()), - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - [edge_exists = edge_exists.data(), - edgelist_first = thrust::make_zip_iterator( - edgelist_srcs.begin(), edgelist_dsts.begin())] __device__(auto idx) { - auto src = thrust::get<0>(edgelist_first[idx]); - auto dst = thrust::get<1>(edgelist_first[idx]); - - return edge_exists[idx] ? thrust::make_tuple(src, dst) - : thrust::make_tuple(dst, src); - }); } template @@ -173,7 +243,8 @@ struct extract_weak_edges { __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { - return ((count < k - 2) && (count > 0)) + + return ((count < k - 2) && (count > 0)) ? thrust::optional>{thrust::make_tuple(src, dst)} : thrust::nullopt; } @@ -187,9 +258,10 @@ struct extract_triangles_from_weak_edges { raft::device_span weak_srcs{}; raft::device_span weak_dsts{}; - __device__ thrust::tuple operator()( - edge_t i) const + __device__ thrust::tuple + operator()(edge_t i) const { + auto itr = thrust::upper_bound( thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); @@ -202,12 +274,10 @@ struct extract_triangles_from_weak_edges { // Extract (q, r) edges auto edge_q_r = thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); - return thrust::make_tuple(thrust::get<0>(edge_p_q), - thrust::get<1>(edge_p_q), - thrust::get<0>(edge_p_r), - thrust::get<1>(edge_p_r), - thrust::get<0>(edge_q_r), - thrust::get<1>(edge_q_r)); + return thrust::make_tuple( + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r)); } }; @@ -284,8 +354,7 @@ k_truss(raft::handle_t const& handle, std::optional> modified_graph{std::nullopt}; std::optional> modified_graph_view{std::nullopt}; - std::optional> undirected_graph_view{ - std::nullopt}; + std::optional> undirected_graph_view{std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; @@ -293,8 +362,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_bucket_t edgelist_dodg(handle); - cugraph::edge_property_t, bool> dodg_mask( - handle, graph_view); + cugraph::edge_property_t, bool> dodg_mask(handle, graph_view); // Ideally, leverage the undirected graph derived from k-core undirected_graph_view = graph_view; @@ -393,6 +461,7 @@ k_truss(raft::handle_t const& handle, // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { + auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; auto vertex_partition_range_lasts = @@ -434,466 +503,490 @@ k_truss(raft::handle_t const& handle, extract_low_to_high_degree_edges_t{}); } + cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); // Masking edges not part of the DODG - edgelist_dodg.insert(srcs.begin(), srcs.end(), dsts.begin()); - + edgelist_dodg.insert(srcs.begin(), + srcs.end(), + dsts.begin()); + cugraph::transform_e( - handle, - cur_graph_view, - edgelist_dodg, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - return true; - }, - dodg_mask.mutable_view(), - false); - + handle, + cur_graph_view, + edgelist_dodg, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + + return true; + }, + dodg_mask.mutable_view(), + false); + edgelist_dodg.clear(); } // 4. Compute triangle count using nbr_intersection and unroll weak edges { + auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - cugraph::edge_property_t weak_edges_mask(handle, - cur_graph_view); + cugraph::edge_property_t weak_edges_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, weak_edges_mask.mutable_view(), bool{true}); - + // Attach mask cur_graph_view.attach_edge_mask(dodg_mask.view()); auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view, false); + edge_triangle_count(handle, cur_graph_view, false); cugraph::edge_bucket_t edgelist_weak(handle); cugraph::edge_bucket_t edges_to_decrement_count(handle); - size_t prev_chunk_size = 0; // FIXME: Add support for chunking + size_t prev_chunk_size = 0; // FIXME: Add support for chunking + + while (true) { + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + // Extract weak edges + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + extract_weak_edges{k}); + + auto weak_edgelist_first = thrust::make_zip_iterator( + weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = thrust::make_zip_iterator( + weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); + + // Perform nbr_intersection of the weak edges from the undirected + // graph view + cur_graph_view.clear_edge_mask(); + + // Attach the weak edge mask + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto [intersection_offsets, intersection_indices] = \ + per_v_pair_dst_nbr_intersection( + handle, + cur_graph_view, + weak_edgelist_first, + weak_edgelist_last, + false); + + // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) + + auto triangles_from_weak_edges = + allocate_dataframe_buffer>( + intersection_indices.size(), + handle.get_stream()); + + // Form (p, q) edges + // Extract triangle from weak + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + extract_triangles_from_weak_edges{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) + } + ); - while (true) { - cur_graph_view.clear_edge_mask(); - cur_graph_view.attach_edge_mask(dodg_mask.view()); + cur_graph_view.clear_edge_mask(); + // Check for edge existance on the directed graph view + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + rmm::device_uvector edge_exists(0, handle.get_stream()); + + + // Handling (p, r) edges - // Extract weak edges - auto [weak_edgelist_srcs, weak_edgelist_dsts] = - extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - extract_weak_edges{k}); - - auto weak_edgelist_first = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - auto weak_edgelist_last = - thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); - - // Perform nbr_intersection of the weak edges from the undirected - // graph view - cur_graph_view.clear_edge_mask(); - - // Attach the weak edge mask - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection( - handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, false); - - // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) - // To avoid overcompensation, redirect all edges in the triangle to follow this unique - // pattern: (p, q) then (q, r) then (p, r) - - auto triangles_from_weak_edges = allocate_dataframe_buffer< - thrust::tuple>( - intersection_indices.size(), handle.get_stream()); - - // Form (p, q) edges - // Extract triangle from weak - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges), - extract_triangles_from_weak_edges{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); - - cur_graph_view.clear_edge_mask(); - // Check for edge existance on the directed graph view - cur_graph_view.attach_edge_mask(dodg_mask.view()); - - rmm::device_uvector edge_exists(0, handle.get_stream()); - - // Handling (p, r) edges - if constexpr (multi_gpu) { // (p, q) edges are owned by the current GPU while (p, r) and (q, r) // can be owned by different GPUs // Ordering (p, r) edges based on the DODG order_edge_based_on_dodg( handle, cur_graph_view, - raft::device_span(std::get<2>(triangles_from_weak_edges).data(), - std::get<2>(triangles_from_weak_edges).size()), - raft::device_span(std::get<3>(triangles_from_weak_edges).data(), - std::get<3>(triangles_from_weak_edges).size())); - - } else { - edge_exists = cur_graph_view.has_edge( - handle, - raft::device_span(std::get<2>(triangles_from_weak_edges).data(), - intersection_indices.size()), - raft::device_span(std::get<3>(triangles_from_weak_edges).data(), - intersection_indices.size())); - } + raft::device_span( + std::get<2>(triangles_from_weak_edges).data(), + std::get<2>(triangles_from_weak_edges).size()), + raft::device_span( + std::get<3>(triangles_from_weak_edges).data(), + std::get<3>(triangles_from_weak_edges).size()) + ); - // Handling (q, r) edges + // Handling (q, r) edges - if constexpr (multi_gpu) { // (p, q) edges are owned by the current GPU while (p, r) and (q, r) // can be owned by different GPUs // Ordering (q, r) edges based on the DODG order_edge_based_on_dodg( handle, cur_graph_view, - raft::device_span(std::get<4>(triangles_from_weak_edges).data(), - std::get<4>(triangles_from_weak_edges).size()), - raft::device_span(std::get<5>(triangles_from_weak_edges).data(), - std::get<5>(triangles_from_weak_edges).size())); - - } else { - edge_exists = cur_graph_view.has_edge( - handle, - raft::device_span(std::get<4>(triangles_from_weak_edges).data(), - intersection_indices.size()), - raft::device_span(std::get<5>(triangles_from_weak_edges).data(), - intersection_indices.size())); - } - - // re-order triangles - // To avoid overcompensation, redirect all edges in the triangle to follow this unique - // pattern: (p, q) then (q, r) then (p, r) - thrust::transform( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges), - get_dataframe_buffer_begin(triangles_from_weak_edges), - [] __device__(auto triangle) { - auto edge_p_q = thrust::make_tuple(thrust::get<0>(triangle), thrust::get<1>(triangle)); - auto edge_p_r = thrust::make_tuple(thrust::get<2>(triangle), thrust::get<3>(triangle)); - auto edge_q_r = thrust::make_tuple(thrust::get<4>(triangle), thrust::get<5>(triangle)); - - if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { - if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { - triangle = thrust::make_tuple(thrust::get<0>(edge_p_r), - thrust::get<1>(edge_p_r), - thrust::get<0>(edge_q_r), - thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_q), - thrust::get<1>(edge_p_q)); - - } else { - triangle = thrust::make_tuple(thrust::get<0>(edge_p_r), - thrust::get<1>(edge_p_r), - thrust::get<0>(edge_p_q), - thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), - thrust::get<1>(edge_q_r)); - } - } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { - triangle = thrust::make_tuple(thrust::get<0>(edge_p_q), - thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), - thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_r), - thrust::get<1>(edge_p_r)); - } - - return triangle; - }); - - thrust::sort(handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - auto unique_triangle_end = - thrust::unique(handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - auto num_unique_triangles = thrust::distance( - get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - - resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - - if constexpr (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts( - vertex_partition_range_lasts.size(), handle.get_stream()); - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - // FIXME: put the redundant code above in a function - std::tie(triangles_from_weak_edges, std::ignore) = groupby_gpu_id_and_shuffle_values( - handle.get_comms(), + raft::device_span( + std::get<4>(triangles_from_weak_edges).data(), + std::get<4>(triangles_from_weak_edges).size()), + raft::device_span( + std::get<5>(triangles_from_weak_edges).data(), + std::get<5>(triangles_from_weak_edges).size()) + ); + + // re-order triangles + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) + thrust::transform( + handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_from_weak_edges), get_dataframe_buffer_end(triangles_from_weak_edges), - - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - - handle.get_stream()); - - unique_triangle_end = thrust::unique(handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - num_unique_triangles = thrust::distance( - get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - resize_dataframe_buffer( - triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - } - - auto edgelist_to_update_count = allocate_dataframe_buffer>( - 3 * num_unique_triangles, handle.get_stream()); - - // Flatten the triangle to a list of egdes. - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), - get_dataframe_buffer_begin(edgelist_to_update_count), - [num_unique_triangles, - triangles_from_weak_edges = - get_dataframe_buffer_begin(triangles_from_weak_edges)] __device__(auto idx) { - auto idx_triangle = idx % num_unique_triangles; - auto idx_vertex_in_triangle = idx / num_unique_triangles; - auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); - vertex_t src; - vertex_t dst; - - if (idx_vertex_in_triangle == 0) { - src = *(thrust::get<0>(triangle)); - dst = *(thrust::get<1>(triangle)); - } - - if (idx_vertex_in_triangle == 1) { - src = *(thrust::get<2>(triangle)); - dst = *(thrust::get<3>(triangle)); - } - - if (idx_vertex_in_triangle == 2) { - src = *(thrust::get<4>(triangle)); - dst = *(thrust::get<5>(triangle)); + get_dataframe_buffer_begin(triangles_from_weak_edges), + [] __device__(auto triangle) { + auto edge_p_q = thrust::make_tuple(thrust::get<0>(triangle), thrust::get<1>(triangle)); + auto edge_p_r = thrust::make_tuple(thrust::get<2>(triangle), thrust::get<3>(triangle)); + auto edge_q_r = thrust::make_tuple(thrust::get<4>(triangle), thrust::get<5>(triangle)); + + if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { + if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q) + ); + + } else { + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r) + ); + } + } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { + triangle = thrust::make_tuple( + thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) + ); + + } + return triangle; } + ); - return thrust::make_tuple(src, dst); - }); - - thrust::sort(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - auto unique_pair_count = - thrust::unique_count(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - // If multi-GPU, shuffle and reduce - if constexpr (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts( - vertex_partition_range_lasts.size(), handle.get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto my_rank = handle.get_comms().get_rank(); - - std::tie(edgelist_to_update_count, std::ignore) = groupby_gpu_id_and_shuffle_values( - handle.get_comms(), + thrust::sort( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + auto unique_triangle_end = thrust::unique( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + auto num_unique_triangles = thrust::distance(get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + + resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + + if constexpr (multi_gpu) { + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + // FIXME: put the redundant code above in a function + std::tie(triangles_from_weak_edges, std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, + + handle.get_stream() + ); + + thrust::sort( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + unique_triangle_end = thrust::unique( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + num_unique_triangles = thrust::distance( + get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + + } + + auto edgelist_to_update_count = + allocate_dataframe_buffer>(3* num_unique_triangles, + handle.get_stream()); + + // Flatten the triangle to a list of egdes. + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count), - - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) { - return key_func(thrust::get<0>(val), thrust::get<1>(val)); - }, - - handle.get_stream()); - } - - thrust::sort(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( - unique_pair_count, handle.get_stream()); - - rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); - - thrust::reduce_by_key(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count), - thrust::make_constant_iterator(size_t{1}), - get_dataframe_buffer_begin(vertex_pair_buffer_unique), - decrease_count.begin(), - thrust::equal_to>{}); + [ + num_unique_triangles, + triangles_from_weak_edges = get_dataframe_buffer_begin(triangles_from_weak_edges) + ] __device__(auto idx) { + auto idx_triangle = idx % num_unique_triangles; + auto idx_vertex_in_triangle = idx / num_unique_triangles; + auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); + vertex_t src; + vertex_t dst; + + if (idx_vertex_in_triangle == 0) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<1>(triangle)); + } - // Update count of weak edges - edges_to_decrement_count.clear(); + if (idx_vertex_in_triangle == 1) { + src = *(thrust::get<2>(triangle)); + dst = *(thrust::get<3>(triangle)); + } - edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), - std::get<0>(vertex_pair_buffer_unique).end(), - std::get<1>(vertex_pair_buffer_unique).begin()); + if (idx_vertex_in_triangle == 2) { + src = *(thrust::get<4>(triangle)); + dst = *(thrust::get<5>(triangle)); + } + + return thrust::make_tuple(src, dst); + } + ); + + // If multi-GPU, shuffle and reduce + if constexpr (multi_gpu) { + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + std::tie(edgelist_to_update_count, std::ignore) = + groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, + + handle.get_stream() + ); + } + + thrust::sort( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + thrust::make_constant_iterator(size_t{1}), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + // Update count of weak edges + edges_to_decrement_count.clear(); + + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); + + // Update count of weak edges from the DODG view + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [ + edge_buffer_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<1>(vertex_pair_buffer_unique).begin()), + edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).end()), + decrease_count = raft::device_span(decrease_count.data(), decrease_count.size()) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + + auto itr_pair = thrust::find( // FIXME: Update to lowerbound + thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); + + count -= decrease_count[idx_pair]; + + return count; - cugraph::transform_e( - handle, - cur_graph_view, - edges_to_decrement_count, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [edge_buffer_first = - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), - std::get<1>(vertex_pair_buffer_unique).begin()), - edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), - std::get<1>(vertex_pair_buffer_unique).end()), - decrease_count = raft::device_span( - decrease_count.data(), decrease_count.size())] __device__(auto src, - auto dst, - thrust::nullopt_t, - thrust::nullopt_t, - edge_t count) { - auto itr_pair = thrust::lower_bound( - thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); - - auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); - - count -= decrease_count[idx_pair]; - - return count; - }, - edge_triangle_counts.mutable_view(), - false); - - edgelist_weak.clear(); - - thrust::sort( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end())); - - edgelist_weak.insert( - weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin()); - - // Get undirected graph view - cur_graph_view.clear_edge_mask(); - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); - - cugraph::transform_e( - handle, - cur_graph_view, - edgelist_weak, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - return false; - }, - weak_edges_mask.mutable_view(), - false); - - edgelist_weak.clear(); - thrust::sort( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), - thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end())); - - edgelist_weak.insert( - weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin()); - - cugraph::transform_e( - handle, - cur_graph_view, - edgelist_weak, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - return false; - }, - weak_edges_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } + }, + edge_triangle_counts.mutable_view(), + true); + + edgelist_weak.clear(); + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()) + ); + + edgelist_weak.insert(weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin()); + + // Get undirected graph view + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + + return false; + }, + weak_edges_mask.mutable_view(), + false); + + edgelist_weak.clear(); + + // shuffle the edges if multi_gpu + if constexpr (multi_gpu) { + std::tie( + weak_edgelist_dsts, weak_edgelist_srcs, std::ignore, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(weak_edgelist_dsts), + std::move(weak_edgelist_srcs), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), + thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end()) + ); + + edgelist_weak.insert(weak_edgelist_dsts.begin(), + weak_edgelist_dsts.end(), + weak_edgelist_srcs.begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + + return false; + }, + weak_edges_mask.mutable_view(), + false); + + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } + } - + cur_graph_view.clear_edge_mask(); cur_graph_view.attach_edge_mask(dodg_mask.view()); - + cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count == 0 ? false : true; - }, - dodg_mask.mutable_view(), - true); - + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count == 0 ? false : true; + }, + dodg_mask.mutable_view(), + true); + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, @@ -901,17 +994,19 @@ k_truss(raft::handle_t const& handle, edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - renumber_map ? std::make_optional(raft::device_span((*renumber_map).data(), - (*renumber_map).size())) - : std::nullopt); - + renumber_map + ? std::make_optional( + raft::device_span((*renumber_map).data(), (*renumber_map).size())): + std::nullopt + ); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts), false); - + return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } From 8fa9cd97f1cbe8878b487e4eecc2e70a0d7ae188 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 17:06:17 -0800 Subject: [PATCH 10/13] fix style --- cpp/src/community/k_truss_impl.cuh | 1103 +++++++++++++--------------- 1 file changed, 524 insertions(+), 579 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 540ab678437..f6e5f813929 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -17,9 +17,9 @@ #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" -#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" +#include "prims/per_v_pair_dst_nbr_intersection.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" @@ -41,20 +41,18 @@ #include #include #include + #include using namespace std::chrono; namespace cugraph { template -void order_edge_based_on_dodg( - raft::handle_t const& handle, - graph_view_t & graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts - ) +void order_edge_based_on_dodg(raft::handle_t const& handle, + graph_view_t& graph_view, + raft::device_span edgelist_srcs, + raft::device_span edgelist_dsts) { - std::vector rx_counts{}; std::optional> srcs{std::nullopt}; std::optional> dsts{std::nullopt}; @@ -62,7 +60,6 @@ void order_edge_based_on_dodg( std::optional> cp_edgelist_srcs{std::nullopt}; std::optional> cp_edgelist_dsts{std::nullopt}; - // FIXME: Minor comm is not working for all cases so I believe some edges a beyong // the partitioning range if constexpr (multi_gpu) { @@ -75,118 +72,96 @@ void order_edge_based_on_dodg( auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); raft::update_device(d_vertex_partition_range_lasts.data(), vertex_partition_range_lasts.data(), vertex_partition_range_lasts.size(), handle.get_stream()); - - auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}; + auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}; rmm::device_uvector tmp_srcs(edgelist_srcs.size(), handle.get_stream()); rmm::device_uvector tmp_dsts(edgelist_srcs.size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()), - thrust::make_zip_iterator(tmp_srcs.begin(), tmp_dsts.begin())); - + thrust::copy(handle.get_thrust_policy(), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end()), + thrust::make_zip_iterator(tmp_srcs.begin(), tmp_dsts.begin())); + cp_edgelist_srcs = std::move(tmp_srcs); cp_edgelist_dsts = std::move(tmp_dsts); auto d_tx_counts = cugraph::groupby_and_count( thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()), - [func]__device__(auto val) { - return func(val); - }, - comm_size, - std::numeric_limits::max(), - handle.get_stream()); - + [func] __device__(auto val) { return func(val); }, + comm_size, + std::numeric_limits::max(), + handle.get_stream()); std::vector h_tx_counts(d_tx_counts.size()); handle.sync_stream(); - raft::update_host(h_tx_counts.data(), - d_tx_counts.data(), - d_tx_counts.size(), - handle.get_stream()); - - std::tie(srcs, rx_counts) = - shuffle_values( - handle.get_comms(), - cp_edgelist_srcs->begin(), h_tx_counts, handle.get_stream()); - - std::tie(dsts, std::ignore) = - shuffle_values( - handle.get_comms(), - cp_edgelist_dsts->begin(), h_tx_counts, handle.get_stream()); + raft::update_host( + h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); + + std::tie(srcs, rx_counts) = shuffle_values( + handle.get_comms(), cp_edgelist_srcs->begin(), h_tx_counts, handle.get_stream()); + + std::tie(dsts, std::ignore) = shuffle_values( + handle.get_comms(), cp_edgelist_dsts->begin(), h_tx_counts, handle.get_stream()); } std::optional> edge_exists{std::nullopt}; edge_exists = graph_view.has_edge( - handle, - srcs ? raft::device_span(srcs->data(), srcs->size()) - : raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - dsts ? raft::device_span(dsts->data(), dsts->size()) - : raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()) - ); - - if constexpr (multi_gpu) { + handle, + srcs ? raft::device_span(srcs->data(), srcs->size()) + : raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), + dsts ? raft::device_span(dsts->data(), dsts->size()) + : raft::device_span(edgelist_dsts.data(), edgelist_dsts.size())); + if constexpr (multi_gpu) { // Send the result back std::tie(edge_exists, std::ignore) = - shuffle_values(handle.get_comms(), edge_exists->begin(), rx_counts, handle.get_stream()); - + shuffle_values(handle.get_comms(), edge_exists->begin(), rx_counts, handle.get_stream()); + // The 'edge_exists' array is ordered based on 'cp_edgelist_srcs' where the edges where grouped, // however it needs to match 'edgelist_srcs', hence re-order 'edge_exists' accordingly. thrust::sort_by_key( - handle.get_thrust_policy(), - thrust::make_zip_iterator( - cp_edgelist_srcs->begin(), - cp_edgelist_dsts->begin()), - thrust::make_zip_iterator( - cp_edgelist_srcs->end(), - cp_edgelist_dsts->end()), - edge_exists->begin()); - + handle.get_thrust_policy(), + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()), + edge_exists->begin()); + auto num_unique_pair = thrust::unique_count( - handle.get_thrust_policy(), - thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), - thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end())); - + handle.get_thrust_policy(), + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end())); + rmm::device_uvector tmp_srcs(num_unique_pair, handle.get_stream()); rmm::device_uvector tmp_dsts(num_unique_pair, handle.get_stream()); rmm::device_uvector tmp_edge_exists(num_unique_pair, handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - thrust::make_zip_iterator( - cp_edgelist_srcs->begin(), - cp_edgelist_dsts->begin()), - thrust::make_zip_iterator( - cp_edgelist_srcs->end(), - cp_edgelist_dsts->end()), - edge_exists->begin(), - thrust::make_zip_iterator( - tmp_srcs.begin(), - tmp_dsts.begin()), - tmp_edge_exists.begin(), - thrust::equal_to>{}); + thrust::reduce_by_key( + handle.get_thrust_policy(), + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()), + edge_exists->begin(), + thrust::make_zip_iterator(tmp_srcs.begin(), tmp_dsts.begin()), + tmp_edge_exists.begin(), + thrust::equal_to>{}); cp_edgelist_srcs = std::move(tmp_srcs); cp_edgelist_dsts = std::move(tmp_dsts); - edge_exists = std::move(tmp_edge_exists); + edge_exists = std::move(tmp_edge_exists); // Match DODG edges thrust::transform( @@ -194,47 +169,42 @@ void order_edge_based_on_dodg( thrust::make_counting_iterator(0), thrust::make_counting_iterator(edgelist_srcs.size()), thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - [ - edge_exists = edge_exists->data(), - edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - cp_edgelist_first = thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), - cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()) - ] __device__(auto idx) { + [edge_exists = edge_exists->data(), + edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + cp_edgelist_first = + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + cp_edgelist_last = thrust::make_zip_iterator(cp_edgelist_srcs->end(), + cp_edgelist_dsts->end())] __device__(auto idx) { auto src = thrust::get<0>(edgelist_first[idx]); auto dst = thrust::get<1>(edgelist_first[idx]); - auto itr_pair = thrust::find( // FIXME: replace by lower bound - thrust::seq, cp_edgelist_first, cp_edgelist_last, thrust::make_tuple(src, dst)); - - auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); + auto itr_pair = thrust::find( // FIXME: replace by lower bound + thrust::seq, + cp_edgelist_first, + cp_edgelist_last, + thrust::make_tuple(src, dst)); + auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); return edge_exists[idx_pair] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); - } - ); - + }); + } else { - - - // Match DODG edges - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(edgelist_srcs.size()), - thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), - [ - edge_exists = edge_exists->data(), - edgelist_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()) - ] __device__(auto idx) { - auto src = thrust::get<0>(edgelist_first[idx]); - auto dst = thrust::get<1>(edgelist_first[idx]); - - - return edge_exists[idx] ? thrust::make_tuple(src, dst) : thrust::make_tuple(dst, src); - } - ); + // Match DODG edges + thrust::transform(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(edgelist_srcs.size()), + thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()), + [edge_exists = edge_exists->data(), + edgelist_first = thrust::make_zip_iterator( + edgelist_srcs.begin(), edgelist_dsts.begin())] __device__(auto idx) { + auto src = thrust::get<0>(edgelist_first[idx]); + auto dst = thrust::get<1>(edgelist_first[idx]); + + return edge_exists[idx] ? thrust::make_tuple(src, dst) + : thrust::make_tuple(dst, src); + }); } - } template @@ -243,8 +213,7 @@ struct extract_weak_edges { __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { - - return ((count < k - 2) && (count > 0)) + return ((count < k - 2) && (count > 0)) ? thrust::optional>{thrust::make_tuple(src, dst)} : thrust::nullopt; } @@ -258,10 +227,9 @@ struct extract_triangles_from_weak_edges { raft::device_span weak_srcs{}; raft::device_span weak_dsts{}; - __device__ thrust::tuple - operator()(edge_t i) const + __device__ thrust::tuple operator()( + edge_t i) const { - auto itr = thrust::upper_bound( thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); @@ -274,10 +242,12 @@ struct extract_triangles_from_weak_edges { // Extract (q, r) edges auto edge_q_r = thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); - return thrust::make_tuple( - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r)); + return thrust::make_tuple(thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q), + thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r)); } }; @@ -354,7 +324,8 @@ k_truss(raft::handle_t const& handle, std::optional> modified_graph{std::nullopt}; std::optional> modified_graph_view{std::nullopt}; - std::optional> undirected_graph_view{std::nullopt}; + std::optional> undirected_graph_view{ + std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; @@ -362,7 +333,8 @@ k_truss(raft::handle_t const& handle, cugraph::edge_bucket_t edgelist_dodg(handle); - cugraph::edge_property_t, bool> dodg_mask(handle, graph_view); + cugraph::edge_property_t, bool> dodg_mask( + handle, graph_view); // Ideally, leverage the undirected graph derived from k-core undirected_graph_view = graph_view; @@ -461,7 +433,6 @@ k_truss(raft::handle_t const& handle, // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; auto vertex_partition_range_lasts = @@ -503,490 +474,466 @@ k_truss(raft::handle_t const& handle, extract_low_to_high_degree_edges_t{}); } - cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); // Masking edges not part of the DODG - edgelist_dodg.insert(srcs.begin(), - srcs.end(), - dsts.begin()); - + edgelist_dodg.insert(srcs.begin(), srcs.end(), dsts.begin()); + cugraph::transform_e( - handle, - cur_graph_view, - edgelist_dodg, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - - return true; - }, - dodg_mask.mutable_view(), - false); - + handle, + cur_graph_view, + edgelist_dodg, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return true; + }, + dodg_mask.mutable_view(), + false); + edgelist_dodg.clear(); } // 4. Compute triangle count using nbr_intersection and unroll weak edges { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - cugraph::edge_property_t weak_edges_mask(handle, cur_graph_view); + cugraph::edge_property_t weak_edges_mask(handle, + cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, weak_edges_mask.mutable_view(), bool{true}); - + // Attach mask cur_graph_view.attach_edge_mask(dodg_mask.view()); auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view, false); + edge_triangle_count(handle, cur_graph_view, false); cugraph::edge_bucket_t edgelist_weak(handle); cugraph::edge_bucket_t edges_to_decrement_count(handle); - size_t prev_chunk_size = 0; // FIXME: Add support for chunking - - while (true) { - cur_graph_view.clear_edge_mask(); - cur_graph_view.attach_edge_mask(dodg_mask.view()); - - // Extract weak edges - auto [weak_edgelist_srcs, weak_edgelist_dsts] = - extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - extract_weak_edges{k}); - - auto weak_edgelist_first = thrust::make_zip_iterator( - weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - auto weak_edgelist_last = thrust::make_zip_iterator( - weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); - - // Perform nbr_intersection of the weak edges from the undirected - // graph view - cur_graph_view.clear_edge_mask(); - - // Attach the weak edge mask - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - auto [intersection_offsets, intersection_indices] = \ - per_v_pair_dst_nbr_intersection( - handle, - cur_graph_view, - weak_edgelist_first, - weak_edgelist_last, - false); - - // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) - // To avoid overcompensation, redirect all edges in the triangle to follow this unique - // pattern: (p, q) then (q, r) then (p, r) - - auto triangles_from_weak_edges = - allocate_dataframe_buffer>( - intersection_indices.size(), - handle.get_stream()); - - // Form (p, q) edges - // Extract triangle from weak - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges), - extract_triangles_from_weak_edges{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) + size_t prev_chunk_size = 0; // FIXME: Add support for chunking + + while (true) { + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + // Extract weak edges + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + extract_weak_edges{k}); + + auto weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto weak_edgelist_last = + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()); + + // Perform nbr_intersection of the weak edges from the undirected + // graph view + cur_graph_view.clear_edge_mask(); + + // Attach the weak edge mask + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection( + handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, false); + + // Identify (p, q) edges, and form edges (p, q), (p, r) and (q, r) + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) + + auto triangles_from_weak_edges = allocate_dataframe_buffer< + thrust::tuple>( + intersection_indices.size(), handle.get_stream()); + + // Form (p, q) edges + // Extract triangle from weak + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + extract_triangles_from_weak_edges{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + + cur_graph_view.clear_edge_mask(); + // Check for edge existance on the directed graph view + cur_graph_view.attach_edge_mask(dodg_mask.view()); + + rmm::device_uvector edge_exists(0, handle.get_stream()); + + // Handling (p, r) edges + + // (p, q) edges are owned by the current GPU while (p, r) and (q, r) + // can be owned by different GPUs + // Ordering (p, r) edges based on the DODG + order_edge_based_on_dodg( + handle, + cur_graph_view, + raft::device_span(std::get<2>(triangles_from_weak_edges).data(), + std::get<2>(triangles_from_weak_edges).size()), + raft::device_span(std::get<3>(triangles_from_weak_edges).data(), + std::get<3>(triangles_from_weak_edges).size())); + + // Handling (q, r) edges + + // (p, q) edges are owned by the current GPU while (p, r) and (q, r) + // can be owned by different GPUs + // Ordering (q, r) edges based on the DODG + order_edge_based_on_dodg( + handle, + cur_graph_view, + raft::device_span(std::get<4>(triangles_from_weak_edges).data(), + std::get<4>(triangles_from_weak_edges).size()), + raft::device_span(std::get<5>(triangles_from_weak_edges).data(), + std::get<5>(triangles_from_weak_edges).size())); + + // re-order triangles + // To avoid overcompensation, redirect all edges in the triangle to follow this unique + // pattern: (p, q) then (q, r) then (p, r) + thrust::transform( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges), + get_dataframe_buffer_begin(triangles_from_weak_edges), + [] __device__(auto triangle) { + auto edge_p_q = thrust::make_tuple(thrust::get<0>(triangle), thrust::get<1>(triangle)); + auto edge_p_r = thrust::make_tuple(thrust::get<2>(triangle), thrust::get<3>(triangle)); + auto edge_q_r = thrust::make_tuple(thrust::get<4>(triangle), thrust::get<5>(triangle)); + + if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { + if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { + triangle = thrust::make_tuple(thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q)); + + } else { + triangle = thrust::make_tuple(thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r), + thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r)); + } + } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { + triangle = thrust::make_tuple(thrust::get<0>(edge_p_q), + thrust::get<1>(edge_p_q), + thrust::get<0>(edge_q_r), + thrust::get<1>(edge_q_r), + thrust::get<0>(edge_p_r), + thrust::get<1>(edge_p_r)); } - ); - - cur_graph_view.clear_edge_mask(); - // Check for edge existance on the directed graph view - cur_graph_view.attach_edge_mask(dodg_mask.view()); - - rmm::device_uvector edge_exists(0, handle.get_stream()); - - - // Handling (p, r) edges - - // (p, q) edges are owned by the current GPU while (p, r) and (q, r) - // can be owned by different GPUs - // Ordering (p, r) edges based on the DODG - order_edge_based_on_dodg( - handle, - cur_graph_view, - raft::device_span( - std::get<2>(triangles_from_weak_edges).data(), - std::get<2>(triangles_from_weak_edges).size()), - raft::device_span( - std::get<3>(triangles_from_weak_edges).data(), - std::get<3>(triangles_from_weak_edges).size()) - ); - - // Handling (q, r) edges - - // (p, q) edges are owned by the current GPU while (p, r) and (q, r) - // can be owned by different GPUs - // Ordering (q, r) edges based on the DODG - order_edge_based_on_dodg( - handle, - cur_graph_view, - raft::device_span( - std::get<4>(triangles_from_weak_edges).data(), - std::get<4>(triangles_from_weak_edges).size()), - raft::device_span( - std::get<5>(triangles_from_weak_edges).data(), - std::get<5>(triangles_from_weak_edges).size()) - ); - - // re-order triangles - // To avoid overcompensation, redirect all edges in the triangle to follow this unique - // pattern: (p, q) then (q, r) then (p, r) - thrust::transform( - handle.get_thrust_policy(), + return triangle; + }); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + auto unique_triangle_end = + thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + auto num_unique_triangles = thrust::distance( + get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + + resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + // FIXME: put the redundant code above in a function + std::tie(triangles_from_weak_edges, std::ignore) = groupby_gpu_id_and_shuffle_values( + handle.get_comms(), get_dataframe_buffer_begin(triangles_from_weak_edges), get_dataframe_buffer_end(triangles_from_weak_edges), - get_dataframe_buffer_begin(triangles_from_weak_edges), - [] __device__(auto triangle) { - auto edge_p_q = thrust::make_tuple(thrust::get<0>(triangle), thrust::get<1>(triangle)); - auto edge_p_r = thrust::make_tuple(thrust::get<2>(triangle), thrust::get<3>(triangle)); - auto edge_q_r = thrust::make_tuple(thrust::get<4>(triangle), thrust::get<5>(triangle)); - - if (thrust::get<1>(edge_p_q) == thrust::get<1>(edge_q_r)) { - if (thrust::get<0>(edge_p_q) == thrust::get<0>(edge_p_r)) { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q) - ); - - } else { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r), - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r) - ); - } - } else if (thrust::get<1>(edge_p_q) == thrust::get<0>(edge_q_r)) { - triangle = thrust::make_tuple( - thrust::get<0>(edge_p_q), thrust::get<1>(edge_p_q), - thrust::get<0>(edge_q_r), thrust::get<1>(edge_q_r), - thrust::get<0>(edge_p_r), thrust::get<1>(edge_p_r) - ); - - } - return triangle; - } - ); - thrust::sort( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - auto unique_triangle_end = thrust::unique( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - auto num_unique_triangles = thrust::distance(get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - - resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - - if constexpr (multi_gpu) { - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - // FIXME: put the redundant code above in a function - std::tie(triangles_from_weak_edges, std::ignore) = - groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges), - - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, - - handle.get_stream() - ); - - thrust::sort( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - unique_triangle_end = thrust::unique( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(triangles_from_weak_edges), - get_dataframe_buffer_end(triangles_from_weak_edges)); - - num_unique_triangles = thrust::distance( - get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); - resize_dataframe_buffer(triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); - - } - - auto edgelist_to_update_count = - allocate_dataframe_buffer>(3* num_unique_triangles, - handle.get_stream()); - - // Flatten the triangle to a list of egdes. - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), - get_dataframe_buffer_begin(edgelist_to_update_count), - [ - num_unique_triangles, - triangles_from_weak_edges = get_dataframe_buffer_begin(triangles_from_weak_edges) - ] __device__(auto idx) { - auto idx_triangle = idx % num_unique_triangles; - auto idx_vertex_in_triangle = idx / num_unique_triangles; - auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); - vertex_t src; - vertex_t dst; - - if (idx_vertex_in_triangle == 0) { - src = *(thrust::get<0>(triangle)); - dst = *(thrust::get<1>(triangle)); - } + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, - if (idx_vertex_in_triangle == 1) { - src = *(thrust::get<2>(triangle)); - dst = *(thrust::get<3>(triangle)); - } + handle.get_stream()); - if (idx_vertex_in_triangle == 2) { - src = *(thrust::get<4>(triangle)); - dst = *(thrust::get<5>(triangle)); - } - - return thrust::make_tuple(src, dst); + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + unique_triangle_end = thrust::unique(handle.get_thrust_policy(), + get_dataframe_buffer_begin(triangles_from_weak_edges), + get_dataframe_buffer_end(triangles_from_weak_edges)); + + num_unique_triangles = thrust::distance( + get_dataframe_buffer_begin(triangles_from_weak_edges), unique_triangle_end); + resize_dataframe_buffer( + triangles_from_weak_edges, num_unique_triangles, handle.get_stream()); + } + + auto edgelist_to_update_count = allocate_dataframe_buffer>( + 3 * num_unique_triangles, handle.get_stream()); + + // Flatten the triangle to a list of egdes. + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(size_dataframe_buffer(edgelist_to_update_count)), + get_dataframe_buffer_begin(edgelist_to_update_count), + [num_unique_triangles, + triangles_from_weak_edges = + get_dataframe_buffer_begin(triangles_from_weak_edges)] __device__(auto idx) { + auto idx_triangle = idx % num_unique_triangles; + auto idx_vertex_in_triangle = idx / num_unique_triangles; + auto triangle = (triangles_from_weak_edges + idx_triangle).get_iterator_tuple(); + vertex_t src; + vertex_t dst; + + if (idx_vertex_in_triangle == 0) { + src = *(thrust::get<0>(triangle)); + dst = *(thrust::get<1>(triangle)); + } + + if (idx_vertex_in_triangle == 1) { + src = *(thrust::get<2>(triangle)); + dst = *(thrust::get<3>(triangle)); } - ); - - // If multi-GPU, shuffle and reduce - if constexpr (multi_gpu) { - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - std::tie(edgelist_to_update_count, std::ignore) = - groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count), - - [key_func = - cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}] __device__(auto val) {return key_func(thrust::get<0>(val), thrust::get<1>(val));}, - - handle.get_stream() - ); - } - - thrust::sort( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - - auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( - unique_pair_count, handle.get_stream()); - - rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); - - thrust::reduce_by_key(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count), - thrust::make_constant_iterator(size_t{1}), - get_dataframe_buffer_begin(vertex_pair_buffer_unique), - decrease_count.begin(), - thrust::equal_to>{}); - - // Update count of weak edges - edges_to_decrement_count.clear(); - - edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), - std::get<0>(vertex_pair_buffer_unique).end(), - std::get<1>(vertex_pair_buffer_unique).begin()); - - // Update count of weak edges from the DODG view - cugraph::transform_e( - handle, - cur_graph_view, - edges_to_decrement_count, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [ - edge_buffer_first = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<1>(vertex_pair_buffer_unique).begin()), - edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).end()), - decrease_count = raft::device_span(decrease_count.data(), decrease_count.size()) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - - auto itr_pair = thrust::find( // FIXME: Update to lowerbound - thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); - - auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); - - count -= decrease_count[idx_pair]; - - return count; + if (idx_vertex_in_triangle == 2) { + src = *(thrust::get<4>(triangle)); + dst = *(thrust::get<5>(triangle)); + } + + return thrust::make_tuple(src, dst); + }); + + // If multi-GPU, shuffle and reduce + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + auto vertex_partition_range_lasts = cur_graph_view.vertex_partition_range_lasts(); + + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + std::tie(edgelist_to_update_count, std::ignore) = groupby_gpu_id_and_shuffle_values( + handle.get_comms(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + + [key_func = + cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); }, - edge_triangle_counts.mutable_view(), - true); - - edgelist_weak.clear(); - - thrust::sort( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()) - ); - - edgelist_weak.insert(weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin()); - - // Get undirected graph view - cur_graph_view.clear_edge_mask(); - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); - - cugraph::transform_e( - handle, - cur_graph_view, - edgelist_weak, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - - return false; - }, - weak_edges_mask.mutable_view(), - false); - - edgelist_weak.clear(); - - // shuffle the edges if multi_gpu - if constexpr (multi_gpu) { - std::tie( - weak_edgelist_dsts, weak_edgelist_srcs, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(weak_edgelist_dsts), - std::move(weak_edgelist_srcs), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - } - - thrust::sort( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), - thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end()) - ); - - edgelist_weak.insert(weak_edgelist_dsts.begin(), - weak_edgelist_dsts.end(), - weak_edgelist_srcs.begin()); - - cugraph::transform_e( + + handle.get_stream()); + } + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto unique_pair_count = + thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count)); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(edgelist_to_update_count), + get_dataframe_buffer_end(edgelist_to_update_count), + thrust::make_constant_iterator(size_t{1}), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + // Update count of weak edges + edges_to_decrement_count.clear(); + + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); + + // Update count of weak edges from the DODG view + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [edge_buffer_first = + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<1>(vertex_pair_buffer_unique).begin()), + edge_buffer_last = thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).end()), + decrease_count = raft::device_span( + decrease_count.data(), decrease_count.size())] __device__(auto src, + auto dst, + thrust::nullopt_t, + thrust::nullopt_t, + edge_t count) { + auto itr_pair = thrust::find( // FIXME: Update to lowerbound + thrust::seq, + edge_buffer_first, + edge_buffer_last, + thrust::make_tuple(src, dst)); + + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); + + count -= decrease_count[idx_pair]; + + return count; + }, + edge_triangle_counts.mutable_view(), + true); + + edgelist_weak.clear(); + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end())); + + edgelist_weak.insert( + weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), weak_edgelist_dsts.begin()); + + // Get undirected graph view + cur_graph_view.clear_edge_mask(); + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return false; + }, + weak_edges_mask.mutable_view(), + false); + + edgelist_weak.clear(); + + // shuffle the edges if multi_gpu + if constexpr (multi_gpu) { + std::tie(weak_edgelist_dsts, + weak_edgelist_srcs, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - cur_graph_view, - edgelist_weak, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - - return false; - }, - weak_edges_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - - if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } - + std::move(weak_edgelist_dsts), + std::move(weak_edgelist_srcs), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } + + thrust::sort( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_dsts.begin(), weak_edgelist_srcs.begin()), + thrust::make_zip_iterator(weak_edgelist_dsts.end(), weak_edgelist_srcs.end())); + + edgelist_weak.insert( + weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), weak_edgelist_srcs.begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_weak, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return false; + }, + weak_edges_mask.mutable_view(), + false); + + cur_graph_view.attach_edge_mask(weak_edges_mask.view()); + + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } } - + cur_graph_view.clear_edge_mask(); cur_graph_view.attach_edge_mask(dodg_mask.view()); - + cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count == 0 ? false : true; - }, - dodg_mask.mutable_view(), - true); - + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count == 0 ? false : true; + }, + dodg_mask.mutable_view(), + true); + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, @@ -994,19 +941,17 @@ k_truss(raft::handle_t const& handle, edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - renumber_map - ? std::make_optional( - raft::device_span((*renumber_map).data(), (*renumber_map).size())): - std::nullopt - ); - + renumber_map ? std::make_optional(raft::device_span((*renumber_map).data(), + (*renumber_map).size())) + : std::nullopt); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts), false); - + return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } From 106afb3b5b5db73adacd9319a6d11bed28062f8d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 17:24:43 -0800 Subject: [PATCH 11/13] reduce before shuffling --- cpp/src/community/k_truss_impl.cuh | 190 +++++++++++++---------------- 1 file changed, 82 insertions(+), 108 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index f6e5f813929..d1150b8d36c 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -60,7 +60,7 @@ void order_edge_based_on_dodg(raft::handle_t const& handle, std::optional> cp_edgelist_srcs{std::nullopt}; std::optional> cp_edgelist_dsts{std::nullopt}; - // FIXME: Minor comm is not working for all cases so I believe some edges a beyong + // FIXME: Minor comm is not working for all cases so I believe some edges a beyond // the partitioning range if constexpr (multi_gpu) { auto& comm = handle.get_comms(); @@ -98,6 +98,18 @@ void order_edge_based_on_dodg(raft::handle_t const& handle, cp_edgelist_srcs = std::move(tmp_srcs); cp_edgelist_dsts = std::move(tmp_dsts); + auto unique_pair_end = thrust::unique( + handle.get_thrust_policy(), + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end())); + + auto num_unique_pair = thrust::distance( + thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), + unique_pair_end); + + cp_edgelist_srcs->resize(num_unique_pair, handle.get_stream()); + cp_edgelist_dsts->resize(num_unique_pair, handle.get_stream()); + auto d_tx_counts = cugraph::groupby_and_count( thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()), @@ -108,16 +120,16 @@ void order_edge_based_on_dodg(raft::handle_t const& handle, std::vector h_tx_counts(d_tx_counts.size()); - handle.sync_stream(); - raft::update_host( h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); - std::tie(srcs, rx_counts) = shuffle_values( - handle.get_comms(), cp_edgelist_srcs->begin(), h_tx_counts, handle.get_stream()); + handle.sync_stream(); + + std::tie(srcs, rx_counts) = + shuffle_values(comm, cp_edgelist_srcs->begin(), h_tx_counts, handle.get_stream()); - std::tie(dsts, std::ignore) = shuffle_values( - handle.get_comms(), cp_edgelist_dsts->begin(), h_tx_counts, handle.get_stream()); + std::tie(dsts, std::ignore) = + shuffle_values(comm, cp_edgelist_dsts->begin(), h_tx_counts, handle.get_stream()); } std::optional> edge_exists{std::nullopt}; @@ -133,36 +145,12 @@ void order_edge_based_on_dodg(raft::handle_t const& handle, std::tie(edge_exists, std::ignore) = shuffle_values(handle.get_comms(), edge_exists->begin(), rx_counts, handle.get_stream()); - // The 'edge_exists' array is ordered based on 'cp_edgelist_srcs' where the edges where grouped, - // however it needs to match 'edgelist_srcs', hence re-order 'edge_exists' accordingly. thrust::sort_by_key( handle.get_thrust_policy(), thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()), edge_exists->begin()); - auto num_unique_pair = thrust::unique_count( - handle.get_thrust_policy(), - thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), - thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end())); - - rmm::device_uvector tmp_srcs(num_unique_pair, handle.get_stream()); - rmm::device_uvector tmp_dsts(num_unique_pair, handle.get_stream()); - rmm::device_uvector tmp_edge_exists(num_unique_pair, handle.get_stream()); - - thrust::reduce_by_key( - handle.get_thrust_policy(), - thrust::make_zip_iterator(cp_edgelist_srcs->begin(), cp_edgelist_dsts->begin()), - thrust::make_zip_iterator(cp_edgelist_srcs->end(), cp_edgelist_dsts->end()), - edge_exists->begin(), - thrust::make_zip_iterator(tmp_srcs.begin(), tmp_dsts.begin()), - tmp_edge_exists.begin(), - thrust::equal_to>{}); - - cp_edgelist_srcs = std::move(tmp_srcs); - cp_edgelist_dsts = std::move(tmp_dsts); - edge_exists = std::move(tmp_edge_exists); - // Match DODG edges thrust::transform( handle.get_thrust_policy(), @@ -178,11 +166,8 @@ void order_edge_based_on_dodg(raft::handle_t const& handle, auto src = thrust::get<0>(edgelist_first[idx]); auto dst = thrust::get<1>(edgelist_first[idx]); - auto itr_pair = thrust::find( // FIXME: replace by lower bound - thrust::seq, - cp_edgelist_first, - cp_edgelist_last, - thrust::make_tuple(src, dst)); + auto itr_pair = thrust::lower_bound( + thrust::seq, cp_edgelist_first, cp_edgelist_last, thrust::make_tuple(src, dst)); auto idx_pair = thrust::distance(cp_edgelist_first, itr_pair); @@ -324,8 +309,6 @@ k_truss(raft::handle_t const& handle, std::optional> modified_graph{std::nullopt}; std::optional> modified_graph_view{std::nullopt}; - std::optional> undirected_graph_view{ - std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; @@ -333,12 +316,6 @@ k_truss(raft::handle_t const& handle, cugraph::edge_bucket_t edgelist_dodg(handle); - cugraph::edge_property_t, bool> dodg_mask( - handle, graph_view); - - // Ideally, leverage the undirected graph derived from k-core - undirected_graph_view = graph_view; - if (graph_view.count_self_loops(handle) > edge_t{0}) { auto [srcs, dsts] = extract_transform_e(handle, graph_view, @@ -432,69 +409,69 @@ k_truss(raft::handle_t const& handle, // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. - { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; + auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - auto out_degrees = cur_graph_view.compute_out_degrees(handle); - edge_src_property_t edge_src_out_degrees(handle, - cur_graph_view); - edge_dst_property_t edge_dst_out_degrees(handle, - cur_graph_view); - update_edge_src_property( - handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); - update_edge_dst_property( - handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); - - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - if (edge_weight_view) { - std::tie(srcs, dsts, wgts) = extract_transform_e( - handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - *edge_weight_view, - extract_low_to_high_degree_weighted_edges_t{}); - } else { - std::tie(srcs, dsts) = - extract_transform_e(handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - edge_dummy_property_t{}.view(), - extract_low_to_high_degree_edges_t{}); - } - - cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); + auto vertex_partition_range_lasts = + renumber_map + ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) + : std::nullopt; - // Masking edges not part of the DODG - edgelist_dodg.insert(srcs.begin(), srcs.end(), dsts.begin()); - - cugraph::transform_e( + auto out_degrees = cur_graph_view.compute_out_degrees(handle); + edge_src_property_t edge_src_out_degrees(handle, + cur_graph_view); + edge_dst_property_t edge_dst_out_degrees(handle, + cur_graph_view); + update_edge_src_property( + handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); + update_edge_dst_property( + handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); + + rmm::device_uvector srcs(0, handle.get_stream()); + rmm::device_uvector dsts(0, handle.get_stream()); + + edge_weight_view = edge_weight + ? std::make_optional((*edge_weight).view()) + : std::optional>{std::nullopt}; + if (edge_weight_view) { + std::tie(srcs, dsts, wgts) = extract_transform_e( handle, cur_graph_view, - edgelist_dodg, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - return true; - }, - dodg_mask.mutable_view(), - false); - - edgelist_dodg.clear(); + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + *edge_weight_view, + extract_low_to_high_degree_weighted_edges_t{}); + } else { + std::tie(srcs, dsts) = + extract_transform_e(handle, + cur_graph_view, + edge_src_out_degrees.view(), + edge_dst_out_degrees.view(), + edge_dummy_property_t{}.view(), + extract_low_to_high_degree_edges_t{}); } + cugraph::edge_property_t, bool> dodg_mask( + handle, cur_graph_view); + cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); + + // Masking edges not part of the DODG + edgelist_dodg.insert(srcs.begin(), srcs.end(), dsts.begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edgelist_dodg, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return true; + }, + dodg_mask.mutable_view(), + false); + + edgelist_dodg.clear(); + // 4. Compute triangle count using nbr_intersection and unroll weak edges { @@ -822,11 +799,8 @@ k_truss(raft::handle_t const& handle, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - auto itr_pair = thrust::find( // FIXME: Update to lowerbound - thrust::seq, - edge_buffer_first, - edge_buffer_last, - thrust::make_tuple(src, dst)); + auto itr_pair = thrust::lower_bound( + thrust::seq, edge_buffer_first, edge_buffer_last, thrust::make_tuple(src, dst)); auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); @@ -835,7 +809,7 @@ k_truss(raft::handle_t const& handle, return count; }, edge_triangle_counts.mutable_view(), - true); + false); edgelist_weak.clear(); @@ -928,7 +902,7 @@ k_truss(raft::handle_t const& handle, return count == 0 ? false : true; }, dodg_mask.mutable_view(), - true); + false); rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); From b98ddb0a84a52cad4a48b972cf2dea12e77f4dc5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 22 Jan 2025 12:19:30 -0800 Subject: [PATCH 12/13] removebenchmark print --- cpp/src/community/k_truss_impl.cuh | 204 ++++++----------------------- 1 file changed, 42 insertions(+), 162 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 7a1d0ad5635..08215e574d1 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -59,16 +59,6 @@ struct extract_weak_edges { } }; -template -struct extract_edges_with_positive_count { - __device__ thrust::optional> operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const - { - return (count > 0) - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; - } -}; template struct extract_triangles_endpoints { @@ -127,6 +117,17 @@ struct exclude_self_loop_t { } }; +template +struct exclude_self_loop_weighted_edges_t { + __device__ thrust::optional> operator()( + vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, weight_t wgt) const + { + return src != dst + ? thrust::optional>{thrust::make_tuple(src, dst, wgt)} + : thrust::nullopt; + } +}; + template struct extract_low_to_high_degree_weighted_edges_t { __device__ thrust::optional> operator()( @@ -220,7 +221,6 @@ k_truss(raft::handle_t const& handle, bool do_expensive_check) { - std::cout<< "initial number of edges " << graph_view.compute_number_of_edges(handle) << std::endl; // 1. Check input arguments. CUGRAPH_EXPECTS(graph_view.is_symmetric(), @@ -237,25 +237,38 @@ k_truss(raft::handle_t const& handle, std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; + std::optional> wgts{std::nullopt}; cugraph::edge_bucket_t edgelist_dodg(handle); if (graph_view.count_self_loops(handle) > edge_t{0}) { - auto [srcs, dsts] = extract_transform_e(handle, - graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_dummy_property_t{}.view(), - exclude_self_loop_t{}); + rmm::device_uvector srcs(0, handle.get_stream()); + rmm::device_uvector dsts(0, handle.get_stream()); + if (edge_weight_view) { + std::tie(srcs, dsts, wgts) = extract_transform_e(handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + exclude_self_loop_weighted_edges_t{}); + } else { + std::tie(srcs, dsts) = extract_transform_e(handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_dummy_property_t{}.view(), + exclude_self_loop_t{}); + + } if constexpr (multi_gpu) { - std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore) = + std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(srcs), std::move(dsts), std::nullopt, std::nullopt, std::nullopt); + handle, std::move(srcs), std::move(dsts), std::move(wgts), std::nullopt, std::nullopt); } std::tie(*modified_graph, std::ignore, std::ignore, std::ignore, renumber_map) = @@ -264,7 +277,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::move(srcs), std::move(dsts), - std::nullopt, + std::move(wgts), std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, @@ -290,13 +303,10 @@ k_truss(raft::handle_t const& handle, raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; - //std::cout<<"k = " << k << std::endl; auto [srcs, dsts, wgts] = k_core(handle, cur_graph_view, edge_weight_view, k - 1, - //1160, - //1000, // working for scale 19 and init_k = 210 std::make_optional(k_core_degree_type_t::OUT), std::make_optional(core_number_span)); @@ -339,8 +349,6 @@ k_truss(raft::handle_t const& handle, auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - std::cout<< "after k-core number of edges " << cur_graph_view.compute_number_of_edges(handle) << std::endl; - auto vertex_partition_range_lasts = renumber_map ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) @@ -415,50 +423,21 @@ k_truss(raft::handle_t const& handle, // Attach mask cur_graph_view.attach_edge_mask(dodg_mask.view()); - - std::chrono::seconds s (0); // 1 second - std::chrono::duration nbr_intersection_tc_ms = duration_cast (s); - std::chrono::duration edge_unrolling_ms = duration_cast (s); - std::chrono::duration nbr_intersection_unrolling_ms = duration_cast (s); - std::chrono::duration update_counter_and_mask_ms = duration_cast (s); - std::chrono::duration weak_edge_extraction_ms = duration_cast (s); - - std::chrono::duration extract_endpoints_ms = duration_cast (s); - std::chrono::duration flattening_ep_ms = duration_cast (s); - std::chrono::duration sort_reduce_direct_e_ms = duration_cast (s); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto start = high_resolution_clock::now(); auto edge_triangle_counts = edge_triangle_count(handle, cur_graph_view, false); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - auto stop = high_resolution_clock::now(); - nbr_intersection_tc_ms = duration_cast(stop - start); cugraph::edge_bucket_t edgelist_weak(handle); cugraph::edge_bucket_t edges_to_decrement_count(handle); size_t prev_chunk_size = 0; // FIXME: Add support for chunking - - //while(cur_graph_view.compute_number_of_edges(handle) != 0) { - auto num_strong_edges = cur_graph_view.compute_number_of_edges(handle); - auto iteration = -1; - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - start = high_resolution_clock::now(); - + auto iteration = 0; while (true) { + // Extract weak edges iteration += 1; - std::cout<< "iteration = " << iteration << " k = " << k << std::endl; - //std::cout<<"number of strong edges = " << num_strong_edges << " k = " << k << std::endl; if (iteration > 0) { cur_graph_view.clear_edge_mask(); cur_graph_view.attach_edge_mask(dodg_mask.view()); } - - // Extract weak edges - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto start_weak_e_extract = high_resolution_clock::now(); auto [weak_edgelist_srcs, weak_edgelist_dsts] = extract_transform_e(handle, cur_graph_view, @@ -466,15 +445,6 @@ k_truss(raft::handle_t const& handle, edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), extract_weak_edges{k}); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto stop_weak_e_extract = high_resolution_clock::now(); - weak_edge_extraction_ms += duration_cast(stop_weak_e_extract - start_weak_e_extract); - - if (weak_edgelist_srcs.size() == 0){ - break; - } - auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); @@ -488,24 +458,16 @@ k_truss(raft::handle_t const& handle, // Attach the weak edge mask cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto start_nbr_unrolling = high_resolution_clock::now(); auto [intersection_offsets, intersection_indices] = per_v_pair_dst_nbr_intersection( handle, cur_graph_view, weak_edgelist_first, weak_edgelist_last, false); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - auto stop_nbr_unrolling = high_resolution_clock::now(); - - nbr_intersection_unrolling_ms += duration_cast(stop_nbr_unrolling - start_nbr_unrolling); // This array stores (p, q, r) which are endpoints for the triangles with weak edges - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto start_extract_ep = high_resolution_clock::now(); auto triangles_endpoints = allocate_dataframe_buffer< thrust::tuple>( intersection_indices.size(), handle.get_stream()); - // Extract endpoints for weak edges in triangles + // Extract endpoints for triangles with weak edges thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(triangles_endpoints), @@ -532,11 +494,6 @@ k_truss(raft::handle_t const& handle, resize_dataframe_buffer(triangles_endpoints, num_unique_triangles, handle.get_stream()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto stop_extract_ep = high_resolution_clock::now(); - - extract_endpoints_ms += duration_cast(stop_extract_ep - start_extract_ep); - if constexpr (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -591,40 +548,15 @@ k_truss(raft::handle_t const& handle, triangles_endpoints, num_unique_triangles, handle.get_stream()); } - // FIXME: Remove as it might not be used. - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto start_flattening_ep = high_resolution_clock::now(); auto edgelist_to_update_count = allocate_dataframe_buffer>( 3 * num_unique_triangles, handle.get_stream()); - // Flatten the triangle to a list of egdes by directing the edges from low degree to high - // This operation should be done on the original graph // FIXME: The outdegree needs to be computed on the original graph without masking any edges. // because it is from the original degree distribution that the DODG is created. cur_graph_view.clear_edge_mask(); - /* - // FIXME: No need to recompute it. Simply make the one from DODG global and reuse it - // FIXME: Same for the 4 statements below - - auto out_degrees = cur_graph_view.compute_out_degrees(handle); - edge_src_property_t edge_src_out_degrees(handle, - cur_graph_view); - edge_dst_property_t edge_dst_out_degrees(handle, - cur_graph_view); - - update_edge_src_property( - handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); - update_edge_dst_property( - handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); - */ - - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - - // The order no longer matters since triangle duplicates have been removed + // The order no longer matters since duplicated triangles have been removed // Flatten the endpoints to a list of egdes. thrust::transform( handle.get_thrust_policy(), @@ -657,20 +589,6 @@ k_truss(raft::handle_t const& handle, return thrust::make_tuple(src, dst); }); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto stop_flattening_ep = high_resolution_clock::now(); - - flattening_ep_ms += duration_cast(stop_flattening_ep - start_flattening_ep); - - // Instead of leveraging the endpoints, simply flatten it endpoints and - // use degree to re-order the edges - /* - // FIXME: Is this sorting necessary******************************************************** - thrust::sort(handle.get_thrust_policy(), - get_dataframe_buffer_begin(edgelist_to_update_count), - get_dataframe_buffer_end(edgelist_to_update_count)); - */ // Attach the weak edge mask // mask edges with count = 0 when shuffling since they are not needed and reduce the search space @@ -698,9 +616,6 @@ k_truss(raft::handle_t const& handle, cur_graph_view.vertex_partition_range_lasts()); } - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto start_sort_reduce_direct_e = high_resolution_clock::now(); - thrust::sort(handle.get_thrust_policy(), get_dataframe_buffer_begin(edgelist_to_update_count), get_dataframe_buffer_end(edgelist_to_update_count)); @@ -744,11 +659,6 @@ k_truss(raft::handle_t const& handle, decrease_count.size()) }); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto stop_sort_reduce_direct_e = high_resolution_clock::now(); - - sort_reduce_direct_e_ms += duration_cast(stop_sort_reduce_direct_e - start_sort_reduce_direct_e); - if constexpr (multi_gpu) { auto& comm = handle.get_comms(); @@ -803,9 +713,6 @@ k_truss(raft::handle_t const& handle, // Update count of weak edges edges_to_decrement_count.clear(); - //RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - auto update_counter_and_mask_start = high_resolution_clock::now(); edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).begin()); @@ -921,35 +828,12 @@ k_truss(raft::handle_t const& handle, cur_graph_view.attach_edge_mask(weak_edges_mask.view()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - auto update_counter_and_mask_stop= high_resolution_clock::now(); - - update_counter_and_mask_ms += duration_cast(update_counter_and_mask_stop - update_counter_and_mask_start); - - std::cout<<"prev_num_edges = " << prev_number_of_edges << " cur_num_edges = " << cur_graph_view.compute_number_of_edges(handle) << std::endl; - std::cout<<"weak edgelist size = " << weak_edgelist_srcs.size() << std::endl; - //if (cur_graph_view.compute_number_of_edges(handle) == 0) { break; } - if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { + break; } + + iteration += 1; } - //#if 0 - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - stop = high_resolution_clock::now(); - - edge_unrolling_ms = duration_cast(stop - start); - - auto workflow_runtime = ((nbr_intersection_tc_ms.count()/1000) + (edge_unrolling_ms.count()/1000)); - std::cout<<"kmax = " << k << std::endl; - std::cout << "1) entire workflow took: " << workflow_runtime << " milliseconds" << std::endl; - std::cout << "2) edge triangle count took: " << nbr_intersection_tc_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "3) while loop - edge unrolling took: " << edge_unrolling_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "3.1) weak edge extraction took: " << weak_edge_extraction_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "3.2) nbr_intersection_unrolling took: " << nbr_intersection_unrolling_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "3.3) extract_endpoints took: " << extract_endpoints_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "3.4) flattening_ep took: " << flattening_ep_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "3.5) sort_reduce_direct_e took: " << sort_reduce_direct_e_ms.count()/1000 << " milliseconds" << std::endl; - std::cout << "3.6) update counter and mask took: " << update_counter_and_mask_ms.count()/1000 << " milliseconds" << std::endl; - cur_graph_view.clear_edge_mask(); cur_graph_view.attach_edge_mask(dodg_mask.view()); @@ -963,13 +847,12 @@ k_truss(raft::handle_t const& handle, return count == 0 ? false : true; }, dodg_mask.mutable_view(), - false); + true); rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; - //#if 0 std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, @@ -987,9 +870,6 @@ k_truss(raft::handle_t const& handle, std::move(edgelist_dsts), std::move(edgelist_wgts), false); - //#endif - - std::cout<< "result size = " << edgelist_srcs.size() << std::endl; return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); From cd4a104538703b747ce62f949536cc135f5a64f2 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 24 Jan 2025 07:42:33 -0800 Subject: [PATCH 13/13] create dodg without extracting the edges --- cpp/src/community/k_truss_impl.cuh | 122 +++++++++++++++++------------ 1 file changed, 71 insertions(+), 51 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 08215e574d1..ef47a137053 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -60,6 +60,16 @@ struct extract_weak_edges { }; +template +struct extract_edges { + __device__ thrust::optional> operator()( + vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, weight_t wgt) const + { + return thrust::optional>{thrust::make_tuple(src, dst, wgt)}; + } +}; + + template struct extract_triangles_endpoints { size_t chunk_start{}; @@ -349,73 +359,54 @@ k_truss(raft::handle_t const& handle, auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - auto vertex_partition_range_lasts = - renumber_map - ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) - : std::nullopt; - - auto out_degrees = cur_graph_view.compute_out_degrees(handle); edge_src_property_t edge_src_out_degrees(handle, cur_graph_view); edge_dst_property_t edge_dst_out_degrees(handle, cur_graph_view); - update_edge_src_property( - handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); - update_edge_dst_property( - handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); - - rmm::device_uvector srcs(0, handle.get_stream()); - rmm::device_uvector dsts(0, handle.get_stream()); - - edge_weight_view = edge_weight + + cugraph::edge_property_t, bool> dodg_mask( + handle, cur_graph_view); + { + auto out_degrees = cur_graph_view.compute_out_degrees(handle); + update_edge_src_property( + handle, cur_graph_view, out_degrees.begin(), edge_src_out_degrees.mutable_view()); + update_edge_dst_property( + handle, cur_graph_view, out_degrees.begin(), edge_dst_out_degrees.mutable_view()); + + edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - if (edge_weight_view) { - std::tie(srcs, dsts, wgts) = extract_transform_e( + + cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); + + std::cout<<"number of edges before dodg= " << cur_graph_view.compute_number_of_edges(handle) << std::endl; + cugraph::transform_e( handle, cur_graph_view, edge_src_out_degrees.view(), edge_dst_out_degrees.view(), - *edge_weight_view, - extract_low_to_high_degree_weighted_edges_t{}); - } else { - std::tie(srcs, dsts) = - extract_transform_e(handle, - cur_graph_view, - edge_src_out_degrees.view(), - edge_dst_out_degrees.view(), - edge_dummy_property_t{}.view(), - extract_low_to_high_degree_edges_t{}); + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_out_degree, auto dst_out_degree, thrust::nullopt_t) { + return (src_out_degree < dst_out_degree) + ? true + : ((src_out_degree == dst_out_degree) && + (src < dst) /* tie-breaking using vertex ID */) + ? true : false; + }, + dodg_mask.mutable_view(), + false); + + if (cur_graph_view.has_edge_mask()) { cur_graph_view.clear_edge_mask(); } + cur_graph_view.attach_edge_mask(dodg_mask.view()); } - cugraph::edge_property_t, bool> dodg_mask( - handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, dodg_mask.mutable_view(), bool{false}); - - // Masking edges not part of the DODG - edgelist_dodg.insert(srcs.begin(), srcs.end(), dsts.begin()); - - cugraph::transform_e( - handle, - cur_graph_view, - edgelist_dodg, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - return true; - }, - dodg_mask.mutable_view(), - false); - - - edgelist_dodg.clear(); - // 4. Compute triangle count using nbr_intersection and unroll weak edges { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; + std::cout<<"num_edges = " << cur_graph_view.compute_number_of_edges(handle) << std::endl; + cugraph::edge_property_t weak_edges_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, weak_edges_mask.mutable_view(), bool{true}); @@ -853,6 +844,7 @@ k_truss(raft::handle_t const& handle, rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, @@ -863,6 +855,26 @@ k_truss(raft::handle_t const& handle, renumber_map ? std::make_optional(raft::device_span((*renumber_map).data(), (*renumber_map).size())) : std::nullopt); + + + /* + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + extract_edges{}); + */ + + + + auto weight_size = 0; + if (edge_weight_view){ + weight_size = edgelist_wgts->size(); + //raft::print_device_vector("edgelist_wgts", edgelist_wgts->data(), 20, std::cout); + } + + std::cout<<"edgelist size = " << edgelist_srcs.size() << " edgelist weight size = " << weight_size << std::endl; std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, @@ -870,9 +882,17 @@ k_truss(raft::handle_t const& handle, std::move(edgelist_dsts), std::move(edgelist_wgts), false); - + + if (edge_weight_view){ + weight_size = edgelist_wgts->size(); + } + std::cout<<"after sym - edgelist size = " << edgelist_srcs.size() << " edgelist weight size = " << weight_size << std::endl; return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } } } // namespace cugraph + + + +// Current state of k-truss as of Thursday. the next version will be leveraging edge masking \ No newline at end of file