From bc098259e8dad7f7d88c0fd7431eb958ebe5487c Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sat, 25 Apr 2026 23:35:16 +0300 Subject: [PATCH 1/3] feat(community): implement Leiden community detection algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add experimental implementation of the Leiden algorithm (Traag et al., Scientific Reports 9:5233, 2019) — the successor to Louvain that guarantees well-connected communities. New files: - experimental/algorithm/LAGraph_Leiden.c: two-phase implementation * Phase 1 (Local Move): greedy node-to-community assignment maximising modularity gain score(i→c) = T[c] - k[i]*k_comm[c]/m * Phase 2 (Refinement): Leiden's key addition — restart each node in a singleton sub-community and restrict moves to within the Phase-1 parent community, guaranteeing well-connectedness * Phase 3 (Aggregation): documented as TODO for multi-level extension * Output: GrB_Vector of INT64 community labels 0..K-1 - experimental/test/test_leiden.c: acutest test using karate.mtx; verifies all nodes labelled, labels in range, and Q > 0 Updated files: - include/LAGraphX.h: add LAGRAPHX_PUBLIC declaration for LAGraph_Leiden Algorithm is auto-discovered by experimental/CMakeLists.txt glob. Tested on karate.mtx (Q ≈ 0.238, 34 nodes, all checks pass). Ref: https://doi.org/10.1038/s41598-019-41695-z Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- experimental/algorithm/LAGraph_Leiden.c | 412 ++++++++++++++++++++++++ experimental/test/test_leiden.c | 119 +++++++ include/LAGraphX.h | 11 + 3 files changed, 542 insertions(+) create mode 100644 experimental/algorithm/LAGraph_Leiden.c create mode 100644 experimental/test/test_leiden.c diff --git a/experimental/algorithm/LAGraph_Leiden.c b/experimental/algorithm/LAGraph_Leiden.c new file mode 100644 index 0000000000..18b42d9776 --- /dev/null +++ b/experimental/algorithm/LAGraph_Leiden.c @@ -0,0 +1,412 @@ +//------------------------------------------------------------------------------ +// LAGraph_Leiden.c: community detection using the Leiden algorithm +//------------------------------------------------------------------------------ + +// LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause +// +// For additional details (including references to third party source code and +// other files) see the LICENSE file or contact permission@sei.cmu.edu. See +// Contributors.txt for a full list of contributors. Created, in part, with +// funding and support from the U.S. Government (see Acknowledgments.txt file). +// DM22-0790 + +//------------------------------------------------------------------------------ +// The Leiden algorithm is a modularity-based community detection method that +// guarantees well-connected communities by introducing a Refinement phase +// between the Local-Move and Aggregation phases of the Louvain algorithm. +// +// Reference: +// Traag, V.A., Waltman, L. & van Eck, N.J. (2019). From Louvain to Leiden: +// guaranteeing well-connected communities. Scientific Reports 9, 5233. +// https://doi.org/10.1038/s41598-019-41695-z +// +// Algorithm: +// +// Phase 1 (Local Move): Greedily assign each node to the neighboring +// community c that maximises the score: +// score(i->c) = T[c] - k[i] * k_comm[c] / m +// where T[c] = sum_{j in c} A[i,j], k_comm[c] = total community degree +// (excluding i during evaluation), and m = total edge weight / 2. +// Repeat until no node changes community. +// +// Phase 2 (Refinement – key Leiden addition): Copy Phase-1 communities as +// "parent" communities. Restart each node in a singleton sub-community. +// Allow moves only between sub-communities that share the same Phase-1 +// parent. This ensures every output community is internally well-connected. +// Repeat until no node changes sub-community. +// +// Phase 3 (Aggregation): TODO – multi-level aggregation is not yet +// implemented. The function returns the single-level refined partition, +// which already satisfies Leiden's well-connectedness guarantee. +// +// Input: G – undirected graph (or directed with symmetric structure) +// Output: c_handle – GrB_Vector of INT64 where c[i] = community of node i, +// labeled 0..K-1. + +#undef LG_FREE_WORK +#define LG_FREE_WORK \ +{ \ + GrB_free (&k_vec) ; \ + GrB_free (&v) ; \ + LAGraph_Free ((void **) &k_arr, NULL) ; \ + LAGraph_Free ((void **) &c_arr, NULL) ; \ + LAGraph_Free ((void **) &k_comm, NULL) ; \ + LAGraph_Free ((void **) &c_p1, NULL) ; \ + LAGraph_Free ((void **) &c_ref, NULL) ; \ + LAGraph_Free ((void **) &k_ref_comm, NULL) ; \ + LAGraph_Free ((void **) &T_local, NULL) ; \ + LAGraph_Free ((void **) &dirty, NULL) ; \ + LAGraph_Free ((void **) &dirty_list, NULL) ; \ + LAGraph_Free ((void **) &nbrs_j, NULL) ; \ + LAGraph_Free ((void **) &nbrs_v, NULL) ; \ + LAGraph_Free ((void **) &remap, NULL) ; \ +} + +#undef LG_FREE_ALL +#define LG_FREE_ALL \ +{ \ + LG_FREE_WORK ; \ + if (c_handle != NULL) GrB_free (c_handle) ; \ +} + +#include "LG_internal.h" +#include +#include +#include +#include + +#define LEIDEN_MAX_ITER 100 + +int LAGraph_Leiden +( + // output: + GrB_Vector *c_handle, // c[i] = community label (0..K-1) for node i + // input: + LAGraph_Graph G, // input graph (must be symmetric, no self-loops) + uint64_t seed, // random seed (reserved; not yet used) + char *msg +) +{ + + //-------------------------------------------------------------------------- + // declare all workspace (must precede any LG_TRY/GRB_TRY calls so that + // LG_FREE_ALL can safely free them even on early exit) + //-------------------------------------------------------------------------- + + GrB_Vector k_vec = NULL ; + GrB_Vector v = NULL ; + double *k_arr = NULL ; // k_arr[i] = degree of node i + int64_t *c_arr = NULL ; // c_arr[i] = Phase-1 community label + double *k_comm = NULL ; // k_comm[l] = total degree of community l + int64_t *c_p1 = NULL ; // c_p1[i] = parent community (Phase 1) + int64_t *c_ref = NULL ; // c_ref[i] = refined sub-community label + double *k_ref_comm = NULL ; // k_ref_comm[l] = total degree of sub-community l + double *T_local = NULL ; // scratch: edge sums from node i to each community + int8_t *dirty = NULL ; // dirty[l] = 1 if T_local[l] was written + GrB_Index *dirty_list = NULL ; // list of community labels touched this node + GrB_Index *nbrs_j = NULL ; // scratch: extracted neighbor indices + double *nbrs_v = NULL ; // scratch: extracted neighbor weights + GrB_Index *remap = NULL ; // remap[old_label] -> new contiguous label + + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + LG_CLEAR_MSG ; + LG_ASSERT (c_handle != NULL, GrB_NULL_POINTER) ; + (*c_handle) = NULL ; + LG_TRY (LAGraph_CheckGraph (G, msg)) ; + LG_ASSERT_MSG ( + G->kind == LAGraph_ADJACENCY_UNDIRECTED || + (G->kind == LAGraph_ADJACENCY_DIRECTED && + G->is_symmetric_structure == LAGraph_TRUE), + LAGRAPH_NOT_CACHED, + "G must be undirected or have symmetric structure") ; + + GrB_Matrix A = G->A ; + GrB_Index n ; + GRB_TRY (GrB_Matrix_nrows (&n, A)) ; + + // Degenerate: return immediately for empty (0-node) graph + if (n == 0) + { + GRB_TRY (GrB_Vector_new (c_handle, GrB_INT64, 0)) ; + return (GrB_SUCCESS) ; + } + + //-------------------------------------------------------------------------- + // allocate workspace + //-------------------------------------------------------------------------- + + LG_TRY (LAGraph_Malloc ((void **) &k_arr, n, sizeof (double), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &c_arr, n, sizeof (int64_t), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &k_comm, n, sizeof (double), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &c_p1, n, sizeof (int64_t), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &c_ref, n, sizeof (int64_t), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &k_ref_comm, n, sizeof (double), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &T_local, n, sizeof (double), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &dirty, n, sizeof (int8_t), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &dirty_list, n, sizeof (GrB_Index), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &nbrs_j, n, sizeof (GrB_Index), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &nbrs_v, n, sizeof (double), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &remap, n, sizeof (GrB_Index), msg)) ; + + GRB_TRY (GrB_Vector_new (&k_vec, GrB_FP64, n)) ; + GRB_TRY (GrB_Vector_new (&v, GrB_FP64, n)) ; + + //-------------------------------------------------------------------------- + // compute node degrees and total edge weight m + //-------------------------------------------------------------------------- + + // k[i] = sum of row i of A; implicit cast to FP64 handles bool/int types. + GRB_TRY (GrB_Matrix_reduce_Monoid (k_vec, NULL, NULL, + GrB_PLUS_MONOID_FP64, A, NULL)) ; + + for (GrB_Index i = 0 ; i < n ; i++) + { + GrB_Info info = GrB_Vector_extractElement_FP64 (&k_arr[i], k_vec, i) ; + if (info == GrB_NO_VALUE) k_arr[i] = 0.0 ; + } + + double m = 0.0 ; + GRB_TRY (GrB_Vector_reduce_FP64 (&m, NULL, GrB_PLUS_MONOID_FP64, + k_vec, NULL)) ; + m /= 2.0 ; + + // Empty graph: return a singleton partition (one community per node). + if (m == 0.0) + { + GRB_TRY (GrB_Vector_new (c_handle, GrB_INT64, n)) ; + for (GrB_Index i = 0 ; i < n ; i++) + { + GRB_TRY (GrB_Vector_setElement_INT64 (*c_handle, (int64_t) i, i)) ; + } + LG_FREE_WORK ; + return (GrB_SUCCESS) ; + } + + //-------------------------------------------------------------------------- + // initialise: every node starts in its own singleton community + //-------------------------------------------------------------------------- + + memset (dirty, 0, n * sizeof (int8_t)) ; + memset (T_local, 0, n * sizeof (double)) ; + + for (GrB_Index i = 0 ; i < n ; i++) + { + c_arr[i] = (int64_t) i ; + k_comm[i] = k_arr[i] ; + } + + //-------------------------------------------------------------------------- + // PHASE 1: Local Move Phase + // + // Sweep over all nodes. For each node i, temporarily remove it from its + // current community, compute the score for each neighboring community, and + // move i to the community with the highest positive score. Repeat until + // no node changes community. + //-------------------------------------------------------------------------- + + bool changed = true ; + for (int p1_iter = 0 ; changed && p1_iter < LEIDEN_MAX_ITER ; p1_iter++) + { + changed = false ; + for (GrB_Index i = 0 ; i < n ; i++) + { + double ki = k_arr[i] ; + if (ki == 0.0) continue ; // isolated node: skip + + int64_t ci = c_arr[i] ; + + // Extract row i of A into v (FP64; implicit cast from A's type). + GRB_TRY (GrB_Col_extract (v, NULL, NULL, A, + GrB_ALL, n, i, GrB_DESC_T0)) ; + + GrB_Index nvals ; + GRB_TRY (GrB_Vector_nvals (&nvals, v)) ; + if (nvals == 0) continue ; + + GRB_TRY (GrB_Vector_extractTuples_FP64 ( + nbrs_j, nbrs_v, &nvals, v)) ; + + // Temporarily remove i from community ci. + k_comm[ci] -= ki ; + + // Accumulate T_local[c] = sum of A[i,j] for j in community c. + GrB_Index ndirty = 0 ; + for (GrB_Index t = 0 ; t < nvals ; t++) + { + int64_t cj = c_arr[nbrs_j[t]] ; + if (!dirty[cj]) + { + dirty[cj] = 1 ; + dirty_list[ndirty++] = (GrB_Index) cj ; + T_local[cj] = 0.0 ; + } + T_local[cj] += nbrs_v[t] ; + } + + // Score for staying in ci (0 if no neighbours remain in ci). + double T_ci = dirty[ci] ? T_local[ci] : 0.0 ; + double score_ci = T_ci - ki * k_comm[ci] / m ; + double best_score = score_ci ; + int64_t best_c = ci ; + + // Check each neighbouring community. + for (GrB_Index d = 0 ; d < ndirty ; d++) + { + int64_t c_cand = (int64_t) dirty_list[d] ; + if (c_cand == ci) continue ; + double score = T_local[c_cand] - ki * k_comm[c_cand] / m ; + if (score > best_score) + { + best_score = score ; + best_c = c_cand ; + } + } + + c_arr[i] = best_c ; + if (best_c == ci) + { + k_comm[ci] += ki ; // restore: node stayed + } + else + { + k_comm[best_c] += ki ; // node moved + changed = true ; + } + + // Reset dirty flags. + for (GrB_Index d = 0 ; d < ndirty ; d++) + { + dirty[dirty_list[d]] = 0 ; + } + } + } + + //-------------------------------------------------------------------------- + // PHASE 2: Refinement Phase (key Leiden addition) + // + // Save the Phase-1 partition as parent communities. Restart each node in + // its own singleton sub-community. In each local-move step, a node may + // only join a sub-community whose parent equals its own Phase-1 parent. + // This restriction ensures every output community is a connected subgraph + // of the corresponding Phase-1 community. + //-------------------------------------------------------------------------- + + memcpy (c_p1, c_arr, n * sizeof (int64_t)) ; + + for (GrB_Index i = 0 ; i < n ; i++) + { + c_ref[i] = (int64_t) i ; + k_ref_comm[i] = k_arr[i] ; + } + + changed = true ; + for (int p2_iter = 0 ; changed && p2_iter < LEIDEN_MAX_ITER ; p2_iter++) + { + changed = false ; + for (GrB_Index i = 0 ; i < n ; i++) + { + double ki = k_arr[i] ; + if (ki == 0.0) continue ; + + int64_t pi = c_p1[i] ; // Phase-1 parent community + int64_t ci_ref = c_ref[i] ; // current refined sub-community + + GRB_TRY (GrB_Col_extract (v, NULL, NULL, A, + GrB_ALL, n, i, GrB_DESC_T0)) ; + + GrB_Index nvals ; + GRB_TRY (GrB_Vector_nvals (&nvals, v)) ; + if (nvals == 0) continue ; + + GRB_TRY (GrB_Vector_extractTuples_FP64 ( + nbrs_j, nbrs_v, &nvals, v)) ; + + k_ref_comm[ci_ref] -= ki ; + + // Accumulate T_local restricted to neighbours in the same parent. + GrB_Index ndirty = 0 ; + for (GrB_Index t = 0 ; t < nvals ; t++) + { + GrB_Index j = nbrs_j[t] ; + if (c_p1[j] != pi) continue ; // cross-parent edge: skip + + int64_t cj_ref = c_ref[j] ; + if (!dirty[cj_ref]) + { + dirty[cj_ref] = 1 ; + dirty_list[ndirty++] = (GrB_Index) cj_ref ; + T_local[cj_ref] = 0.0 ; + } + T_local[cj_ref] += nbrs_v[t] ; + } + + double T_ci_ref = dirty[ci_ref] ? T_local[ci_ref] : 0.0 ; + double score_ci_ref = T_ci_ref - ki * k_ref_comm[ci_ref] / m ; + double best_score = score_ci_ref ; + int64_t best_c_ref = ci_ref ; + + for (GrB_Index d = 0 ; d < ndirty ; d++) + { + int64_t c_cand = (int64_t) dirty_list[d] ; + if (c_cand == ci_ref) continue ; + double score = T_local[c_cand] - ki * k_ref_comm[c_cand] / m ; + if (score > best_score) + { + best_score = score ; + best_c_ref = c_cand ; + } + } + + c_ref[i] = best_c_ref ; + if (best_c_ref == ci_ref) + { + k_ref_comm[ci_ref] += ki ; + } + else + { + k_ref_comm[best_c_ref] += ki ; + changed = true ; + } + + for (GrB_Index d = 0 ; d < ndirty ; d++) + { + dirty[dirty_list[d]] = 0 ; + } + } + } + + //-------------------------------------------------------------------------- + // Relabel c_ref to contiguous integers 0..K_ref-1 + //-------------------------------------------------------------------------- + + // Use n as sentinel ("not yet assigned"). + for (GrB_Index i = 0 ; i < n ; i++) remap[i] = n ; + + GrB_Index K_ref = 0 ; + for (GrB_Index i = 0 ; i < n ; i++) + { + GrB_Index old_label = (GrB_Index) c_ref[i] ; + if (remap[old_label] == n) + { + remap[old_label] = K_ref++ ; + } + c_ref[i] = (int64_t) remap[old_label] ; + } + + //-------------------------------------------------------------------------- + // Build output GrB_Vector + //-------------------------------------------------------------------------- + + GRB_TRY (GrB_Vector_new (c_handle, GrB_INT64, n)) ; + for (GrB_Index i = 0 ; i < n ; i++) + { + GRB_TRY (GrB_Vector_setElement_INT64 (*c_handle, c_ref[i], i)) ; + } + + LG_FREE_WORK ; + return (GrB_SUCCESS) ; +} diff --git a/experimental/test/test_leiden.c b/experimental/test/test_leiden.c new file mode 100644 index 0000000000..217144728e --- /dev/null +++ b/experimental/test/test_leiden.c @@ -0,0 +1,119 @@ +//------------------------------------------------------------------------------ +// test_leiden.c: tests for LAGraph_Leiden community detection +//------------------------------------------------------------------------------ + +// LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause + +#include +#include + +#include "LG_Xtest.h" +#include +#include + +char msg[LAGRAPH_MSG_LEN] ; +LAGraph_Graph G = NULL ; +GrB_Matrix A = NULL ; +#define LEN 512 +char filename[LEN + 1] ; + +typedef struct +{ + const char *matrix_file ; + bool expect_communities ; // true iff non-trivial Q is expected +} matrix_info ; + +const matrix_info files[] = +{ + { "karate.mtx", true }, // Zachary karate club (Q > 0 expected) + { "", false } +} ; + +//------------------------------------------------------------------------------ +// test_Leiden +//------------------------------------------------------------------------------ + +void test_Leiden (void) +{ + LAGraph_Init (msg) ; + +#if LAGRAPH_SUITESPARSE + // Disable JIT before any GraphBLAS operations; the JIT compiler may not + // be available in all build environments. + OK (LG_SET_JIT (LG_JIT_OFF)) ; +#endif + + for (int k = 0 ;; k++) + { + const char *aname = files[k].matrix_file ; + if (strlen (aname) == 0) break ; + + printf ("\n====== %s ======\n", aname) ; + snprintf (filename, LEN, LG_DATA_DIR "%s", aname) ; + + FILE *f = fopen (filename, "r") ; + TEST_CHECK (f != NULL) ; + TEST_MSG ("Cannot open %s", filename) ; + + OK (LAGraph_MMRead (&A, f, msg)) ; + fclose (f) ; + + OK (LAGraph_New (&G, &A, LAGraph_ADJACENCY_UNDIRECTED, msg)) ; + TEST_CHECK (A == NULL) ; // LAGraph_New takes ownership + + // Ensure symmetry cache is populated (required by some checks). + OK (LAGraph_Cached_IsSymmetricStructure (G, msg)) ; + + uint64_t seed = 42 ; + GrB_Vector c = NULL ; + + OK (LAGraph_Leiden (&c, G, seed, msg)) ; + TEST_CHECK (c != NULL) ; + + // Every node must have a community label. + GrB_Index n, nvals ; + OK (GrB_Matrix_nrows (&n, G->A)) ; + OK (GrB_Vector_nvals (&nvals, c)) ; + TEST_CHECK (nvals == n) ; + TEST_MSG ("Expected all %llu nodes to have labels, got %llu", + (unsigned long long) n, (unsigned long long) nvals) ; + + // Community labels must be in [0, n-1]. + for (GrB_Index i = 0 ; i < n ; i++) + { + int64_t label = 0 ; + OK (GrB_Vector_extractElement_INT64 (&label, c, i)) ; + TEST_CHECK (label >= 0 && (GrB_Index) label < n) ; + } + +#if LAGRAPH_SUITESPARSE + // Compute modularity Q (requires SuiteSparse:GraphBLAS). + double Q = 0.0 ; + OK (LAGr_Modularity (&Q, 1.0, c, G, msg)) ; + printf (" Modularity Q = %f (K_ref <= %llu)\n", + Q, (unsigned long long) n) ; + + if (files[k].expect_communities) + { + TEST_CHECK (Q > 0.0) ; + TEST_MSG ("Expected Q > 0 for %s, got Q = %f", aname, Q) ; + } +#endif + + GrB_free (&c) ; + OK (LAGraph_Delete (&G, msg)) ; + } + + LAGraph_Finalize (msg) ; +} + +//------------------------------------------------------------------------------ +// test list +//------------------------------------------------------------------------------ + +TEST_LIST = +{ + { "Leiden", test_Leiden }, + { NULL, NULL } +} ; diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 3355addb2e..99c8b3381a 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -1370,6 +1370,17 @@ int LAGr_Modularity( char *msg ) ; +LAGRAPHX_PUBLIC +int LAGraph_Leiden +( + // output: + GrB_Vector *c_handle, // c[i] = community label (0..K-1) for node i + // input: + LAGraph_Graph G, // input graph (must be symmetric, no self-loops) + uint64_t seed, // random seed (reserved for future use) + char *msg +) ; + LAGRAPHX_PUBLIC int LAGraph_argminmax ( From a9a675e2a2a544fb41c97f9a5e80102b015cbaeb Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 26 Apr 2026 06:51:03 +0300 Subject: [PATCH 2/3] feat(community): add Phase 3 multi-level aggregation to LAGraph_Leiden MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Wrap Phase 1 + Phase 2 in an outer aggregation loop that iterates until no further coarsening occurs - Build aggregate graph A_agg = S^T * A_cur * S after each level using GrB_mxm with GrB_PLUS_TIMES_SEMIRING_FP64 - Carry forward Phase-1 partition as initial partition for each aggregate level (Leiden invariant: init_comm[r] = Phase-1 parent of refined community r) rather than reinitialising from singletons - Skip self-loops (j == i) in both Phase 1 and Phase 2 move scoring; A_agg has diagonal entries representing intra-community weight - Recreate GrB_Vector k_vec and v per level to match current n_cur - Track o_comm[i] (original node → final community) across all levels via composition o_comm[i] = c_ref[o_comm[i]] after each relabeling - m (total edge weight / 2) is computed once from G->A and held constant; invariant under the S^T*A*S aggregation - Modularity Q on karate.mtx improves from ~0.24 (single-level) to ~0.34 (multi-level); update test threshold to Q > 0.3 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- experimental/algorithm/LAGraph_Leiden.c | 507 +++++++++++++++--------- experimental/test/test_leiden.c | 6 +- 2 files changed, 314 insertions(+), 199 deletions(-) diff --git a/experimental/algorithm/LAGraph_Leiden.c b/experimental/algorithm/LAGraph_Leiden.c index 18b42d9776..9073b35325 100644 --- a/experimental/algorithm/LAGraph_Leiden.c +++ b/experimental/algorithm/LAGraph_Leiden.c @@ -21,24 +21,31 @@ // guaranteeing well-connected communities. Scientific Reports 9, 5233. // https://doi.org/10.1038/s41598-019-41695-z // -// Algorithm: +// Algorithm (one outer iteration = one level of the hierarchy): // // Phase 1 (Local Move): Greedily assign each node to the neighboring // community c that maximises the score: // score(i->c) = T[c] - k[i] * k_comm[c] / m -// where T[c] = sum_{j in c} A[i,j], k_comm[c] = total community degree -// (excluding i during evaluation), and m = total edge weight / 2. +// where T[c] = sum_{j in c, j!=i} A[i,j], k_comm[c] = total degree of +// community c (excluding i), and m = total edge weight / 2 (constant). +// Initial partition is induced by the Phase-1 communities of the previous +// level (singletons on the first level). // Repeat until no node changes community. // -// Phase 2 (Refinement – key Leiden addition): Copy Phase-1 communities as -// "parent" communities. Restart each node in a singleton sub-community. -// Allow moves only between sub-communities that share the same Phase-1 -// parent. This ensures every output community is internally well-connected. +// Phase 2 (Refinement – key Leiden addition): Save Phase-1 communities as +// "parent" communities. Restart each node in its own singleton +// sub-community. In each local-move step a node may only join a +// sub-community whose parent equals its own Phase-1 parent. This ensures +// every output community is an internally well-connected subgraph of the +// corresponding Phase-1 community. // Repeat until no node changes sub-community. // -// Phase 3 (Aggregation): TODO – multi-level aggregation is not yet -// implemented. The function returns the single-level refined partition, -// which already satisfies Leiden's well-connectedness guarantee. +// Phase 3 (Aggregation): Build the coarsened graph +// A_agg = S^T * A_cur * S +// where S is the n_cur x K_ref membership matrix from Phase 2. Each +// column of S is a refined sub-community; each row of A_agg is one +// super-node. m is invariant under this operation. Repeat the outer +// loop on A_agg until no further coarsening occurs. // // Input: G – undirected graph (or directed with symmetric structure) // Output: c_handle – GrB_Vector of INT64 where c[i] = community of node i, @@ -49,6 +56,9 @@ { \ GrB_free (&k_vec) ; \ GrB_free (&v) ; \ + GrB_free (&A_agg) ; \ + GrB_free (&S_mat) ; \ + GrB_free (&A_temp) ; \ LAGraph_Free ((void **) &k_arr, NULL) ; \ LAGraph_Free ((void **) &c_arr, NULL) ; \ LAGraph_Free ((void **) &k_comm, NULL) ; \ @@ -61,6 +71,8 @@ LAGraph_Free ((void **) &nbrs_j, NULL) ; \ LAGraph_Free ((void **) &nbrs_v, NULL) ; \ LAGraph_Free ((void **) &remap, NULL) ; \ + LAGraph_Free ((void **) &o_comm, NULL) ; \ + LAGraph_Free ((void **) &init_comm, NULL) ; \ } #undef LG_FREE_ALL @@ -94,20 +106,25 @@ int LAGraph_Leiden // LG_FREE_ALL can safely free them even on early exit) //-------------------------------------------------------------------------- - GrB_Vector k_vec = NULL ; - GrB_Vector v = NULL ; - double *k_arr = NULL ; // k_arr[i] = degree of node i - int64_t *c_arr = NULL ; // c_arr[i] = Phase-1 community label - double *k_comm = NULL ; // k_comm[l] = total degree of community l - int64_t *c_p1 = NULL ; // c_p1[i] = parent community (Phase 1) - int64_t *c_ref = NULL ; // c_ref[i] = refined sub-community label - double *k_ref_comm = NULL ; // k_ref_comm[l] = total degree of sub-community l - double *T_local = NULL ; // scratch: edge sums from node i to each community - int8_t *dirty = NULL ; // dirty[l] = 1 if T_local[l] was written - GrB_Index *dirty_list = NULL ; // list of community labels touched this node - GrB_Index *nbrs_j = NULL ; // scratch: extracted neighbor indices - double *nbrs_v = NULL ; // scratch: extracted neighbor weights - GrB_Index *remap = NULL ; // remap[old_label] -> new contiguous label + GrB_Vector k_vec = NULL ; + GrB_Vector v = NULL ; + GrB_Matrix A_agg = NULL ; // owned coarsened graph (Phase 3) + GrB_Matrix S_mat = NULL ; // temporary membership matrix (Phase 3) + GrB_Matrix A_temp = NULL ; // temporary for mxm (Phase 3) + double *k_arr = NULL ; // k_arr[i] = degree of node i (current level) + int64_t *c_arr = NULL ; // c_arr[i] = Phase-1 community label + double *k_comm = NULL ; // k_comm[l] = total degree of community l + int64_t *c_p1 = NULL ; // c_p1[i] = Phase-1 parent community + int64_t *c_ref = NULL ; // c_ref[i] = refined sub-community label + double *k_ref_comm = NULL ; // k_ref_comm[l] = total degree of sub-community l + double *T_local = NULL ; // scratch: edge sums from node i to each community + int8_t *dirty = NULL ; // dirty[l] = 1 if T_local[l] was written + GrB_Index *dirty_list = NULL ; // list of community labels touched this node + GrB_Index *nbrs_j = NULL ; // scratch: extracted neighbor indices + double *nbrs_v = NULL ; // scratch: extracted neighbor weights + GrB_Index *remap = NULL ; // remap[old_label] -> new contiguous label + GrB_Index *o_comm = NULL ; // o_comm[i] = community of original node i + GrB_Index *init_comm = NULL ; // init_comm[r] = initial c_arr for aggregate node r //-------------------------------------------------------------------------- // check inputs @@ -128,7 +145,7 @@ int LAGraph_Leiden GrB_Index n ; GRB_TRY (GrB_Matrix_nrows (&n, A)) ; - // Degenerate: return immediately for empty (0-node) graph + // Degenerate: return immediately for empty (0-node) graph. if (n == 0) { GRB_TRY (GrB_Vector_new (c_handle, GrB_INT64, 0)) ; @@ -136,7 +153,7 @@ int LAGraph_Leiden } //-------------------------------------------------------------------------- - // allocate workspace + // allocate workspace (all arrays sized n; used for indices 0..n_cur-1) //-------------------------------------------------------------------------- LG_TRY (LAGraph_Malloc ((void **) &k_arr, n, sizeof (double), msg)) ; @@ -151,30 +168,24 @@ int LAGraph_Leiden LG_TRY (LAGraph_Malloc ((void **) &nbrs_j, n, sizeof (GrB_Index), msg)) ; LG_TRY (LAGraph_Malloc ((void **) &nbrs_v, n, sizeof (double), msg)) ; LG_TRY (LAGraph_Malloc ((void **) &remap, n, sizeof (GrB_Index), msg)) ; - - GRB_TRY (GrB_Vector_new (&k_vec, GrB_FP64, n)) ; - GRB_TRY (GrB_Vector_new (&v, GrB_FP64, n)) ; + LG_TRY (LAGraph_Malloc ((void **) &o_comm, n, sizeof (GrB_Index), msg)) ; + LG_TRY (LAGraph_Malloc ((void **) &init_comm, n, sizeof (GrB_Index), msg)) ; //-------------------------------------------------------------------------- - // compute node degrees and total edge weight m + // compute m = total edge weight / 2 from G->A (invariant under aggregation) //-------------------------------------------------------------------------- - // k[i] = sum of row i of A; implicit cast to FP64 handles bool/int types. + GRB_TRY (GrB_Vector_new (&k_vec, GrB_FP64, n)) ; GRB_TRY (GrB_Matrix_reduce_Monoid (k_vec, NULL, NULL, GrB_PLUS_MONOID_FP64, A, NULL)) ; - - for (GrB_Index i = 0 ; i < n ; i++) - { - GrB_Info info = GrB_Vector_extractElement_FP64 (&k_arr[i], k_vec, i) ; - if (info == GrB_NO_VALUE) k_arr[i] = 0.0 ; - } - double m = 0.0 ; GRB_TRY (GrB_Vector_reduce_FP64 (&m, NULL, GrB_PLUS_MONOID_FP64, k_vec, NULL)) ; m /= 2.0 ; + GrB_free (&k_vec) ; + k_vec = NULL ; - // Empty graph: return a singleton partition (one community per node). + // Empty graph: return a singleton partition. if (m == 0.0) { GRB_TRY (GrB_Vector_new (c_handle, GrB_INT64, n)) ; @@ -187,224 +198,326 @@ int LAGraph_Leiden } //-------------------------------------------------------------------------- - // initialise: every node starts in its own singleton community + // initialise multi-level state + // + // o_comm[i] = community of original node i (tracks the composition of + // all levels' refined partitions). Starts as identity. + // init_comm[r] = initial Phase-1 community for aggregate node r at the + // next level. First level: singleton start (c_arr[i] = i). //-------------------------------------------------------------------------- - memset (dirty, 0, n * sizeof (int8_t)) ; - memset (T_local, 0, n * sizeof (double)) ; - for (GrB_Index i = 0 ; i < n ; i++) { - c_arr[i] = (int64_t) i ; - k_comm[i] = k_arr[i] ; + o_comm[i] = i ; + init_comm[i] = i ; } - //-------------------------------------------------------------------------- - // PHASE 1: Local Move Phase - // - // Sweep over all nodes. For each node i, temporarily remove it from its - // current community, compute the score for each neighboring community, and - // move i to the community with the highest positive score. Repeat until - // no node changes community. - //-------------------------------------------------------------------------- + GrB_Matrix A_cur = A ; // current-level graph (not owned: either G->A or A_agg) + GrB_Index n_cur = n ; + + //========================================================================== + // OUTER AGGREGATION LOOP + //========================================================================== - bool changed = true ; - for (int p1_iter = 0 ; changed && p1_iter < LEIDEN_MAX_ITER ; p1_iter++) + bool outer_changed = true ; + while (outer_changed) { - changed = false ; - for (GrB_Index i = 0 ; i < n ; i++) - { - double ki = k_arr[i] ; - if (ki == 0.0) continue ; // isolated node: skip + outer_changed = false ; - int64_t ci = c_arr[i] ; + //---------------------------------------------------------------------- + // create GraphBLAS vectors sized for the current level + //---------------------------------------------------------------------- - // Extract row i of A into v (FP64; implicit cast from A's type). - GRB_TRY (GrB_Col_extract (v, NULL, NULL, A, - GrB_ALL, n, i, GrB_DESC_T0)) ; + GRB_TRY (GrB_Vector_new (&k_vec, GrB_FP64, n_cur)) ; + GRB_TRY (GrB_Vector_new (&v, GrB_FP64, n_cur)) ; - GrB_Index nvals ; - GRB_TRY (GrB_Vector_nvals (&nvals, v)) ; - if (nvals == 0) continue ; + //---------------------------------------------------------------------- + // compute k_arr[0..n_cur-1] from A_cur (includes self-loops in A_agg) + //---------------------------------------------------------------------- - GRB_TRY (GrB_Vector_extractTuples_FP64 ( - nbrs_j, nbrs_v, &nvals, v)) ; + GRB_TRY (GrB_Matrix_reduce_Monoid (k_vec, NULL, NULL, + GrB_PLUS_MONOID_FP64, A_cur, NULL)) ; + for (GrB_Index i = 0 ; i < n_cur ; i++) + { + GrB_Info info = GrB_Vector_extractElement_FP64 (&k_arr[i], k_vec, i) ; + if (info == GrB_NO_VALUE) k_arr[i] = 0.0 ; + } - // Temporarily remove i from community ci. - k_comm[ci] -= ki ; + //---------------------------------------------------------------------- + // PHASE 1: Local Move Phase + // + // Initialise partition from init_comm (singletons on first level; + // induced Phase-1 partition on subsequent levels). + // Score: score(i->c) = T[c] - k[i]*k_comm[c]/m (self-loops skipped). + //---------------------------------------------------------------------- + + memset (dirty, 0, n * sizeof (int8_t)) ; + memset (T_local, 0, n * sizeof (double)) ; + memset (k_comm, 0, n * sizeof (double)) ; + for (GrB_Index i = 0 ; i < n_cur ; i++) + { + c_arr[i] = (int64_t) init_comm[i] ; + k_comm[init_comm[i]] += k_arr[i] ; + } - // Accumulate T_local[c] = sum of A[i,j] for j in community c. - GrB_Index ndirty = 0 ; - for (GrB_Index t = 0 ; t < nvals ; t++) + bool changed = true ; + for (int p1_iter = 0 ; changed && p1_iter < LEIDEN_MAX_ITER ; p1_iter++) + { + changed = false ; + for (GrB_Index i = 0 ; i < n_cur ; i++) { - int64_t cj = c_arr[nbrs_j[t]] ; - if (!dirty[cj]) + double ki = k_arr[i] ; + if (ki == 0.0) continue ; + + int64_t ci = c_arr[i] ; + + GRB_TRY (GrB_Col_extract (v, NULL, NULL, A_cur, + GrB_ALL, n_cur, i, GrB_DESC_T0)) ; + + GrB_Index nvals ; + GRB_TRY (GrB_Vector_nvals (&nvals, v)) ; + if (nvals == 0) continue ; + + GRB_TRY (GrB_Vector_extractTuples_FP64 ( + nbrs_j, nbrs_v, &nvals, v)) ; + + // Temporarily remove i from community ci. + k_comm[ci] -= ki ; + + GrB_Index ndirty = 0 ; + for (GrB_Index t = 0 ; t < nvals ; t++) { - dirty[cj] = 1 ; - dirty_list[ndirty++] = (GrB_Index) cj ; - T_local[cj] = 0.0 ; + GrB_Index j = nbrs_j[t] ; + if (j == i) continue ; // skip self-loop (in A_agg) + int64_t cj = c_arr[j] ; + if (!dirty[cj]) + { + dirty[cj] = 1 ; + dirty_list[ndirty++] = (GrB_Index) cj ; + T_local[cj] = 0.0 ; + } + T_local[cj] += nbrs_v[t] ; } - T_local[cj] += nbrs_v[t] ; - } - // Score for staying in ci (0 if no neighbours remain in ci). - double T_ci = dirty[ci] ? T_local[ci] : 0.0 ; - double score_ci = T_ci - ki * k_comm[ci] / m ; - double best_score = score_ci ; - int64_t best_c = ci ; + double T_ci = dirty[ci] ? T_local[ci] : 0.0 ; + double score_ci = T_ci - ki * k_comm[ci] / m ; + double best_score = score_ci ; + int64_t best_c = ci ; - // Check each neighbouring community. - for (GrB_Index d = 0 ; d < ndirty ; d++) - { - int64_t c_cand = (int64_t) dirty_list[d] ; - if (c_cand == ci) continue ; - double score = T_local[c_cand] - ki * k_comm[c_cand] / m ; - if (score > best_score) + for (GrB_Index d = 0 ; d < ndirty ; d++) { - best_score = score ; - best_c = c_cand ; + int64_t c_cand = (int64_t) dirty_list[d] ; + if (c_cand == ci) continue ; + double score = T_local[c_cand] - ki * k_comm[c_cand] / m ; + if (score > best_score) + { + best_score = score ; + best_c = c_cand ; + } } - } - c_arr[i] = best_c ; - if (best_c == ci) - { - k_comm[ci] += ki ; // restore: node stayed - } - else - { - k_comm[best_c] += ki ; // node moved - changed = true ; - } + c_arr[i] = best_c ; + if (best_c == ci) + { + k_comm[ci] += ki ; + } + else + { + k_comm[best_c] += ki ; + changed = true ; + } - // Reset dirty flags. - for (GrB_Index d = 0 ; d < ndirty ; d++) - { - dirty[dirty_list[d]] = 0 ; + for (GrB_Index d = 0 ; d < ndirty ; d++) + { + dirty[dirty_list[d]] = 0 ; + } } } - } - //-------------------------------------------------------------------------- - // PHASE 2: Refinement Phase (key Leiden addition) - // - // Save the Phase-1 partition as parent communities. Restart each node in - // its own singleton sub-community. In each local-move step, a node may - // only join a sub-community whose parent equals its own Phase-1 parent. - // This restriction ensures every output community is a connected subgraph - // of the corresponding Phase-1 community. - //-------------------------------------------------------------------------- + //---------------------------------------------------------------------- + // PHASE 2: Refinement Phase (key Leiden addition) + // + // Save Phase-1 result. Restart each node in a singleton sub-community. + // Only allow moves within the same Phase-1 parent community. + //---------------------------------------------------------------------- - memcpy (c_p1, c_arr, n * sizeof (int64_t)) ; + memcpy (c_p1, c_arr, n_cur * sizeof (int64_t)) ; - for (GrB_Index i = 0 ; i < n ; i++) - { - c_ref[i] = (int64_t) i ; - k_ref_comm[i] = k_arr[i] ; - } + for (GrB_Index i = 0 ; i < n_cur ; i++) + { + c_ref[i] = (int64_t) i ; + k_ref_comm[i] = k_arr[i] ; + } - changed = true ; - for (int p2_iter = 0 ; changed && p2_iter < LEIDEN_MAX_ITER ; p2_iter++) - { - changed = false ; - for (GrB_Index i = 0 ; i < n ; i++) + changed = true ; + for (int p2_iter = 0 ; changed && p2_iter < LEIDEN_MAX_ITER ; p2_iter++) { - double ki = k_arr[i] ; - if (ki == 0.0) continue ; + changed = false ; + for (GrB_Index i = 0 ; i < n_cur ; i++) + { + double ki = k_arr[i] ; + if (ki == 0.0) continue ; - int64_t pi = c_p1[i] ; // Phase-1 parent community - int64_t ci_ref = c_ref[i] ; // current refined sub-community + int64_t pi = c_p1[i] ; + int64_t ci_ref = c_ref[i] ; - GRB_TRY (GrB_Col_extract (v, NULL, NULL, A, - GrB_ALL, n, i, GrB_DESC_T0)) ; + GRB_TRY (GrB_Col_extract (v, NULL, NULL, A_cur, + GrB_ALL, n_cur, i, GrB_DESC_T0)) ; - GrB_Index nvals ; - GRB_TRY (GrB_Vector_nvals (&nvals, v)) ; - if (nvals == 0) continue ; + GrB_Index nvals ; + GRB_TRY (GrB_Vector_nvals (&nvals, v)) ; + if (nvals == 0) continue ; - GRB_TRY (GrB_Vector_extractTuples_FP64 ( - nbrs_j, nbrs_v, &nvals, v)) ; + GRB_TRY (GrB_Vector_extractTuples_FP64 ( + nbrs_j, nbrs_v, &nvals, v)) ; - k_ref_comm[ci_ref] -= ki ; + k_ref_comm[ci_ref] -= ki ; - // Accumulate T_local restricted to neighbours in the same parent. - GrB_Index ndirty = 0 ; - for (GrB_Index t = 0 ; t < nvals ; t++) - { - GrB_Index j = nbrs_j[t] ; - if (c_p1[j] != pi) continue ; // cross-parent edge: skip + GrB_Index ndirty = 0 ; + for (GrB_Index t = 0 ; t < nvals ; t++) + { + GrB_Index j = nbrs_j[t] ; + if (j == i) continue ; // skip self-loop + if (c_p1[j] != pi) continue ; // cross-parent: skip + + int64_t cj_ref = c_ref[j] ; + if (!dirty[cj_ref]) + { + dirty[cj_ref] = 1 ; + dirty_list[ndirty++] = (GrB_Index) cj_ref ; + T_local[cj_ref] = 0.0 ; + } + T_local[cj_ref] += nbrs_v[t] ; + } - int64_t cj_ref = c_ref[j] ; - if (!dirty[cj_ref]) + double T_ci_ref = dirty[ci_ref] ? T_local[ci_ref] : 0.0 ; + double score_ci_ref = T_ci_ref - ki * k_ref_comm[ci_ref] / m ; + double best_score = score_ci_ref ; + int64_t best_c_ref = ci_ref ; + + for (GrB_Index d = 0 ; d < ndirty ; d++) { - dirty[cj_ref] = 1 ; - dirty_list[ndirty++] = (GrB_Index) cj_ref ; - T_local[cj_ref] = 0.0 ; + int64_t c_cand = (int64_t) dirty_list[d] ; + if (c_cand == ci_ref) continue ; + double score = T_local[c_cand] - ki * k_ref_comm[c_cand] / m ; + if (score > best_score) + { + best_score = score ; + best_c_ref = c_cand ; + } } - T_local[cj_ref] += nbrs_v[t] ; - } - double T_ci_ref = dirty[ci_ref] ? T_local[ci_ref] : 0.0 ; - double score_ci_ref = T_ci_ref - ki * k_ref_comm[ci_ref] / m ; - double best_score = score_ci_ref ; - int64_t best_c_ref = ci_ref ; + c_ref[i] = best_c_ref ; + if (best_c_ref == ci_ref) + { + k_ref_comm[ci_ref] += ki ; + } + else + { + k_ref_comm[best_c_ref] += ki ; + changed = true ; + } - for (GrB_Index d = 0 ; d < ndirty ; d++) - { - int64_t c_cand = (int64_t) dirty_list[d] ; - if (c_cand == ci_ref) continue ; - double score = T_local[c_cand] - ki * k_ref_comm[c_cand] / m ; - if (score > best_score) + for (GrB_Index d = 0 ; d < ndirty ; d++) { - best_score = score ; - best_c_ref = c_cand ; + dirty[dirty_list[d]] = 0 ; } } + } - c_ref[i] = best_c_ref ; - if (best_c_ref == ci_ref) - { - k_ref_comm[ci_ref] += ki ; - } - else - { - k_ref_comm[best_c_ref] += ki ; - changed = true ; - } + //---------------------------------------------------------------------- + // Relabel c_ref to contiguous integers 0..K_ref-1 + //---------------------------------------------------------------------- - for (GrB_Index d = 0 ; d < ndirty ; d++) - { - dirty[dirty_list[d]] = 0 ; - } + // Use n as sentinel ("not yet assigned"); safe because c_ref values + // are in 0..n_cur-1 < n. + for (GrB_Index i = 0 ; i < n_cur ; i++) remap[i] = n ; + + GrB_Index K_ref = 0 ; + for (GrB_Index i = 0 ; i < n_cur ; i++) + { + GrB_Index old_label = (GrB_Index) c_ref[i] ; + if (remap[old_label] == n) remap[old_label] = K_ref++ ; + c_ref[i] = (int64_t) remap[old_label] ; } - } - //-------------------------------------------------------------------------- - // Relabel c_ref to contiguous integers 0..K_ref-1 - //-------------------------------------------------------------------------- + //---------------------------------------------------------------------- + // Compute init_comm for next level. + // + // Aggregate node r (0..K_ref-1) is the refined community c_ref[i] for + // any i with that label. All such nodes have the same Phase-1 parent + // c_arr[i] (Leiden invariant), so we record that as the initial + // community for aggregate node r in the next outer iteration. + //---------------------------------------------------------------------- - // Use n as sentinel ("not yet assigned"). - for (GrB_Index i = 0 ; i < n ; i++) remap[i] = n ; + for (GrB_Index i = 0 ; i < n_cur ; i++) + { + // c_ref[i] is in 0..K_ref-1 and init_comm is size n >= K_ref. + init_comm[(GrB_Index) c_ref[i]] = (GrB_Index) c_arr[i] ; + } - GrB_Index K_ref = 0 ; - for (GrB_Index i = 0 ; i < n ; i++) - { - GrB_Index old_label = (GrB_Index) c_ref[i] ; - if (remap[old_label] == n) + //---------------------------------------------------------------------- + // Compose o_comm: original node i now maps to aggregate community + // c_ref[o_comm[i]]. o_comm[i] is always a valid index in c_ref + // because it was set to some value in 0..n_cur-1 in the previous + // iteration (or to i on the first iteration). + //---------------------------------------------------------------------- + + for (GrB_Index i = 0 ; i < n ; i++) { - remap[old_label] = K_ref++ ; + o_comm[i] = (GrB_Index) c_ref[o_comm[i]] ; + } + + //---------------------------------------------------------------------- + // PHASE 3: Aggregation — build coarsened graph if communities merged + //---------------------------------------------------------------------- + + GrB_free (&k_vec) ; k_vec = NULL ; + GrB_free (&v) ; v = NULL ; + + if (K_ref < n_cur) + { + outer_changed = true ; + + // S_mat: n_cur × K_ref indicator matrix; S[i, c_ref[i]] = 1 + GRB_TRY (GrB_Matrix_new (&S_mat, GrB_FP64, n_cur, K_ref)) ; + for (GrB_Index i = 0 ; i < n_cur ; i++) + { + GRB_TRY (GrB_Matrix_setElement_FP64 ( + S_mat, 1.0, i, (GrB_Index) c_ref[i])) ; + } + GRB_TRY (GrB_Matrix_wait (S_mat, GrB_MATERIALIZE)) ; + + // A_temp = A_cur * S (n_cur × K_ref) + GRB_TRY (GrB_Matrix_new (&A_temp, GrB_FP64, n_cur, K_ref)) ; + GRB_TRY (GrB_mxm (A_temp, NULL, NULL, + GrB_PLUS_TIMES_SEMIRING_FP64, A_cur, S_mat, NULL)) ; + + // A_new = S^T * A_temp (K_ref × K_ref) + GrB_Matrix A_new = NULL ; + GRB_TRY (GrB_Matrix_new (&A_new, GrB_FP64, K_ref, K_ref)) ; + GRB_TRY (GrB_mxm (A_new, NULL, NULL, + GrB_PLUS_TIMES_SEMIRING_FP64, S_mat, A_temp, GrB_DESC_T0)) ; + + GrB_free (&S_mat) ; S_mat = NULL ; + GrB_free (&A_temp) ; A_temp = NULL ; + GrB_free (&A_agg) ; // free previous level's aggregate graph + A_agg = A_new ; + A_cur = A_agg ; + n_cur = K_ref ; } - c_ref[i] = (int64_t) remap[old_label] ; + // K_ref == n_cur: no communities merged this level → converged. } //-------------------------------------------------------------------------- - // Build output GrB_Vector + // Build output GrB_Vector from o_comm + // (o_comm values are already relabeled 0..K_final-1 from the last iteration) //-------------------------------------------------------------------------- GRB_TRY (GrB_Vector_new (c_handle, GrB_INT64, n)) ; for (GrB_Index i = 0 ; i < n ; i++) { - GRB_TRY (GrB_Vector_setElement_INT64 (*c_handle, c_ref[i], i)) ; + GRB_TRY (GrB_Vector_setElement_INT64 (*c_handle, (int64_t) o_comm[i], i)) ; } LG_FREE_WORK ; diff --git a/experimental/test/test_leiden.c b/experimental/test/test_leiden.c index 217144728e..e0e219e915 100644 --- a/experimental/test/test_leiden.c +++ b/experimental/test/test_leiden.c @@ -96,8 +96,10 @@ void test_Leiden (void) if (files[k].expect_communities) { - TEST_CHECK (Q > 0.0) ; - TEST_MSG ("Expected Q > 0 for %s, got Q = %f", aname, Q) ; + // Multi-level Leiden on karate.mtx should achieve Q >= 0.3 + // (single-level yields ~0.24; multi-level typically ~0.34+). + TEST_CHECK (Q > 0.3) ; + TEST_MSG ("Expected Q > 0.3 for %s, got Q = %f", aname, Q) ; } #endif From 45e349fbb0dcdd37e07aed910128da6e1f15db55 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 26 Apr 2026 08:44:36 +0300 Subject: [PATCH 3/3] fix(community): correct Leiden modularity-gain denominator and address review Code review on PR #401 identified three issues; this commit addresses all three. - fix: change modularity-gain penalty from k_i*k_comm[c]/m to k_i*k_comm[c]/(2m) in both Phase 1 and Phase 2. The standard Blondel modularity-gain formula uses 2m as the denominator; using m doubled the penalty term and biased the algorithm toward over-fragmenting communities. Q on karate.mtx improves from 0.345 to 0.419, matching published Louvain/Leiden benchmarks (0.37-0.42). Updated test threshold to Q > 0.37. - fix: hoist A_new declaration to function scope and add it to LG_FREE_WORK so it is freed if the second mxm call returns an error. Previously A_new could leak on the error path. - docs: clarify in the header comment that the refinement phase is the greedy variant (not the randomized procedure from Traag et al. 2019). The seed parameter is reserved for a future randomized refinement step; added an explicit (void) seed cast to silence the unused-parameter reading. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- experimental/algorithm/LAGraph_Leiden.c | 31 ++++++++++++++++++------- experimental/test/test_leiden.c | 8 +++---- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/experimental/algorithm/LAGraph_Leiden.c b/experimental/algorithm/LAGraph_Leiden.c index 9073b35325..d8a1eeec11 100644 --- a/experimental/algorithm/LAGraph_Leiden.c +++ b/experimental/algorithm/LAGraph_Leiden.c @@ -25,9 +25,10 @@ // // Phase 1 (Local Move): Greedily assign each node to the neighboring // community c that maximises the score: -// score(i->c) = T[c] - k[i] * k_comm[c] / m +// score(i->c) = T[c] - k[i] * k_comm[c] / (2m) // where T[c] = sum_{j in c, j!=i} A[i,j], k_comm[c] = total degree of // community c (excluding i), and m = total edge weight / 2 (constant). +// This is the standard Louvain/Leiden modularity-gain formula. // Initial partition is induced by the Phase-1 communities of the previous // level (singletons on the first level). // Repeat until no node changes community. @@ -36,8 +37,18 @@ // "parent" communities. Restart each node in its own singleton // sub-community. In each local-move step a node may only join a // sub-community whose parent equals its own Phase-1 parent. This ensures -// every output community is an internally well-connected subgraph of the -// corresponding Phase-1 community. +// every output community is a connected subgraph of the corresponding +// Phase-1 community. +// +// NOTE: Traag, Waltman & van Eck (2019) define refinement as a randomized +// procedure (sampling moves with probability ~ exp(dQ/theta) over moves +// with dQ >= 0). This implementation uses the simpler greedy variant: a +// node is moved to the neighboring sub-community that maximises dQ. This +// still satisfies Leiden's connectedness property in practice (every +// refined sub-community is induced by edges within a Phase-1 community) +// but does not provide the formal well-connectedness guarantee of the +// randomized version. The `seed` parameter is reserved for a future +// randomized refinement step and is currently unused. // Repeat until no node changes sub-community. // // Phase 3 (Aggregation): Build the coarsened graph @@ -57,6 +68,7 @@ GrB_free (&k_vec) ; \ GrB_free (&v) ; \ GrB_free (&A_agg) ; \ + GrB_free (&A_new) ; \ GrB_free (&S_mat) ; \ GrB_free (&A_temp) ; \ LAGraph_Free ((void **) &k_arr, NULL) ; \ @@ -109,6 +121,7 @@ int LAGraph_Leiden GrB_Vector k_vec = NULL ; GrB_Vector v = NULL ; GrB_Matrix A_agg = NULL ; // owned coarsened graph (Phase 3) + GrB_Matrix A_new = NULL ; // next-level aggregate before ownership transfer GrB_Matrix S_mat = NULL ; // temporary membership matrix (Phase 3) GrB_Matrix A_temp = NULL ; // temporary for mxm (Phase 3) double *k_arr = NULL ; // k_arr[i] = degree of node i (current level) @@ -131,6 +144,7 @@ int LAGraph_Leiden //-------------------------------------------------------------------------- LG_CLEAR_MSG ; + (void) seed ; // reserved for future randomized refinement LG_ASSERT (c_handle != NULL, GrB_NULL_POINTER) ; (*c_handle) = NULL ; LG_TRY (LAGraph_CheckGraph (G, msg)) ; @@ -182,6 +196,7 @@ int LAGraph_Leiden GRB_TRY (GrB_Vector_reduce_FP64 (&m, NULL, GrB_PLUS_MONOID_FP64, k_vec, NULL)) ; m /= 2.0 ; + double two_m = 2.0 * m ; // denominator of the modularity penalty GrB_free (&k_vec) ; k_vec = NULL ; @@ -300,7 +315,7 @@ int LAGraph_Leiden } double T_ci = dirty[ci] ? T_local[ci] : 0.0 ; - double score_ci = T_ci - ki * k_comm[ci] / m ; + double score_ci = T_ci - ki * k_comm[ci] / two_m ; double best_score = score_ci ; int64_t best_c = ci ; @@ -308,7 +323,7 @@ int LAGraph_Leiden { int64_t c_cand = (int64_t) dirty_list[d] ; if (c_cand == ci) continue ; - double score = T_local[c_cand] - ki * k_comm[c_cand] / m ; + double score = T_local[c_cand] - ki * k_comm[c_cand] / two_m ; if (score > best_score) { best_score = score ; @@ -391,7 +406,7 @@ int LAGraph_Leiden } double T_ci_ref = dirty[ci_ref] ? T_local[ci_ref] : 0.0 ; - double score_ci_ref = T_ci_ref - ki * k_ref_comm[ci_ref] / m ; + double score_ci_ref = T_ci_ref - ki * k_ref_comm[ci_ref] / two_m ; double best_score = score_ci_ref ; int64_t best_c_ref = ci_ref ; @@ -399,7 +414,7 @@ int LAGraph_Leiden { int64_t c_cand = (int64_t) dirty_list[d] ; if (c_cand == ci_ref) continue ; - double score = T_local[c_cand] - ki * k_ref_comm[c_cand] / m ; + double score = T_local[c_cand] - ki * k_ref_comm[c_cand] / two_m ; if (score > best_score) { best_score = score ; @@ -494,7 +509,6 @@ int LAGraph_Leiden GrB_PLUS_TIMES_SEMIRING_FP64, A_cur, S_mat, NULL)) ; // A_new = S^T * A_temp (K_ref × K_ref) - GrB_Matrix A_new = NULL ; GRB_TRY (GrB_Matrix_new (&A_new, GrB_FP64, K_ref, K_ref)) ; GRB_TRY (GrB_mxm (A_new, NULL, NULL, GrB_PLUS_TIMES_SEMIRING_FP64, S_mat, A_temp, GrB_DESC_T0)) ; @@ -503,6 +517,7 @@ int LAGraph_Leiden GrB_free (&A_temp) ; A_temp = NULL ; GrB_free (&A_agg) ; // free previous level's aggregate graph A_agg = A_new ; + A_new = NULL ; // ownership transferred to A_agg A_cur = A_agg ; n_cur = K_ref ; } diff --git a/experimental/test/test_leiden.c b/experimental/test/test_leiden.c index e0e219e915..3c9d1a65e9 100644 --- a/experimental/test/test_leiden.c +++ b/experimental/test/test_leiden.c @@ -96,10 +96,10 @@ void test_Leiden (void) if (files[k].expect_communities) { - // Multi-level Leiden on karate.mtx should achieve Q >= 0.3 - // (single-level yields ~0.24; multi-level typically ~0.34+). - TEST_CHECK (Q > 0.3) ; - TEST_MSG ("Expected Q > 0.3 for %s, got Q = %f", aname, Q) ; + // Multi-level Leiden on karate.mtx achieves Q ≈ 0.42, matching + // published Louvain/Leiden benchmarks on this graph. + TEST_CHECK (Q > 0.37) ; + TEST_MSG ("Expected Q > 0.37 for %s, got Q = %f", aname, Q) ; } #endif