From fcd1d07e66c3e607c9732605361988b6918400be Mon Sep 17 00:00:00 2001
From: DanyaLitva <140255125+DanyaLitva@users.noreply.github.com>
Date: Thu, 30 Apr 2026 19:00:44 +0300
Subject: [PATCH 1/2] Implement CFL all-paths path-finding algorithm adv

---
 .../algorithm/LAGraph_CFL_AllPaths_advanced.c |  416 +++++
 .../LAGraph_CFL_optimized_matrix_opt.c        | 1338 +++++++++++++++++
 .../LAGraph_CFL_optimized_matrix_opt.h        |  106 ++
 .../LAGraph_CFL_reachability_advanced.c       |  992 ++++++++++++
 include/LAGraphX.h                            |  203 +++
 5 files changed, 3055 insertions(+)
 create mode 100644 experimental/algorithm/LAGraph_CFL_AllPaths_advanced.c
 create mode 100644 experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.c
 create mode 100644 experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.h
 create mode 100644 experimental/algorithm/LAGraph_CFL_reachability_advanced.c

diff --git a/experimental/algorithm/LAGraph_CFL_AllPaths_advanced.c b/experimental/algorithm/LAGraph_CFL_AllPaths_advanced.c
new file mode 100644
index 0000000000..273c3abc95
--- /dev/null
+++ b/experimental/algorithm/LAGraph_CFL_AllPaths_advanced.c
@@ -0,0 +1,416 @@
+#define LG_FREE_WORK \
+{ \
+    if (outputs_reachability != NULL) { \
+        for (size_t i = 0; i < symbols_amount; i++) { \
+            GrB_free(&outputs_reachability[i]); \
+        } \
+    } \
+    LAGraph_Free((void**)&outputs_reachability, NULL); \
+    LAGraph_Free((void **)&T, NULL); \
+    GrB_free(&false_scalar); \
+    GrB_free(&identity_matrix); \
+    GrB_free(&v_diag); \
+    LAGraph_Free((void **)&t_empty_flags, NULL); \
+    LAGraph_Free((void **)&eps_rules, NULL); \
+    LAGraph_Free((void **)&term_rules, NULL); \
+    LAGraph_Free((void **)&bin_rules, NULL); \
+    LAGraph_Free((void **)&new_rules, NULL); \
+    GrB_free(&AllPaths_semiring); \
+    GrB_free(&AllPaths_monoid); \
+    GrB_free(&bottom_scalar); \
+    GrB_free(&IAllPaths_mult); \
+    GrB_free(&AllPaths_set); \
+    GrB_free(&AllPaths_mult); \
+    GrB_free(&AllPaths_add); \
+    GrB_free(&Theta); \
+}
+
+#include "LG_internal.h"
+#include <LAGraphX.h>
+
+//Merging two ordered arrays of internal vertices in the add function
+static GrB_Index* merge_all_paths(size_t* n, const GrB_Index* a, const size_t na, const GrB_Index* b, const size_t nb)
+{  
+  GrB_Index *tmp = malloc((na + nb) * sizeof(GrB_Index));
+  
+  size_t ia = 0, ib = 0, outn = 0;
+  // Handle the first elements of both arrays to initialize tmp and avoid checking if tmp is empty in the loop
+  if (na > 0 && nb > 0) {
+    if (a[0] < b[0]) {
+      tmp[outn++] = a[ia++];
+    } else if (b[0] < a[0]) {
+      tmp[outn++] = b[ib++];
+    } else {
+      tmp[outn++] = a[ia++];
+      ib++;
+    }
+  } else if (na > 0) {
+    tmp[outn++] = a[ia++];
+  } else {
+    tmp[outn++] = b[ib++];
+  }
+  
+  while (ia < na && ib < nb) {
+    GrB_Index va = a[ia];
+    GrB_Index vb = b[ib];
+    if (va < vb) {
+      if (tmp[outn-1] != va) tmp[outn++] = va;
+      ia++;
+    } else if (vb < va) {
+      if (tmp[outn-1] != vb) tmp[outn++] = vb;
+      ib++;
+    } else {
+      if (tmp[outn-1] != va) tmp[outn++] = va;
+      ia++; ib++;
+    }
+  }
+  while (ia < na) {
+    GrB_Index va = a[ia++];
+    if (tmp[outn-1] != va) tmp[outn++] = va;
+  }
+  while (ib < nb) {
+    GrB_Index vb = b[ib++];
+    if (tmp[outn-1] != vb) tmp[outn++] = vb;
+  }
+  
+  tmp = realloc(tmp, outn * sizeof(GrB_Index));
+  *n = outn;
+  
+  return tmp;
+}
+
+static GrB_Index* insert_all_paths(size_t* n, const GrB_Index* arr, size_t len, GrB_Index value)
+{
+  // Array is ordered, so we can use binary search to find the position to insert value
+  size_t l = 0, r = len, m = 0;
+  while (l < r) {
+    m = l + (r - l) / 2;
+    if (arr[m] < value) l = m + 1;
+    else r = m;
+  }
+
+    // If value is already in the array, return the original array
+    if (l < len && arr[l] == value) {
+        GrB_Index *tmp = malloc(len * sizeof(GrB_Index));
+        memcpy(tmp, arr, len * sizeof(GrB_Index));
+        *n = len;
+        return tmp;
+    }
+
+    GrB_Index *tmp = malloc((len + 1) * sizeof(GrB_Index));
+    memcpy(tmp, arr, l * sizeof(GrB_Index));
+    tmp[l] = value;
+    memcpy(tmp + l + 1, arr + l, (len - l) * sizeof(GrB_Index));
+
+    *n = len + 1;
+    return tmp;
+}
+
+static void add_all_paths(AllPathsElem *z, AllPathsElem *x, AllPathsElem *y)
+{
+    // temp is needed to avoid freeing the memory of z in case z == x or z == y
+    AllPathsElem temp;
+
+    // x->n and y->n are not equal to zero
+    if (x->n == 1 && y->n == 1) {
+        if (x->data.single_elem == y->data.single_elem) {
+            temp.n = 1;
+            temp.data.single_elem = x->data.single_elem;
+        } else {
+            temp.n = 2;
+            temp.data.middle = malloc(2 * sizeof(GrB_Index));
+            if (x->data.single_elem < y->data.single_elem) {
+                temp.data.middle[0] = x->data.single_elem;
+                temp.data.middle[1] = y->data.single_elem;
+            } else {
+                temp.data.middle[0] = y->data.single_elem;
+                temp.data.middle[1] = x->data.single_elem;
+            }
+        }
+    } else if (x->n == 1) {
+        temp.data.middle = insert_all_paths(&temp.n, y->data.middle, y->n, x->data.single_elem);
+    } else if (y->n == 1) {
+        temp.data.middle = insert_all_paths(&temp.n, x->data.middle, x->n, y->data.single_elem);
+    } else {
+        temp.data.middle = merge_all_paths(&temp.n, x->data.middle, x->n, y->data.middle, y->n);
+    }
+
+    if(x->n > 1){
+      free(x->data.middle);
+    }
+    if(y->n > 1){
+      free(y->data.middle);
+    }
+
+    *z = temp;
+}
+
+static void mult_all_paths_post(AllPathsElem *z,
+                     const void *x, GrB_Index ix, GrB_Index jx,
+                     const void *y, GrB_Index iy, GrB_Index jy,
+                     const void *theta)
+{
+  z->data.single_elem = jx;
+  z->n = 1;
+}
+
+static void set_all_paths(AllPathsElem *z, const AllPathsElem *x, const bool *edge_exist)
+{
+  z->data.single_elem = GrB_INDEX_MAX; // A special value to indicate that this path corresponds to a terminal rule (A->t) or an epsilon rule (A->eps)
+  z->n = 1;
+}
+
+#define MULT_PATH_POST_INDEX_DEFN                                                   \
+"static void mult_all_paths_post(AllPathsElem *z, \n"      \
+"                     const void *x, GrB_Index ix, GrB_Index jx, \n"      \
+"                     const void *y, GrB_Index iy, GrB_Index jy, \n"      \
+"                     const void *theta) \n"      \
+"{ \n"      \
+"  z->data.single_elem = jx; \n"      \
+"  z->n = 1; \n"      \
+"}"
+
+GrB_Info LAGraph_CFL_AllPaths_adv(
+    GrB_Matrix *outputs, 
+    GrB_Type *all_paths_ptr_t,
+    const GrB_Matrix *adj_matrices, 
+    size_t symbols_amount,       
+    const LAGraph_rule_EWCNF *rules,
+    size_t rules_count,
+    char *msg,
+    int8_t optimizations
+)
+{
+    LG_CLEAR_MSG;
+    size_t msg_len = 0; 
+    
+    GrB_Type AllPaths_type = NULL;
+
+    GrB_Matrix* T = NULL;
+    GrB_Scalar false_scalar = NULL;
+    GrB_Matrix* outputs_reachability = NULL;
+    bool* t_empty_flags = NULL;    
+    LAGraph_rule_EWCNF* new_rules = NULL;
+    size_t new_rules_count = 0;
+    size_t* eps_rules = NULL, eps_rules_count = 0;
+    size_t* term_rules = NULL, term_rules_count = 0;
+    size_t* bin_rules = NULL, bin_rules_count = 0;
+    GrB_Matrix identity_matrix = NULL;
+    GrB_Vector v_diag = NULL;
+
+#if GxB_IMPLEMENTATION < GxB_VERSION(9, 4, 5)
+    return (GrB_NOT_IMPLEMENTED);
+#else
+    GrB_BinaryOp AllPaths_add = NULL;
+    GrB_Monoid AllPaths_monoid = NULL;
+    GxB_IndexBinaryOp IAllPaths_mult = NULL;
+    GrB_BinaryOp AllPaths_mult = NULL;
+    GrB_Semiring AllPaths_semiring = NULL;
+    GrB_BinaryOp AllPaths_set = NULL;
+    GrB_Scalar Theta = NULL;
+    GrB_Scalar bottom_scalar = NULL;
+
+    GrB_free(all_paths_ptr_t);
+    GRB_TRY(GxB_Type_new(all_paths_ptr_t, sizeof(AllPathsElem), "AllPathsElem",
+    "typedef struct{size_t n;union{GrB_Index single_elem;GrB_Index* middle;}data;}AllPathsElem;"));
+    AllPaths_type = *all_paths_ptr_t;
+
+    GRB_TRY(GrB_Scalar_new(&Theta, GrB_BOOL));
+    GRB_TRY(GrB_Scalar_setElement_BOOL(Theta, false));
+
+    AllPathsElem bottom = { 0 };
+    GRB_TRY(GrB_Scalar_new(&bottom_scalar, AllPaths_type));
+    GRB_TRY(GrB_Scalar_setElement_UDT(bottom_scalar, (void*)(&bottom)));
+
+    GRB_TRY(GrB_BinaryOp_new(&AllPaths_add, (void*)add_all_paths, AllPaths_type, AllPaths_type, AllPaths_type));
+    GRB_TRY(GrB_Monoid_new(&AllPaths_monoid, AllPaths_add, (void*)(&bottom)));
+
+    GRB_TRY(GxB_IndexBinaryOp_new(&IAllPaths_mult, (void*)mult_all_paths_post, AllPaths_type, 
+                                  GrB_BOOL, GrB_BOOL, GrB_BOOL, "mult_all_paths_post", MULT_PATH_POST_INDEX_DEFN));
+
+    GRB_TRY(GxB_BinaryOp_new_IndexOp(&AllPaths_mult, IAllPaths_mult, Theta));
+    GRB_TRY(GrB_Semiring_new(&AllPaths_semiring, AllPaths_monoid, AllPaths_mult));
+    GRB_TRY(GrB_BinaryOp_new(&AllPaths_set, (void*)set_all_paths, AllPaths_type, AllPaths_type, GrB_BOOL));
+
+    LG_ASSERT_MSG(outputs != NULL, GrB_NULL_POINTER, "The outputs array cannot be null.");
+    LG_CLEAR_MSG;
+    msg_len = 0;
+
+    LAGraph_Calloc((void**)&outputs_reachability, symbols_amount, sizeof(GrB_Matrix), msg);
+    LG_TRY(LAGraph_CFL_reachability_adv(outputs_reachability, adj_matrices, symbols_amount, rules, rules_count, msg, optimizations));
+
+    for (size_t i = 0; i < symbols_amount; i++) {
+        GrB_set(outputs_reachability[i], GrB_ROWMAJOR, GrB_STORAGE_ORIENTATION_HINT);
+    }
+    
+    for (size_t i = 0; i < rules_count; i++) {
+        new_rules_count += (rules[i].indexed_count == 0) ? 1 : rules[i].indexed_count;
+    }
+    
+    LG_TRY(LAGraph_Calloc((void**)&new_rules, new_rules_count, sizeof(LAGraph_rule_EWCNF), msg));
+    size_t nr_idx = 0;
+    for (size_t i = 0; i < rules_count; i++) {
+        LAGraph_rule_EWCNF rule = rules[i];
+        if (rule.indexed_count == 0) {
+            new_rules[nr_idx++] = rule;
+        } else {
+            for (size_t rule_index = 0; rule_index < rule.indexed_count; rule_index++) {
+                LAGraph_rule_EWCNF exploded_rule = rule;
+                exploded_rule.indexed_count = 0;
+                exploded_rule.indexed = 0;
+                if (rule.nonterm != -1 && rule.indexed & LAGraph_EWNCF_INDEX_NONTERM) {
+                    exploded_rule.nonterm = rule.nonterm + rule_index;
+                }
+                if (rule.prod_A != -1 && rule.indexed & LAGraph_EWNCF_INDEX_PROD_A) {
+                    exploded_rule.prod_A = rule.prod_A + rule_index;
+                }
+                if (rule.prod_B != -1 && rule.indexed & LAGraph_EWNCF_INDEX_PROD_B) {
+                    exploded_rule.prod_B = rule.prod_B + rule_index;
+                }
+                new_rules[nr_idx++] = exploded_rule;
+            }
+        }
+    }
+
+    LG_TRY(LAGraph_Calloc((void**)&T, symbols_amount, sizeof(GrB_Matrix), msg));
+    GRB_TRY(GrB_Scalar_new(&false_scalar, GrB_BOOL));
+    GRB_TRY(GrB_Scalar_setElement_BOOL(false_scalar, false));
+    LG_TRY(LAGraph_Calloc((void**)&t_empty_flags, symbols_amount, sizeof(bool), msg));
+    
+    GrB_Index n;
+    bool found_n = false;
+    for (size_t i = 0; i < symbols_amount; i++) {
+        if (adj_matrices[i] != NULL) {
+            GRB_TRY(GrB_Matrix_ncols(&n, adj_matrices[i]));
+            found_n = true;
+            break;
+        }
+    }
+    if (!found_n) { n = 0; }
+
+    for (size_t i = 0; i < symbols_amount; i++) {
+        GRB_TRY(GrB_Matrix_new(&T[i], AllPaths_type, n, n));
+        t_empty_flags[i] = true;
+    }
+
+    LG_TRY(LAGraph_Calloc((void**)&eps_rules, new_rules_count, sizeof(size_t), msg));
+    LG_TRY(LAGraph_Calloc((void**)&term_rules, new_rules_count, sizeof(size_t), msg));
+    LG_TRY(LAGraph_Calloc((void**)&bin_rules, new_rules_count, sizeof(size_t), msg));
+
+    for (size_t i = 0; i < new_rules_count; i++) {
+        LAGraph_rule_EWCNF rule = new_rules[i];
+        bool is_rule_eps = rule.prod_A == -1 && rule.prod_B == -1;
+        bool is_rule_term = rule.prod_A != -1 && rule.prod_B == -1;
+        bool is_rule_bin = rule.prod_A != -1 && rule.prod_B != -1;
+        
+        if (is_rule_eps) { eps_rules[eps_rules_count++] = i; continue; }
+        if (is_rule_term) { term_rules[term_rules_count++] = i; continue; }
+        if (is_rule_bin) { bin_rules[bin_rules_count++] = i; continue; }
+    }
+
+    for (size_t i = 0; i < term_rules_count; i++) {
+        LAGraph_rule_EWCNF term_rule = new_rules[term_rules[i]];
+        if (adj_matrices[term_rule.prod_A] == NULL) continue;
+        
+        GrB_Index adj_matrix_nnz = 0;
+        GRB_TRY(GrB_Matrix_nvals(&adj_matrix_nnz, adj_matrices[term_rule.prod_A]));
+        if (adj_matrix_nnz == 0) continue;
+
+        GxB_eWiseUnion(
+            T[term_rule.nonterm], GrB_NULL, GrB_NULL, AllPaths_set,
+            T[term_rule.nonterm], bottom_scalar, adj_matrices[term_rule.prod_A], false_scalar, GrB_NULL
+        );
+        t_empty_flags[term_rule.nonterm] = false;
+    }
+
+    if (eps_rules_count > 0) {
+        GRB_TRY(GrB_Vector_new(&v_diag, GrB_BOOL, n));
+        GRB_TRY(GrB_Vector_assign_BOOL(v_diag, GrB_NULL, GrB_NULL, true, GrB_ALL, n, NULL));
+        GRB_TRY(GrB_Matrix_diag(&identity_matrix, v_diag, 0));
+        GRB_TRY(GrB_free(&v_diag));
+
+        for (size_t i = 0; i < eps_rules_count; i++) {
+            LAGraph_rule_EWCNF eps_rule = new_rules[eps_rules[i]];
+            GrB_BinaryOp acc_op = t_empty_flags[eps_rule.nonterm] ? GrB_NULL : AllPaths_add;
+            GxB_eWiseUnion(
+                T[eps_rule.nonterm], GrB_NULL, acc_op, AllPaths_set,
+                T[eps_rule.nonterm], bottom_scalar, identity_matrix, false_scalar, GrB_NULL
+            );
+            t_empty_flags[eps_rule.nonterm] = false;
+        }
+        GrB_free(&identity_matrix);
+    }
+
+    for (size_t i = 0; i < symbols_amount; i++) {
+            GrB_Index temp_nvals = 0;
+            GrB_Matrix_nvals(&temp_nvals, outputs_reachability[i]);
+            if (temp_nvals != 0) {
+                t_empty_flags[i] = false;
+        }
+    }
+
+    for (size_t i = 0; i < bin_rules_count; i++) {
+        LAGraph_rule_EWCNF bin_rule = new_rules[bin_rules[i]];
+
+        if (t_empty_flags[bin_rule.prod_A] || t_empty_flags[bin_rule.prod_B]) continue;
+
+        GrB_BinaryOp acc_op = t_empty_flags[bin_rule.nonterm] ? GrB_NULL : AllPaths_add;
+        GRB_TRY(GrB_mxm(
+            T[bin_rule.nonterm], GrB_NULL, acc_op,
+            AllPaths_semiring, outputs_reachability[bin_rule.prod_A], outputs_reachability[bin_rule.prod_B],
+            GrB_NULL
+        ));
+    }
+    
+    for (size_t i = 0; i < symbols_amount; i++) {
+        outputs[i] = T[i];
+        GrB_Matrix_wait(outputs[i], GrB_MATERIALIZE);
+    }
+
+    LG_FREE_WORK;
+    return GrB_SUCCESS;
+#endif
+}
+
+// Helper function to free the output matrix of LAGraph_CFL_AllPaths, which contains elements of type AllPathsElem with dynamically allocated arrays of intermediate vertices. 
+static void free_AllPaths_matrix(GrB_Matrix* ptr_output) 
+{
+  GxB_Iterator iterator;
+  GxB_Iterator_new(&iterator);
+  GrB_Info info = GxB_Matrix_Iterator_attach(iterator, *ptr_output, NULL);
+  info = GxB_Matrix_Iterator_seek(iterator, 0);
+  AllPathsElem val;
+
+  while (info != GxB_EXHAUSTED)
+  {
+    GxB_Iterator_get_UDT(iterator, (void*)&val);
+    if (val.n > 1 && val.data.middle != NULL) {
+      free(val.data.middle);
+    }
+    info = GxB_Matrix_Iterator_next(iterator);
+  }
+
+  GrB_free(&iterator);
+  GrB_free(ptr_output);
+}
+
+// Free outputs and all_paths_ptr_t after you have finished working with the output matrices from LAGraph_CFL_AllPaths.
+// do outputs = NULL, all_paths_ptr_t = NULL after LAGraph_CFL_AllPaths_free_outputs
+GrB_Info LAGraph_CFL_AllPaths_adv_free_outputs(GrB_Matrix* outputs, int64_t nonterms_count, GrB_Type* all_paths_ptr_t)
+{
+#if GxB_IMPLEMENTATION < GxB_VERSION(9, 4, 5)
+  return (GrB_NOT_IMPLEMENTED);
+#else
+  if (outputs) {
+    for (size_t i = 0; i < nonterms_count; i++) {
+      if (outputs[i] == NULL)
+        continue;
+      free_AllPaths_matrix(&outputs[i]);
+      outputs[i] = NULL;
+    }
+    free(outputs);
+  }
+  GrB_free(all_paths_ptr_t);
+  return GrB_SUCCESS;
+#endif
+}
\ No newline at end of file
diff --git a/experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.c b/experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.c
new file mode 100644
index 0000000000..ec558a59f4
--- /dev/null
+++ b/experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.c
@@ -0,0 +1,1338 @@
+//------------------------------------------------------------------------------
+// LAGraph_CFL_optimized_matrix_opt.c: Implementation of operations for
+// Optimized Context-Free Language Reachability Matrix-Based Algorithm
+//------------------------------------------------------------------------------
+//
+// LAGraph, (c) 2019-2025 by The LAGraph Contributors, All Rights Reserved.
+// SPDX-License-Identifier: BSD-2-Clause
+
+// Contributed by Vlasenco Daniel, Ilhom Kombaev, Semyon Grigoriev, St. Petersburg State
+// University.
+
+//------------------------------------------------------------------------------
+
+// Code is an implementation of optimized matrix operations for CFL_reachability
+// algorithms, described in the following paper:
+// * Ilia Muravev, "Optimization of the Context-Free Language Reachability Matrix-Based
+// Algorithm" and based on the python implementation from:
+// https://github.com/FormalLanguageConstrainedPathQuerying/CFPQ_PyAlgo/tree/murav/optimize-matrix
+
+#include "LAGraph_CFL_optimized_matrix_opt.h"
+#include "LG_internal.h"
+#include <LAGraphX.h>
+
+#define BENCH_CFL_REACHBILITY false
+
+#if BENCH_CFL_REACHBILITY
+    #define IS_ISO(matrix, str)                                                          \
+        {                                                                                \
+            bool iso_flag;                                                               \
+            GrB_Index nnz;                                                               \
+            TRY(GxB_Matrix_iso(&iso_flag, matrix));                                      \
+            TRY(GrB_Matrix_nvals(&nnz, matrix));                                         \
+            if (!iso_flag && nnz) {                                                      \
+                printf("-----ISO ALERT----- (%s)\n", str);                               \
+                TRY(GxB_print(matrix, 1));                                               \
+                printf("-------------------\n");                                         \
+                TRY(-122);                                                               \
+            }                                                                            \
+        }
+
+    #define IS_ROW(matrix, str)                                                          \
+        {                                                                                \
+            if (matrix != NULL) {                                                        \
+                int32_t orientation;                                                     \
+                TRY(GrB_get(matrix, &orientation, GrB_STORAGE_ORIENTATION_HINT));        \
+                if (orientation != GrB_ROWMAJOR) {                                       \
+                    printf("-----NOT A ROW----- (%s)\n", str);                           \
+                    TRY(GxB_print(matrix, 1));                                           \
+                    printf("-------------------\n");                                     \
+                    TRY(-122);                                                           \
+                }                                                                        \
+            }                                                                            \
+        }
+
+    #define IS_COL(matrix, str)                                                          \
+        {                                                                                \
+            if (matrix != NULL) {                                                        \
+                int32_t orientation;                                                     \
+                TRY(GrB_get(matrix, &orientation, GrB_STORAGE_ORIENTATION_HINT));        \
+                if (orientation != GrB_COLMAJOR) {                                       \
+                    printf("-----NOT A COL----- (%s)\n", str);                           \
+                    TRY(GxB_print(matrix, 1));                                           \
+                    printf("-------------------\n");                                     \
+                    TRY(-122);                                                           \
+                }                                                                        \
+            }                                                                            \
+        }
+#else
+    #define IS_ISO(matrix, str)
+    #define IS_ROW(matrix, str)
+    #define IS_COL(matrix, str)
+#endif
+
+#define TRY(GrB_method)                                                                  \
+    {                                                                                    \
+        GrB_Info LG_GrB_Info = GrB_method;                                               \
+        if (LG_GrB_Info < GrB_SUCCESS) {                                                 \
+            fprintf(stderr, "LAGraph failure (file %s, line %d): (%d) \n", __FILE__,     \
+                    __LINE__, LG_GrB_Info);                                              \
+            return (LG_GrB_Info);                                                        \
+        }                                                                                \
+    }
+
+#define OPT_EMPTY (1 << 0)
+#define OPT_FORMAT (1 << 1)
+#define OPT_LAZY (1 << 2)
+#define OPT_BLOCK (1 << 3)
+
+typedef CFL_Matrix Matrix;
+typedef enum CFL_Matrix_block Matrix_block;
+
+#define TO_COL(matrix) GrB_set(matrix, GrB_COLMAJOR, GrB_STORAGE_ORIENTATION_HINT)
+#define TO_ROW(matrix) GrB_set(matrix, GrB_ROWMAJOR, GrB_STORAGE_ORIENTATION_HINT)
+
+GrB_Info matrix_print_lazy(Matrix *A, int8_t optimizations);
+
+GrB_Info matrix_to_format(Matrix *matrix, int32_t format, bool is_both) {
+    TRY(CFL_matrix_update(matrix));
+
+    // Matrix contain both formats so just switch base matrix
+    if (matrix->is_both) {
+        matrix->base = format == GrB_ROWMAJOR ? matrix->base_row : matrix->base_col;
+        matrix->format = format;
+        return GrB_SUCCESS;
+    }
+
+    // No changes required
+    if (matrix->format == format) {
+        return GrB_SUCCESS;
+    }
+
+    // Matrix contain just one matrix and format is not same
+    GrB_Matrix new_matrix, old_matrix;
+    if (matrix->format == GrB_ROWMAJOR) {
+        new_matrix = matrix->base_col;
+        old_matrix = matrix->base_row;
+    } else {
+        new_matrix = matrix->base_row;
+        old_matrix = matrix->base_col;
+    }
+
+    if (is_both) {
+        TRY(GrB_Matrix_new(&new_matrix, GrB_BOOL, matrix->nrows, matrix->ncols));
+        TRY(GrB_set(new_matrix, format, GrB_STORAGE_ORIENTATION_HINT))
+        TRY(GrB_Matrix_assign(new_matrix, GrB_NULL, GrB_NULL, old_matrix, GrB_ALL,
+                              matrix->nrows, GrB_ALL, matrix->ncols, GrB_NULL));
+        if (format == GrB_ROWMAJOR) {
+            matrix->base_row = new_matrix;
+            matrix->base_col = matrix->base;
+        } else {
+            matrix->base_row = matrix->base;
+            matrix->base_col = new_matrix;
+        }
+        matrix->is_both = true;
+        matrix->base = new_matrix;
+    } else {
+        if (format == GrB_ROWMAJOR) {
+            matrix->base_row = matrix->base;
+            matrix->base_col = NULL;
+            TRY(TO_ROW(matrix->base));
+        } else {
+            matrix->base_row = NULL;
+            matrix->base_col = matrix->base;
+            TRY(TO_COL(matrix->base));
+        }
+    }
+
+    matrix->format = format;
+
+    TRY(CFL_matrix_update(matrix));
+
+    return GrB_SUCCESS;
+}
+
+// clear methods
+
+GrB_Info matrix_clear(Matrix *A) {
+    TRY(GrB_Matrix_clear(A->base));
+    TRY(CFL_matrix_update(A));
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_clear_format(Matrix *A, int8_t optimizations) {
+    if (!(optimizations & OPT_FORMAT)) {
+        TRY(matrix_clear(A));
+        return GrB_SUCCESS;
+    }
+
+    if (!A->is_both) {
+        TRY(matrix_clear(A));
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_to_format(A, GrB_ROWMAJOR, false));
+    TRY(matrix_clear(A));
+
+    TRY(matrix_to_format(A, GrB_COLMAJOR, false));
+    TRY(matrix_clear(A));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_clear_empty(Matrix *A, int8_t optimizations) {
+    if (!(optimizations & OPT_EMPTY)) {
+        TRY(matrix_clear_format(A, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(A));
+    if (A->nvals == 0) {
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_clear_format(A, optimizations));
+    return GrB_SUCCESS;
+}
+
+// duplicate methods
+
+GrB_Info matrix_dup(Matrix *output, Matrix *input) {
+    if (output == input) {
+        return GrB_SUCCESS;
+    }
+
+    TRY(GrB_Matrix_apply(output->base, GrB_NULL, GrB_NULL, GrB_IDENTITY_BOOL, input->base,
+                         GrB_NULL));
+    TRY(CFL_matrix_update(output));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_dup_format(Matrix *output, Matrix *input, int8_t optimizations) {
+    if (!(optimizations & OPT_FORMAT)) {
+        TRY(matrix_dup(output, input));
+        return GrB_SUCCESS;
+    }
+
+    if (!output->is_both) {
+        TRY(CFL_matrix_update(output));
+        TRY(CFL_matrix_update(input));
+        Matrix *larger = output->nvals > input->nvals ? output : input;
+
+        TRY(matrix_to_format(output, larger->format, false));
+        TRY(matrix_to_format(input, larger->format, false));
+
+        TRY(matrix_dup(output, input));
+
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_to_format(output, GrB_ROWMAJOR, false));
+    TRY(matrix_dup(output, input));
+    TRY(matrix_to_format(output, GrB_COLMAJOR, false));
+    TRY(matrix_dup(output, input));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_dup_empty(Matrix *output, Matrix *input, int8_t optimizations) {
+    if (!(optimizations & OPT_EMPTY)) {
+        TRY(matrix_dup_format(output, input, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(input));
+    if (input->nvals == 0) {
+        TRY(matrix_clear_empty(output, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_dup_format(output, input, optimizations));
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_dup_lazy(Matrix *output, Matrix *input, int8_t optimizations) {
+    if (!(optimizations & OPT_LAZY)) {
+        TRY(matrix_dup_empty(output, input, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (!input->is_lazy) {
+        TRY(matrix_dup_empty(output, input, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    for (size_t i = 0; i < input->base_matrices_count; i++) {
+        TRY(matrix_dup_empty(output->base_matrices[i], input->base_matrices[i],
+                             optimizations));
+    }
+    output->base_matrices_count = input->base_matrices_count;
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info block_matrix_hyper_rotate_i(Matrix *matrix, enum CFL_Matrix_block format);
+
+GrB_Info matrix_dup_block(Matrix *output, Matrix *input, int8_t optimizations) {
+    if (!(optimizations & OPT_BLOCK)) {
+        TRY(matrix_dup_lazy(output, input, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (output->block_type == CELL && input->block_type == CELL) {
+        TRY(matrix_dup_lazy(output, input, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(block_matrix_hyper_rotate_i(input, output->block_type));
+    TRY(matrix_dup_lazy(output, input, optimizations));
+    return GrB_SUCCESS;
+}
+
+// block optimization specific methods
+
+GrB_Info block_matrix_hyper_rotate_i(Matrix *matrix, enum CFL_Matrix_block format) {
+    if (matrix->is_lazy) {
+        for (size_t i = 0; i < matrix->base_matrices_count; i++) {
+            TRY(block_matrix_hyper_rotate_i(matrix->base_matrices[i], format));
+        }
+
+        TRY(CFL_matrix_update(matrix));
+        return GrB_SUCCESS;
+    }
+
+    if (matrix->block_type == CELL) {
+        return GrB_SUCCESS;
+    }
+
+    if (matrix->block_type == format) {
+        return GrB_SUCCESS;
+    }
+
+    GrB_Scalar scalar_true;
+    TRY(GrB_Scalar_new(&scalar_true, GrB_BOOL));
+    TRY(GrB_Scalar_setElement_BOOL(scalar_true, true));
+
+    TRY(CFL_matrix_update(matrix));
+
+    GrB_Index *nrows;
+    GrB_Index *ncols;
+    TRY(LAGraph_Calloc((void **)&nrows, matrix->nvals, sizeof(GrB_Index), NULL));
+    TRY(LAGraph_Calloc((void **)&ncols, matrix->nvals, sizeof(GrB_Index), NULL));
+
+    TRY(GrB_Matrix_extractTuples_BOOL(nrows, ncols, NULL, &matrix->nvals, matrix->base));
+
+    if (matrix->block_type == VEC_VERT) {
+        for (size_t i = 0; i < matrix->nvals; i++) {
+            ncols[i] = ncols[i] + nrows[i] / matrix->ncols * matrix->ncols;
+            nrows[i] = nrows[i] % matrix->ncols;
+        }
+    } else {
+        for (size_t i = 0; i < matrix->nvals; i++) {
+            nrows[i] = nrows[i] + ncols[i] / matrix->nrows * matrix->nrows;
+            ncols[i] = ncols[i] % matrix->nrows;
+        }
+    }
+
+    GrB_Matrix new;
+    TRY(GrB_Matrix_new(&new, GrB_BOOL, matrix->ncols, matrix->nrows));
+    if (matrix->is_both) {
+        TRY(GxB_Matrix_build_Scalar(new, nrows, ncols, scalar_true, matrix->nvals));
+        TRY(GrB_Matrix_free(&matrix->base_row));
+        TRY(GrB_Matrix_free(&matrix->base_col));
+        matrix->base = new;
+        matrix->base_row = new;
+        TRY(TO_ROW(matrix->base));
+        TRY(GrB_Matrix_new(&new, GrB_BOOL, matrix->ncols, matrix->nrows));
+        TRY(TO_COL(new));
+        TRY(GxB_Matrix_build_Scalar(new, nrows, ncols, scalar_true, matrix->nvals));
+        matrix->base_col = new;
+        matrix->format = GrB_ROWMAJOR;
+    } else {
+        TRY(GrB_Matrix_free(&matrix->base));
+        matrix->base = new;
+        if (matrix->base_row != NULL) {
+            TRY(GxB_Matrix_build_Scalar(new, nrows, ncols, scalar_true, matrix->nvals));
+            matrix->base_row = new;
+        } else {
+            TRY(TO_COL(new));
+            TRY(GxB_Matrix_build_Scalar(new, nrows, ncols, scalar_true, matrix->nvals));
+            matrix->base_col = new;
+        }
+        matrix->format = GrB_ROWMAJOR;
+    }
+    TRY(CFL_matrix_update(matrix));
+    // TRY(CFL_matrix_free(matrix_p));
+    // TRY(CFL_matrix_from_base(matrix_p, new));
+
+    TRY(LAGraph_Free((void **)&nrows, NULL));
+    TRY(LAGraph_Free((void **)&ncols, NULL));
+    TRY(GrB_free(&scalar_true));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info block_matrix_to_diag(Matrix *diag, Matrix *input) {
+    if (input->block_type == CELL) {
+        return GrB_SUCCESS;
+    }
+
+    GrB_Scalar scalar_true;
+    TRY(GrB_Scalar_new(&scalar_true, GrB_BOOL));
+    TRY(GrB_Scalar_setElement_BOOL(scalar_true, true));
+
+    TRY(CFL_matrix_update(input));
+
+    GrB_Index *rows;
+    GrB_Index *cols;
+    TRY(LAGraph_Calloc((void **)&rows, input->nvals, sizeof(GrB_Index), NULL));
+    TRY(LAGraph_Calloc((void **)&cols, input->nvals, sizeof(GrB_Index), NULL));
+    TRY(GrB_Matrix_extractTuples_BOOL(rows, cols, NULL, &input->nvals, input->base));
+
+    if (input->block_type == VEC_HORIZ) {
+        for (size_t i = 0; i < input->nvals; i++) {
+            rows[i] = rows[i] + cols[i] / input->nrows * input->nrows;
+        }
+    }
+
+    if (input->block_type == VEC_VERT) {
+        for (size_t i = 0; i < input->nvals; i++) {
+            cols[i] = cols[i] + rows[i] / input->ncols * input->ncols;
+        }
+    }
+
+    TRY(GxB_Matrix_build_Scalar(diag->base, rows, cols, scalar_true, input->nvals));
+    TRY(CFL_matrix_update(diag));
+
+    TRY(LAGraph_Free((void **)&rows, NULL));
+    TRY(LAGraph_Free((void **)&cols, NULL));
+    TRY(GrB_free(&scalar_true));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info block_matrix_reduce(Matrix *matrix, Matrix *input, int8_t optimizations) {
+    if (input->block_type == CELL) {
+        TRY(matrix_dup_block(matrix, input, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    GrB_Scalar scalar_true;
+    TRY(GrB_Scalar_new(&scalar_true, GrB_BOOL));
+    TRY(GrB_Scalar_setElement_BOOL(scalar_true, true));
+
+    TRY(CFL_matrix_update(input));
+
+    GrB_Index *rows;
+    GrB_Index *cols;
+    TRY(LAGraph_Calloc((void **)&rows, input->nvals, sizeof(GrB_Index), NULL));
+    TRY(LAGraph_Calloc((void **)&cols, input->nvals, sizeof(GrB_Index), NULL));
+    TRY(GrB_Matrix_extractTuples_BOOL(rows, cols, NULL, &input->nvals, input->base));
+
+    if (input->block_type == VEC_VERT) {
+        for (size_t i = 0; i < input->nvals; i++) {
+            rows[i] = rows[i] % input->ncols;
+        }
+    }
+
+    if (input->block_type == VEC_HORIZ) {
+        for (size_t i = 0; i < input->nvals; i++) {
+            cols[i] = cols[i] % input->nrows;
+        }
+    }
+
+    TRY(GxB_Matrix_build_Scalar(matrix->base, rows, cols, scalar_true, input->nvals));
+    TRY(CFL_matrix_update(matrix));
+
+    TRY(LAGraph_Free((void **)&rows, NULL));
+    TRY(LAGraph_Free((void **)&cols, NULL));
+    TRY(GrB_free(&scalar_true));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info block_matrix_repeat_into_vector(Matrix *matrix, Matrix *input,
+                                         GrB_Index block_count) {
+    GrB_Matrix *tiles;
+    TRY(LAGraph_Calloc((void **)&tiles, block_count, sizeof(GrB_Matrix), NULL));
+    for (size_t i = 0; i < block_count; i++) {
+        tiles[i] = input->base;
+    }
+
+    if (matrix->block_type == VEC_VERT) {
+        TRY(GxB_Matrix_concat(matrix->base, tiles, block_count, 1, GrB_NULL));
+    } else {
+        TRY(GxB_Matrix_concat(matrix->base, tiles, 1, block_count, GrB_NULL));
+    }
+    TRY(CFL_matrix_update(matrix));
+    TRY(LAGraph_Free((void **)&tiles, NULL));
+
+    return GrB_SUCCESS;
+}
+
+// lazy optimization specific methods
+
+GrB_Info matrix_wise_empty(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                           int8_t optimizations);
+
+GrB_Info matrix_sort_lazy(Matrix *A, bool reverse) {
+    for (size_t i = 0; i < A->base_matrices_count; i++) {
+        for (size_t j = i + 1; j < A->base_matrices_count; j++) {
+            Matrix *first = reverse ? A->base_matrices[i] : A->base_matrices[j];
+            Matrix *second = reverse ? A->base_matrices[j] : A->base_matrices[i];
+            TRY(CFL_matrix_update(first));
+            TRY(CFL_matrix_update(second));
+
+            if (first->nvals < second->nvals) {
+                Matrix *temp = A->base_matrices[i];
+                A->base_matrices[i] = A->base_matrices[j];
+                A->base_matrices[j] = temp;
+            }
+        }
+    }
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_matrix_to_base(Matrix **matrix_p, Matrix *input, int8_t optimizations) {
+    if (!input->is_lazy) {
+        Matrix *result;
+        TRY(CFL_matrix_create(&result, input->nrows, input->ncols));
+        TRY(CFL_dup(result, input, optimizations));
+        *matrix_p = result;
+
+        return GrB_SUCCESS;
+    }
+
+    Matrix *matrix;
+    TRY(CFL_matrix_create_lazy(&matrix, input->nrows, input->ncols));
+    TRY(CFL_matrix_free(&matrix->base_matrices[0]));
+    matrix->base_matrices_count = 0;
+
+    for (size_t i = 0; i < input->base_matrices_count; i++) {
+        Matrix *base;
+        TRY(CFL_matrix_create(&base, input->nrows, input->ncols));
+        matrix->base_matrices[i] = base;
+        matrix->base_matrices_count++;
+    }
+
+    TRY(CFL_matrix_update(matrix));
+    TRY(CFL_dup(matrix, input, optimizations));
+    TRY(matrix_sort_lazy(matrix, false));
+
+    Matrix *acc;
+    TRY(CFL_matrix_create(&acc, input->nrows, input->ncols));
+    for (size_t j = 0; j < matrix->base_matrices_count; j++) {
+        TRY(CFL_wise(acc, acc, matrix->base_matrices[j], false, optimizations));
+    }
+
+    TRY(CFL_matrix_free(&matrix));
+
+    *matrix_p = acc;
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_combine_lazy(Matrix *A, size_t threshold, int8_t optimizations) {
+    Matrix **new_matrices;
+    TRY(LAGraph_Calloc((void **)&new_matrices, 50, sizeof(Matrix *), NULL));
+
+    size_t new_size = 0;
+
+    TRY(matrix_sort_lazy(A, false));
+
+    for (size_t i = 0; i < A->base_matrices_count; i++) {
+        TRY(CFL_matrix_update(A->base_matrices[i]));
+        if (new_size == 0 || A->base_matrices[i]->nvals > threshold) {
+            new_matrices[new_size++] = A->base_matrices[i];
+            continue;
+        }
+
+        TRY(matrix_wise_empty(new_matrices[new_size - 1], new_matrices[new_size - 1],
+                              A->base_matrices[i], false, optimizations));
+        TRY(CFL_matrix_free(&A->base_matrices[i]));
+    }
+
+    TRY(LAGraph_Free((void **)&A->base_matrices, NULL));
+    A->base_matrices = new_matrices;
+    A->base_matrices_count = new_size;
+    TRY(CFL_matrix_update(A));
+
+    return GrB_SUCCESS;
+}
+
+// create and update methods
+GrB_Info CFL_matrix_update(Matrix *matrix) {
+    if (!matrix->is_lazy) {
+        TRY(GrB_Matrix_nvals(&matrix->nvals, matrix->base));
+    } else {
+        size_t new_nnz = 0;
+        for (size_t i = 0; i < matrix->base_matrices_count; i++) {
+            TRY(GrB_Matrix_nvals(&(matrix->base_matrices[i]->nvals),
+                                 matrix->base_matrices[i]->base));
+            new_nnz += matrix->base_matrices[i]->nvals;
+        }
+
+        matrix->nvals = new_nnz;
+    }
+
+    if (!matrix->is_lazy) {
+        TRY(GrB_Matrix_nrows(&matrix->nrows, matrix->base));
+        TRY(GrB_Matrix_ncols(&matrix->ncols, matrix->base));
+    } else {
+        TRY(GrB_Matrix_nrows(&matrix->nrows, matrix->base_matrices[0]->base));
+        TRY(GrB_Matrix_ncols(&matrix->ncols, matrix->base_matrices[0]->base));
+    }
+
+    if (matrix->nrows == matrix->ncols)
+        matrix->block_type = CELL;
+    else
+        matrix->block_type = matrix->nrows > matrix->ncols ? VEC_VERT : VEC_HORIZ;
+
+#ifdef BENCH_CFL_REACHBILITY
+    if (matrix->is_lazy) {
+        for (size_t i = 0; i < matrix->base_matrices_count; i++) {
+            IS_ISO(matrix->base_matrices[i]->base, "lazy matrix base");
+            IS_ROW(matrix->base_matrices[i]->base_row, "");
+            IS_COL(matrix->base_matrices[i]->base_col, "");
+        }
+    } else {
+        IS_ISO(matrix->base, "");
+        IS_ROW(matrix->base_row, "");
+        IS_COL(matrix->base_col, "");
+    }
+#endif
+
+    return GrB_SUCCESS;
+}
+
+// Create optimizied matrix from base GrB_Matrix
+// Input: base
+// Output: matrix
+GrB_Info CFL_matrix_from_base(Matrix **matrix, GrB_Matrix base) {
+    Matrix *result;
+    TRY(LAGraph_Calloc((void **)&result, 1, sizeof(Matrix), NULL));
+
+    result->base = base;
+    result->nvals = 0; // We will get actual info in update function
+    result->nrows = 0;
+    result->ncols = 0;
+
+    // Format optimization fields
+    result->base_row = base;
+    result->base_col = NULL;
+    result->format = GrB_ROWMAJOR;
+    result->is_both = false;
+
+    // Lazy addition optimization fields
+    result->is_lazy = false;
+    result->base_matrices = NULL;
+    result->base_matrices_count = 0;
+
+    // Block optimization fields
+    result->block_type = CELL;
+
+    TRY(CFL_matrix_update(result));
+    *matrix = result;
+
+    return GrB_SUCCESS;
+}
+
+// Create optimizied lazy matrix from base GrB_Matrix
+// Input: base
+// Output: matrix_p that holds new allocated matrix.
+//
+// Base matrix will be in array of lazy matrix
+GrB_Info CFL_matrix_from_base_lazy(Matrix **matrix_p, GrB_Matrix base) {
+    Matrix *result;
+    TRY(CFL_matrix_from_base(&result, base));
+
+    TRY(CFL_matrix_from_base(matrix_p, base));
+
+    Matrix *matrix = *matrix_p;
+    matrix->is_lazy = true;
+    TRY(LAGraph_Calloc(
+        (void **)&matrix->base_matrices, 40, sizeof(CFL_Matrix *),
+        NULL)); // this is enough for this centry i guess, because 40th matrices must
+                // have 10^40 nvals for being putten in base_matrices, this is 2^132
+    matrix->base_matrices[0] = result;
+    matrix->base_matrices_count = 1;
+    matrix->base = NULL;
+    TRY(CFL_matrix_update(matrix));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_matrix_create(Matrix **matrix, GrB_Index nrows, GrB_Index ncols) {
+    GrB_Matrix _result;
+    TRY(GrB_Matrix_new(&_result, GrB_BOOL, nrows, ncols));
+    TRY(CFL_matrix_from_base(matrix, _result));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_matrix_create_lazy(Matrix **matrix, GrB_Index nrows, GrB_Index ncols) {
+    GrB_Matrix _result;
+    TRY(GrB_Matrix_new(&_result, GrB_BOOL, nrows, ncols));
+    TRY(CFL_matrix_from_base_lazy(matrix, _result))
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_matrix_free(Matrix **matrix_p) {
+    if (*matrix_p == NULL) {
+        return GrB_SUCCESS;
+    }
+
+    Matrix *matrix = *matrix_p;
+    if (matrix->is_both) {
+        TRY(GrB_Matrix_free(&(matrix->base_col)));
+        TRY(GrB_Matrix_free(&(matrix->base_row)));
+    } else {
+        TRY(GrB_Matrix_free(&(matrix->base)));
+    }
+
+    for (size_t i = 0; i < matrix->base_matrices_count; i++) {
+        TRY(CFL_matrix_free(&(matrix->base_matrices[i])));
+    }
+    TRY(LAGraph_Free((void **)&matrix->base_matrices, NULL));
+    TRY(LAGraph_Free((void **)matrix_p, NULL));
+
+    return GrB_SUCCESS;
+
+    GrB_Matrix_free(&matrix->base);
+}
+
+// mxm operations
+
+GrB_Info matrix_mxm(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                    bool swap) {
+    Matrix *left = swap ? second : first;
+    Matrix *right = swap ? first : second;
+
+    // CFL_matrix_update(first);
+    // CFL_matrix_update(second);
+    // CFL_matrix_update(output);
+    TRY(GrB_mxm(output->base, GrB_NULL, accum ? GxB_ANY_BOOL : GrB_NULL,
+                GxB_ANY_PAIR_BOOL, left->base, right->base, GrB_NULL));
+    // IS_ISO(output->base, "MXM output");
+    TRY(CFL_matrix_update(output));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_mxm_format(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                           bool swap, int8_t optimizations) {
+    if (!(optimizations & OPT_FORMAT)) {
+        TRY(matrix_mxm(output, first, second, accum, swap))
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(first));
+    TRY(CFL_matrix_update(second));
+    GrB_Index left_nvals = swap ? second->nvals : first->nvals;
+    GrB_Index right_nvals = swap ? first->nvals : second->nvals;
+
+    int32_t desired_orientation = left_nvals < right_nvals ? GrB_ROWMAJOR : GrB_COLMAJOR;
+
+    if (!first->is_both && first->format != desired_orientation &&
+        !(first->nvals > second->nvals / 3.0)) {
+        TRY(matrix_mxm(output, first, second, accum, swap))
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_to_format(first, desired_orientation, true));
+    TRY(matrix_to_format(second, desired_orientation, false));
+    TRY(matrix_to_format(output, desired_orientation, false));
+    TRY(matrix_mxm(output, first, second, accum, swap));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_mxm_empty(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                          bool swap, int8_t optimizations) {
+    if (!(optimizations & OPT_EMPTY)) {
+        TRY(matrix_mxm_format(output, first, second, accum, swap, optimizations))
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(first));
+    TRY(CFL_matrix_update(second));
+
+    if (first->nvals == 0 || second->nvals == 0) {
+        if (accum) {
+            return GrB_SUCCESS;
+        }
+
+        TRY(CFL_matrix_update(output));
+        if (output->nvals == 0) {
+            return GrB_SUCCESS;
+        }
+
+        TRY(matrix_clear_empty(output, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_mxm_format(output, first, second, accum, swap, optimizations));
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_mxm_lazy(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                         bool swap, int8_t optimizations) {
+    if (!(optimizations & OPT_LAZY)) {
+        TRY(matrix_mxm_empty(output, first, second, accum, swap, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (!first->is_lazy) {
+        TRY(matrix_mxm_empty(output, first, second, accum, swap, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(second));
+
+    TRY(matrix_combine_lazy(first, second->nvals, optimizations));
+    TRY(matrix_sort_lazy(first, false));
+
+    GrB_Matrix *accs;
+    TRY(LAGraph_Calloc((void **)&accs, first->base_matrices_count, sizeof(GrB_Matrix),
+                       NULL));
+    Matrix **acc_matrices;
+    TRY(LAGraph_Calloc((void **)&acc_matrices, first->base_matrices_count,
+                       sizeof(Matrix *), NULL));
+    for (size_t i = 0; i < first->base_matrices_count; i++) {
+        TRY(GrB_Matrix_new(&accs[i], GrB_BOOL, swap ? second->nrows : first->nrows,
+                           swap ? first->ncols : second->ncols));
+        TRY(CFL_matrix_from_base(&acc_matrices[i], accs[i]))
+    }
+
+    for (size_t i = 0; i < first->base_matrices_count; i++) {
+        TRY(matrix_mxm_empty(acc_matrices[i], first->base_matrices[i], second, false,
+                             swap, optimizations));
+    }
+
+    GrB_Matrix acc;
+    GrB_Matrix_new(&acc, GrB_BOOL, swap ? second->nrows : first->nrows,
+                   swap ? first->ncols : second->ncols);
+    Matrix *acc_matrix;
+    CFL_matrix_from_base(&acc_matrix, acc);
+
+    for (size_t i = 0; i < first->base_matrices_count; i++) {
+        TRY(matrix_wise_empty(acc_matrix, acc_matrix, acc_matrices[i], false,
+                              optimizations));
+        TRY(CFL_matrix_free(&acc_matrices[i]));
+    }
+    LAGraph_Free((void **)&accs, NULL);
+    LAGraph_Free((void **)&acc_matrices, NULL);
+
+    if (accum) {
+        TRY(matrix_wise_empty(output, output, acc_matrix, false, optimizations));
+    } else {
+        TRY(matrix_dup_block(output, acc_matrix, optimizations));
+    }
+
+    TRY(CFL_matrix_free(&acc_matrix));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_wise_block(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                           int8_t optimizations);
+
+GrB_Info matrix_mxm_block(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                          bool swap, int8_t optimizations) {
+    if (!(optimizations & OPT_BLOCK)) {
+        TRY(matrix_mxm_lazy(output, first, second, accum, swap, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (first->block_type == CELL && second->block_type == CELL) {
+        TRY(matrix_mxm_lazy(output, first, second, accum, swap, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (first->block_type == CELL) {
+        TRY(block_matrix_hyper_rotate_i(second, swap ? VEC_VERT : VEC_HORIZ));
+        TRY(block_matrix_hyper_rotate_i(output, swap ? VEC_VERT : VEC_HORIZ));
+
+        Matrix *temp;
+        TRY(CFL_matrix_create(&temp, swap ? second->nrows : first->nrows,
+                              swap ? first->ncols : second->ncols));
+        TRY(matrix_mxm_lazy(temp, first, second, accum, swap, optimizations));
+        TRY(matrix_wise_block(output, output, temp, false, optimizations));
+        TRY(CFL_matrix_free(&temp));
+
+        return GrB_SUCCESS;
+    }
+
+    if (second->block_type == CELL) {
+        TRY(block_matrix_hyper_rotate_i(first, swap ? VEC_HORIZ : VEC_VERT));
+        TRY(block_matrix_hyper_rotate_i(output, swap ? VEC_HORIZ : VEC_VERT));
+
+        Matrix *temp;
+        TRY(CFL_matrix_create(&temp, swap ? second->nrows : first->nrows,
+                              swap ? first->ncols : first->ncols));
+        TRY(matrix_mxm_lazy(temp, first, second, accum, swap, optimizations));
+        TRY(matrix_wise_block(output, output, temp, false, optimizations));
+        TRY(CFL_matrix_free(&temp));
+
+        return GrB_SUCCESS;
+    }
+
+    GrB_Index size = first->nrows > first->ncols ? first->nrows : first->ncols;
+
+    Matrix *diag;
+    TRY(CFL_matrix_create(&diag, size, size));
+    TRY(block_matrix_to_diag(diag, second));
+    TRY(CFL_matrix_update(diag));
+
+    TRY(block_matrix_hyper_rotate_i(first, swap ? VEC_VERT : VEC_HORIZ));
+    TRY(block_matrix_hyper_rotate_i(output, swap ? VEC_VERT : VEC_HORIZ));
+
+    Matrix *temp;
+    TRY(CFL_matrix_create(&temp, first->nrows, first->ncols));
+    TRY(matrix_mxm_lazy(temp, first, diag, false, swap, optimizations));
+    TRY(matrix_wise_block(output, output, temp, false, optimizations));
+
+    TRY(CFL_matrix_free(&temp));
+    TRY(CFL_matrix_free(&diag));
+
+    return GrB_SUCCESS;
+}
+
+// wise operations
+
+GrB_Info matrix_wise(Matrix *output, Matrix *first, Matrix *second, bool accum) {
+    GrB_BinaryOp accum_op = accum ? GxB_ANY_BOOL : GrB_NULL;
+    if (output == first)
+        accum_op = GrB_NULL;
+
+    TRY(CFL_matrix_update(first));
+    TRY(CFL_matrix_update(second));
+    TRY(CFL_matrix_update(output));
+
+    TRY(GrB_Matrix_eWiseAdd_BinaryOp(output->base, GrB_NULL, accum_op, GxB_ANY_BOOL,
+                                     first->base, second->base, GrB_NULL));
+
+    TRY(CFL_matrix_update(output));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_wise_format(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                            int8_t optimizations) {
+    if (!(optimizations & OPT_FORMAT)) {
+        TRY(matrix_wise(output, first, second, accum))
+        return GrB_SUCCESS;
+    }
+
+    if (!output->is_both) {
+        TRY(CFL_matrix_update(output));
+        TRY(CFL_matrix_update(first));
+        TRY(CFL_matrix_update(second));
+
+        Matrix *larger = output->nvals > first->nvals ? output : first;
+        larger = larger->nvals > second->nvals ? larger : second;
+
+        TRY(matrix_to_format(output, larger->format, false));
+        TRY(matrix_to_format(first, larger->format, false));
+        TRY(matrix_to_format(second, larger->format, false));
+
+        TRY(matrix_wise(output, first, second, accum));
+
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_to_format(output, GrB_ROWMAJOR, false));
+    TRY(matrix_to_format(first, output->format, false));
+    TRY(matrix_to_format(second, output->format, false));
+
+    TRY(matrix_wise(output, first, second, accum));
+
+    TRY(matrix_to_format(output, GrB_COLMAJOR, false));
+    TRY(matrix_to_format(first, output->format, false));
+    TRY(matrix_to_format(second, output->format, false));
+
+    TRY(matrix_wise(output, first, second, accum));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_wise_empty(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                           int8_t optimizations) {
+    if (!(optimizations & OPT_EMPTY)) {
+        TRY(matrix_wise_format(output, first, second, accum, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(first));
+    TRY(CFL_matrix_update(second));
+    TRY(CFL_matrix_update(output));
+
+    if (output == first) {
+        if (first->nvals == 0) {
+            TRY(matrix_dup_empty(first, second, optimizations));
+            return GrB_SUCCESS;
+        }
+
+        if (second->nvals == 0) {
+            return GrB_SUCCESS;
+        }
+
+        TRY(matrix_wise_format(output, first, second, accum, optimizations))
+        return GrB_SUCCESS;
+    }
+
+    if (first->nvals == 0 && second->nvals == 0) {
+        if (accum || output->nvals == 0) {
+            return GrB_SUCCESS;
+        }
+
+        TRY(matrix_clear_empty(output, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (first->nvals == 0) {
+        if (accum) {
+            TRY(matrix_wise_empty(output, output, second, false, optimizations));
+            return GrB_SUCCESS;
+        }
+
+        TRY(matrix_dup_empty(output, second, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (second->nvals == 0) {
+        if (accum) {
+            TRY(matrix_wise_empty(output, output, first, false, optimizations));
+            return GrB_SUCCESS;
+        }
+
+        TRY(matrix_dup_empty(output, first, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_wise_format(output, first, second, accum, optimizations));
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_wise_lazy(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                          int8_t optimizations) {
+    if (!(optimizations & OPT_LAZY)) {
+        TRY(matrix_wise_empty(output, first, second, accum, optimizations))
+        return GrB_SUCCESS;
+    }
+
+    if (!first->is_lazy && !second->is_lazy) {
+        TRY(matrix_wise_empty(output, first, second, accum, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (!first->is_lazy && second->is_lazy) {
+        for (size_t i = 0; i < second->base_matrices_count; i++) {
+            TRY(matrix_wise_empty(output, first, second->base_matrices[i], true,
+                                  optimizations));
+        }
+
+        return GrB_SUCCESS;
+    }
+
+    Matrix *other;
+    TRY(CFL_matrix_create(&other, output->nrows, output->ncols));
+    TRY(matrix_dup_empty(other, second, optimizations));
+
+    size_t other_nvals = other->nvals >= 10 ? other->nvals : 10;
+
+    while (true) {
+        bool found = false;
+
+        for (size_t i = 0; i < first->base_matrices_count; i++) {
+            TRY(CFL_matrix_update(first->base_matrices[i]));
+            size_t self_nvals = first->base_matrices[i]->nvals >= 10
+                                    ? first->base_matrices[i]->nvals
+                                    : 10;
+
+            if (other_nvals / 10 <= self_nvals && self_nvals <= other_nvals * 10) {
+                TRY(matrix_wise_empty(other, other, first->base_matrices[i], accum,
+                                      optimizations));
+                TRY(CFL_matrix_free(&first->base_matrices[i]));
+                for (size_t j = i + 1; j < first->base_matrices_count; j++) {
+                    first->base_matrices[j - 1] = first->base_matrices[j];
+                }
+                first->base_matrices_count--;
+                found = true;
+                break;
+            }
+        }
+
+        if (found) {
+            continue;
+        }
+
+        first->base_matrices[first->base_matrices_count++] = other;
+        break;
+    }
+
+    TRY(matrix_sort_lazy(first, false));
+    TRY(CFL_matrix_update(output));
+
+    return GrB_SUCCESS;
+}
+
+// - Any operation on two hyper vectors is performed block-wise
+// - When hyper vector is added in-place to a cell, then sum of hyper vector's blocks is
+// added to a cell
+// - When cell is added in-place to a hyper vector, then cell is added to each of
+// hyper vector's blocks
+GrB_Info matrix_wise_block(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                           int8_t optimizations) {
+    if (!(optimizations & OPT_BLOCK)) {
+        TRY(matrix_wise_lazy(output, first, second, accum, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (output != first) {
+        // fprintf(stderr, "Matrix wise currently support only iadd operation");
+        return GrB_INVALID_VALUE;
+    }
+
+    if (first->block_type == CELL && second->block_type == CELL) {
+        TRY(matrix_wise_lazy(output, first, second, accum, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    // second is vector
+    if (first->block_type == CELL) {
+        Matrix *temp_reduced;
+        TRY(CFL_matrix_create(&temp_reduced, first->nrows, first->ncols));
+        TRY(block_matrix_reduce(temp_reduced, second, optimizations));
+
+        TRY(matrix_wise_lazy(output, first, temp_reduced, accum, optimizations));
+        TRY(CFL_matrix_free(&temp_reduced));
+
+        return GrB_SUCCESS;
+    }
+
+    // first is vector
+    if (second->block_type == CELL) {
+        // LG_SET_BURBLE(true);
+        Matrix *temp_vector;
+        TRY(CFL_matrix_create(&temp_vector, first->nrows, first->ncols));
+
+        GrB_Index block_count = 0;
+        if (first->nrows > first->ncols) {
+            block_count = first->nrows / first->ncols;
+        } else {
+            block_count = first->ncols / first->nrows;
+        }
+        TRY(block_matrix_repeat_into_vector(temp_vector, second, block_count));
+        TRY(block_matrix_hyper_rotate_i(temp_vector, first->block_type));
+        TRY(matrix_wise_lazy(output, first, temp_vector, accum, optimizations));
+
+        TRY(CFL_matrix_free(&temp_vector));
+
+        return GrB_SUCCESS;
+    }
+
+    // both are vector
+    TRY(block_matrix_hyper_rotate_i(second, first->block_type));
+    TRY(matrix_wise_lazy(output, first, second, accum, optimizations));
+
+    return GrB_SUCCESS;
+}
+
+// rsub methods
+
+GrB_Info matrix_rsub(Matrix *output, Matrix *mask) {
+    TRY(GrB_eWiseAdd(output->base, mask->base, GrB_NULL, GxB_ANY_BOOL, output->base,
+                     output->base, GrB_DESC_RSC));
+
+    TRY(CFL_matrix_update(output));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_rsub_format(Matrix *output, Matrix *mask, int8_t optimizations) {
+    if (!(optimizations & OPT_FORMAT)) {
+        TRY(matrix_rsub(output, mask));
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(output));
+    TRY(CFL_matrix_update(mask));
+
+    Matrix *larger_matrix = output->nvals > mask->nvals ? output : mask;
+    TRY(matrix_to_format(output, larger_matrix->format, false));
+    TRY(matrix_to_format(mask, larger_matrix->format, false));
+
+    TRY(matrix_rsub(output, mask));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_rsub_empty(Matrix *output, Matrix *mask, int8_t optimizations) {
+    if (!(optimizations & OPT_EMPTY)) {
+        TRY(matrix_rsub_format(output, mask, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(mask));
+    TRY(CFL_matrix_update(output));
+    if (mask->nvals == 0 || output->nvals == 0) {
+        return GrB_SUCCESS;
+    }
+
+    TRY(matrix_rsub_format(output, mask, optimizations));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_rsub_lazy(Matrix *output, Matrix *mask, int8_t optimizations) {
+    if (!(optimizations & OPT_LAZY)) {
+        TRY(matrix_rsub_empty(output, mask, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    if (!mask->is_lazy) {
+        TRY(matrix_rsub_empty(output, mask, optimizations))
+        return GrB_SUCCESS;
+    }
+
+    TRY(CFL_matrix_update(output));
+    TRY(matrix_combine_lazy(mask, output->nvals, optimizations));
+    TRY(matrix_sort_lazy(mask, true));
+
+    for (size_t i = 0; i < mask->base_matrices_count; i++) {
+        TRY(matrix_rsub_empty(output, mask->base_matrices[i], optimizations));
+    }
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info matrix_rsub_block(Matrix *output, Matrix *mask, int8_t optimizations) {
+    if (!(optimizations & OPT_BLOCK)) {
+        TRY(matrix_rsub_lazy(output, mask, optimizations))
+        return GrB_SUCCESS;
+    }
+
+    if ((output->block_type == CELL && mask->block_type != CELL) ||
+        (output->block_type != CELL && mask->block_type == CELL)) {
+        // fprintf(stderr, "Don't support rsub operation between cell and vector");
+        return GrB_INVALID_VALUE;
+    }
+
+    if (output->block_type == CELL) {
+        TRY(matrix_rsub_lazy(output, mask, optimizations));
+        return GrB_SUCCESS;
+    }
+
+    TRY(block_matrix_hyper_rotate_i(output, mask->block_type));
+    TRY(matrix_rsub_lazy(output, mask, optimizations));
+
+    return GrB_SUCCESS;
+}
+
+// utility methods
+
+// GrB_Info matrix_print_lazy(Matrix *A, int8_t optimizations) {
+//     // return;
+//     GxB_Print_Level pr = 1;
+
+//     if (!A->is_lazy) {
+//         TRY(CFL_matrix_update(A));
+//         TRY(GxB_print(A->base, pr));
+//         // printf("nnz: %ld\n", A->nvals);
+//         return GrB_SUCCESS;
+//     }
+
+//     if (A->base_matrices_count == 1) {
+//         TRY(CFL_matrix_update(A));
+//         // printf("nnz: %ld\n", A->nvals);
+//         TRY(GxB_print(A->base_matrices[0]->base, pr));
+//         return GrB_SUCCESS;
+//     }
+
+//     Matrix *temp;
+//     TRY(CFL_matrix_create(&temp, A->nrows, A->ncols));
+//     for (size_t i = 0; i < A->base_matrices_count; i++) {
+//         TRY(matrix_wise_empty(temp, temp, A->base_matrices[i], false, optimizations));
+//     }
+
+//     A = temp;
+//     TRY(GxB_print(A->base, pr));
+//     TRY(CFL_matrix_update(A));
+//     // printf("nnz: %ld\n", A->nvals);
+//     TRY(CFL_matrix_free(&temp));
+
+//     return GrB_SUCCESS;
+// }
+
+// GrB_Info print_graph_info(Matrix **matrices, size_t count) {
+//     GrB_Index nnz = 0;
+
+//     for (size_t i = 0; i < count; i++) {
+//         Matrix *A = matrices[i];
+//         TRY(CFL_matrix_update(A));
+//         nnz += A->nvals;
+//     }
+
+//     printf("NNZ: %ld\n", nnz);
+//     return GrB_SUCCESS;
+// }
+
+// order of optimizations: block -> lazy -> empty -> format
+
+GrB_Info CFL_mxm(Matrix *output, Matrix *first, Matrix *second, bool accum, bool swap,
+                 int8_t optimizations) {
+    TRY(CFL_matrix_update(first));
+    TRY(CFL_matrix_update(second));
+    TRY(CFL_matrix_update(output));
+
+    TRY(matrix_mxm_block(output, first, second, accum, swap, optimizations));
+
+    TRY(CFL_matrix_update(output));
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_wise(Matrix *output, Matrix *first, Matrix *second, bool accum,
+                  int8_t optimizations) {
+    TRY(CFL_matrix_update(first));
+    TRY(CFL_matrix_update(second));
+    TRY(CFL_matrix_update(output));
+
+    TRY(matrix_wise_block(output, first, second, accum, optimizations));
+
+    TRY(CFL_matrix_update(output));
+
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_rsub(Matrix *output, Matrix *mask, int8_t optimizations) {
+    TRY(CFL_matrix_update(mask));
+    TRY(CFL_matrix_update(output));
+
+    TRY(matrix_rsub_block(output, mask, optimizations))
+
+    TRY(CFL_matrix_update(output));
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_dup(Matrix *output, Matrix *input, int8_t optimizations) {
+    TRY(CFL_matrix_update(output));
+    TRY(CFL_matrix_update(input));
+
+    TRY(matrix_dup_block(output, input, optimizations))
+
+    TRY(CFL_matrix_update(output));
+    return GrB_SUCCESS;
+}
+
+GrB_Info CFL_clear(Matrix *A, int8_t optimizations) {
+    TRY(CFL_matrix_update(A));
+
+    TRY(matrix_clear_empty(A, optimizations))
+
+    TRY(CFL_matrix_update(A));
+    return GrB_SUCCESS;
+}
\ No newline at end of file
diff --git a/experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.h b/experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.h
new file mode 100644
index 0000000000..e80a867fcb
--- /dev/null
+++ b/experimental/algorithm/LAGraph_CFL_optimized_matrix_opt.h
@@ -0,0 +1,106 @@
+//------------------------------------------------------------------------------
+// LAGraph_CFL_optimized_matrix_opt.h: Header with operations for
+// Optimized Context-Free Language Reachability Matrix-Based Algorithm
+//------------------------------------------------------------------------------
+//
+// LAGraph, (c) 2019-2026 by The LAGraph Contributors, All Rights Reserved.
+// SPDX-License-Identifier: BSD-2-Clause
+
+// Contributed by Vlasenco Daniel, Ilhom Kombaev, Semyon Grigoriev, St. Petersburg State
+// University.
+
+//------------------------------------------------------------------------------
+
+// Code is an implementation of optimized matrix operations for CFL_reachability
+// algorithms, described in the following paper:
+// * Ilia Muravev, "Optimization of the Context-Free Language Reachability Matrix-Based
+// Algorithm" and based on the python implementation from:
+// https://github.com/FormalLanguageConstrainedPathQuerying/CFPQ_PyAlgo/tree/murav/optimize-matrix
+
+#include <GraphBLAS.h>
+
+// Several square matrices can be concatenated into a horizontal or vertical vector of
+// matrices for fast multiplication
+//
+// [CELL]      - the matrix is square: n x n
+// [VEC_HORIZ] - the matrix is a horizontal block vector: n x (n * k),
+//               where k is the number of concatenated matrices
+// [VEC_VERT]  - the matrix is a vertical block vector: (n * k) x n,
+//               where k is the number of concatenated matrices
+enum CFL_Matrix_block { CELL, VEC_HORIZ, VEC_VERT };
+
+// Matrix wrapper for CFL reachability algorithms with optional optimizations
+typedef struct CFL_Matrix {
+    GrB_Matrix base;      // Underlying GrB_Matrix
+    int8_t optimizations; // Optimizations flags
+    // Fields of base matrix
+    GrB_Index nvals;
+    GrB_Index nrows;
+    GrB_Index ncols;
+    // Fields of format optimization
+    GrB_Matrix base_row;
+    GrB_Matrix base_col;
+    int32_t format;
+    bool is_both;
+    // Fields of lazy addition optimization
+    struct CFL_Matrix **base_matrices;
+    size_t base_matrices_count;
+    bool is_lazy;
+    // Fields of block optimization
+    enum CFL_Matrix_block block_type;
+} CFL_Matrix;
+
+// Creates a CFL matrix from intialized GrB_Matrix
+//
+// Creates a CFL_Matrix wrapping an already-initialized GrB_Matrix
+// On success, *matrix is set to a newly allocated CFL_Matrix
+GrB_Info CFL_matrix_from_base(CFL_Matrix **matrix, GrB_Matrix base);
+
+// Creates a lazy CFL matrix from intialized GrB_Matrix
+//
+// Creates a CFL_Matrix wrapping an already-initialized GrB_Matrix
+// On success, *matrix is set to a newly allocated CFL_Matrix
+GrB_Info CFL_matrix_from_base_lazy(CFL_Matrix **matrix, GrB_Matrix base);
+
+// Creates an empty CFL_Matrix of the given dimensions
+//
+// The underlying GrB_Matrix is allocated internally
+// On success, *matrix is set to a newly allocated CFL_Matrix
+GrB_Info CFL_matrix_create(CFL_Matrix **matrix, GrB_Index nrows, GrB_Index ncols);
+
+// Creates an empty lazy CFL_Matrix of the given dimensions
+//
+// The underlying GrB_Matrix is allocated internally
+// On success, *matrix is set to a newly allocated CFL_Matrix with new CFL_Matrix
+GrB_Info CFL_matrix_create_lazy(CFL_Matrix **matrix, GrB_Index nrows, GrB_Index ncols);
+
+// Free a CFL_Matrix and sets *matrix to NULL
+GrB_Info CFL_matrix_free(CFL_Matrix **matrix);
+
+// Recomputes internal fields (nvals, nrows, ncols, format, block_type, etc...)
+// from the current state of the underlying base matrix
+GrB_Info CFL_matrix_update(CFL_Matrix *matrix);
+
+GrB_Info CFL_mxm(CFL_Matrix *output, CFL_Matrix *first, CFL_Matrix *second, bool accum,
+                 bool swap, int8_t optimizations);
+GrB_Info CFL_wise(CFL_Matrix *output, CFL_Matrix *first, CFL_Matrix *second, bool accum,
+                  int8_t optimizations);
+
+// Computes C = B \ A, i.e. C(i, j) = true iff B(i, j) = true and A(i, j) = false.
+GrB_Info CFL_rsub(CFL_Matrix *output, CFL_Matrix *mask, int8_t optimizations);
+GrB_Info CFL_dup(CFL_Matrix *output, CFL_Matrix *input, int8_t optimizations);
+
+// Converts a matrix (lazy or regular) into its evaluated base form by merging
+// all underlying base matrices. If the input matrix is not lazy, returns its copy
+//
+// Parameters:
+//   matrix_p       - [out] Pointer that will be holds created matrix
+//   input          - [in]  Pointer to the source matrix (may be lazy).
+//   optimizations  - [in]  Bitmask specifying enabled optimizations.
+GrB_Info CFL_matrix_to_base(
+    // output
+    CFL_Matrix **matrix_p,
+    // input
+    CFL_Matrix *matrix, int8_t optimizations);
+
+GrB_Info CFL_clear(CFL_Matrix *A, int8_t optimizations);
\ No newline at end of file
diff --git a/experimental/algorithm/LAGraph_CFL_reachability_advanced.c b/experimental/algorithm/LAGraph_CFL_reachability_advanced.c
new file mode 100644
index 0000000000..c679261849
--- /dev/null
+++ b/experimental/algorithm/LAGraph_CFL_reachability_advanced.c
@@ -0,0 +1,992 @@
+//------------------------------------------------------------------------------
+// LAGraph_CFL_reachability.c: Context-Free Language Reachability Matrix-Based
+// Algorithm
+// ------------------------------------------------------------------------------
+//
+// LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved.
+// SPDX-License-Identifier: BSD-2-Clause
+
+// Contributed by Ilhom Kombaev, Semyon Grigoriev, St. Petersburg State University.
+
+//------------------------------------------------------------------------------
+
+// Code is based on the "A matrix-based CFPQ algorithm" described in the
+// following paper: * Rustam Azimov, Semyon Grigorev, "Context-Free Path
+// Querying Using Linear Algebra", URL:
+// https://disser.spbu.ru/files/2022/disser_azimov.pdf
+
+#define LG_FREE_WORK                                                                     \
+    {                                                                                    \
+        TRY_INNER(CFL_matrix_free(&iden));                                               \
+        TRY_INNER(LAGraph_Free((void **)&to_new_symbols_map, msg));                      \
+        TRY_INNER(LAGraph_Free((void **)&new_rules, msg));                               \
+        for (size_t i = 0; i < new_symbols_amount; i++) {                                \
+            TRY_INNER(CFL_matrix_free(&temp_matrices[i]));                               \
+            TRY_INNER(CFL_matrix_free(&delta_matrices[i]));                              \
+            TRY_INNER(CFL_matrix_free(&matrices[i]));                                    \
+            if (new_adj_matrices != adj_matrices) {                                      \
+                TRY_INNER(GrB_free(&new_adj_matrices[i]));                               \
+            }                                                                            \
+        }                                                                                \
+        if (new_adj_matrices != adj_matrices) {                                          \
+            TRY_INNER(LAGraph_Free((void **)&new_adj_matrices, msg));                    \
+        }                                                                                \
+        TRY_INNER(LAGraph_Free((void **)&delta_matrices, msg));                          \
+        TRY_INNER(LAGraph_Free((void **)&matrices, msg));                                \
+        TRY_INNER(LAGraph_Free((void **)&temp_matrices, msg));                           \
+    }
+
+#define LG_FREE_ALL                                                                      \
+    {                                                                                    \
+        LG_FREE_WORK;                                                                    \
+    }
+
+#include "LAGraph_CFL_optimized_matrix_opt.h"
+#include "LG_internal.h"
+#include <LAGraphX.h>
+
+#define ERROR_RULE(msg)                                                                  \
+    {                                                                                    \
+        LG_ASSERT_MSGF(false, GrB_INVALID_VALUE, "Rule with index %ld is invalid. " msg, \
+                       i);                                                               \
+    }
+
+#define ADD_TO_MSG(...)                                                                  \
+    {                                                                                    \
+        if (msg_len == 0) {                                                              \
+            msg_len +=                                                                   \
+                snprintf(msg, LAGRAPH_MSG_LEN,                                           \
+                         "LAGraph failure (file %s, line %d): ", __FILE__, __LINE__);    \
+        }                                                                                \
+        if (msg_len < LAGRAPH_MSG_LEN) {                                                 \
+            msg_len += snprintf(msg + msg_len, LAGRAPH_MSG_LEN - msg_len, __VA_ARGS__);  \
+        }                                                                                \
+    }
+
+#define ADD_INDEX_TO_ERROR_RULE(rule, i)                                                 \
+    {                                                                                    \
+        rule.len_indexes_str += snprintf(rule.indexes_str + rule.len_indexes_str,        \
+                                         LAGRAPH_MSG_LEN - rule.len_indexes_str,         \
+                                         rule.count == 0 ? "%ld" : ", %ld", i);          \
+        rule.count++;                                                                    \
+    }
+
+#define BENCH_CFL_REACHBILITY false
+
+#if BENCH_CFL_REACHBILITY
+    #define IS_ISO(matrix, str)                                                          \
+        {                                                                                \
+            bool iso_flag;                                                               \
+            GrB_Index nnz;                                                               \
+            GxB_Matrix_iso(&iso_flag, matrix);                                           \
+            GrB_Matrix_nvals(&nnz, matrix);                                              \
+            if (!iso_flag && nnz) {                                                      \
+                printf("-----ISO ALERT----- (%s)\n", str);                               \
+                GxB_print(matrix, 1);                                                    \
+                printf("-------------------\n");                                         \
+            }                                                                            \
+        }
+
+    #define TIMER_START()                                                                \
+        {                                                                                \
+            start_time = LAGraph_WallClockTime();                                        \
+        }
+
+    #define TIMER_STOP(label, accumulator)                                               \
+        {                                                                                \
+            end_time = LAGraph_WallClockTime();                                          \
+            printf("%s %.3fs\n", label, end_time - start_time);                          \
+            if (accumulator != NULL) {                                                   \
+                *(accumulator) += (end_time - start_time);                               \
+            }                                                                            \
+        }
+
+    #define IS_ROW(matrix, str)                                                          \
+        {                                                                                \
+            int32_t orientation;                                                         \
+            GrB_get(matrix, &orientation, GrB_STORAGE_ORIENTATION_HINT);                 \
+            if (orientation != GrB_ROWMAJOR) {                                           \
+                printf("-----NOT A ROW----- (%s)\n", str);                               \
+                GxB_print(matrix, 1);                                                    \
+                printf("-------------------\n");                                         \
+            }                                                                            \
+        }
+
+    #define IS_COL(matrix, str)                                                          \
+        {                                                                                \
+            int32_t orientation;                                                         \
+            GrB_get(matrix, &orientation, GrB_STORAGE_ORIENTATION_HINT);                 \
+            if (orientation != GrB_COLMAJOR) {                                           \
+                printf("-----NOT A COL----- (%s)\n", str);                               \
+                GxB_print(matrix, 1);                                                    \
+                printf("-------------------\n");                                         \
+            }                                                                            \
+        }
+#else
+    #define IS_ISO(matrix, str)
+    #define TIMER_START()
+    #define TIMER_STOP(label, accumulator)
+    #define IS_ROW(matrix, str)
+    #define IS_COL(matrix, str)
+#endif
+// clang-format on
+
+#define OPT_EMPTY (1 << 0)
+#define OPT_FORMAT (1 << 1)
+#define OPT_LAZY (1 << 2)
+#define OPT_BLOCK (1 << 3)
+
+#define TRY(GrB_method)                                                                  \
+    {                                                                                    \
+        GrB_Info LG_GrB_Info = GrB_method;                                               \
+        if (LG_GrB_Info < GrB_SUCCESS) {                                                 \
+            fprintf(stderr, "LAGraph failure (file %s, line %d): \n", __FILE__,          \
+                    __LINE__);                                                           \
+            LG_FREE_ALL;                                                                 \
+            return (LG_GrB_Info);                                                        \
+        }                                                                                \
+    }
+
+// Checks the return value of a GraphBLAS/LAGraph call inside a helper function.
+// On failure, logs the error location to stderr, invokes FREE_INNER() to release
+// any resources allocated within the current function, and returns the error code.
+//
+// FREE_INNER() must be defined by the caller before using this macro:
+//
+//   #define FREE_INNER()       \
+//       {                      \
+//           LAGraph_Free(&a)   \
+//           GrB_free(&M)       \
+//       }
+//
+// Note: use TRY() instead when the function is not a helper (i.e., it has
+// its own top-level FREE_ALL cleanup).
+#define TRY_INNER(GrB_method)                                                            \
+    {                                                                                    \
+        GrB_Info LG_GrB_Info = GrB_method;                                               \
+        if (LG_GrB_Info < GrB_SUCCESS) {                                                 \
+            fprintf(stderr, "LAGraph failure (file %s, line %d): \n", __FILE__,          \
+                    __LINE__);                                                           \
+            FREE_INNER();                                                                \
+            return (LG_GrB_Info);                                                        \
+        }                                                                                \
+    }
+
+#define TRY_I(GrB_method)                                                                \
+    {                                                                                    \
+        GrB_Info LG_GrB_Info = GrB_method;                                               \
+        if (LG_GrB_Info < GrB_SUCCESS) {                                                 \
+            fprintf(stderr,                                                              \
+                    "LAGraph failure (file %s, line %d) (Iteration: %d, i: %d): \n",     \
+                    __FILE__, __LINE__, iteration, i);                                   \
+            return (LG_GrB_Info);                                                        \
+        }                                                                                \
+    }
+
+typedef struct {
+    int32_t index;
+    int32_t base_index;
+    int32_t count;
+} CFL_Symbol;
+
+// When using the OPT_BLOCK optimization, indexed symbols must be grouped together.
+// This produces a mapping: [old_index -> (new_index, base_index, indexed_count)]
+//   - new_index:     the index of the symbol in the new numeration
+//   - base_index:    the original index of the first symbol in the indexed group
+//   - indexed_count: the number of indexed symbols in the group (0 if not indexed)
+//
+// Example:
+//   (0) S   -> (0, 0, 0)  - non-indexed symbol
+//   (1) A_0 -> (1, 1, 3)  - indexed group of 3, starting at old index 1
+//   (2) A_1 ->              }
+//   (3) A_2 ->              } members of the A group
+//   (4) B_0 -> (2, 4, 2)  - indexed group of 2, starting at old index 4
+//   (5) B_1 ->             } member of the B group
+//   (6) C   -> (3, 6, 0)  - non-indexed symbol
+//   (7) a   -> (4, 7, 0)  - non-indexed symbol
+//
+// This mapping is used to build a compact matrix array and to expand
+// production rules, both with and without the OPT_BLOCK optimization.
+//
+// Output: CFL_Symbol **symbols and size_t *size
+static GrB_Info get_new_symbols(const LAGraph_rule_EWCNF *rules, size_t rules_count,
+                                size_t symbols_amount, CFL_Symbol **symbols, size_t *size,
+                                char *msg) {
+    *symbols = NULL;
+    bool *checked = NULL;
+
+#undef FREE_INNER_WORK
+#undef FREE_INNER
+
+#define FREE_INNER_WORK()                                                                \
+    {                                                                                    \
+        LAGraph_Free((void **)&checked, msg);                                            \
+    };
+
+#define FREE_INNER()                                                                     \
+    {                                                                                    \
+        FREE_INNER_WORK();                                                               \
+        LAGraph_Free((void **)symbols, msg);                                             \
+    }
+
+    TRY_INNER(LAGraph_Calloc((void **)&checked, symbols_amount, sizeof(bool), msg));
+    for (size_t i = 0; i < symbols_amount; i++) {
+        checked[i] = false;
+    }
+
+    size_t capacity = 1;
+    *size = 0;
+    TRY_INNER(LAGraph_Calloc((void **)symbols, capacity, sizeof(CFL_Symbol), msg));
+
+    for (size_t i = 0; i < capacity; i++) {
+        CFL_Symbol sym = {0};
+        (*symbols)[i] = sym;
+    }
+
+    for (size_t i = 0; i < rules_count; i++) {
+        LAGraph_rule_EWCNF rule = rules[i];
+
+        int32_t prods[3] = {rule.nonterm, rule.prod_A, rule.prod_B};
+        int bitmasks[3] = {LAGraph_EWNCF_INDEX_NONTERM, LAGraph_EWNCF_INDEX_PROD_A,
+                           LAGraph_EWNCF_INDEX_PROD_B};
+
+        for (size_t j = 0; j < 3; j++) {
+            if (prods[j] == -1) {
+                continue;
+            }
+
+            if (checked[prods[j]])
+                continue;
+
+            checked[prods[j]] = true;
+
+            CFL_Symbol sym;
+            sym.base_index = prods[j];
+            sym.index = *size;
+            sym.count = rule.indexed & bitmasks[j] ? rule.indexed_count : 0;
+
+            for (size_t k = sym.base_index; k < sym.base_index + sym.count; k++) {
+                checked[k] = true;
+            }
+
+            if (*size == capacity) {
+                capacity *= 2;
+                TRY_INNER(LAGraph_Realloc((void **)symbols, capacity, capacity / 2,
+                                          sizeof(CFL_Symbol), msg));
+            }
+
+            (*symbols)[(*size)++] = sym;
+        }
+    }
+
+    for (size_t i = 0; i < symbols_amount; i++) {
+        if (checked[i])
+            continue;
+
+        if (*size == capacity) {
+            capacity *= 2;
+            TRY_INNER(LAGraph_Realloc((void **)symbols, capacity, capacity / 2,
+                                      sizeof(CFL_Symbol), msg));
+        }
+
+        CFL_Symbol sym;
+        sym.base_index = i;
+        sym.index = *size;
+        sym.count = 0;
+
+        (*symbols)[(*size)++] = sym;
+        checked[i] = true;
+        // printf("Inserted (%ld, %ld, %ld)\n", sym.index, sym.base_index, sym.count);
+    }
+
+    FREE_INNER_WORK();
+
+    return GrB_SUCCESS;
+}
+
+// Expands indexed grammar rules into a set of concrete rules.
+//
+//   Before: A_i -> B_i C     (indexed_count = 3)
+//           D   -> E F
+//   After:  A_0 -> B_0 C
+//           A_1 -> B_1 C
+//           A_2 -> B_2 C
+//           D   -> E F
+//
+// Output: LAGraph_rule_EWCNF **new_rules and size_t *new_rules_count
+static GrB_Info explode_rules(const LAGraph_rule_EWCNF *rules, size_t rules_count,
+                              LAGraph_rule_EWCNF **new_rules, size_t *new_rules_count,
+                              char *msg) {
+    *new_rules = NULL;
+
+#undef FREE_INNER
+
+#define FREE_INNER()                                                                     \
+    {                                                                                    \
+        LAGraph_Free((void **)new_rules, msg);                                           \
+    }
+
+    size_t new_rules_size = 0;
+    size_t new_rules_capacity = 1;
+
+    TRY_INNER(LAGraph_Calloc((void **)new_rules, new_rules_capacity,
+                             sizeof(LAGraph_rule_EWCNF), msg));
+
+    // explode rules
+    for (size_t i_rule = 0; i_rule < rules_count; i_rule++) {
+        LAGraph_rule_EWCNF rule = rules[i_rule];
+
+        if (new_rules_size == new_rules_capacity) {
+            TRY_INNER(LAGraph_Realloc((void **)new_rules, new_rules_capacity * 2,
+                                      new_rules_capacity, sizeof(LAGraph_rule_EWCNF),
+                                      msg));
+            new_rules_capacity *= 2;
+        }
+
+        if (rule.indexed_count == 0) {
+            (*new_rules)[new_rules_size++] = rule;
+        } else {
+            while (new_rules_size + rule.indexed_count >= new_rules_capacity) {
+                TRY_INNER(LAGraph_Realloc((void **)new_rules, new_rules_capacity * 2,
+                                          new_rules_capacity, sizeof(LAGraph_rule_EWCNF),
+                                          msg));
+                new_rules_capacity *= 2;
+            }
+
+            for (size_t rule_index = 0; rule_index < rule.indexed_count; rule_index++) {
+                LAGraph_rule_EWCNF new_rule = rule;
+                new_rule.indexed_count = 0;
+                new_rule.indexed = 0;
+                if (rule.nonterm != -1 && rule.indexed & LAGraph_EWNCF_INDEX_NONTERM) {
+                    new_rule.nonterm = rule.nonterm + rule_index;
+                }
+                if (rule.prod_A != -1 && rule.indexed & LAGraph_EWNCF_INDEX_PROD_A) {
+                    new_rule.prod_A = rule.prod_A + rule_index;
+                }
+                if (rule.prod_B != -1 && rule.indexed & LAGraph_EWNCF_INDEX_PROD_B) {
+                    new_rule.prod_B = rule.prod_B + rule_index;
+                }
+
+                (*new_rules)[new_rules_size++] = new_rule;
+            }
+        }
+    }
+
+    *new_rules_count = new_rules_size;
+
+    return GrB_SUCCESS;
+}
+
+// Splits a CFL_Matrix into an array of GrB_Matrix matrices
+
+// If the matrix is not a horizontal or vertical block vector (i.e., its
+// block_type is CELL), the underlying GrB_Matrix is extracted and copied
+// into outputs[0]
+//
+// Otherwise, the matrix split into square sub-matrices of size (graph_size x graph_size)
+// Parameters:
+//   outputs       - [out] Caller-allocated array of GrB_Matrix to write results into
+//                         Must have enough space for all sub-matrices
+//   matrix        - [in]  Source CFL_Matrix to split
+static GrB_Info split_CFL_matrix(GrB_Matrix *outputs, CFL_Matrix *matrix,
+                                 int8_t optimizations) {
+    char msg[LAGRAPH_MSG_LEN];
+    CFL_Matrix *base_matrix;
+    GrB_Index *nrows = NULL, *ncols = NULL;
+
+#undef FREE_INNER
+
+#define FREE_INNER()                                                                     \
+    {                                                                                    \
+        CFL_matrix_free(&base_matrix);                                                   \
+        LAGraph_Free((void **)&nrows, msg);                                              \
+        LAGraph_Free((void **)&ncols, msg);                                              \
+    }
+
+    if (matrix->block_type == CELL) {
+        CFL_Matrix *result;
+        TRY_INNER(CFL_matrix_to_base(&result, matrix, optimizations));
+        TRY_INNER(GrB_Matrix_dup(outputs, result->base));
+        TRY_INNER(CFL_matrix_free(&result));
+        return GrB_SUCCESS;
+    }
+
+    TRY_INNER(CFL_matrix_to_base(&base_matrix, matrix, optimizations));
+
+    // we can create nrows and ncols array with the same size, it will be mush easier than
+    // calculate size of each array :)
+    GrB_Index matrices_count = matrix->nrows > matrix->ncols
+                                   ? matrix->nrows / matrix->ncols
+                                   : matrix->ncols / matrix->nrows;
+
+    TRY_INNER(LAGraph_Calloc((void **)&nrows, matrices_count, sizeof(GrB_Index), msg));
+    TRY_INNER(LAGraph_Calloc((void **)&ncols, matrices_count, sizeof(GrB_Index), msg));
+
+    GrB_Index graph_size = matrix->nrows < matrix->ncols ? matrix->nrows : matrix->ncols;
+    for (size_t i = 0; i < matrices_count; i++) {
+        nrows[i] = graph_size;
+        ncols[i] = graph_size;
+    }
+
+    GrB_Index m = 0, n = 0;
+    if (matrix->block_type == VEC_VERT) {
+        m = matrices_count;
+        n = 1;
+    } else {
+        m = 1;
+        n = matrices_count;
+    }
+
+    TRY_INNER(GxB_Matrix_split(outputs, m, n, nrows, ncols, base_matrix->base, GrB_NULL));
+
+    TRY_INNER(LAGraph_Free((void **)&nrows, msg));
+    TRY_INNER(LAGraph_Free((void **)&ncols, msg));
+    TRY_INNER(CFL_matrix_free(&base_matrix));
+
+    return GrB_SUCCESS;
+}
+
+// Builds the mapping [old_index -> (new_index, base_index, indexed_count)]
+// See get_new_symbols() for a detailed description of the mapping format
+//
+// Parameters:
+//   map           - [out] Allocated array of CFL_Symbol. Caller must free
+//   size          - [out] Number of entries in map
+static GrB_Info get_new_symbols_map(const LAGraph_rule_EWCNF *rules, size_t rules_count,
+                                    size_t symbols_amount, CFL_Symbol **map, size_t *size,
+                                    char *msg, int8_t optimizations) {
+#undef FREE_INNER
+
+#define FREE_INNER()                                                                     \
+    {                                                                                    \
+        LAGraph_Free((void **)map, msg);                                                 \
+    }
+
+    if (optimizations & OPT_BLOCK) {
+        TRY_INNER(get_new_symbols(rules, rules_count, symbols_amount, map, size, msg));
+    } else {
+        *size = symbols_amount;
+        TRY_INNER(LAGraph_Calloc((void **)map, symbols_amount, sizeof(CFL_Symbol), msg));
+        for (size_t i = 0; i < symbols_amount; i++) {
+            CFL_Symbol *sym = &(*map)[i];
+            sym->index = i;
+            sym->base_index = i;
+            sym->count = 0;
+        }
+    }
+
+    return GrB_SUCCESS;
+}
+
+// Builds a new array of adjacency matrices according to the symbol mapping
+//
+// Parameters:
+//   new_adj_matrices_p - [out] Resulting matrix array. Caller must free
+//                              (only if OPT_BLOCK is set).
+static GrB_Info get_new_adj_matrices(const GrB_Matrix *adj_matrices, CFL_Symbol *map,
+                                     GrB_Index map_size, GrB_Matrix **new_adj_matrices_p,
+                                     char *msg, int8_t optimizations) {
+#undef FREE_INNER
+
+#define FREE_INNER()                                                                     \
+    {                                                                                    \
+        LAGraph_Free((void **)new_adj_matrices_p, msg);                                  \
+    }
+
+    GrB_Index n;
+    TRY_INNER(GrB_Matrix_ncols(&n, adj_matrices[0]));
+
+    if (!(optimizations & OPT_BLOCK)) {
+        *new_adj_matrices_p = (GrB_Matrix *)adj_matrices;
+        return GrB_SUCCESS;
+    }
+
+    TRY_INNER(
+        LAGraph_Calloc((void **)new_adj_matrices_p, map_size, sizeof(GrB_Matrix), msg));
+
+    for (size_t i = 0; i < map_size; i++) {
+        CFL_Symbol sym = map[i];
+
+        if (sym.count == 0) {
+            TRY_INNER(
+                GrB_Matrix_dup(&(*new_adj_matrices_p)[i], adj_matrices[sym.base_index]));
+            continue;
+        }
+
+        GrB_Matrix new_col_matrix;
+        TRY_INNER(GrB_Matrix_new(&new_col_matrix, GrB_BOOL, n * sym.count, n));
+        GrB_Matrix *Tiles = (GrB_Matrix *)adj_matrices + sym.base_index;
+        TRY_INNER(GxB_Matrix_concat(new_col_matrix, Tiles, sym.count, 1, GrB_NULL));
+        (*new_adj_matrices_p)[i] = new_col_matrix;
+    }
+
+    return GrB_SUCCESS;
+}
+
+// Remaps rule symbol indices according to the symbol mapping
+//
+// Parameters:
+//   new_rules       - [out] Allocated output rule array. Caller must free
+//   new_rules_count - [out] Number of rules written to new_rules
+static GrB_Info get_new_rules(const LAGraph_rule_EWCNF *rules, size_t rules_count,
+                              CFL_Symbol *map, size_t map_size,
+                              LAGraph_rule_EWCNF **new_rules, size_t *new_rules_count,
+                              char *msg, int8_t optimizations) {
+    *new_rules_count = 0;
+#undef FREE_INNER
+
+#define FREE_INNER()                                                                     \
+    {                                                                                    \
+        LAGraph_Free((void **)new_rules, msg);                                           \
+    }
+
+    if (!(optimizations & OPT_BLOCK)) {
+        TRY_INNER(explode_rules(rules, rules_count, new_rules, new_rules_count, msg));
+        return GrB_SUCCESS;
+    }
+
+    TRY_INNER(
+        LAGraph_Calloc((void **)new_rules, rules_count, sizeof(LAGraph_rule_EWCNF), msg));
+    for (size_t i = 0; i < rules_count; i++) {
+        LAGraph_rule_EWCNF rule = rules[i];
+        LAGraph_rule_EWCNF new_rule = rule;
+
+        for (size_t i_sym = 0; i_sym < map_size; i_sym++) {
+            CFL_Symbol sym = map[i_sym];
+
+            if (rule.nonterm != -1 && rule.nonterm == sym.base_index) {
+                new_rule.nonterm = sym.index;
+            }
+            if (rule.prod_A != -1 && rule.prod_A == sym.base_index) {
+                new_rule.prod_A = sym.index;
+            }
+            if (rule.prod_B != -1 && rule.prod_B == sym.base_index) {
+                new_rule.prod_B = sym.index;
+            }
+        }
+
+        (*new_rules)[(*new_rules_count)++] = new_rule;
+    }
+
+    return GrB_SUCCESS;
+}
+
+// LAGraph_CFL_reachability_adv: Context-Free Language Reachability Matrix-Based
+// Algorithm
+//
+// This function determines the set of vertex pairs (u, v) in a graph (represented by
+// adjacency matrices) such that there is a path from u to v, where the edge labels
+// form a word from the language generated by the context-free grammar (represented by
+// `rules`).
+//
+// Terminals and non-terminals are enumerated by integers starting from zero.
+// The start non-terminal is the non-terminal with index 0.
+//
+// Example:
+//
+// Graph:
+// ┌───┐   ┌───┐   ┌───┐   ┌───┐   ┌───┐
+// │ 0 ├───► 1 ├───► 2 ├───► 3 ├───► 4 │
+// └───┘ a └─┬─┘ a └─▲─┘ b └───┘ b └───┘
+//           │       │
+//           │ ┌───┐ │
+//          a└─► 5 ├─┘b
+//             └───┘
+//
+// Grammar: S -> aSb | ab
+//
+// There are paths from node [1] to node [3] and from node [1] to node [2] that form
+// the word "ab" ([1]-a->[2]-b->[3] and [1]-a->[5]-b->[2]). The word "ab" is in the
+// language generated by our context-free grammar, so the pairs (1, 3) and (1, 2) will
+// be included in the result.
+//
+// Note: It doesn't matter how many paths exist from node [A] to node [B] that form a
+// word in the language. If at least one path exists, the pair ([A], [B]) will be
+// included in the result.
+//
+// In contrast, the path from node [1] to node [4] forms the word "abb"
+// ([1]-a->[2]-b->[3]-b->[4]) and the word "abbb" ([1]-a->[5]-b->[2]-b->[3]-b->[4]).
+// The words "aab" and "abbb" are not in the language, so the pair (1, 4) will not be
+// included in the result.
+//
+// With this graph and grammar, we obtain the following results:
+// (0, 4) - because there exists a path (0-1-2-3-4) that forms the word "aabb"
+// (1, 3) - because there exists a path (1-2-3) that forms "ab"
+// (1, 2) - because there exists a path (1-5-2) that forms the word "ab"
+// (0, 3) - because there exists a path (0-1-5-2-3) that forms the word "aabb"
+GrB_Info LAGraph_CFL_reachability_adv(
+    // Output
+    GrB_Matrix *outputs, // Array of matrices containing results.
+                         // The size of the array must be equal to the count
+                         // of symbols (symbols_amount).
+                         //
+                         // Each matrix is square, with size equal to the number of
+                         // vertices in the graph. Matrices are allocated by the
+                         // caller, not by this method.
+                         //
+                         // outputs[k]: (i, j) = true if and only if there is a path
+                         // from node i to node j whose edge labels form a word
+                         // derivable from the symbol 'k' of the specified CFG.
+                         //
+                         // Note: output[t], where t is the index of a terminal, will
+                         // be an exact copy of adj_matrices[t].
+    // Input
+    const GrB_Matrix *adj_matrices, // Array of adjacency matrices representing the graph.
+                                    // The length of this array is equal to the count of
+                                    // symbols (symbols_amount).
+                                    //
+                                    // Each matrix is square, with size equal to the
+                                    // number of vertices in the graph.
+                                    //
+                                    // adj_matrices[i]: (i, j) == 1 if and only if there
+                                    // is an edge between nodes i and j with the label of
+                                    // the symbol corresponding to index 'i' (where i is
+                                    // in the range [0, symbols_amount - 1]).
+                                    //
+                                    // Note: adj_matrices[N], where N is the index of a
+                                    // nonterminal, must be initialized as an empty square
+                                    // matrix of size symbols_amount.
+    size_t symbols_amount,          // Count of terminal and nonterminals
+    const LAGraph_rule_EWCNF *rules, // The rules of the CFG.
+                                     // Warning: now not ready for N -> A rules, where
+                                     // is N and A are nonterminals.
+    size_t rules_count,              // The total number of rules in the CFG.
+    char *msg,                       // Message string for error reporting.
+    int8_t optimizations             // Optimizations flags
+) {
+#undef FREE_INNER
+
+#define FREE_INNER()
+
+    // Declare workspace and clear the msg string, if not NULL
+    CFL_Matrix **delta_matrices, **matrices, **temp_matrices;
+    CFL_Matrix *iden = NULL;
+    GrB_Matrix identity_matrix = NULL;
+
+    // for OPT_BLOCK optimization
+    size_t new_symbols_amount = 0;
+    GrB_Matrix *new_adj_matrices = NULL;
+    LAGraph_rule_EWCNF *new_rules = NULL;
+    size_t new_rules_count = 0;
+    CFL_Symbol *to_new_symbols_map = NULL;
+
+    LG_CLEAR_MSG;
+    size_t msg_len = 0; // For error formatting
+
+    TRY(LAGraph_Calloc((void **)&delta_matrices, symbols_amount, sizeof(CFL_Matrix *),
+                       msg));
+    TRY(LAGraph_Calloc((void **)&matrices, symbols_amount, sizeof(CFL_Matrix *), msg));
+    TRY(LAGraph_Calloc((void **)&temp_matrices, symbols_amount, sizeof(CFL_Matrix *),
+                       msg));
+
+    LG_ASSERT_MSG(symbols_amount > 0, GrB_INVALID_VALUE,
+                  "The number of symbols must be greater than zero.");
+    LG_ASSERT_MSG(rules_count > 0, GrB_INVALID_VALUE,
+                  "The number of rules must be greater than zero.");
+    LG_ASSERT_MSG(outputs != NULL, GrB_NULL_POINTER, "The outputs array cannot be null.");
+    LG_ASSERT_MSG(rules != NULL, GrB_NULL_POINTER, "The rules array cannot be null.");
+    LG_ASSERT_MSG(adj_matrices != NULL, GrB_NULL_POINTER,
+                  "The adjacency matrices array cannot be null.");
+
+    // Find null adjacency matrices
+    bool found_null = false;
+    for (size_t i = 0; i < symbols_amount; i++) {
+        if (adj_matrices[i] != NULL)
+            continue;
+
+        if (!found_null) {
+            ADD_TO_MSG("Adjacency matrices with these indexes are null: ");
+            ADD_TO_MSG("%ld", i);
+        } else {
+            ADD_TO_MSG(", %ld", i);
+        }
+
+        found_null = true;
+    }
+
+    if (found_null) {
+        LG_FREE_ALL;
+        return GrB_NULL_POINTER;
+    }
+
+    GrB_Index n;
+    TRY(GrB_Matrix_ncols(&n, adj_matrices[0]));
+
+    TRY(get_new_symbols_map(rules, rules_count, symbols_amount, &to_new_symbols_map,
+                            &new_symbols_amount, msg, optimizations));
+    TRY(get_new_adj_matrices(adj_matrices, to_new_symbols_map, new_symbols_amount,
+                             &new_adj_matrices, msg, optimizations));
+    TRY(get_new_rules(rules, rules_count, to_new_symbols_map, new_symbols_amount,
+                      &new_rules, &new_rules_count, msg, optimizations));
+
+    // Arrays for processing rules
+    size_t eps_rules[new_rules_count], eps_rules_count = 0;   // [Variable -> eps]
+    size_t term_rules[new_rules_count], term_rules_count = 0; // [Variable -> term]
+    size_t bin_rules[new_rules_count], bin_rules_count = 0;   // [Variable -> AB]
+
+    // Process rules
+    typedef struct {
+        size_t count;
+        size_t len_indexes_str;
+        char indexes_str[LAGRAPH_MSG_LEN];
+    } rule_error_s;
+    rule_error_s term_err = {0};
+    rule_error_s nonterm_err = {0};
+    rule_error_s invalid_err = {0};
+    for (size_t i = 0; i < new_rules_count; i++) {
+        LAGraph_rule_EWCNF rule = new_rules[i];
+
+        bool is_rule_eps = rule.prod_A == -1 && rule.prod_B == -1;
+        bool is_rule_term = rule.prod_A != -1 && rule.prod_B == -1;
+        bool is_rule_bin = rule.prod_A != -1 && rule.prod_B != -1;
+
+        // Check that all rules are well-formed
+        if (rule.nonterm < 0 || (size_t)rule.nonterm >= new_symbols_amount) {
+            ADD_INDEX_TO_ERROR_RULE(nonterm_err, i);
+        }
+
+        // [Variable -> eps]
+        if (is_rule_eps) {
+            eps_rules[eps_rules_count++] = i;
+
+            continue;
+        }
+
+        // [Variable -> term]
+        if (is_rule_term) {
+            term_rules[term_rules_count++] = i;
+
+            if (rule.prod_A < -1 || (size_t)rule.prod_A >= new_symbols_amount) {
+                ADD_INDEX_TO_ERROR_RULE(term_err, i);
+            }
+
+            continue;
+        }
+
+        // [Variable -> A B]
+        if (is_rule_bin) {
+            bin_rules[bin_rules_count++] = i;
+
+            if (rule.prod_A < -1 || (size_t)rule.prod_A >= new_symbols_amount ||
+                rule.prod_B < -1 || (size_t)rule.prod_B >= new_symbols_amount) {
+                ADD_INDEX_TO_ERROR_RULE(nonterm_err, i);
+            }
+
+            continue;
+        }
+
+        // [Variable -> _ B]
+        ADD_INDEX_TO_ERROR_RULE(invalid_err, i);
+    }
+
+    if (term_err.count + nonterm_err.count + invalid_err.count > 0) {
+        ADD_TO_MSG("Count of invalid rules: %ld.\n",
+                   term_err.count + nonterm_err.count + invalid_err.count);
+
+        if (nonterm_err.count > 0) {
+            ADD_TO_MSG("Non-terminals must be in range [0, nonterms_count). ");
+            ADD_TO_MSG("Indexes of invalid rules: %s\n", nonterm_err.indexes_str)
+        }
+        if (term_err.count > 0) {
+            ADD_TO_MSG("Terminals must be in range [-1, nonterms_count). ");
+            ADD_TO_MSG("Indexes of invalid rules: %s\n", term_err.indexes_str)
+        }
+        if (invalid_err.count > 0) {
+            ADD_TO_MSG("[Variable -> _ B] type of rule is not acceptable. ");
+            ADD_TO_MSG("Indexes of invalid rules: %.120s\n", invalid_err.indexes_str)
+        }
+
+        LG_FREE_ALL;
+        return GrB_INVALID_VALUE;
+    }
+
+    // Create symbol matrices
+    for (size_t i = 0; i < new_symbols_amount; i++) {
+        GrB_Index nrows;
+        TRY(GrB_Matrix_nrows(&nrows, new_adj_matrices[i]));
+        GrB_Index ncols;
+        TRY(GrB_Matrix_ncols(&ncols, new_adj_matrices[i]));
+
+        GrB_Matrix new_adj_matrix;
+        TRY(GrB_Matrix_dup(&new_adj_matrix, new_adj_matrices[i]));
+        TRY(CFL_matrix_from_base(&delta_matrices[i], new_adj_matrix));
+
+        if (optimizations & OPT_LAZY) {
+            TRY(CFL_matrix_create_lazy(&matrices[i], nrows, ncols));
+        } else {
+            TRY(CFL_matrix_create(&matrices[i], nrows, ncols));
+        }
+
+        TRY(CFL_matrix_create(&temp_matrices[i], nrows, ncols));
+    }
+
+    // Rule [Variable -> term]
+    for (size_t i = 0; i < term_rules_count; i++) {
+        LAGraph_rule_EWCNF term_rule = new_rules[term_rules[i]];
+        CFL_Matrix *nonterm_matrix = delta_matrices[term_rule.nonterm];
+        CFL_Matrix *term_matrix = delta_matrices[term_rule.prod_A];
+
+        TRY(CFL_wise(nonterm_matrix, nonterm_matrix, term_matrix, true, optimizations));
+    }
+
+    GrB_Vector v_diag;
+    TRY(GrB_Vector_new(&v_diag, GrB_BOOL, n));
+    TRY(GrB_Vector_assign_BOOL(v_diag, GrB_NULL, GrB_NULL, true, GrB_ALL, n, NULL));
+    TRY(GrB_Matrix_diag(&identity_matrix, v_diag, 0));
+    TRY(GrB_Vector_free(&v_diag));
+    TRY(CFL_matrix_from_base(&iden, identity_matrix));
+
+    // Rule [Variable -> eps]
+    for (size_t i = 0; i < eps_rules_count; i++) {
+        LAGraph_rule_EWCNF eps_rule = new_rules[eps_rules[i]];
+        CFL_Matrix *nonterm_matrix = delta_matrices[eps_rule.nonterm];
+
+        TRY(CFL_wise(nonterm_matrix, nonterm_matrix, iden, true, optimizations));
+    }
+
+    // Rule [Variable -> Variable1 Variable2]
+    double start_time, end_time;
+    bool changed = true;
+    size_t iteration = 0;
+    double mxm1 = 0.0;
+    double wise1 = 0.0;
+    double mxm2 = 0.0;
+    double wise2 = 0.0;
+    double rsubt = 0.0;
+    while (changed) {
+        iteration++;
+        changed = false;
+
+#if BENCH_CFL_REACHBILITY
+        printf("\n--- ITERATARION %ld ---\n", iteration);
+#endif
+
+        for (size_t i = 0; i < new_symbols_amount; i++) {
+            TRY_I(CFL_matrix_free(&temp_matrices[i]));
+            TRY_I(CFL_matrix_create(&temp_matrices[i], matrices[i]->nrows,
+                                    matrices[i]->ncols));
+        }
+
+        TIMER_START();
+        for (size_t i = 0; i < bin_rules_count; i++) {
+            LAGraph_rule_EWCNF bin_rule = new_rules[bin_rules[i]];
+            CFL_Matrix *A = matrices[bin_rule.prod_A];
+            CFL_Matrix *B = delta_matrices[bin_rule.prod_B];
+            CFL_Matrix *C = temp_matrices[bin_rule.nonterm];
+
+            TRY_I(CFL_mxm(C, A, B, true, false, optimizations));
+        }
+        TIMER_STOP("MXM 1", &mxm1);
+
+        TIMER_START()
+        for (size_t i = 0; i < new_symbols_amount; i++) {
+            CFL_Matrix *A = delta_matrices[i];
+            CFL_Matrix *C = matrices[i];
+
+            TRY_I(CFL_wise(C, C, A, false, optimizations));
+        }
+        TIMER_STOP("WISE 1", &wise1);
+
+        TIMER_START()
+        for (size_t i = 0; i < bin_rules_count; i++) {
+            LAGraph_rule_EWCNF bin_rule = new_rules[bin_rules[i]];
+            CFL_Matrix *A = matrices[bin_rule.prod_B];
+            CFL_Matrix *B = delta_matrices[bin_rule.prod_A];
+            CFL_Matrix *C = temp_matrices[bin_rule.nonterm];
+
+            TRY(CFL_mxm(C, A, B, true, true, optimizations));
+        }
+        TIMER_STOP("MXM 2", &mxm2);
+
+        // Rule [Variable -> term]
+        for (size_t i = 0; i < term_rules_count; i++) {
+            LAGraph_rule_EWCNF term_rule = new_rules[term_rules[i]];
+            CFL_Matrix *A = temp_matrices[term_rule.nonterm];
+            CFL_Matrix *B = delta_matrices[term_rule.prod_A];
+
+            TRY_I(CFL_wise(A, A, B, true, optimizations));
+        }
+
+        TIMER_START();
+        for (size_t i = 0; i < new_symbols_amount; i++) {
+            TRY_I(CFL_dup(delta_matrices[i], temp_matrices[i], optimizations));
+        }
+        TIMER_STOP("WISE 2 (copy)", &wise2);
+
+        TIMER_START();
+        for (size_t i = 0; i < new_symbols_amount; i++) {
+            CFL_Matrix *A = matrices[i];
+            CFL_Matrix *C = delta_matrices[i];
+
+            TRY_I(CFL_rsub(C, A, optimizations));
+        }
+        TIMER_STOP("WISE 3 (MASK)", &rsubt);
+
+        size_t new_nnz = 0;
+        for (size_t i = 0; i < new_symbols_amount; i++) {
+            TRY(CFL_matrix_update(delta_matrices[i]));
+            new_nnz += delta_matrices[i]->nvals;
+        }
+
+        if (new_nnz != 0) {
+            changed = true;
+        }
+    }
+
+#if BENCH_CFL_REACHBILITY
+    printf("MXM1: %.3f, wise1: %.3f, MXM2: %.3f, wise2: %.3f, rsub: %.3f", mxm1, wise1,
+           mxm2, wise2, rsubt);
+#endif
+
+    // get outputs matrices
+    for (size_t i = 0; i < new_symbols_amount; i++) {
+        CFL_Symbol sym = to_new_symbols_map[i];
+        TRY(split_CFL_matrix(outputs + sym.base_index, matrices[i], optimizations));
+    }
+
+    LG_FREE_WORK;
+    return GrB_SUCCESS;
+}
+
+// Helper function to free the output matrix of LAGraph_CFL_AllPaths, which contains elements of type AllPathsElem with dynamically allocated arrays of intermediate vertices. 
+static void free_AllPaths_matrix(GrB_Matrix* ptr_output) 
+{
+  GxB_Iterator iterator;
+  GxB_Iterator_new(&iterator);
+  GrB_Info info = GxB_Matrix_Iterator_attach(iterator, *ptr_output, NULL);
+  info = GxB_Matrix_Iterator_seek(iterator, 0);
+  AllPathsElem val;
+
+  while (info != GxB_EXHAUSTED)
+  {
+    GxB_Iterator_get_UDT(iterator, (void*)&val);
+    if (val.n > 1 && val.data.middle != NULL) {
+      free(val.data.middle);
+    }
+    info = GxB_Matrix_Iterator_next(iterator);
+  }
+
+  GrB_free(&iterator);
+  GrB_free(ptr_output);
+}
+
+// Free outputs and all_paths_ptr_t after you have finished working with the output matrices from LAGraph_CFL_AllPaths.
+// do outputs = NULL, all_paths_ptr_t = NULL after LAGraph_CFL_AllPaths_free_outputs
+GrB_Info LAGraph_CFL_AllPaths_free_outputs(GrB_Matrix* outputs, int64_t nonterms_count, GrB_Type* all_paths_ptr_t)
+{
+#if GxB_IMPLEMENTATION < GxB_VERSION(9, 4, 5)
+  return (GrB_NOT_IMPLEMENTED);
+#else
+  if (outputs) {
+    for (size_t i = 0; i < nonterms_count; i++) {
+      if (outputs[i] == NULL)
+        continue;
+      free_AllPaths_matrix(&outputs[i]);
+      outputs[i] = NULL;
+    }
+    free(outputs);
+  }
+  GrB_free(all_paths_ptr_t);
+  return GrB_SUCCESS;
+#endif
+}
diff --git a/include/LAGraphX.h b/include/LAGraphX.h
index 3355addb2e..9f86617219 100644
--- a/include/LAGraphX.h
+++ b/include/LAGraphX.h
@@ -1028,6 +1028,105 @@ int LAGraph_SquareClustering
     int32_t index;   // For rules that can be grouped by index
  } LAGraph_rule_WCNF;
 
+typedef struct
+{
+    size_t n;
+    union
+    {
+        GrB_Index single_elem;
+        GrB_Index* middle;
+    } data;
+} AllPathsElem;
+
+// Flags for indecies of EWCNF rules
+enum {
+    LAGraph_EWNCF_INDEX_NONTERM = 1 << 0,
+    LAGraph_EWNCF_INDEX_PROD_A  = 1 << 1,
+    LAGraph_EWNCF_INDEX_PROD_B = 1 << 2,
+};
+
+// Production rule of Context-free grammar in Extended Weak Chomsky Normal Form
+//
+// All grammar symbols (terminal and nonterminals) are presented by natural numbers from range [0; symbols_amount).
+//
+// Rule without indecies defined by tuple of [NONTERM, PROD_A, PROD_B, INDEXED_COUNT, INDEXED]
+// in Extended Weak Chomsky Normal Form where INDEXED_COUNT and INDEXED are zero:
+// Variable -> eps: [NONTERM, -1, -1, 0, 0]
+// Variable -> X: [NONTERM, PROD_X, -1, 0, 0]
+// Variable -> X Y: [NONTERM, PROD_X, PROD_Y, 0, 0]
+//
+// Where X, Y are arbitrary grammar symbols (terminal or nonterminal)
+//
+// Example:
+// Nonterms: [0 S] [1 A] [2 B] [3 C]
+// Terms: [4 a] [5 b]
+// S -> A B  [0  1  2 0 0]
+// S -> A b  [0  1  5 0 0]
+// S -> a B  [0  4  2 0 0]
+// S -> C    [0  3 -1 0 0]
+// A -> a    [1  4 -1 0 0]
+// B -> b    [2  5 -1 0 0]
+// C -> b    [3  5 -1 0 0]
+// S -> eps  [0 -1 -1 0 0]
+//
+// Warning: 
+// Variable -> _ B: [NONTERM, -1, PROD_B, INDEXED_COUNT, INDEXED] is not valid rule and may causes errors
+//
+// --------------- Indexed rules --------------- 
+// Rule may represent a family of indexed productions of size `indexed_count`.
+//
+// Indexed symbols are specified by 'indexed' bit mask
+// Posible indexed forms:
+// 
+//   1) N   -> A_i B        [N, A, B, INDEXED_COUNT, INDEX_PROD_A]
+//   2) N   -> A B_i        [N, A, B, INDEXED_COUNT, INDEX_PROD_A]
+//   3) N   -> A_i B_i      [N, A, B, INDEXED_COUNT, INDEX_PROD_A | INDEX_PROD_B]
+//   4) N_i -> A B          [N, A, B, INDEXED_COUNT, INDEX_NONTERM]
+//   5) N_i -> A_i B        [N, A, B, INDEXED_COUNT, INDEX_NONTERM | INDEX_PROD_A]
+//   6) N_i -> A B_i        [N, A, B, INDEXED_COUNT, INDEX_NONTERM | INDEX_PROD_B]
+//   7) N_i -> A_i B_i      [N, A, B, INDEXED_COUNT, INDEX_NONTERM | INDEX_PROD_A | INDEX_PROD_B]
+//
+// There are constraints about indexed rules:
+// 
+// 1) If symbol A is indexed in one rule, it must be indexed
+//    in all rules where it appears.
+// 2) If multiple symbols are indexed in the same rule,
+//    their indexed_count must be equal.
+// 3) Indexed symbols must be numbered consecutively:
+//    A_0, A_1, ..., A_{k-1} -> [base, base+1, ..., base+k-1]
+// 4) For indexed rules, base symbol A must represent by A_0.
+//
+// Actual rules are expanded as:
+//
+// for i in range [0; indexed_count)
+// N[_i] -> A[_i] B[_i]    (only for symbols marked as indexed)
+//
+// Example:
+// Z -> X_i Y (represented by [Z; X, Y, 3, INDEX_PROD_A]) expanded as:
+// 
+// Z -> X_0 Y
+// Z -> X_1 Y
+// Z -> X_2 Y
+//
+// --------------- Difference from standard Weak Chomsky Normal Form (WCNF) ---------------
+//
+// Standard WCNF allows only:
+//   N -> A B   (A,B are nonterminals)
+//   N -> a     (a are terminal)
+//   N -> eps
+//
+// EWCNF allows:
+//   N -> X Y   (X are terminal or nonterminal, Y are terminal or nonterminal)
+//   N -> X     (X are terminal or nonterminal)
+//   N -> eps
+//   and indexed rules.
+ typedef struct {
+    int32_t nonterm;       // LHS nonterminal symbol id
+    int32_t prod_A;        // first RHS symbol id or -1
+    int32_t prod_B;        // second RHS symbol id or -1
+    uint32_t indexed_count; // Number of indexed rules (zero if not indexed)
+    uint8_t indexed;        // Bitmask of indexed symbols
+} LAGraph_rule_EWCNF;
 
 // LAGraph_CFL_reachability: Context-Free Language Reachability Matrix-Based Algorithm
 //
@@ -1096,6 +1195,110 @@ GrB_Info LAGraph_CFL_reachability
     char *msg                       // Message string for error reporting.
 ) ;
 
+
+// LAGraph_CFL_reachability_adv: Context-Free Language Reachability Matrix-Based
+// Algorithm
+//
+// This function determines the set of vertex pairs (u, v) in a graph (represented by
+// adjacency matrices) such that there is a path from u to v, where the edge labels
+// form a word from the language generated by the context-free grammar (represented by
+// `rules`).
+//
+// Terminals and non-terminals are enumerated by integers starting from zero.
+// The start non-terminal is the non-terminal with index 0.
+//
+// Example:
+//
+// Graph:
+// ┌───┐   ┌───┐   ┌───┐   ┌───┐   ┌───┐
+// │ 0 ├───► 1 ├───► 2 ├───► 3 ├───► 4 │
+// └───┘ a └─┬─┘ a └─▲─┘ b └───┘ b └───┘
+//           │       │
+//           │ ┌───┐ │
+//          a└─► 5 ├─┘b
+//             └───┘
+//
+// Grammar: S -> aSb | ab
+//
+// There are paths from node [1] to node [3] and from node [1] to node [2] that form
+// the word "ab" ([1]-a->[2]-b->[3] and [1]-a->[5]-b->[2]). The word "ab" is in the
+// language generated by our context-free grammar, so the pairs (1, 3) and (1, 2) will
+// be included in the result.
+//
+// Note: It doesn't matter how many paths exist from node [A] to node [B] that form a
+// word in the language. If at least one path exists, the pair ([A], [B]) will be
+// included in the result.
+//
+// In contrast, the path from node [1] to node [4] forms the word "abb"
+// ([1]-a->[2]-b->[3]-b->[4]) and the word "abbb" ([1]-a->[5]-b->[2]-b->[3]-b->[4]).
+// The words "aab" and "abbb" are not in the language, so the pair (1, 4) will not be
+// included in the result.
+//
+// With this graph and grammar, we obtain the following results:
+// (0, 4) - because there exists a path (0-1-2-3-4) that forms the word "aabb"
+// (1, 3) - because there exists a path (1-2-3) that forms "ab"
+// (1, 2) - because there exists a path (1-5-2) that forms the word "ab"
+// (0, 3) - because there exists a path (0-1-5-2-3) that forms the word "aabb"
+GrB_Info LAGraph_CFL_reachability_adv
+(
+    // Output
+    GrB_Matrix *outputs, // Array of matrices containing results.
+                         // The size of the array must be equal to the count
+                         // of symbols (symbols_amount).
+                         //
+                         // Each matrix is square, with size equal to the number of
+                         // vertices in the graph. Matrices are allocated by the
+                         // caller, not by this method.
+                         //
+                         // outputs[k]: (i, j) = true if and only if there is a path
+                         // from node i to node j whose edge labels form a word
+                         // derivable from the symbol 'k' of the specified CFG.
+                         //
+                         // Note: output[t], where t is the index of a terminal, will
+                         // be an exact copy of adj_matrices[t].
+    // Input
+    const GrB_Matrix *adj_matrices, // Array of adjacency matrices representing the graph.
+                                    // The length of this array is equal to the count of
+                                    // symbols (symbols_amount).
+                                    //
+                                    // Each matrix is square, with size equal to the
+                                    // number of vertices in the graph.
+                                    //
+                                    // adj_matrices[i]: (i, j) == 1 if and only if there
+                                    // is an edge between nodes i and j with the label of
+                                    // the symbol corresponding to index 'i' (where i is
+                                    // in the range [0, symbols_amount - 1]).
+                                    //
+                                    // Note: adj_matrices[N], where N is the index of a
+                                    // nonterminal, must be initialized as an empty square
+                                    // matrix of size symbols_amount.
+    size_t symbols_amount,          // Count of terminal and nonterminals
+    const LAGraph_rule_EWCNF *rules, // The rules of the CFG.
+                                     // Warning: now not ready for N -> A rules, where
+                                     // is N and A are nonterminals.
+    size_t rules_count,              // The total number of rules in the CFG.
+    char *msg,                       // Message string for error reporting.
+    int8_t optimizations             // Optimizations flags
+) ;
+
+GrB_Info LAGraph_CFL_AllPaths_adv(
+    GrB_Matrix *outputs, 
+    GrB_Type *all_paths_ptr_t,
+    const GrB_Matrix *adj_matrices, 
+    size_t symbols_amount,       
+    const LAGraph_rule_EWCNF *rules,
+    size_t rules_count,
+    char *msg,
+    int8_t optimizations
+);
+
+// Free outputs and all_paths_ptr_t after you have finished working with the output matrices from LAGraph_CFL_AllPaths_adv.
+// do outputs = NULL, all_paths_ptr_t = NULL after LAGraph_CFL_AllPaths_adv_free_outputs
+GrB_Info LAGraph_CFL_AllPaths_adv_free_outputs(
+  GrB_Matrix* outputs,
+  int64_t nonterms_count,
+  GrB_Type* all_paths_ptr_t);
+
 //------------------------------------------------------------------------------
 // a simple example of an algorithm
 //------------------------------------------------------------------------------

From 8399e80992a2bb2b7fe6528a54e6777f2f121dde Mon Sep 17 00:00:00 2001
From: Litvyakov Daniil <140255125+DanyaLitva@users.noreply.github.com>
Date: Thu, 30 Apr 2026 19:53:57 +0300
Subject: [PATCH 2/2] Update LAGraph_CFL_reachability_advanced.c

---
 .../LAGraph_CFL_reachability_advanced.c       | 43 -------------------
 1 file changed, 43 deletions(-)

diff --git a/experimental/algorithm/LAGraph_CFL_reachability_advanced.c b/experimental/algorithm/LAGraph_CFL_reachability_advanced.c
index c679261849..433f305ccf 100644
--- a/experimental/algorithm/LAGraph_CFL_reachability_advanced.c
+++ b/experimental/algorithm/LAGraph_CFL_reachability_advanced.c
@@ -947,46 +947,3 @@ GrB_Info LAGraph_CFL_reachability_adv(
     LG_FREE_WORK;
     return GrB_SUCCESS;
 }
-
-// Helper function to free the output matrix of LAGraph_CFL_AllPaths, which contains elements of type AllPathsElem with dynamically allocated arrays of intermediate vertices. 
-static void free_AllPaths_matrix(GrB_Matrix* ptr_output) 
-{
-  GxB_Iterator iterator;
-  GxB_Iterator_new(&iterator);
-  GrB_Info info = GxB_Matrix_Iterator_attach(iterator, *ptr_output, NULL);
-  info = GxB_Matrix_Iterator_seek(iterator, 0);
-  AllPathsElem val;
-
-  while (info != GxB_EXHAUSTED)
-  {
-    GxB_Iterator_get_UDT(iterator, (void*)&val);
-    if (val.n > 1 && val.data.middle != NULL) {
-      free(val.data.middle);
-    }
-    info = GxB_Matrix_Iterator_next(iterator);
-  }
-
-  GrB_free(&iterator);
-  GrB_free(ptr_output);
-}
-
-// Free outputs and all_paths_ptr_t after you have finished working with the output matrices from LAGraph_CFL_AllPaths.
-// do outputs = NULL, all_paths_ptr_t = NULL after LAGraph_CFL_AllPaths_free_outputs
-GrB_Info LAGraph_CFL_AllPaths_free_outputs(GrB_Matrix* outputs, int64_t nonterms_count, GrB_Type* all_paths_ptr_t)
-{
-#if GxB_IMPLEMENTATION < GxB_VERSION(9, 4, 5)
-  return (GrB_NOT_IMPLEMENTED);
-#else
-  if (outputs) {
-    for (size_t i = 0; i < nonterms_count; i++) {
-      if (outputs[i] == NULL)
-        continue;
-      free_AllPaths_matrix(&outputs[i]);
-      outputs[i] = NULL;
-    }
-    free(outputs);
-  }
-  GrB_free(all_paths_ptr_t);
-  return GrB_SUCCESS;
-#endif
-}