From 1f227416a146b613d7b76dd22ac71648de94eea0 Mon Sep 17 00:00:00 2001
From: RXGottlieb <36906916+RXGottlieb@users.noreply.github.com>
Date: Wed, 18 Mar 2026 17:48:29 -0400
Subject: [PATCH 1/4] Update for Julia v1.12

- Explicitly use `Float64(pi)` in some places in written kernels
- Switch to using MultiFloats in `quadrant` to avoid use of `BigFloat` on GPU
- Improve sparsity detection for more complex expressions
- Move `@register_symbolic` to global scope for `SCMC_sigmoid`
---
 Project.toml                             |   4 +-
 src/SourceCodeMcCormick.jl               |   1 +
 src/kernel_writer/kernel_write.jl        | 116 ++++++++---------------
 src/kernel_writer/math_kernels.jl        |  18 +++-
 src/kernel_writer/string_math_kernels.jl |  12 +--
 5 files changed, 61 insertions(+), 90 deletions(-)

diff --git a/Project.toml b/Project.toml
index 4f4d295..89f5a47 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "SourceCodeMcCormick"
 uuid = "a7283dc5-4ecf-47fb-a95b-1412723fc960"
 authors = ["Robert Gottlieb <Robert.x.gottlieb@uconn.edu>"]
-version = "0.5.1"
+version = "0.5.2"
 
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
@@ -9,6 +9,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
+MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
@@ -20,6 +21,7 @@ CUDA = "5"
 DocStringExtensions = "0.8 - 0.9"
 Graphs = "1"
 IfElse = "0.1.0 - 0.1.1"
+MultiFloats = "3.2.3"
 PrecompileTools = "~1"
 Reexport = "~1"
 StaticArrays = "~1"
diff --git a/src/SourceCodeMcCormick.jl b/src/SourceCodeMcCormick.jl
index a4a331a..6db41c6 100644
--- a/src/SourceCodeMcCormick.jl
+++ b/src/SourceCodeMcCormick.jl
@@ -9,6 +9,7 @@ using DocStringExtensions
 using Graphs
 using CUDA
 using StaticArrays: @MVector
+using MultiFloats
 import Dates
 import SymbolicUtils: BasicSymbolic, exprtype, SYM, TERM, ADD, MUL, POW, DIV
 
diff --git a/src/kernel_writer/kernel_write.jl b/src/kernel_writer/kernel_write.jl
index 7d43b4d..ac84a11 100644
--- a/src/kernel_writer/kernel_write.jl
+++ b/src/kernel_writer/kernel_write.jl
@@ -109,13 +109,10 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
         error("Splitting must be one of: {:low, :default, :high, :max}")
     end
 
-    # Pull out sparsity information in the factorization
-    sparsity = detect_sparsity(factored, gradlist)
-
     # Decide if the kernel needs to be split
     if (n_vars[end] < 31) && ((n_lines[end] <= max_size) || (findfirst(x -> x > split_point, n_lines)==length(n_lines)))
         # Complexity is fairly low; only a single kernel needed
-        create_kernel!(expr_hash, 1, num, get_name.(gradlist), func_outputs, constants, factored, sparsity)
+        create_kernel!(expr_hash, 1, num, get_name.(gradlist), func_outputs, constants, factored)
         push!(kernel_nums, 1)
         push!(inputs, string.(indep_vars))
         push!(outputs, "OUT")
@@ -169,7 +166,7 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
             #### Start of alternative to experimental section
 
             # Send the element at `new_ID` to create_kernel!()
-            create_kernel!(expr_hash, kernel_count, extract(factored, new_ID), get_name.(gradlist), func_outputs, constants, factored, sparsity)
+            create_kernel!(expr_hash, kernel_count, extract(factored, new_ID), get_name.(gradlist), func_outputs, constants, factored)
             push!(kernel_nums, kernel_count)
             push!(inputs, string.(get_name.(pull_vars(extract(factored, new_ID)))))
             push!(outputs, string(factored[new_ID].lhs))
@@ -188,7 +185,7 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
             # If the total number of lines (not including the final line) is below the max size 
             # and the number of variables is below 32, we can make the final kernel and be done
             if (n_vars[end] < 32) && (all(n_lines[1:end-1] .<= max_size))
-                create_kernel!(expr_hash, kernel_count, extract(factored), get_name.(gradlist), func_outputs, constants, factored, sparsity)
+                create_kernel!(expr_hash, kernel_count, extract(factored), get_name.(gradlist), func_outputs, constants, factored)
                 push!(kernel_nums, kernel_count)
                 push!(inputs, string.(get_name.(pull_vars(extract(factored)))))
                 push!(outputs, "OUT")
@@ -435,8 +432,8 @@ end
 # This function takes information about the file name, kernel ID, and
 # the expression that a SINGLE kernel is being created for, and creates
 # that kernel in the specified file.
-create_kernel!(expr_hash::String, kernel_ID::Int, num::Num, gradlist::Vector{Symbol}, func_outputs::Vector{Symbol}, constants::Vector{Num}, orig_factored::Vector{Equation}, orig_sparsity::Vector{Vector{Int}}) = create_kernel!(expr_hash, kernel_ID, num.val, gradlist, func_outputs, constants, orig_factored, orig_sparsity)
-function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Real}, gradlist::Vector{Symbol}, func_outputs::Vector{Symbol}, constants::Vector{Num}, orig_factored::Vector{Equation}, orig_sparsity::Vector{Vector{Int}})
+create_kernel!(expr_hash::String, kernel_ID::Int, num::Num, gradlist::Vector{Symbol}, func_outputs::Vector{Symbol}, constants::Vector{Num}, orig_factored::Vector{Equation}) = create_kernel!(expr_hash, kernel_ID, num.val, gradlist, func_outputs, constants, orig_factored)
+function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Real}, gradlist::Vector{Symbol}, func_outputs::Vector{Symbol}, constants::Vector{Num}, orig_factored::Vector{Equation})
     # This function will create a kernel for `num`, with the name:
     # "f_" * expr_hash * "_$n". This name will be pushed to `kernels`,
     # and a vector of the required inputs variables will be pushed to
@@ -466,41 +463,7 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
     # normally, unless it's a temporary variable, in which case we have to refer to the original
     # factorization and sparsity.
     string_gradlist = string.(gradlist)
-    sparsity = Vector{Vector{Int}}(undef, length(varorder))
-    for i in eachindex(varorder)
-        if varorder[i] in string_gradlist
-            # Mark sparsity if the variable is already in gradlist
-            sparsity[i] = [findfirst(==(varorder[i]), string_gradlist)]
-        else
-            # Find out what index we're on
-            idx = findfirst(x -> isequal(string(x.lhs), varorder[i]), factorized)
-
-            if isnothing(idx)
-                sparsity[i] = orig_sparsity[findfirst(x -> isequal(string(x.lhs), varorder[i]), orig_factored)]
-            else
-                # Extract all the variables for this index
-                vars = pull_vars(extract(factorized, idx))
-
-                # For each variable in the expanded expression, add in sparsity information
-                curr_sparsity = Int[]
-                for var in vars
-                    ID = findfirst(==(string(get_name(var))), string_gradlist)
-                    if isnothing(ID)
-                        # If we didn't find the variable, we need to scan the original factorization,
-                        # and then pull sparsity info from the original sparsity list
-                        ID = findfirst(x -> isequal(string(x.lhs), string(var)), orig_factored)
-                        push!(curr_sparsity, orig_sparsity[ID]...)
-                    else
-                        # If we do find the variable, we can add this variable directly into the sparsity
-                        push!(curr_sparsity, ID)
-                    end
-                end
-
-                # Add a sorted, unique list to the sparsity tracker
-                sparsity[i] = sort(unique(curr_sparsity))
-            end
-        end
-    end
+    sparsity = get_sparsity(varorder, string_gradlist, factorized)
 
     # Check if we need temporary variables at all. We don't need
     # temporary variables if we only have addition, or if we have
@@ -536,7 +499,7 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
     maxtemp = 0
     # if need_temps #(Skip for now)
         for i in eachindex(varorder) # Loop through every participating variable
-            if (varorder[i] in string.(get_name.(vars)))
+            if (varorder[i] in string.(vars))
                 # Skip the variable if it's an input
                 continue
             end
@@ -608,7 +571,7 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
     file = open(joinpath(@__DIR__, "storage", "f_"*expr_hash*".jl"), "a")
 
     # Put in the preamble.
-    write(file, preamble_string(expr_hash, ["OUT"; string.(get_name.(vars))], kernel_ID, maxtemp, length(gradlist)))
+    write(file, preamble_string(expr_hash, ["OUT"; string.(vars)], kernel_ID, maxtemp, length(gradlist)))
 
     # Loop through the topological list to add calculations in order
     temp_endlist = []
@@ -616,7 +579,7 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
     name_tracker = copy(varids)
     for i in eachindex(varorder) # Order in which variables are calculated
         # Skip calculation if the variable is one of the inputs
-        if (varorder[i] in string.(get_name.(vars)))
+        if (varorder[i] in string.(vars))
             continue
         end
 
@@ -628,20 +591,17 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
         factorized_ID = findfirst(x -> isequal(string(x.lhs), varorder[i]), factorized)
         participants = get_name.(pull_vars(factorized[factorized_ID].rhs))
         inputs = []
+        input_IDs = Int[]
         for p in string.(participants)
             # Find the corresponding element in varids
             varids_ID = findfirst(x -> isequal(x, p), varids)
-            # Push the name_tracker name to the input list
+            # Push the name_tracker name to the input list and the ID to the list of input IDs
             push!(inputs, name_tracker[varids_ID])
-        end
 
-        # [Deprecating; I'll use temporary variables the whole way and then set
-        # the output variable at the end for final copying]
-        # # If this is the final variable, it'll be called "OUT". No need
-        # # for temp variables
-        # if i==length(varorder)
-        #     name_tracker[ID] = "OUT"
-        # else
+            # Find the corresponding element in varorder
+            varorder_ID = findfirst(x -> isequal(x, p), varorder)
+            push!(input_IDs, varorder_ID)
+        end
 
         # Determine which tempID to use/override. temp_endlist keeps
         # track of where variables will be used in the future (stored
@@ -695,6 +655,7 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
         # Now we can append this temporary variable to the list of inputs
         # for the correct operation
         inputs = [name_tracker[ID]; inputs]
+        input_IDs = [i; input_IDs]
 
         # Now we can pass the equation's RHS and the inputs to call the correct
         # writer function
@@ -702,14 +663,12 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
             # Special case. We're adding inputs[3] to inputs[2], so we only want
             # to pass the sparsity information of inputs[3] (rather than the
             # sparsity information of inputs[2] and inputs[3])
-            corrected_i = findfirst(x->x==inputs[3], varorder)
-            write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[corrected_i])
+            write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[input_IDs[3]])
         elseif length(inputs)>2 && inputs[1]==inputs[3]
             # Special case. We're adding inputs[2] to inputs[3], so we only want
             # to pass the sparsity information of inputs[2] (rather than the
             # sparsity information of inputs[2] and inputs[3])
-            corrected_i = findfirst(x->x==inputs[2], varorder)
-            write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[corrected_i])
+            write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[input_IDs[2]])
         else
             write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[i])
         end
@@ -765,10 +724,6 @@ end
 # 1) (a^x1)^b = (a^b)^x1 [EAGO paper] (Can't do powers besides integers)
 function perform_substitutions(old_factored::Vector{Equation})
     factored = deepcopy(old_factored)
-
-    # Register any terms we want to substitute
-    @eval @register_symbolic SCMC_sigmoid(x)
-
     scan_flag = true
     while scan_flag
         scan_flag = false
@@ -1468,23 +1423,28 @@ function pull_mult(factors, index; args=[])
 end
 
 
-# A helper function to calculate the sparsity of a factorization, given
-# the full gradlist
-function detect_sparsity(factored, gradlist)
-    # Idea is to check every element of "factored" and pull out a list of indices
-    # in gradlist that the variables depend on.
-    sparsity = Vector{Int}[]
-    string_gradlist = string.(gradlist)
-
-    for i in eachindex(factored)
-        vars = string.(pull_vars(extract(factored, i)))
-        indices = zeros(Int, length(vars))
-        for j in eachindex(indices)
-            indices[j] = findfirst(==(string(vars[j])), string_gradlist)
+# A helper function to calculate the sparsity of a factorization
+function get_sparsity(varorder, string_gradlist, factored)
+    sparsity = [Int[] for _ in 1:length(varorder)]
+    function calc_sp(v)
+        if !isempty(sparsity[v])
+            return sparsity[v]
+        end
+        if varorder[v] in string_gradlist
+            sparsity[v] = [findfirst(==(varorder[v]), string_gradlist)]
+            return sparsity[v]
+        end
+        idx = findfirst(x -> isequal(string(x.lhs), varorder[v]), factored)
+        RHS_vars = pull_vars(factored[idx].rhs)
+        for var in RHS_vars
+            var_idx = findfirst(==(string(get_name(var))), varorder)
+            sparsity[v] = [sparsity[v]..., calc_sp(var_idx)...]
         end
-        push!(sparsity, indices)
+        return sort(unique(sparsity[v]))
+    end
+    for v in 1:length(varorder)
+        sparsity[v] = calc_sp(v)
     end
-
     return sparsity
 end
 
diff --git a/src/kernel_writer/math_kernels.jl b/src/kernel_writer/math_kernels.jl
index 76932c4..ee56479 100644
--- a/src/kernel_writer/math_kernels.jl
+++ b/src/kernel_writer/math_kernels.jl
@@ -1365,6 +1365,7 @@ end
 
 # Sigmoid function
 # max threads: 640
+@register_symbolic SCMC_sigmoid(x) # Register as symbolic so that we can use it later
 function SCMC_sigmoid_kernel(OUT::CuDeviceMatrix, x::CuDeviceMatrix)
     idx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x
     stride = blockDim().x * gridDim().x
@@ -4670,21 +4671,21 @@ function SCMC_cos_kernel(OUT::CuDeviceMatrix, x::CuDeviceMatrix)
         kL = Base.ceil(-0.5 - x[idx,3]/(2.0*pi))
         xL1 = x[idx,3] + 2.0*pi*kL
         xU1 = x[idx,4] + 2.0*pi*kL
-        if (xL1 < -pi) || (xL1 > pi)
+        if (xL1 < -pi) || (xL1 > Float64(pi))
             eps_min = NaN
             eps_max = NaN
         elseif xL1 <= 0.0
             if xU1 <= 0.0
                 eps_min = x[idx,3]
                 eps_max = x[idx,4]
-            elseif xU1 >= pi
+            elseif xU1 >= Float64(pi)
                 eps_min = pi - 2.0*pi*kL
                 eps_max = -2.0*pi*kL
             else
                 eps_min = (cos(xL1) <= cos(xU1)) ? x[idx,3] : x[idx,4]
                 eps_max = -2.0*pi*kL
             end
-        elseif xU1 <= pi
+        elseif xU1 <= Float64(pi)
             eps_min = x[idx,4]
             eps_max = x[idx,3]
         elseif xU1 >= 2.0*pi
@@ -5449,9 +5450,16 @@ function cos_newton_or_golden_section(x0::Float64, xL::Float64, xU::Float64, env
     return xk
 end
 
-# Directly from IntervalArithmetic.jl
+# Similar to IntervalArithmetic.jl, but not using `rem2pi`
 function quadrant(x::Float64)
-    x_mod2pi = rem2pi(x, RoundNearest)
+    bigx = MultiFloats.Float64x2(x)
+    bigpi = MultiFloats._MF{Float64,2}((3.141592653589793, 1.2246467991473532e-16))
+    rem = Float64(floor(bigx/bigpi))
+    if iseven(rem)
+        x_mod2pi = Float64(bigx - rem*bigpi)
+    else
+        x_mod2pi = Float64(bigx - (rem+1)*bigpi)
+    end
     
     x_mod2pi < -(pi/2.0) && return (Int32(2), x_mod2pi)
     x_mod2pi < 0 && return (Int32(3), x_mod2pi)
diff --git a/src/kernel_writer/string_math_kernels.jl b/src/kernel_writer/string_math_kernels.jl
index 56750db..fd742ac 100644
--- a/src/kernel_writer/string_math_kernels.jl
+++ b/src/kernel_writer/string_math_kernels.jl
@@ -10291,21 +10291,21 @@ function SCMC_cos_kernel(OUT::String, v1::String, varlist::Vector{String}, spars
         write(buffer, "        kL = Base.ceil(-0.5 - $v1_lo/(2.0*pi))\n")
         write(buffer, "        xL1 = $v1_lo + 2.0*pi*kL\n")
         write(buffer, "        xU1 = $v1_hi + 2.0*pi*kL\n")
-        write(buffer, "        if (xL1 < -pi) || (xL1 > pi)\n")
+        write(buffer, "        if (xL1 < -pi) || (xL1 > Float64(pi))\n")
         write(buffer, "            eps_min = NaN\n")
         write(buffer, "            eps_max = NaN\n")
         write(buffer, "        elseif xL1 <= 0.0\n")
         write(buffer, "            if xU1 <= 0.0\n")
         write(buffer, "                eps_min = $v1_lo\n")
         write(buffer, "                eps_max = $v1_hi\n")
-        write(buffer, "            elseif xU1 >= pi\n")
+        write(buffer, "            elseif xU1 >= Float64(pi)\n")
         write(buffer, "                eps_min = pi - 2.0*pi*kL\n")
         write(buffer, "                eps_max = -2.0*pi*kL\n")
         write(buffer, "            else\n")
         write(buffer, "                eps_min = (cos(xL1) <= cos(xU1)) ? $v1_lo : $v1_hi\n")
         write(buffer, "                eps_max = -2.0*pi*kL\n")
         write(buffer, "            end\n")
-        write(buffer, "        elseif xU1 <= pi\n")
+        write(buffer, "        elseif xU1 <= Float64(pi)\n")
         write(buffer, "            eps_min = $v1_hi\n")
         write(buffer, "            eps_max = $v1_lo\n")
         write(buffer, "        elseif xU1 >= 2.0*pi\n")
@@ -10601,21 +10601,21 @@ function SCMC_cos_kernel(OUT::String, v1::String, varlist::Vector{String}, spars
         write(buffer, "        kL = Base.ceil(-0.5 - $v1_lo/(2.0*pi))\n")
         write(buffer, "        xL1 = $v1_lo + 2.0*pi*kL\n")
         write(buffer, "        xU1 = $v1_hi + 2.0*pi*kL\n")
-        write(buffer, "        if (xL1 < -pi) || (xL1 > pi)\n")
+        write(buffer, "        if (xL1 < -pi) || (xL1 > Float64(pi))\n")
         write(buffer, "            eps_min = NaN\n")
         write(buffer, "            eps_max = NaN\n")
         write(buffer, "        elseif xL1 <= 0.0\n")
         write(buffer, "            if xU1 <= 0.0\n")
         write(buffer, "                eps_min = $v1_lo\n")
         write(buffer, "                eps_max = $v1_hi\n")
-        write(buffer, "            elseif xU1 >= pi\n")
+        write(buffer, "            elseif xU1 >= Float64(pi)\n")
         write(buffer, "                eps_min = pi - 2.0*pi*kL\n")
         write(buffer, "                eps_max = -2.0*pi*kL\n")
         write(buffer, "            else\n")
         write(buffer, "                eps_min = (cos(xL1) <= cos(xU1)) ? $v1_lo : $v1_hi\n")
         write(buffer, "                eps_max = -2.0*pi*kL\n")
         write(buffer, "            end\n")
-        write(buffer, "        elseif xU1 <= pi\n")
+        write(buffer, "        elseif xU1 <= Float64(pi)\n")
         write(buffer, "            eps_min = $v1_hi\n")
         write(buffer, "            eps_max = $v1_lo\n")
         write(buffer, "        elseif xU1 >= 2.0*pi\n")

From 6dfec28ce211b2139ec17f692c6663716caacfec Mon Sep 17 00:00:00 2001
From: RXGottlieb <36906916+RXGottlieb@users.noreply.github.com>
Date: Wed, 18 Mar 2026 17:57:15 -0400
Subject: [PATCH 2/4] Less restrictive version for MultiFloats

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 89f5a47..0ede0cc 100644
--- a/Project.toml
+++ b/Project.toml
@@ -21,7 +21,7 @@ CUDA = "5"
 DocStringExtensions = "0.8 - 0.9"
 Graphs = "1"
 IfElse = "0.1.0 - 0.1.1"
-MultiFloats = "3.2.3"
+MultiFloats = "3.1"
 PrecompileTools = "~1"
 Reexport = "~1"
 StaticArrays = "~1"

From dde5966525bb843e469cd7f5fdf28ddf1c343c74 Mon Sep 17 00:00:00 2001
From: RXGottlieb <36906916+RXGottlieb@users.noreply.github.com>
Date: Thu, 19 Mar 2026 19:38:03 -0400
Subject: [PATCH 3/4] Efficiency improvements

- Make efficiency improvements to `kgen` and `create_kernel!`
- Add minor utilities to suppose `kgen` and `create_kernel!` improvements
- Remove some unused sections of code
- Remove some extraneous notes
---
 src/kernel_writer/kernel_write.jl | 305 +++++++++++-------------------
 src/transform/utilities.jl        |  65 ++++++-
 src/transform/write.jl            |  26 ++-
 3 files changed, 186 insertions(+), 210 deletions(-)

diff --git a/src/kernel_writer/kernel_write.jl b/src/kernel_writer/kernel_write.jl
index ac84a11..ced9c1f 100644
--- a/src/kernel_writer/kernel_write.jl
+++ b/src/kernel_writer/kernel_write.jl
@@ -29,27 +29,6 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
     write(file, "# Kernel(s) generated for the expression: $(string(num))\n\n")
     close(file)
 
-    # Parse the list of requested outputs (Not currently useful, since all outputs are generated by default)
-    func_outputs = Symbol[]
-    for output in raw_outputs
-        output == :cv     && push!(func_outputs, :cv)
-        output == :cc     && push!(func_outputs, :cc)
-        output == :lo     && push!(func_outputs, :lo)
-        output == :hi     && push!(func_outputs, :hi)
-        output == :MC     && push!(func_outputs, :cv, :cc, :lo, :hi)
-        output == :cvgrad && push!(func_outputs, :cvgrad)
-        output == :ccgrad && push!(func_outputs, :ccgrad)
-        output == :grad   && push!(func_outputs, :cvgrad, :ccgrad)
-        output == :all    && push!(func_outputs, :cv, :cc, :lo, :hi, :cvgrad, :ccgrad)
-        if ~(output in [:cv, :cc, :lo, :hi, :MC, :cvgrad, :ccgrad, :grad, :all])
-            error("Output list contains an invalid output symbol: :$output. Acceptable symbols
-                   include [:cv, :cc, :lo, :hi, :MC, :cvgrad, :ccgrad, :grad, :all]")
-        end
-    end
-    if isempty(func_outputs)
-        error("No outputs specified.")
-    end
-
     # Check the number of independent variables in the expression. Note that
     # even if gradlist has 32+ elements, if the expression itself has fewer
     # than 32 independent variables, it can be handled in a single kernel.
@@ -65,7 +44,7 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
     # and Software, 33:3, 563-593 (2018). DOI: 10.1080/10556788.2017.1335312
     # This method is also used in EAGO's `affine_relax_quadratic!` function.
     if affine_quadratic==true && is_quadratic(num) # NOTE: When switching to MOI variables, this will be easy to detect
-        func_name = kgen_affine_quadratic(expr_hash, num, gradlist, func_outputs, constants)
+        func_name = kgen_affine_quadratic(expr_hash, num, gradlist, constants)
         return func_name
     end
 
@@ -109,10 +88,14 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
         error("Splitting must be one of: {:low, :default, :high, :max}")
     end
 
+    # Pull out sparsity information in the factorization
+    all_vars = [Symbol.(get_name.(gradlist))..., get_name.(getfield.(factored, :lhs))...]
+    sparsity = get_sparsity(all_vars, Symbol.(get_name.(gradlist)), factored)
+
     # Decide if the kernel needs to be split
     if (n_vars[end] < 31) && ((n_lines[end] <= max_size) || (findfirst(x -> x > split_point, n_lines)==length(n_lines)))
         # Complexity is fairly low; only a single kernel needed
-        create_kernel!(expr_hash, 1, num, get_name.(gradlist), func_outputs, constants, factored)
+        create_kernel!(expr_hash, 1, factored, length(factored), get_name.(gradlist), constants, all_vars, sparsity)
         push!(kernel_nums, 1)
         push!(inputs, string.(indep_vars))
         push!(outputs, "OUT")
@@ -120,7 +103,6 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
         # Complexity is not low enough; need multiple kernels
         complete = false
         kernel_count = 1
-        # structure_list = String[] # Experimental
         while !complete
             # Determine which line to break at
             line_ID = findfirst(x -> x > split_point, n_lines)
@@ -133,47 +115,13 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
                 new_ID =  min(line_ID, vars_ID)
             end
 
-            # =============================================================================
-            # =============================================================================
-            # EXPERIMENTAL: 
-
-            # We want to make the element at `new_ID` into a kernel, unless the structure
-            # is exactly the same as a kernel we made previously.
-            # new_term = extract(factored, new_ID)
-            # new_structure, order = structure(new_term)
-            # @show new_structure
-            # # @show structure_list
-            # if new_structure in structure_list
-            #     # We've already made this structure. Identify which kernel that was and use that
-            #     # one instead of making a new kernel
-            #     kernel_ID = findfirst(x -> x==new_structure, structure_list)
-            #     push!(kernel_nums, kernel_ID)
-            #     push!(inputs, string.(get_name.(order)))
-            #     push!(outputs, string(factored[new_ID].lhs))          
-            #     println("Was in the list")  
-            # else
-            #     # Send the element at `new_ID` to create_kernel!()
-            #     create_kernel!(expr_hash, kernel_count, new_term, get_name.(gradlist), func_outputs, constants)
-            #     push!(structure_list, new_structure)
-            #     push!(kernel_nums, kernel_count)
-            #     push!(inputs, string.(get_name.(pull_vars(extract(factored, new_ID)))))
-            #     push!(outputs, string(factored[new_ID].lhs))
-            #     kernel_count += 1
-            # end
-
-            # =============================================================================
-            # =============================================================================
-            #### Start of alternative to experimental section
-
             # Send the element at `new_ID` to create_kernel!()
-            create_kernel!(expr_hash, kernel_count, extract(factored, new_ID), get_name.(gradlist), func_outputs, constants, factored)
+            create_kernel!(expr_hash, kernel_count, factored, new_ID, get_name.(gradlist), constants, all_vars, sparsity)
             push!(kernel_nums, kernel_count)
             push!(inputs, string.(get_name.(pull_vars(extract(factored, new_ID)))))
             push!(outputs, string(factored[new_ID].lhs))
             kernel_count += 1
 
-            #### End of alternative to experimental section
-
             # Eliminate this part of the factored list, since we've already calculated
             # it from this kernel
             factored[new_ID] = factored[new_ID].lhs ~ factored[new_ID].lhs
@@ -185,7 +133,7 @@ function kgen(num::Num, gradlist::Vector{Num}, raw_outputs::Vector{Symbol}, cons
             # If the total number of lines (not including the final line) is below the max size 
             # and the number of variables is below 32, we can make the final kernel and be done
             if (n_vars[end] < 32) && (all(n_lines[1:end-1] .<= max_size))
-                create_kernel!(expr_hash, kernel_count, extract(factored), get_name.(gradlist), func_outputs, constants, factored)
+                create_kernel!(expr_hash, kernel_count, factored, length(factored), get_name.(gradlist), constants, all_vars, sparsity)
                 push!(kernel_nums, kernel_count)
                 push!(inputs, string.(get_name.(pull_vars(extract(factored)))))
                 push!(outputs, "OUT")
@@ -309,7 +257,7 @@ end
 # A special version of kgen that only applies to quadratic functions. Instead of
 # doing McCormick relaxations, this returns either affine bounds or secant line
 # bounds, depending on where on the quadratic function the point of interest is.
-function kgen_affine_quadratic(expr_hash::String, num::Num, gradlist::Vector{Num}, func_outputs::Vector{Symbol}, constants::Vector{Num})
+function kgen_affine_quadratic(expr_hash::String, num::Num, gradlist::Vector{Num}, constants::Vector{Num})
     # Since it's quadratic, we can construct the kernel according to
     # `affine_relax_quadratic!` in EAGO.
 
@@ -432,138 +380,102 @@ end
 # This function takes information about the file name, kernel ID, and
 # the expression that a SINGLE kernel is being created for, and creates
 # that kernel in the specified file.
-create_kernel!(expr_hash::String, kernel_ID::Int, num::Num, gradlist::Vector{Symbol}, func_outputs::Vector{Symbol}, constants::Vector{Num}, orig_factored::Vector{Equation}) = create_kernel!(expr_hash, kernel_ID, num.val, gradlist, func_outputs, constants, orig_factored)
-function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Real}, gradlist::Vector{Symbol}, func_outputs::Vector{Symbol}, constants::Vector{Num}, orig_factored::Vector{Equation})
-    # This function will create a kernel for `num`, with the name:
-    # "f_" * expr_hash * "_$n". This name will be pushed to `kernels`,
-    # and a vector of the required inputs variables will be pushed to
-    # `inputs`. Other inputs are needed to determine portions of the
-    # kernel itself.
-
-    # Start by factorizing the input expression
-    factorized = factor(num, split_div=true)
-
-    # Perform substitutions if possible
-    factorized = perform_substitutions(factorized)
-
-    # Collect all the LHS terms and participating variables
-    LHS = string.(getfield.(factorized, :lhs))
-    vars = get_name.(pull_vars(num))
+create_kernel!(expr_hash::String, kernel_ID::Int, factored::Vector{Equation}, stop_point::Int, gradlist::Vector{Symbol}, constants::Vector{Num}, all_vars::Vector{Symbol}, sparsity::Vector{Vector{Int}}) = create_kernel!(expr_hash, kernel_ID, factored, stop_point, gradlist, constants, all_vars, sparsity)
+function create_kernel!(expr_hash::String, kernel_ID::Int, factored::Vector{Equation}, stop_point::Int, gradlist::Vector{Symbol}, constants::Vector{Num}, all_vars::Vector{Symbol}, sparsity::Vector{Vector{Int}})
+    # This function will create a kernel for the portion of `factored`
+    # ending at `stop_point`, with the name:
+    # "f_" * expr_hash * "_$n". 
+    # This name will be pushed to `kernels`, and a vector of the required 
+    # inputs variables will be pushed to `inputs`. Other inputs are needed 
+    # to determine portions of the kernel itself.
+
+    # Start by getting a smaller version of `factored` that only contains
+    # factors relevant to the `stop_point`
+    short_factored = shorten(factored, stop_point)
+
+    # Collect vars that are needed as inputs (this may be base-level variables,
+    # auxiliary variables, or a combination)
+    input_vars = pull_vars(extract(factored, stop_point), get_names=true)
 
     # Put the factorized expression into directed acyclic graph form
-    edgelist, varids = eqn_edges(factorized) #varids includes all aux variables also
+    edgelist = eqn_edges(short_factored, all_vars)
     g = SimpleDiGraph(edgelist)
 
     # Perform a topological sort to get the order in which we should
     # perform calculations (i.e., the final entry in "varorder" is the
     # full original expression)
-    varorder = varids[topological_sort(g)]
-
-    # Now, we need sparsity information for all the variables. We can pull sparsity information
-    # normally, unless it's a temporary variable, in which case we have to refer to the original
-    # factorization and sparsity.
-    string_gradlist = string.(gradlist)
-    sparsity = get_sparsity(varorder, string_gradlist, factorized)
-
-    # Check if we need temporary variables at all. We don't need
-    # temporary variables if we only have addition, or if we have
-    # the addition of single-rule terms, since we can just keep adding
-    # new information to the existing output space. E.g.:
-    #   x +   y +     z : No temporary variables needed
-    #  3x + x*y +   z^2 : No temporary variables needed
-    # x*y + y*z + x*y*z : Temporary variable needed because x*y*z is two rules
-    # need_temps = depth_check(factorized)
-    # NOTE: Alternatively, saving data to global GPU memory will definitely
-    #       be slower than saving it to a temporary variable and copying it
-    #       to global memory only at the end. Though, that would mean more
-    #       local storage for each thread, which would limit the number of
-    #       threads per SM that could be used (and affect occupancy). So,
-    #       in some cases it may be better to store data in temporary variables,
-    #       and in other cases it might be better to store directly to the
-    #       final output location. This may require some testing, and then
-    #       perhaps a flag that overrides "need_temps" and the subtraction
-    #       of 1 from the temp count later on.
-    #  (Disabling entirely for now)
+    varorder = all_vars[topological_sort(g)]
 
     # Glossary:
-    # varids: ALL variables including base variables and aux variables
-    # varorder: A topologically sorted list of varids
-    # vars: Variables that participate in the original expression 
-    #       (i.e., NOT including ones produced through factorization)
-
-    # g.fadjlist[i]: Contains indices of varids that depend on varids[i]
-    # g.badjlist[i]: Contains indices of varids that are needed to compute varids[i]
+    # all_vars: ALL variables including base variables and aux variables
+    # input_vars: Variables needed as inputs to this expression
+    # varorder: A topologically sorted list of variables (i.e., in calculation order)
+    # gradlist: Non-auxiliary variables
+    # g.fadjlist[i]: Contains indices of all_vars that depend on all_vars[i]
+    # g.badjlist[i]: Contains indices of all_vars that are needed to compute all_vars[i]
 
     # Calculate the number of temporary variables needed. 
     temp_endlist = []
     maxtemp = 0
-    # if need_temps #(Skip for now)
-        for i in eachindex(varorder) # Loop through every participating variable
-            if (varorder[i] in string.(vars))
-                # Skip the variable if it's an input
-                continue
-            end
-            # Find which index varorder[i] is in `varids`
-            ID = findfirst(x -> occursin(varorder[i], x), varids)
-            tempID = 0
-
-            # If we are not already keeping track of temporary variables,
-            # this becomes the first one
-            if isempty(temp_endlist)
-                # Keep track of what varids depend on this temporary variable
-                push!(temp_endlist, copy(g.fadjlist[ID]))
-                tempID = 1
-            else
-                # Check if this variable's expression uses addition. If so,
-                # check if either of the RHS variables appeared earlier
-                # in varorder.
-                factorized_ID = findfirst(x -> isequal(string(x.lhs), varorder[i]), factorized)
-                if exprtype(factorized[factorized_ID].rhs) == ADD
-                    # Check through the temp_endlist to see if any temporary variables
-                    # ONLY point to this ID (i.e., they aren't used elsewhere). If so,
-                    # we can re-use that temporary variable and overwrite the results
-                    # with addition.
-                    for j in eachindex(temp_endlist)
-                        if (length(temp_endlist[j])==1) && (temp_endlist[j][1]==ID)
-                            temp_endlist[j] = copy(g.fadjlist[ID])
-                            tempID = j
-                            break
-                        end
-                    end
-                end
+    for i in eachindex(varorder) # Loop through every participating variable
+        if (varorder[i] in input_vars)
+            # Skip the variable if it's an input
+            continue
+        end
+        # Find which index varorder[i] is in `all_vars`
+        ID = findfirst(==(varorder[i]), all_vars)
+        tempID = 0
 
-                # Check if there are any temporary variables we can override
+        # If we are not already keeping track of temporary variables,
+        # this becomes the first one
+        if isempty(temp_endlist)
+            # Keep track of what varids depend on this temporary variable
+            push!(temp_endlist, copy(g.fadjlist[ID]))
+            tempID = 1
+        else
+            # Check if this variable's expression uses addition. If so,
+            # check if either of the RHS variables appeared earlier
+            # in varorder.
+            factored_ID = findfirst(==(varorder[i]), get_name.(getfield.(factored, :lhs)))
+            if exprtype(factored[factored_ID].rhs) == ADD
+                # Check through the temp_endlist to see if any temporary variables
+                # ONLY point to this ID (i.e., they aren't used elsewhere). If so,
+                # we can re-use that temporary variable and overwrite the results
+                # with addition.
                 for j in eachindex(temp_endlist)
-                    if isempty(temp_endlist[j]) # Then we can override this one
+                    if (length(temp_endlist[j])==1) && (temp_endlist[j][1]==ID)
                         temp_endlist[j] = copy(g.fadjlist[ID])
                         tempID = j
                         break
                     end
                 end
-                if tempID==0 # Then we haven't found one we can override
-                    push!(temp_endlist, copy(g.fadjlist[ID]))
-                    tempID = length(temp_endlist)
-                end
             end
 
-            # Now that we're done with this variable, look over other
-            # temporary variables to see if they're no longer needed
+            # Check if there are any temporary variables we can override
             for j in eachindex(temp_endlist)
-                if ID in temp_endlist[j]
-                    filter!(x -> x!=ID, temp_endlist[j])
+                if isempty(temp_endlist[j]) # Then we can override this one
+                    temp_endlist[j] = copy(g.fadjlist[ID])
+                    tempID = j
+                    break
                 end
             end
-            if tempID > maxtemp
-                maxtemp = tempID
+            if tempID==0 # Then we haven't found one we can override
+                push!(temp_endlist, copy(g.fadjlist[ID]))
+                tempID = length(temp_endlist)
             end
         end
-    # end # Skipping outer loop for now
 
-    # [Deprecating, using temporary variables to decrease global memory accesses]
-    # # We have one more temporary variable than we need, since at
-    # # least one result could have been stored in the final output
-    # # of this kernel.
-    # maxtemp -= 1
+        # Now that we're done with this variable, look over other
+        # temporary variables to see if they're no longer needed
+        for j in eachindex(temp_endlist)
+            if ID in temp_endlist[j]
+                filter!(x -> x!=ID, temp_endlist[j])
+            end
+        end
+        if tempID > maxtemp
+            maxtemp = tempID
+        end
+    end
 
     # At this point, we should be ready to write the kernel. Open
     # the file in "append" mode so that other info that was written
@@ -571,36 +483,32 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
     file = open(joinpath(@__DIR__, "storage", "f_"*expr_hash*".jl"), "a")
 
     # Put in the preamble.
-    write(file, preamble_string(expr_hash, ["OUT"; string.(vars)], kernel_ID, maxtemp, length(gradlist)))
+    write(file, preamble_string(expr_hash, ["OUT"; string.(input_vars)], kernel_ID, maxtemp, length(gradlist)))
 
     # Loop through the topological list to add calculations in order
     temp_endlist = []
     outvar = ""
-    name_tracker = copy(varids)
+    name_tracker = copy(all_vars)
     for i in eachindex(varorder) # Order in which variables are calculated
         # Skip calculation if the variable is one of the inputs
-        if (varorder[i] in string.(vars))
+        if (varorder[i] in input_vars)
             continue
         end
 
-        # Determine the corresponding ID of the variable in varids
-        ID = findfirst(x -> occursin(varorder[i], x), varids)
+        # Determine the index of the variable in `all_vars`
+        ID = findfirst(==(varorder[i]), all_vars)
+
+        # Determine which element of the (full) factorization we're using
+        factored_ID = findfirst(==(varorder[i]), get_name.(getfield.(factored, :lhs)))
 
-        # Get the inputs for this operation by checking the name
-        # tracker (there might be a better way to do this... ah well)
-        factorized_ID = findfirst(x -> isequal(string(x.lhs), varorder[i]), factorized)
-        participants = get_name.(pull_vars(factorized[factorized_ID].rhs))
+        # Determine the indices of the inputs to this calculation
         inputs = []
         input_IDs = Int[]
-        for p in string.(participants)
-            # Find the corresponding element in varids
-            varids_ID = findfirst(x -> isequal(x, p), varids)
-            # Push the name_tracker name to the input list and the ID to the list of input IDs
-            push!(inputs, name_tracker[varids_ID])
-
-            # Find the corresponding element in varorder
-            varorder_ID = findfirst(x -> isequal(x, p), varorder)
-            push!(input_IDs, varorder_ID)
+        participants = pull_vars(factored[factored_ID], get_names=true)
+        for p in participants
+            p_ID = findfirst(==(p), all_vars)
+            push!(inputs, string(name_tracker[p_ID]))
+            push!(input_IDs, p_ID)
         end
 
         # Determine which tempID to use/override. temp_endlist keeps
@@ -619,8 +527,7 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
         else
             # Check if this variable's expression uses addition. If so,
             # check if we can reuse a temporary variable.
-            factorized_ID = findfirst(x -> isequal(string(x.lhs), varorder[i]), factorized)
-            if exprtype(factorized[factorized_ID].rhs) == ADD
+            if exprtype(factored[factored_ID].rhs) == ADD
                 # Check through the temp_endlist to see if any temporary variables
                 # ONLY point to this ID (i.e., they aren't used elsewhere). If so,
                 # we can re-use that temporary variable and overwrite the results
@@ -650,11 +557,11 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
 
         # When we refer to this variable in the future, we need to know what tempID
         # the variable is using
-        name_tracker[ID] = "temp$(tempID)"
+        name_tracker[ID] = Symbol("temp$(tempID)")
 
         # Now we can append this temporary variable to the list of inputs
         # for the correct operation
-        inputs = [name_tracker[ID]; inputs]
+        inputs = [string(name_tracker[ID]); inputs]
         input_IDs = [i; input_IDs]
 
         # Now we can pass the equation's RHS and the inputs to call the correct
@@ -663,14 +570,14 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
             # Special case. We're adding inputs[3] to inputs[2], so we only want
             # to pass the sparsity information of inputs[3] (rather than the
             # sparsity information of inputs[2] and inputs[3])
-            write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[input_IDs[3]])
+            write_operation(file, factored[factored_ID].rhs, inputs, string.(gradlist), sparsity[input_IDs[3]])
         elseif length(inputs)>2 && inputs[1]==inputs[3]
             # Special case. We're adding inputs[2] to inputs[3], so we only want
             # to pass the sparsity information of inputs[2] (rather than the
             # sparsity information of inputs[2] and inputs[3])
-            write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[input_IDs[2]])
+            write_operation(file, factored[factored_ID].rhs, inputs, string.(gradlist), sparsity[input_IDs[2]])
         else
-            write_operation(file, factorized[factorized_ID].rhs, inputs, string.(gradlist), sparsity[i])
+            write_operation(file, factored[factored_ID].rhs, inputs, string.(gradlist), sparsity[i])
         end
 
         # Now that we're done with this variable, eliminate this variable
@@ -683,7 +590,7 @@ function create_kernel!(expr_hash::String, kernel_ID::Int, num::BasicSymbolic{Re
         
         # Keep track of the name of the output variable for setting the output
         if i==length(varorder)
-            outvar = name_tracker[ID]
+            outvar = string(name_tracker[ID])
         end
     end
 
@@ -1424,25 +1331,25 @@ end
 
 
 # A helper function to calculate the sparsity of a factorization
-function get_sparsity(varorder, string_gradlist, factored)
-    sparsity = [Int[] for _ in 1:length(varorder)]
+function get_sparsity(all_vars, symbol_gradlist, factored)
+    sparsity = [Int[] for _ in 1:length(all_vars)]
     function calc_sp(v)
         if !isempty(sparsity[v])
             return sparsity[v]
         end
-        if varorder[v] in string_gradlist
-            sparsity[v] = [findfirst(==(varorder[v]), string_gradlist)]
+        if all_vars[v] in symbol_gradlist
+            sparsity[v] = [findfirst(==(all_vars[v]), symbol_gradlist)]
             return sparsity[v]
         end
-        idx = findfirst(x -> isequal(string(x.lhs), varorder[v]), factored)
-        RHS_vars = pull_vars(factored[idx].rhs)
+        idx = findfirst(x -> isequal(Symbol(x.lhs), all_vars[v]), factored)
+        RHS_vars = Symbol.(get_name.(pull_vars(factored[idx].rhs)))
         for var in RHS_vars
-            var_idx = findfirst(==(string(get_name(var))), varorder)
+            var_idx = findfirst(==(var), all_vars)
             sparsity[v] = [sparsity[v]..., calc_sp(var_idx)...]
         end
         return sort(unique(sparsity[v]))
     end
-    for v in 1:length(varorder)
+    for v in 1:length(all_vars)
         sparsity[v] = calc_sp(v)
     end
     return sparsity
diff --git a/src/transform/utilities.jl b/src/transform/utilities.jl
index ebbc545..a7c45bb 100644
--- a/src/transform/utilities.jl
+++ b/src/transform/utilities.jl
@@ -236,18 +236,22 @@ julia> pull_vars(func)
  z
 ```
 """
-pull_vars(term::BasicSymbolic) = pull_vars(Num(term))
-function pull_vars(term::Num)
+pull_vars(term::BasicSymbolic; get_names::Bool=false) = pull_vars(Num(term), get_names=get_names)
+function pull_vars(term::Num; get_names::Bool=false)
     vars = Num[]
     strings = String[]
     if ~(typeof(term.val) <: Real)
         vars, strings = _pull_vars(term.val, vars, strings)
         vars = vars[sort_vars(strings)]
     end
-    return vars
+    if get_names
+        return get_name.(vars)
+    else
+        return vars
+    end
 end
 
-function pull_vars(terms::Vector{Num})
+function pull_vars(terms::Vector{Num}; get_names::Bool=false)
     vars = Num[]
     strings = String[]
     for term in terms
@@ -258,20 +262,28 @@ function pull_vars(terms::Vector{Num})
     if ~isempty(vars)
         vars = vars[sort_vars(strings)]
     end
-    return vars
+    if get_names
+        return get_name.(vars)
+    else
+        return vars
+    end
 end
 
-function pull_vars(eqn::Equation)
+function pull_vars(eqn::Equation; get_names::Bool=false)
     vars = Num[]
     strings = String[]
     if ~(typeof(eqn.rhs) <: Real)
         vars, strings = _pull_vars(eqn.rhs, vars, strings)
         vars = vars[sort_vars(strings)]
     end
-    return vars
+    if get_names
+        return get_name.(vars)
+    else
+        return vars
+    end
 end
 
-function pull_vars(eqns::Vector{Equation})
+function pull_vars(eqns::Vector{Equation}; get_names::Bool=false)
     vars = Num[]
     strings = String[]
     for eqn in eqns
@@ -282,9 +294,13 @@ function pull_vars(eqns::Vector{Equation})
     if ~isempty(vars)
         vars = vars[sort_vars(strings)]
     end
-    return vars
+    if get_names
+        return get_name.(vars)
+    else
+        return vars
+    end
 end
-function pull_vars(eqn::T) where T<:Real
+function pull_vars(eqn::T; get_names::Bool=false) where T<:Real
     return Num[]
 end
 
@@ -536,6 +552,35 @@ function extract(eqs::Vector{Equation}, ID::Int=length(eqs))
     return final_expr
 end
 
+
+"""
+    shorten(::Vector{Equation}, ::Int)
+
+Given a set of symbolic equations, and a specific element index,
+return a Vector{Equation} that only contains elements needed to
+evaluate the chosen element.
+```
+"""
+function shorten(eqs::Vector{Equation}, ID::Int)
+    indices = Int[]
+    function delve!(idx, indices, LHS, RHS)
+        if idx in indices
+            return nothing
+        else
+            for var in RHS[idx]
+                var_idx = findfirst(==(var), LHS)
+                if ~isnothing(var_idx) && (var_idx != idx)
+                    delve!(var_idx, indices, LHS, RHS)
+                end
+            end
+            push!(indices, idx)
+            return nothing
+        end
+    end
+    delve!(ID, indices, Symbol.(getfield.(eqs, :lhs)), pull_vars.(getfield.(eqs, :rhs), get_names=true))
+    return eqs[indices]
+end
+
 """
     convex_evaluator(::Num)
     convex_evaluator(::Equation)
diff --git a/src/transform/write.jl b/src/transform/write.jl
index c712e34..157fd00 100644
--- a/src/transform/write.jl
+++ b/src/transform/write.jl
@@ -118,6 +118,30 @@ function eqn_edges(a::Vector{Equation})
     end
     return edgelist, vars
 end
+function eqn_edges(a::Vector{Equation}, vars::Vector{Symbol})
+    # Create the list of edges
+    edgelist = Edge{Int}[]
+
+    # Create a mapping dictionary
+    varid = Dict(vars .=> collect(1:length(vars)))
+
+    # Identify LHS variables
+    LHS_id = [varid[x] for x in Symbol.(getfield.(a, :lhs))]
+
+    # Identify RHS variables
+    RHS_id = [[varid[x] for x in pull_vars(RHS, get_names=true)] for RHS in getfield.(a, :rhs)]
+
+    # Create edges of RHS -> LHS
+    for i in eachindex(LHS_id)
+        for j in eachindex(RHS_id[i])
+            if RHS_id[i][j] == LHS_id[i]
+                continue
+            end
+            push!(edgelist, Edge(RHS_id[i][j], LHS_id[i]))
+        end
+    end
+    return edgelist
+end
 
 # A new topological sort that tries to minimize the number of temporary vectors
 # that need to be preallocated
@@ -128,7 +152,7 @@ function topological_sort(g::SimpleDiGraph; order::Vector{Int64}=Int64[])
         for j in g.badjlist[i][sortperm(-lengths)]
             recursive_add(g, j, order)
         end
-        if ~in(i, order)
+        if ~in(i, order) && ~isempty(g.badjlist[i])
             push!(order, i)
         end
     end

From 1bd0daa354991ce1214fc6a2380118961d2f72bd Mon Sep 17 00:00:00 2001
From: RXGottlieb <36906916+RXGottlieb@users.noreply.github.com>
Date: Thu, 19 Mar 2026 19:41:21 -0400
Subject: [PATCH 4/4] Remove old function call

---
 src/kernel_writer/kernel_write.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/kernel_writer/kernel_write.jl b/src/kernel_writer/kernel_write.jl
index ced9c1f..d0eb221 100644
--- a/src/kernel_writer/kernel_write.jl
+++ b/src/kernel_writer/kernel_write.jl
@@ -380,7 +380,6 @@ end
 # This function takes information about the file name, kernel ID, and
 # the expression that a SINGLE kernel is being created for, and creates
 # that kernel in the specified file.
-create_kernel!(expr_hash::String, kernel_ID::Int, factored::Vector{Equation}, stop_point::Int, gradlist::Vector{Symbol}, constants::Vector{Num}, all_vars::Vector{Symbol}, sparsity::Vector{Vector{Int}}) = create_kernel!(expr_hash, kernel_ID, factored, stop_point, gradlist, constants, all_vars, sparsity)
 function create_kernel!(expr_hash::String, kernel_ID::Int, factored::Vector{Equation}, stop_point::Int, gradlist::Vector{Symbol}, constants::Vector{Num}, all_vars::Vector{Symbol}, sparsity::Vector{Vector{Int}})
     # This function will create a kernel for the portion of `factored`
     # ending at `stop_point`, with the name: