diff --git a/.github/actions/run-inference/action.yml b/.github/actions/run-inference/action.yml
new file mode 100644
index 00000000..21cb9fb3
--- /dev/null
+++ b/.github/actions/run-inference/action.yml
@@ -0,0 +1,61 @@
+name: Run Inference
+description: Run one llama-tornado inference pass and write the metrics + sidecar files.
+
+inputs:
+  backend:
+    description: 'GPU backend (opencl or ptx)'
+    required: true
+  model_file:
+    description: 'Model filename inside $MODELS_DIR (e.g. Llama-3.2-1B-Instruct-F16.gguf)'
+    required: true
+  model:
+    description: 'Human-readable model name for the sidecar (e.g. Llama-3.2-1B-Instruct)'
+    required: true
+  quantization:
+    description: 'Quantization type (e.g. F16, Q8_0)'
+    required: true
+  configuration:
+    description: 'Configuration key for the sidecar (e.g. standard, prefill-decode)'
+    required: true
+  flags:
+    description: 'Extra CLI flags passed to llama-tornado (omit for standard run)'
+    required: false
+    default: ''
+  metrics_file:
+    description: 'Absolute path for the output metrics JSON file'
+    required: true
+  prompt:
+    description: 'Prompt to pass to the model'
+    required: false
+    default: 'Say hello'
+
+runs:
+  using: composite
+  steps:
+    - name: Run inference
+      shell: bash
+      working-directory: ${{ github.workspace }}
+      env:
+        JAVA_TOOL_OPTIONS: >-
+          -Dllama.metrics.format=json
+          -Dllama.metrics.output=file
+          -Dllama.metrics.file=${{ inputs.metrics_file }}
+      run: |
+        # Run inference and emit raw metrics JSON via JAVA_TOOL_OPTIONS
+        ./llama-tornado --gpu --${{ inputs.backend }} \
+          --model $MODELS_DIR/${{ inputs.model_file }} \
+          --prompt "${{ inputs.prompt }}" \
+          ${{ inputs.flags }}
+
+        # Write metadata sidecar so process_metrics.py can identify each metrics file
+        SIDECAR="${{ inputs.metrics_file }%.json}.meta.json"
+        python3 scripts/write_metrics_sidecar.py \
+          --out "$SIDECAR" \
+          backend="${{ inputs.backend }}" \
+          task=llama-inference \
+          model_file=${{ inputs.model_file }} \
+          model=${{ inputs.model }} \
+          quantization=${{ inputs.quantization }} \
+          configuration=${{ inputs.configuration }} \
+          "flags=${{ inputs.flags }}" \
+          prompt="${{ inputs.prompt }}"
diff --git a/.github/actions/setup-tornadovm/action.yml b/.github/actions/setup-tornadovm/action.yml
new file mode 100644
index 00000000..a5512658
--- /dev/null
+++ b/.github/actions/setup-tornadovm/action.yml
@@ -0,0 +1,93 @@
+name: Setup TornadoVM
+description: Clone (or restore from cache), build, and configure TornadoVM. Exports TORNADOVM_HOME and updates PATH for all subsequent steps.
+
+inputs:
+  backend:
+    description: 'TornadoVM backend to build (opencl or ptx)'
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Get TornadoVM HEAD SHA
+      id: tornado_sha
+      shell: bash
+      run: |
+        SHA=$(git ls-remote https://github.com/beehive-lab/TornadoVM HEAD | cut -f1)
+        echo "sha=$SHA" >> $GITHUB_OUTPUT
+
+    # actions/cache `path:` is an expression — it cannot access the calling
+    # workflow's env vars via ${{ env.* }}, so we resolve TORNADO_ROOT here.
+    - name: Resolve TORNADO_ROOT for cache path
+      id: paths
+      shell: bash
+      run: echo "tornado_root=$TORNADO_ROOT" >> $GITHUB_OUTPUT
+
+    - name: Restore TornadoVM cache
+      id: cache
+      uses: actions/cache@v4
+      with:
+        path: ${{ steps.paths.outputs.tornado_root }}
+        key: tornadovm-${{ inputs.backend }}-${{ steps.tornado_sha.outputs.sha }}
+
+    - name: Clone TornadoVM master
+      if: steps.cache.outputs.cache-hit != 'true'
+      shell: bash
+      run: |
+        git clone --depth 1 --branch master \
+          https://github.com/beehive-lab/TornadoVM.git \
+          $TORNADO_ROOT
+
+    - name: Set up Python venv for TornadoVM
+      if: steps.cache.outputs.cache-hit != 'true'
+      shell: bash
+      run: |
+        python3 -m venv $TORNADO_ROOT/venv
+        source $TORNADO_ROOT/venv/bin/activate
+        python --version
+
+    - name: Build TornadoVM
+      if: steps.cache.outputs.cache-hit != 'true'
+      shell: bash
+      run: |
+        cd $TORNADO_ROOT
+        mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/
+        source venv/bin/activate
+        echo "=== Building TornadoVM ==="
+        make BACKEND=${{ inputs.backend }}
+
+        echo "=== Verifying TornadoVM SDK directory ==="
+        SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1)
+        if [ -z "$SDK_DIR" ]; then
+          echo "::error::Could not locate TornadoVM SDK directory!"
+          find dist -maxdepth 5 -type d
+          exit 1
+        fi
+
+    # Runs on both cache hit and miss — sets TORNADOVM_HOME and PATH for all
+    # subsequent steps in the calling workflow.
+    - name: Configure TornadoVM environment
+      shell: bash
+      run: |
+        cd $TORNADO_ROOT
+        SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ inputs.backend }}" | head -n 1)
+        if [ -z "$SDK_DIR" ]; then
+          echo "::error::Could not locate TornadoVM SDK directory!"
+          find dist -maxdepth 5 -type d
+          exit 1
+        fi
+        FULL_SDK="${PWD}/${SDK_DIR}"
+        echo "Detected TornadoVM SDK: $FULL_SDK"
+
+        # Export for current shell session
+        export TORNADOVM_HOME="$FULL_SDK"
+        export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH"
+
+        # Save for subsequent steps
+        echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV
+        echo "$FULL_SDK/bin" >> $GITHUB_PATH
+        echo "$JAVA_HOME/bin" >> $GITHUB_PATH
+
+        echo "=== Checking tornado CLI ==="
+        which tornado || { echo "::error::tornado not in PATH"; exit 1; }
+        tornado --devices
diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml
index a17c9228..982c4cfc 100644
--- a/.github/workflows/build-and-run.yml
+++ b/.github/workflows/build-and-run.yml
@@ -48,480 +48,206 @@ jobs:
       - name: Checkout GPULlama3
         uses: actions/checkout@v4
 
-      - name: Clone TornadoVM master
-        run: |
-          git clone --depth 1 --branch master \
-            https://github.com/beehive-lab/TornadoVM.git \
-            $TORNADO_ROOT
-
-      - name: Set up Python venv for TornadoVM
-        run: |
-          python3 -m venv $TORNADO_ROOT/venv
-          source $TORNADO_ROOT/venv/bin/activate
-          python --version
-
-      - name: Build TornadoVM
-        run: |
-          cd $TORNADO_ROOT
-          mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/
-          source venv/bin/activate
-          echo "=== Building TornadoVM ==="
-
-          make BACKEND=${{ matrix.backend.name }}
-
-          echo "=== Searching for TornadoVM SDK directory ==="
-          SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1)
-          if [ -z "$SDK_DIR" ]; then
-            echo "::error::Could not locate TornadoVM SDK directory!"
-            find dist -maxdepth 5 -type d
-            exit 1
-          fi
-          FULL_SDK="${PWD}/${SDK_DIR}"
-          echo "Detected TornadoVM SDK: $FULL_SDK"
-
-          # Export for current shell session
-          export TORNADOVM_HOME="$FULL_SDK"
-          export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH"
-
-          # Save for subsequent steps
-          echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV
-          echo "PATH=$PATH" >> $GITHUB_ENV
-
-          echo "=== Checking tornado CLI ==="
-          which tornado || { echo "::error::tornado not in PATH"; exit 1; }
-          tornado --devices
+      - name: Setup TornadoVM
+        uses: ./.github/actions/setup-tornadovm
+        with:
+          backend: ${{ matrix.backend.name }}
 
       - name: Build GPULlama3.java
         run: |
           cd ${{ github.workspace }}
           echo "Using TORNADOVM_HOME=$TORNADOVM_HOME"
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
           tornado --version
           ./mvnw clean package -DskipTests
 
+      # ── Llama-3.2-1B: standard + prefill-decode variants, all backends ──────────
       - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Llama-3.2-1B-Instruct-F16.gguf \
-            model=Llama-3.2-1B-Instruct \
-            quantization=F16 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Llama-3.2-1B-Instruct-F16.gguf
+          model: Llama-3.2-1B-Instruct
+          quantization: F16
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.json
 
       - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
-            --prompt "Say hello" \
-            --with-prefill-decode
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Llama-3.2-1B-Instruct-F16.gguf \
-            model=Llama-3.2-1B-Instruct \
-            quantization=F16 \
-            configuration=prefill-decode \
-            "flags=--with-prefill-decode" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Llama-3.2-1B-Instruct-F16.gguf
+          model: Llama-3.2-1B-Instruct
+          quantization: F16
+          configuration: prefill-decode
+          flags: --with-prefill-decode
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.json
 
       - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
-            --prompt "Say hello" \
-            --with-prefill-decode --batch-prefill-size 32
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Llama-3.2-1B-Instruct-F16.gguf \
-            model=Llama-3.2-1B-Instruct \
-            quantization=F16 \
-            configuration=batch-prefill-decode \
-            "flags=--with-prefill-decode --batch-prefill-size 32" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Llama-3.2-1B-Instruct-F16.gguf
+          model: Llama-3.2-1B-Instruct
+          quantization: F16
+          configuration: batch-prefill-decode
+          flags: --with-prefill-decode --batch-prefill-size 32
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json
 
       # ── PTX-only: CUDA-graph variants ────────────────────────────────────────
       - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
         if: matrix.backend.name == 'ptx'
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --ptx \
-            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
-            --prompt "Say hello" \
-            --with-prefill-decode \
-            --cuda-graphs
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.meta.json" \
-            backend=ptx \
-            task=llama-inference \
-            model_file=Llama-3.2-1B-Instruct-F16.gguf \
-            model=Llama-3.2-1B-Instruct \
-            quantization=F16 \
-            configuration=prefill-decode-cuda-graphs \
-            "flags=--with-prefill-decode --cuda-graphs" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ptx
+          model_file: Llama-3.2-1B-Instruct-F16.gguf
+          model: Llama-3.2-1B-Instruct
+          quantization: F16
+          configuration: prefill-decode-cuda-graphs
+          flags: --with-prefill-decode --cuda-graphs
+          metrics_file: ${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.json
 
       - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
         if: matrix.backend.name == 'ptx'
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --ptx \
-            --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
-            --prompt "Say hello" \
-            --with-prefill-decode --batch-prefill-size 32 \
-            --cuda-graphs
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.meta.json" \
-            backend=ptx \
-            task=llama-inference \
-            model_file=Llama-3.2-1B-Instruct-F16.gguf \
-            model=Llama-3.2-1B-Instruct \
-            quantization=F16 \
-            configuration=batch-prefill-decode-cuda-graphs \
-            "flags=--with-prefill-decode --batch-prefill-size 32 --cuda-graphs" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ptx
+          model_file: Llama-3.2-1B-Instruct-F16.gguf
+          model: Llama-3.2-1B-Instruct
+          quantization: F16
+          configuration: batch-prefill-decode-cuda-graphs
+          flags: --with-prefill-decode --batch-prefill-size 32 --cuda-graphs
+          metrics_file: ${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json
 
       # ── Additional models — standard inference, all backends ─────────────────
       - name: FP16 - Run Qwen3-4B-f16.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Qwen3-4B-f16.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Qwen3-4B-f16.gguf \
-            model=Qwen3-4B \
-            quantization=F16 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Qwen3-4B-f16.gguf
+          model: Qwen3-4B
+          quantization: F16
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.json
 
       - name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Mistral-7B-Instruct-v0.3.fp16.gguf \
-            model=Mistral-7B-Instruct-v0.3 \
-            quantization=F16 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Mistral-7B-Instruct-v0.3.fp16.gguf
+          model: Mistral-7B-Instruct-v0.3
+          quantization: F16
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.json
 
       - name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=qwen2.5-1.5b-instruct-fp16.gguf \
-            model=Qwen2.5-1.5B-Instruct \
-            quantization=F16 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: qwen2.5-1.5b-instruct-fp16.gguf
+          model: Qwen2.5-1.5B-Instruct
+          quantization: F16
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.json
 
       - name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Phi-3-mini-4k-instruct-fp16.gguf \
-            model=Phi-3-mini-4k-instruct \
-            quantization=F16 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Phi-3-mini-4k-instruct-fp16.gguf
+          model: Phi-3-mini-4k-instruct
+          quantization: F16
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.json
 
       - name: FP16 - Run Granite-3.2-2b-instruct-f16.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=granite-3.2-2b-instruct-f16.gguf \
-            model=Granite-3.2-2B-Instruct \
-            quantization=F16 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: granite-3.2-2b-instruct-f16.gguf
+          model: Granite-3.2-2B-Instruct
+          quantization: F16
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.json
 
       - name: FP16 - Run Granite-4.0-1b-F16.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/granite-4.0-1b-F16.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=granite-4.0-1b-F16.gguf \
-            model=Granite-4.0-1B \
-            quantization=F16 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: granite-4.0-1b-F16.gguf
+          model: Granite-4.0-1B
+          quantization: F16
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.json
 
       - name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Llama-3.2-1B-Instruct-Q8_0.gguf \
-            model=Llama-3.2-1B-Instruct \
-            quantization=Q8_0 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Llama-3.2-1B-Instruct-Q8_0.gguf
+          model: Llama-3.2-1B-Instruct
+          quantization: Q8_0
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.json
 
       - name: Q8 - Run Qwen3-0.6B-Q8_0.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Qwen3-0.6B-Q8_0.gguf \
-            model=Qwen3-0.6B \
-            quantization=Q8_0 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Qwen3-0.6B-Q8_0.gguf
+          model: Qwen3-0.6B
+          quantization: Q8_0
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.json
 
       - name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Phi-3-mini-4k-instruct-Q8_0.gguf \
-            model=Phi-3-mini-4k-instruct \
-            quantization=Q8_0 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Phi-3-mini-4k-instruct-Q8_0.gguf
+          model: Phi-3-mini-4k-instruct
+          quantization: Q8_0
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.json
 
       - name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=qwen2.5-1.5b-instruct-q8_0.gguf \
-            model=Qwen2.5-1.5B-Instruct \
-            quantization=Q8_0 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
-
-      - name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=Mistral-7B-Instruct-v0.3.Q8_0.gguf \
-            model=Mistral-7B-Instruct-v0.3 \
-            quantization=Q8_0 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: qwen2.5-1.5b-instruct-q8_0.gguf
+          model: Qwen2.5-1.5B-Instruct
+          quantization: Q8_0
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.json
+
+      - name: Q8 - Run Mistral-7B-Instruct-v0.3.Q8_0.gguf
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: Mistral-7B-Instruct-v0.3.Q8_0.gguf
+          model: Mistral-7B-Instruct-v0.3
+          quantization: Q8_0
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.json
 
       - name: Q8 - Run Granite-3.2-2b-instruct-Q8_0.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=granite-3.2-2b-instruct-Q8_0.gguf \
-            model=Granite-3.2-2B-Instruct \
-            quantization=Q8_0 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: granite-3.2-2b-instruct-Q8_0.gguf
+          model: Granite-3.2-2B-Instruct
+          quantization: Q8_0
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.json
 
       - name: Q8 - Run Granite-4.0-1b-Q8_0.gguf
-        env:
-          JAVA_TOOL_OPTIONS: >-
-            -Dllama.metrics.format=json
-            -Dllama.metrics.output=file
-            -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json
-        run: |
-          cd ${{ github.workspace }}
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
-          ./llama-tornado --gpu --${{ matrix.backend.name }} \
-            --model $MODELS_DIR/granite-4.0-1b-Q8_0.gguf \
-            --prompt "Say hello"
-          python3 scripts/write_metrics_sidecar.py \
-            --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.meta.json" \
-            backend="${{ matrix.backend.name }}" \
-            task=llama-inference \
-            model_file=granite-4.0-1b-Q8_0.gguf \
-            model=Granite-4.0-1B \
-            quantization=Q8_0 \
-            configuration=standard \
-            flags="" \
-            prompt="Say hello"
+        uses: ./.github/actions/run-inference
+        with:
+          backend: ${{ matrix.backend.name }}
+          model_file: granite-4.0-1b-Q8_0.gguf
+          model: Granite-4.0-1B
+          quantization: Q8_0
+          configuration: standard
+          metrics_file: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json
 
       # ── Upload metrics for the publish job ────────────────────────────────────
       - name: Upload metrics artifacts
diff --git a/.github/workflows/integration-quarkus-langchain4j.yml b/.github/workflows/integration-quarkus-langchain4j.yml
index 8ee6e900..d4a4291e 100644
--- a/.github/workflows/integration-quarkus-langchain4j.yml
+++ b/.github/workflows/integration-quarkus-langchain4j.yml
@@ -43,53 +43,16 @@ jobs:
         uses: actions/checkout@v4
 
       # Step 1: Clone and build TornadoVM
-      - name: Clone TornadoVM master
-        run: |
-          git clone --depth 1 --branch master \
-            https://github.com/beehive-lab/TornadoVM.git \
-            $TORNADO_ROOT
-      - name: Set up Python venv for TornadoVM
-        run: |
-          python3 -m venv $TORNADO_ROOT/venv
-          source $TORNADO_ROOT/venv/bin/activate
-          python --version
-      - name: Build TornadoVM
-        run: |
-          cd $TORNADO_ROOT
-          mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/
-          source venv/bin/activate
-          echo "=== Building TornadoVM ==="
-
-          make BACKEND=${{ matrix.backend.name }}
-
-          echo "=== Searching for TornadoVM SDK directory ==="
-          SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1)
-          if [ -z "$SDK_DIR" ]; then
-            echo "::error::Could not locate TornadoVM SDK directory!"
-            find dist -maxdepth 5 -type d
-            exit 1
-          fi
-          FULL_SDK="${PWD}/${SDK_DIR}"
-          echo "Detected TornadoVM SDK: $FULL_SDK"
-
-          # Export for current shell session
-          export TORNADOVM_HOME="$FULL_SDK"
-          export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH"
-
-          # Save for subsequent steps
-          echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV
-          echo "PATH=$PATH" >> $GITHUB_ENV
-
-          echo "=== Checking tornado CLI ==="
-          which tornado || { echo "::error::tornado not in PATH"; exit 1; }
-          tornado --devices
+      - name: Setup TornadoVM
+        uses: ./.github/actions/setup-tornadovm
+        with:
+          backend: ${{ matrix.backend.name }}
 
       # Step 2: Build GPULlama3.java
       - name: Build GPULlama3.java
         run: |
           cd ${{ github.workspace }}
           echo "Using TORNADOVM_HOME=$TORNADOVM_HOME"
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
           tornado --version
           
           # Append SNAPSHOT to GPULlama3 version
@@ -108,13 +71,12 @@ jobs:
       - name: Clone Quarkus LangChain4j
         run: |
           cd ${{ github.workspace }}
-          git clone https://github.com/quarkiverse/quarkus-langchain4j.git
+          git clone --depth 1 https://github.com/quarkiverse/quarkus-langchain4j.git
 
       # Step 4: Build Quarkus LangChain4j with current GPULlama3.java
       - name: Build Quarkus LangChain4j
         run: |
           cd ${{ github.workspace }}/quarkus-langchain4j
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
           
           # Update the GPULlama3 version used by quarkus-langchain4j
           POM="pom.xml"
@@ -136,7 +98,6 @@ jobs:
       - name: Start Quarkus Application and Wait for Startup
         run: |
           cd ${{ github.workspace }}/quarkus-langchain4j/integration-tests/gpu-llama3
-          export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
           
           echo "Starting Quarkus application on port $QUARKUS_PORT..."
           
@@ -146,7 +107,8 @@ jobs:
                 -Dquarkus.http.port=$QUARKUS_PORT \
                 -jar target/quarkus-app/quarkus-run.jar &
           APP_PID=$!
-          
+          echo "APP_PID=$APP_PID" >> $GITHUB_ENV
+
           if [ -z "$APP_PID" ]; then
             echo "ERROR: Failed to start Quarkus application"
             exit 1
diff --git a/.github/workflows/rerun-workflow.yml b/.github/workflows/rerun-workflow.yml
index e4b14e75..62eec7d3 100644
--- a/.github/workflows/rerun-workflow.yml
+++ b/.github/workflows/rerun-workflow.yml
@@ -47,8 +47,30 @@ jobs:
               core.setOutput('is_help', 'false');
               }
 
-      - name: Get PR SHA
+      - name: Check commenter permissions
+        id: check_permission
         if: steps.help.outputs.is_help != 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const { data: perm } = await github.rest.repos.getCollaboratorPermissionLevel({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              username: context.payload.comment.user.login
+            });
+            const authorized = ['write', 'admin'].includes(perm.permission);
+            if (!authorized) {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: context.issue.number,
+                body: `@${context.payload.comment.user.login} You need write permission to trigger workflow reruns.`
+              });
+            }
+            core.setOutput('authorized', authorized ? 'true' : 'false');
+
+      - name: Get PR SHA
+        if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true'
         id: pr
         uses: actions/github-script@v7
         with:
@@ -64,7 +86,7 @@ jobs:
             console.log(`PR head ref: ${pr.head.ref}`);
 
       - name: Add reaction to comment
-        if: steps.help.outputs.is_help != 'true'
+        if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true'
         uses: actions/github-script@v7
         with:
           script: |
@@ -76,7 +98,7 @@ jobs:
             });
 
       - name: Post start comment
-        if: steps.help.outputs.is_help != 'true'
+        if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true'
         uses: actions/github-script@v7
         with:
           script: |
@@ -92,7 +114,7 @@ jobs:
             });
 
       - name: Rerun failed workflows
-        if: steps.help.outputs.is_help != 'true'
+        if: steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true'
         uses: actions/github-script@v7
         with:
           script: |
@@ -167,7 +189,7 @@ jobs:
             console.log(`Reran ${rerunCount} workflow(s)`);
 
       - name: Post completion comment
-        if: always() && steps.help.outputs.is_help != 'true'
+        if: always() && steps.help.outputs.is_help != 'true' && steps.check_permission.outputs.authorized == 'true'
         uses: actions/github-script@v7
         with:
           script: |