From 27cfcc23d69e533e4c44f5b1aec69baae83c18ce Mon Sep 17 00:00:00 2001 From: SuperMarioYL Date: Sun, 5 Jul 2026 05:23:01 +0800 Subject: [PATCH 1/3] Add opt-in file persistence to the memory plugin (#111) The memory plugin builds a fresh in-RAM Memory() per request, so extracted memories never survive across calls. This adds an opt-in file-backed store, gated on the OPTILLM_MEMORY_FILE env var: - Memory(persist_path=...) loads saved items on init and writes after each add() - run() reads the path from OPTILLM_MEMORY_FILE; unset => behaviour unchanged - loads degrade gracefully on missing/corrupt/non-list files (logged, no raise) - saves are atomic (temp file + os.replace) and bounded by max_size - README documents the env var; unit test covers round-trip, corrupt/missing files, max_size truncation, and the default no-I/O path --- README.md | 2 +- optillm/plugins/memory_plugin.py | 75 ++++++++++++++++++++++++++++++-- tests/test_plugins.py | 64 ++++++++++++++++++++++++++- 3 files changed, 136 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b3eedf54..845db067 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,7 @@ optillm | MCP Client | `mcp` | Implements the model context protocol (MCP) client, enabling you to use any LLM with any MCP Server | | Router | `router` | Uses the [optillm-modernbert-large](https://huggingface.co/codelion/optillm-modernbert-large) model to route requests to different approaches based on the user prompt | | Chain-of-Code | `coc` | Implements a chain of code approach that combines CoT with code execution and LLM based code simulation | -| Memory | `memory` | Implements a short term memory layer, enables you to use unbounded context length with any LLM | +| Memory | `memory` | Implements a short term memory layer, enables you to use unbounded context length with any LLM. Set `OPTILLM_MEMORY_FILE` to opt in to file-backed persistence so memories survive across requests | | Privacy | `privacy` | Anonymize PII data in request and deanonymize it back to original value in response | | Read URLs | `readurls` | Reads all URLs found in the request, fetches the content at the URL and adds it to the context | | Execute Code | `executecode` | Enables use of code interpreter to execute python code in requests and LLM generated responses | diff --git a/optillm/plugins/memory_plugin.py b/optillm/plugins/memory_plugin.py index 77905262..0a41f98d 100644 --- a/optillm/plugins/memory_plugin.py +++ b/optillm/plugins/memory_plugin.py @@ -1,24 +1,90 @@ +import json +import logging +import os import re -from typing import Tuple, List +import tempfile +from typing import Optional, Tuple, List import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity SLUG = "memory" +# Environment variable that opts a run in to file-backed memory. When it is set, +# the Memory store loads any previously saved items on init and persists after +# every add(); when it is unset the plugin behaves exactly as before (in-RAM, +# reset per request). +MEMORY_FILE_ENV = "OPTILLM_MEMORY_FILE" + +logger = logging.getLogger(__name__) + class Memory: - def __init__(self, max_size: int = 100): + def __init__(self, max_size: int = 100, persist_path: Optional[str] = None): self.max_size = max_size self.items: List[str] = [] self.vectorizer = TfidfVectorizer() self.vectors = None self.completion_tokens = 0 + self.persist_path = persist_path + if self.persist_path: + self._load_from_file() def add(self, item: str): if len(self.items) >= self.max_size: self.items.pop(0) self.items.append(item) self.vectors = None # Reset vectors to force recalculation + if self.persist_path: + self._save_to_file() + + def _load_from_file(self): + """Load persisted items from ``persist_path`` (opt-in). + + A missing file (first run) or a corrupt/unreadable one degrades + gracefully to an empty in-memory store, so persistence can never make a + request fail at startup. + """ + try: + with open(self.persist_path, "r", encoding="utf-8") as f: + data = json.load(f) + except FileNotFoundError: + return # nothing persisted yet + except (OSError, ValueError) as e: + logger.warning("Could not load memory from %s: %s", self.persist_path, e) + return + + if not isinstance(data, list): + logger.warning( + "Ignoring memory file %s: expected a JSON list of strings", + self.persist_path, + ) + return + + # Keep only strings and honour max_size (most recent items win). + items = [x for x in data if isinstance(x, str)] + self.items = items[-self.max_size:] + self.vectors = None + + def _save_to_file(self): + """Atomically persist the current items to ``persist_path`` (opt-in). + + Writes to a temp file in the same directory and ``os.replace``s it into + place so a crash mid-write cannot corrupt an existing store. Any I/O + error is logged and swallowed rather than raised. + """ + try: + directory = os.path.dirname(os.path.abspath(self.persist_path)) + os.makedirs(directory, exist_ok=True) + fd, tmp_path = tempfile.mkstemp(dir=directory, suffix=".tmp") + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(self.items, f) + os.replace(tmp_path, self.persist_path) + finally: + if os.path.exists(tmp_path): + os.remove(tmp_path) + except OSError as e: + logger.warning("Could not save memory to %s: %s", self.persist_path, e) def get_relevant(self, query: str, n: int = 10) -> List[str]: if not self.items: @@ -91,7 +157,10 @@ def extract_key_information(system_message, text: str, query: str, client, model return margins, response.usage.completion_tokens def run(system_prompt: str, initial_query: str, client, model: str) -> Tuple[str, int]: - memory = Memory() + # Opt-in file-backed memory: when OPTILLM_MEMORY_FILE is set, items persist + # across requests; when unset, behaviour is unchanged (fresh in-RAM store). + persist_path = os.environ.get(MEMORY_FILE_ENV) or None + memory = Memory(persist_path=persist_path) query, context = extract_query(initial_query) completion_tokens = 0 diff --git a/tests/test_plugins.py b/tests/test_plugins.py index d81e334e..0f769d5a 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -68,6 +68,62 @@ def test_memory_plugin_structure(): assert hasattr(plugin, 'Memory') # Check for Memory class +def test_memory_plugin_persistence(): + """Test opt-in file-backed persistence for the memory plugin (issue #111). + + Covers the round trip, graceful degradation on a missing/corrupt file, and + that the default (no persist_path) never touches the filesystem. + """ + import json + import tempfile + from optillm.plugins.memory_plugin import Memory + + with tempfile.TemporaryDirectory() as tmp: + path = os.path.join(tmp, "memory.json") + + # Round trip: items written by one instance load into the next. + m1 = Memory(persist_path=path) + m1.add("alpha") + m1.add("beta") + assert os.path.exists(path), "add() should persist when persist_path is set" + + m2 = Memory(persist_path=path) + assert m2.items == ["alpha", "beta"], "persisted items should load on init" + + # A missing file is a valid first run, not an error. + missing = os.path.join(tmp, "does_not_exist.json") + m3 = Memory(persist_path=missing) + assert m3.items == [] + + # A corrupt file degrades to an empty store without raising. + corrupt = os.path.join(tmp, "corrupt.json") + with open(corrupt, "w", encoding="utf-8") as f: + f.write("{not valid json") + m4 = Memory(persist_path=corrupt) + assert m4.items == [] + + # A non-list JSON payload is ignored rather than trusted. + wrong_shape = os.path.join(tmp, "wrong.json") + with open(wrong_shape, "w", encoding="utf-8") as f: + json.dump({"items": ["x"]}, f) + m5 = Memory(persist_path=wrong_shape) + assert m5.items == [] + + # max_size is honoured on load (most recent items win). + big = os.path.join(tmp, "big.json") + with open(big, "w", encoding="utf-8") as f: + json.dump([str(i) for i in range(10)], f) + m6 = Memory(max_size=3, persist_path=big) + assert m6.items == ["7", "8", "9"] + + # Default behaviour is unchanged: no persist_path means no file I/O. + no_persist = os.path.join(tmp, "should_not_be_created.json") + m7 = Memory() + m7.add("gamma") + assert not os.path.exists(no_persist) + assert m7.persist_path is None + + def test_genselect_plugin(): """Test genselect plugin module""" import optillm.plugins.genselect_plugin as plugin @@ -392,7 +448,13 @@ def test_no_relative_import_errors(): print("✅ Memory plugin structure test passed") except Exception as e: print(f"❌ Memory plugin structure test failed: {e}") - + + try: + test_memory_plugin_persistence() + print("✅ Memory plugin persistence test passed") + except Exception as e: + print(f"❌ Memory plugin persistence test failed: {e}") + try: test_genselect_plugin() print("✅ GenSelect plugin test passed") From a88baba488d5e26431ddb6c262a4f7c83aa20ccc Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 5 Jul 2026 09:23:47 +0800 Subject: [PATCH 2/3] Bump version to 0.3.18 Co-Authored-By: Claude Opus 4.8 (1M context) --- optillm/__init__.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/optillm/__init__.py b/optillm/__init__.py index b846afa0..32c5c314 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -1,5 +1,5 @@ # Version information -__version__ = "0.3.17" +__version__ = "0.3.18" import os as _os diff --git a/pyproject.toml b/pyproject.toml index 4a7cb2fd..99e9edb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "optillm" -version = "0.3.17" +version = "0.3.18" description = "An optimizing inference proxy for LLMs." readme = "README.md" license = "Apache-2.0" From bbadde1fca14937c9ba835c621a841d3eb628c48 Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Sun, 5 Jul 2026 09:38:44 +0800 Subject: [PATCH 3/3] Add Frame SAST scan of PR-changed files to CI Adds a security-scan workflow that runs the Frame neuro-symbolic SAST tool (lambdasec/frame, pinned) on the Python files changed by a pull request. Scanning only the PR's added/modified files surfaces issues introduced by the change without failing on pre-existing findings elsewhere in the tree. The job fails only on high/critical severity, so lower-confidence categories (e.g. insecure_random on algorithmic sampling) do not block merges. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/security-scan.yml | 69 +++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 .github/workflows/security-scan.yml diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml new file mode 100644 index 00000000..07a0bb04 --- /dev/null +++ b/.github/workflows/security-scan.yml @@ -0,0 +1,69 @@ +name: Security Scan (Frame SAST) + +# Runs the Frame neuro-symbolic SAST tool (https://github.com/lambdasec/frame) +# on the Python files changed by a pull request. Scanning only the PR's changed +# files surfaces issues introduced by the change without failing on pre-existing +# findings elsewhere in the tree. The job fails only on high/critical severity. + +on: + pull_request: + branches: [ main ] + +permissions: + contents: read + +jobs: + frame-scan: + name: Frame SAST (changed files) + runs-on: ubuntu-latest + steps: + - name: Checkout (full history for diff) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install Frame (pinned) + run: | + git clone https://github.com/lambdasec/frame.git /tmp/frame + git -C /tmp/frame checkout 3223ac44320b4870782d9aad03514b4d3c876e0a + pip install "/tmp/frame[scan]" + + - name: Scan Python files changed in this PR + env: + BASE_SHA: ${{ github.event.pull_request.base.sha }} + run: | + set -uo pipefail + + # Added/copied/modified/renamed Python files in this PR (skip deletions). + mapfile -t FILES < <(git diff --name-only --diff-filter=ACMR "$BASE_SHA" HEAD -- '*.py') + + if [ "${#FILES[@]}" -eq 0 ]; then + echo "No Python files changed in this PR - nothing to scan." + exit 0 + fi + + echo "Scanning ${#FILES[@]} changed Python file(s) (fail on high/critical):" + printf ' %s\n' "${FILES[@]}" + + FAIL=0 + for f in "${FILES[@]}"; do + # File may have been renamed away or removed in a later commit. + [ -f "$f" ] || continue + echo "::group::Frame scan $f" + if ! frame scan "$f" --fail-on high; then + FAIL=1 + echo "::error file=$f::Frame flagged a high/critical severity issue in $f" + fi + echo "::endgroup::" + done + + if [ "$FAIL" -ne 0 ]; then + echo "Frame SAST found high/critical severity issue(s) in changed files." + exit 1 + fi + echo "No high/critical severity issues in changed files."