From a5fa3912344796285c2918ad1e0d6995b47c61bd Mon Sep 17 00:00:00 2001 From: labkey-matthewb Date: Wed, 15 Apr 2026 14:03:17 -0700 Subject: [PATCH 1/7] new resources for data analysts using MCP --- core/src/org/labkey/core/CoreMcp.java | 19 ++ .../org/labkey/core/DataAnalysis_Python.md | 321 ++++++++++++++++++ 2 files changed, 340 insertions(+) create mode 100644 core/src/org/labkey/core/DataAnalysis_Python.md diff --git a/core/src/org/labkey/core/CoreMcp.java b/core/src/org/labkey/core/CoreMcp.java index da97a986ba9..562ff479a3e 100644 --- a/core/src/org/labkey/core/CoreMcp.java +++ b/core/src/org/labkey/core/CoreMcp.java @@ -132,4 +132,23 @@ public ReadResourceResult getFileBasedModuleDevelopmentGuide() throws IOExceptio ) )); } + + @McpResource( + uri = "resource://org/labkey/core/DataAnalysis_Python.md", + mimeType = "application/markdown", + name = "Python Data Analysis Development Guide", + description = "Provide documentation for developers using Python to analyze LabKey data") + public ReadResourceResult getPythonDataAnalysisGuide() throws IOException + { + incrementResourceRequestCount("Python Data Analysis"); + String markdown = IOUtils.resourceToString("org/labkey/core/DataAnalysis_Python.md", null, CoreModule.class.getClassLoader()); + return new ReadResourceResult(List.of( + new McpSchema.TextResourceContents( + "resource://org/labkey/core/DataAnalysis_Python.md", + "application/markdown", + markdown + ) + )); + } + } diff --git a/core/src/org/labkey/core/DataAnalysis_Python.md b/core/src/org/labkey/core/DataAnalysis_Python.md new file mode 100644 index 00000000000..42c7f99e510 --- /dev/null +++ b/core/src/org/labkey/core/DataAnalysis_Python.md @@ -0,0 +1,321 @@ +# LabKey Data Analysis using Python + +This project is for performing data analysis against a LabKey Server instance using AI assistance. + +**Connection defaults:** The LabKey server URL and API key can be inferred from `.mcp.json` in this directory. The `url` field (minus the `/mcp` path) provides the server endpoint, and the `apikey` header value provides the authentication token. When writing Python scripts, read these values from `.mcp.json` to pre-populate the `APIWrapper` connection parameters. Before running any script, confirm with the data analyst: +- Is this the correct server? +- Should `use_ssl` be True or False? (infer from the URL scheme in `.mcp.json`) +- Do they want to use the API key from `.mcp.json` or provide a different one? +- What container path should be used? (use MCP `listContainers` to show available options) + +Confirm all of these settings with the analyst before writing any script. + +## MCP Tools Available + +A LabKey MCP server is configured (see `.mcp.json`). Use these tools to explore the server interactively: + +| Tool | Purpose | +|---|---| +| `mcp__labkey__setContainer` | **Must be called first.** Sets the active container (project/folder) for subsequent calls. Path format: `MyProject/MyFolder` (no leading slash). | +| `mcp__labkey__whereAmIWhoAmITalkingTo` | Shows current user, server info, and active container. | +| `mcp__labkey__listContainers` | Lists all containers the user has read access to. | +| `mcp__labkey__listSchemas` | Lists all schemas in the active container. | +| `mcp__labkey__listTables` | Lists tables/queries within a schema. | +| `mcp__labkey__listColumns` | Shows column metadata (name, type, description) for a table. Also returns SQL source for saved queries. | +| `mcp__labkey__getSourceForSavedQuery` | Returns the SQL source of a saved query. | +| `mcp__labkey__validateSQL` | Validates LabKey SQL syntax without executing it. | + +### MCP Workflow + +1. Call `listContainers` to find available containers +2. Call `setContainer` with the desired container path +3. Use `listSchemas` -> `listTables` -> `listColumns` to explore the data model +4. Use `validateSQL` to check queries before running them +5. To actually retrieve data, write a Python script using the `labkey` Python API (see below) + +## LabKey Python API (`labkey` package) + +**Install:** `pip install labkey` +**Requires:** Python 3.11+, LabKey Server v15.1+ +**Repo:** https://github.com/LabKey/labkey-api-python + +### Connection Setup + +```python +from labkey.api_wrapper import APIWrapper + +api = APIWrapper( + "localhost:8080", # domain (hostname or hostname:port) + "MyProject/MyFolder", # container path + context_path=None, # URL path segment after domain, e.g. "labkey" + use_ssl=False, # True for https + verify_ssl=True, # False for self-signed dev certs + api_key="your-api-key", # alternative: use ~/.netrc +) +``` + +Authentication options: +- **API key**: Pass `api_key=` to APIWrapper (sent as `apikey` header) +- **.netrc file**: Create `~/.netrc` with `machine`, `login`, `password` fields (chmod 600) + +### Data Retrieval APIs + +#### select_rows -- Query a table/view + +```python +result = api.query.select_rows( + schema_name, # str: e.g. "lists", "core", "study" + query_name, # str: table or query name + view_name=None, # str: named view to use + filter_array=None, # list[QueryFilter]: row filters + columns=None, # str: comma-separated column names + max_rows=-1, # int: -1 = ALL rows (default!) + sort=None, # str: comma-separated, prefix '-' for desc + offset=None, # int: rows to skip (pagination) + container_path=None, # str: override container + container_filter=None, # str: e.g. "CurrentAndSubfolders" + parameters=None, # dict: for parameterized queries + include_total_count=None, # bool: include total in response + timeout=300, # int: seconds +) +``` + +#### execute_sql -- Run LabKey SQL + +```python +result = api.query.execute_sql( + schema_name, # str: target schema + sql, # str: LabKey SQL + max_rows=None, # int: row limit + sort=None, # str: sort columns + offset=None, # int: row offset + container_path=None, # str: override container + container_filter=None, # str: container scope + parameters=None, # dict: query parameters + timeout=300, # int: seconds + waf_encode_sql=True, # bool: WAF encoding (needs Server v23.09+) +) +``` + +Set `waf_encode_sql=False` if targeting LabKey Server older than v23.09. + +#### get_queries -- List available tables/queries + +```python +result = api.query.get_queries( + schema_name, # str: schema to explore + container_path=None, # str: override container + include_columns=None, # bool: include column metadata + include_system_queries=None, # bool: include system-generated queries + include_title=None, # bool: include custom display titles + include_user_queries=None, # bool: include user-defined saved queries + include_view_data_url=None, # bool: include URLs for viewing data in browser + query_detail_columns=None, # bool: include detailed column info + timeout=300, # int: seconds +) +``` + +### Response Format + +Both `select_rows` and `execute_sql` return a dict: + +```python +{ + "schemaName": "lists", + "queryName": "MyTable", + "rowCount": 25, # total rows (if include_total_count=True) + "rows": [ # list of row dicts + {"Col1": "value", "Col2": 42, ...}, + ... + ], + "metaData": { + "id": "Key", # primary key column + "fields": [{"name": "Col1", "type": "string"}, ...] + } +} +``` + +Working with results: +```python +result = api.query.select_rows("lists", "MyTable") +for row in result["rows"]: + print(row["Name"], row["Value"]) + +# Convert to pandas DataFrame: +import pandas as pd +df = pd.DataFrame(result["rows"]) + +# Date columns come back as strings -- convert explicitly: +df["StartDate"] = pd.to_datetime(df["StartDate"]) + +# Numeric columns may contain None for missing values -- pandas handles this +# but be aware when doing arithmetic: +df["Age"] = pd.to_numeric(df["Age"], errors="coerce") + +# Lookup columns may return nested dicts (e.g. {"value": 1, "displayValue": "Group A"}). +# Extract the display value if needed: +if isinstance(df["Group"].iloc[0], dict): + df["Group"] = df["Group"].apply(lambda x: x.get("displayValue") if isinstance(x, dict) else x) +``` + +### Query Filters + +```python +from labkey.query import QueryFilter + +filters = [ + QueryFilter("Country", "Germany"), # equals (default) + QueryFilter("Age", "18,65", QueryFilter.Types.BETWEEN), # comma-delimited + QueryFilter("Status", "Active;Enrolled", QueryFilter.Types.IN), # semicolon-delimited + QueryFilter("Name", "", QueryFilter.Types.IS_NOT_BLANK), # no value needed +] +result = api.query.select_rows("study", "Demographics", filter_array=filters) +``` + +#### Filter Types Reference + +**Comparison:** `EQUAL`, `NOT_EQUAL` (alias `NEQ`), `GT` / `GREATER_THAN`, `LT` / `LESS_THAN`, `GTE` / `GREATER_THAN_OR_EQUAL`, `LTE` / `LESS_THAN_OR_EQUAL`, `NEQ_OR_NULL` / `NOT_EQUAL_OR_MISSING` + +**Date comparison:** `DATE_EQUAL`, `DATE_NOT_EQUAL`, `DATE_GREATER_THAN`, `DATE_LESS_THAN`, `DATE_GREATER_THAN_OR_EQUAL`, `DATE_LESS_THAN_OR_EQUAL` + +**String:** `STARTS_WITH`, `DOES_NOT_START_WITH`, `CONTAINS`, `DOES_NOT_CONTAIN`, `CONTAINS_ONE_OF`, `CONTAINS_NONE_OF` + +**Set/Range:** `IN` / `EQUALS_ONE_OF` (semicolons), `NOT_IN` / `EQUALS_NONE_OF` (semicolons), `BETWEEN` (commas), `NOT_BETWEEN` (commas) + +**Null checks (no value needed):** `IS_BLANK`, `IS_NOT_BLANK`, `HAS_MISSING_VALUE`, `DOES_NOT_HAVE_MISSING_VALUE`, `HAS_ANY_VALUE` + +**Array:** `ARRAY_CONTAINS_ALL`, `ARRAY_CONTAINS_ANY`, `ARRAY_CONTAINS_NONE`, `ARRAY_CONTAINS_EXACT`, `ARRAY_CONTAINS_NOT_EXACT`, `ARRAY_ISEMPTY`, `ARRAY_ISNOTEMPTY` + +**Search:** `Q` (full-text search across table) + +**Lineage:** `EXP_CHILD_OF`, `EXP_PARENT_OF`, `EXP_LINEAGE_OF` + +**Ontology:** `ONTOLOGY_IN_SUBTREE`, `ONTOLOGY_NOT_IN_SUBTREE` + +### Pagination + +```python +# Page through results: +result = api.query.select_rows("lists", "LargeTable", + max_rows=100, + offset=0, # page 1 + include_total_count=True, + sort="Name" +) +total = result["rowCount"] +page_rows = result["rows"] + +# Pagination enum: +from labkey.query import Pagination +result = api.query.select_rows("lists", "Table", show_rows=Pagination.ALL) +# Values: PAGINATED, SELECTED, UNSELECTED, ALL, NONE +``` + +### Container Filters + +Control which containers are searched. Pass as `container_filter=` to select_rows/execute_sql: + +- `"Current"` -- only the active container +- `"CurrentAndSubfolders"` -- active container and its children +- `"CurrentPlusProject"` -- active container and its parent project +- `"CurrentAndParents"` -- active container and all ancestors +- `"CurrentPlusProjectAndShared"` -- current, project, and shared folder +- `"AllFolders"` -- everything the user can read + +### Error Handling + +```python +from labkey.exceptions import ( + RequestError, # base class for all server errors + RequestAuthorizationError, # 401 -- bad credentials + QueryNotFoundError, # 404 -- wrong schema/table name + ServerNotFoundError, # 404 -- wrong server or context_path + ServerContextError, # connection error, SSL error + UnexpectedRedirectError, # 302 -- usually http->https misconfiguration +) +from requests.exceptions import Timeout + +try: + result = api.query.select_rows("lists", "MyTable") +except QueryNotFoundError: + print("Table not found -- check schema and query names") +except RequestAuthorizationError: + print("Auth failed -- check API key or .netrc") +except ServerNotFoundError: + print("Server not found -- check domain and context_path") +except Timeout: + print("Request timed out") +except RequestError as e: + print(f"Server error: {e.message}") +``` + +### Data Modification APIs + +For completeness -- use these when analysis requires writing back results: + +- `api.query.insert_rows(schema, query, rows)` -- insert new rows +- `api.query.update_rows(schema, query, rows)` -- update rows (must include PK) +- `api.query.delete_rows(schema, query, rows)` -- delete rows (must include PK) +- `api.query.truncate_table(schema, query)` -- delete all rows +- `api.query.import_rows(schema, query, data_file=f)` -- bulk import from file +- `api.query.save_rows(commands)` -- batch multi-table operations + +All modification APIs accept `timeout=300`, `container_path=None`, `transacted=True`, and optional `audit_behavior` / `audit_user_comment`. + +## Important Considerations + +1. **max_rows defaults to -1 (ALL rows)** in `select_rows`. Always set an explicit `max_rows` for large tables to avoid pulling the entire dataset into memory. + +2. **Filter value delimiters are inconsistent**: `IN`/`NOT_IN` use semicolons (`"A;B;C"`), while `BETWEEN`/`NOT_BETWEEN` use commas (`"10,50"`). This is a historical API quirk. + +3. **columns is a comma-separated string**, not a list: `columns="Name,Age,Country"`. + +4. **Sort syntax**: comma-separated column names, prefix `-` for descending: `sort="Age,-Name"`. + +5. **WAF encoding**: `execute_sql` WAF-encodes SQL by default (since labkey v3.0.0). Requires LabKey Server v23.09+. Set `waf_encode_sql=False` for older servers. + +6. **Container path override**: Every API method accepts `container_path=` to query a different folder without creating a new connection. + +7. **CSRF tokens** are fetched automatically on the first request. This adds slight overhead to the first call. + +8. **Default timeout is 300 seconds** (5 minutes) for all query operations. + +9. **Multiple filters on the same column** are supported -- they are appended, not overwritten. + +10. **select_rows sends a GET request** to `query-getQuery.api`. `execute_sql` sends a POST to `query-executeSql.api`. + +11. **LabKey SQL is not standard SQL.** It is a SQL dialect specific to LabKey. Use `mcp__labkey__validateSQL` to check syntax before executing. Refer to LabKey documentation for dialect-specific features (e.g., lookup column traversal via `/` or `.` notation). + +## Typical Analysis Workflow + +```python +from labkey.api_wrapper import APIWrapper +from labkey.query import QueryFilter +import pandas as pd + +# 1. Connect +api = APIWrapper("localhost:8080", "MyProject", use_ssl=False, api_key="...") + +# 2. Explore (or use MCP tools for interactive exploration) +schemas = api.query.get_queries("lists", include_columns=True) + +# 3. Retrieve data +result = api.query.select_rows("lists", "Participants", + columns="ParticipantId,Name,Age,Country", + filter_array=[QueryFilter("Age", "18", QueryFilter.Types.GTE)], + max_rows=1000, + sort="Age" +) + +# 4. Analyze with pandas +df = pd.DataFrame(result["rows"]) +print(df.describe()) +print(df.groupby("Country")["Age"].mean()) + +# 5. Complex queries with SQL +sql_result = api.query.execute_sql("lists", + "SELECT Country, COUNT(*) as N, AVG(Age) as AvgAge " + "FROM Participants GROUP BY Country ORDER BY N DESC" +) +summary = pd.DataFrame(sql_result["rows"]) +``` From f3a4a45e77ff0587b9a06c3a596730551062a55e Mon Sep 17 00:00:00 2001 From: labkey-matthewb Date: Wed, 15 Apr 2026 15:28:54 -0700 Subject: [PATCH 2/7] new resources for data analysts using MCP first draft --- core/src/org/labkey/core/DataAnalysis_R.md | 473 +++++++++++++++++++++ 1 file changed, 473 insertions(+) create mode 100644 core/src/org/labkey/core/DataAnalysis_R.md diff --git a/core/src/org/labkey/core/DataAnalysis_R.md b/core/src/org/labkey/core/DataAnalysis_R.md new file mode 100644 index 00000000000..648d3dbce22 --- /dev/null +++ b/core/src/org/labkey/core/DataAnalysis_R.md @@ -0,0 +1,473 @@ +# LabKey Data Analysis using R + +This project is for performing data analysis against a LabKey Server instance using AI assistance. + +**Connection defaults:** The LabKey server URL and API key can be inferred from `.mcp.json` in this directory. The `url` field (minus the `/mcp` path) provides the server endpoint, and the `apikey` header value provides the authentication token. When writing R scripts, read these values from `.mcp.json` (via `jsonlite::fromJSON`) to pre-populate `labkey.setDefaults()` and the `baseUrl`/`folderPath` parameters. Before running any script, confirm with the data analyst: +- Is this the correct server? +- Should the URL use `http://` or `https://`? (infer from the URL scheme in `.mcp.json`) +- Do they want to use the API key from `.mcp.json` or provide a different one? +- What container path should be used? (use MCP `listContainers` to show available options) + +Confirm all of these settings with the analyst before writing any script. + +## MCP Tools Available + +A LabKey MCP server is configured (see `.mcp.json`). Use these tools to explore the server interactively: + +| Tool | Purpose | +|---|---| +| `mcp__labkey__setContainer` | **Must be called first.** Sets the active container (project/folder) for subsequent calls. Path format: `MyProject/MyFolder` (no leading slash). | +| `mcp__labkey__whereAmIWhoAmITalkingTo` | Shows current user, server info, and active container. | +| `mcp__labkey__listContainers` | Lists all containers the user has read access to. | +| `mcp__labkey__listSchemas` | Lists all schemas in the active container. | +| `mcp__labkey__listTables` | Lists tables/queries within a schema. | +| `mcp__labkey__listColumns` | Shows column metadata (name, type, description) for a table. Also returns SQL source for saved queries. | +| `mcp__labkey__getSourceForSavedQuery` | Returns the SQL source of a saved query. | +| `mcp__labkey__validateSQL` | Validates LabKey SQL syntax without executing it. | + +### MCP Workflow + +1. Call `listContainers` to find available containers +2. Call `setContainer` with the desired container path +3. Use `listSchemas` -> `listTables` -> `listColumns` to explore the data model +4. Use `validateSQL` to check queries before running them +5. To actually retrieve data, write an R script using the `Rlabkey` package (see below) + +## Rlabkey R Package + +**Install:** `install.packages("Rlabkey")` +**Requires:** R 3.0+, LabKey Server v15.1+ +**Dependencies:** httr, jsonlite, Rcpp +**CRAN:** https://cran.r-project.org/package=Rlabkey + +### Connection Setup + +Rlabkey functions take `baseUrl` and `folderPath` as explicit arguments on every call. Use `labkey.setDefaults()` to avoid repeating credentials: + +```r +library(Rlabkey) + +# Option 1: Set defaults (recommended for scripts) +labkey.setDefaults( + baseUrl = "http://localhost:8080/", + apiKey = "your-api-key" +) + +# Then call functions without baseUrl: +rows <- labkey.selectRows( + folderPath = "/home", + schemaName = "lists", + queryName = "MyTable" +) + +# Option 2: Pass baseUrl explicitly on every call (no defaults needed) +rows <- labkey.selectRows( + baseUrl = "http://localhost:8080/", + folderPath = "/home", + schemaName = "lists", + queryName = "MyTable" +) +``` + +Authentication options: +- **API key**: Pass to `labkey.setDefaults(apiKey=)`. An API key avoids storing credentials on the client. API keys can be revoked and set to expire. If an API key is set, it takes precedence over email/password. +- **Email/password**: `labkey.setDefaults(email="user@example.com", password="pass")` +- **.netrc file**: Create `~/.netrc` with `machine`, `login`, `password` fields (chmod 600). Rlabkey reads this automatically. Use `labkey.setCurlOptions(NETRC_FILE='/path/to/_netrc')` for a custom location. +- **Session key**: Pass a session key via `labkey.setDefaults(apiKey=)`. Session keys tie R access to the user's browser session context (same authorizations, impersonation state, etc.). + +To clear credentials: `labkey.setDefaults()` (called with no arguments resets all defaults). + +### Data Retrieval APIs + +#### labkey.selectRows -- Query a table/view + +```r +rows <- labkey.selectRows( + baseUrl = NULL, # str: server URL (e.g. "http://localhost:8080/") + folderPath, # str: container path (e.g. "/home" or "/MyProject/MyFolder") + schemaName, # str: e.g. "lists", "core", "study" + queryName, # str: table or query name + viewName = NULL, # str: named view to use + colSelect = NULL, # vector or comma-sep string: columns to return + maxRows = NULL, # int: max rows (NULL = ALL rows) + rowOffset = NULL, # int: rows to skip (pagination) + colSort = NULL, # str: column name prefixed with "+" or "-" + colFilter = NULL, # makeFilter() result: row filters + showHidden = FALSE, # logical: include hidden columns + colNameOpt = "caption", # str: "caption", "fieldname", or "rname" + containerFilter = NULL, # str: e.g. "CurrentAndSubfolders" + parameters = NULL, # named list: for parameterized queries + includeDisplayValues = FALSE, # logical: include lookup display values + method = "POST" # str: HTTP method ("GET" or "POST") +) +``` + +**Returns:** A data frame with `stringsAsFactors = FALSE`. Column names are determined by `colNameOpt`. + +#### labkey.executeSql -- Run LabKey SQL + +```r +rows <- labkey.executeSql( + baseUrl = NULL, # str: server URL + folderPath, # str: container path + schemaName, # str: target schema + sql, # str: LabKey SQL query + maxRows = NULL, # int: row limit + rowOffset = NULL, # int: row offset + colSort = NULL, # str: sort columns + showHidden = FALSE, # logical: include hidden columns + colNameOpt = "caption", # str: column naming option + containerFilter = NULL, # str: container scope + parameters = NULL # named list: query parameters +) +``` + +**Returns:** A data frame with `stringsAsFactors = FALSE`. + +#### labkey.getQueries -- List available tables/queries + +```r +queries <- labkey.getQueries( + baseUrl = NULL, # str: server URL + folderPath, # str: container path + schemaName # str: schema to explore +) +``` + +**Returns:** A data frame listing available queries in the schema. + +#### labkey.getQueryDetails -- Get column metadata + +```r +details <- labkey.getQueryDetails( + baseUrl = NULL, # str: server URL + folderPath, # str: container path + schemaName, # str: schema name + queryName # str: table/query name +) +``` + +**Returns:** A data frame with column metadata (name, type, caption, etc.). + +#### labkey.getSchemas -- List schemas + +```r +schemas <- labkey.getSchemas( + baseUrl = NULL, # str: server URL + folderPath # str: container path +) +``` + +**Returns:** A data frame listing available schemas. + +### Session-Based API (Alternative Style) + +Rlabkey also provides a session-based interface that wraps the direct functions: + +```r +# Create a session +s <- getSession( + baseUrl = "http://localhost:8080/", + folderPath = "/home" +) + +# Explore +lsProjects("http://localhost:8080/") # list projects (before session) +lsFolders(s) # list folders in session +lsSchemas(s) # list schemas in session + +# Get schema and retrieve data +scobj <- getSchema(s, "lists") # returns schema object with query names +df <- getRows(s, scobj$MyTable) # returns data frame (colNameOpt defaults to "fieldname") +``` + +The session-based `getRows` function defaults to `colNameOpt='fieldname'` (unlike `labkey.selectRows` which defaults to `'caption'`). + +### Response Format + +Both `labkey.selectRows` and `labkey.executeSql` return R data frames directly: + +```r +rows <- labkey.selectRows( + baseUrl = "http://localhost:8080/", + folderPath = "/home", + schemaName = "lists", + queryName = "MyTable" +) + +# The result is already a data frame: +nrow(rows) # number of rows +colnames(rows) # column names +str(rows) # structure/types +head(rows) # preview first rows + +# Access columns directly: +rows$Name +rows$Age + +# Date columns come back as strings -- convert explicitly: +rows$StartDate <- as.Date(rows$StartDate) + +# Or for datetime with timezone: +rows$Created <- as.POSIXct(rows$Created, format = "%Y/%m/%d %H:%M:%S") +``` + +### Column Name Options (`colNameOpt`) + +The `colNameOpt` parameter controls how data frame columns are named: + +| Value | Description | Example | +|---|---|---| +| `"caption"` | Field caption/label (default for `labkey.selectRows`). Best for display, harder to script with. | `"Participant ID"` | +| `"fieldname"` | Field name as used in LabKey API calls (default for `getRows`). Best for scripting. | `"ParticipantId"` | +| `"rname"` | R-safe name: lowercase, spaces become `_`, slashes become `_`. Used by LabKey R Views. | `"participantid"` | + +### Query Filters + +```r +# Build filters with makeFilter(): +filters <- makeFilter( + c("Country", "EQUAL", "Germany"), + c("Age", "GREATER_THAN_OR_EQUAL", "18"), + c("Status", "IN", "Active;Enrolled") +) +rows <- labkey.selectRows(baseUrl = "http://localhost:8080/", + folderPath = "/home", schemaName = "study", + queryName = "Demographics", colFilter = filters) +``` + +The `makeFilter()` function accepts any number of filter triplets in the form `c("column", "OPERATOR", "value")`. Multiple filters are ANDed together. + +#### Filter Operators Reference + +**Comparison:** `EQUAL`, `NOT_EQUAL`, `GREATER_THAN`, `LESS_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN_OR_EQUAL`, `NOT_EQUAL_OR_MISSING` + +**Date comparison:** `DATE_EQUAL`, `DATE_NOT_EQUAL`, `DATE_GREATER_THAN`, `DATE_LESS_THAN`, `DATE_GREATER_THAN_OR_EQUAL`, `DATE_LESS_THAN_OR_EQUAL` + +**String:** `STARTS_WITH`, `DOES_NOT_START_WITH`, `CONTAINS`, `DOES_NOT_CONTAIN`, `CONTAINS_ONE_OF`, `CONTAINS_NONE_OF` + +**Set/Range:** `IN`, `NOT_IN` (semicolon-delimited), `BETWEEN`, `NOT_BETWEEN` (comma-delimited), `MEMBER_OF` + +**Null checks (use empty string as value):** `MISSING`, `NOT_MISSING`, `MV_INDICATOR`, `NO_MV_INDICATOR` + +**Array:** `ARRAY_CONTAINS_ALL`, `ARRAY_CONTAINS_ANY`, `ARRAY_CONTAINS_NONE`, `ARRAY_CONTAINS_EXACT`, `ARRAY_CONTAINS_NOT_EXACT`, `ARRAY_ISEMPTY`, `ARRAY_ISNOTEMPTY` + +**Search:** `Q` (full-text search across table) + +**Lineage:** `EXP_CHILD_OF`, `EXP_PARENT_OF`, `EXP_LINEAGE_OF` + +**Ontology:** `ONTOLOGY_IN_SUBTREE`, `ONTOLOGY_NOT_IN_SUBTREE` + +#### Filter Examples + +```r +# Single filter (equals is common): +makeFilter(c("Country", "EQUAL", "Germany")) + +# Multiple filters (ANDed): +makeFilter( + c("TextFld", "CONTAINS", "h"), + c("BooleanFld", "EQUAL", "TRUE") +) + +# IN operator (semicolon-delimited values): +makeFilter(c("RowId", "IN", "2;3;6")) + +# MISSING operator (empty string for value): +makeFilter(c("IntFld", "MISSING", "")) +``` + +### Pagination + +```r +# Page through results: +page1 <- labkey.selectRows(baseUrl = "http://localhost:8080/", + folderPath = "/home", schemaName = "lists", + queryName = "LargeTable", + maxRows = 100, + rowOffset = 0, # first 100 rows + colSort = "+Name" +) + +page2 <- labkey.selectRows(baseUrl = "http://localhost:8080/", + folderPath = "/home", schemaName = "lists", + queryName = "LargeTable", + maxRows = 100, + rowOffset = 100, # next 100 rows + colSort = "+Name" +) +``` + +Note: When `maxRows` is NULL (the default), **all rows are returned**. Always set an explicit `maxRows` for large tables. + +### Container Filters + +Control which containers are searched. Pass as `containerFilter=` to `labkey.selectRows`/`labkey.executeSql`: + +- `"Current"` -- only the active container (default when NULL) +- `"CurrentAndSubfolders"` -- active container and its children +- `"CurrentPlusProject"` -- active container and its parent project +- `"CurrentAndParents"` -- active container and all ancestors +- `"CurrentPlusProjectAndShared"` -- current, project, and shared folder +- `"AllFolders"` -- everything the user can read + +### Lookup Columns + +To traverse lookup (foreign key) columns, use `/` in `colSelect`: + +```r +# Include columns from a lookup target: +rows <- labkey.selectRows(baseUrl = "http://localhost:8080/", + folderPath = "/home", schemaName = "lists", + queryName = "AllTypes", + colSelect = "TextFld,IntFld,IntFld/LookupValue" +) +``` + +Use `"*"` as `colSelect` to get all columns including those not in the default view. + +### Error Handling + +Rlabkey raises R errors (via `stop()`) on failure. Use `tryCatch` for error handling: + +```r +tryCatch({ + rows <- labkey.selectRows(baseUrl = "http://localhost:8080/", + folderPath = "/home", schemaName = "lists", + queryName = "MyTable") +}, error = function(e) { + message("Error: ", e$message) +}) +``` + +Common error scenarios: +- **Wrong schema/query name**: HTTP 404 with "Query not found" message +- **Bad credentials**: HTTP 401 unauthorized +- **Wrong server URL**: Connection refused or "could not resolve host" +- **HTTP-to-HTTPS mismatch**: Unexpected redirect (302) + +For debugging, enable verbose output: +```r +labkey.setDebugMode(TRUE) +# ... run your query ... +labkey.setDebugMode(FALSE) +``` + +### WAF Encoding + +By default, Rlabkey WAF-encodes SQL in `labkey.executeSql` to pass through web application firewalls. This requires LabKey Server v23.9.0+. For older servers: + +```r +labkey.setWafEncoding(FALSE) +labkey.executeSql(baseUrl = "http://localhost:8080/", + folderPath = "/home", schemaName = "core", + sql = "SELECT * FROM Containers") +``` + +### Data Modification APIs + +For completeness -- use these when analysis requires writing back results: + +- `labkey.insertRows(baseUrl, folderPath, schemaName, queryName, toInsert)` -- insert new rows (data frame) +- `labkey.updateRows(baseUrl, folderPath, schemaName, queryName, toUpdate)` -- update rows (must include PK) +- `labkey.deleteRows(baseUrl, folderPath, schemaName, queryName, toDelete)` -- delete rows (must include PK) +- `labkey.truncateTable(baseUrl, folderPath, schemaName, queryName)` -- delete all rows +- `labkey.importRows(baseUrl, folderPath, schemaName, queryName, toImport)` -- bulk import from data frame +- `labkey.moveRows(baseUrl, folderPath, targetFolderPath, schemaName, queryName, toMove)` -- move rows to another container + +All modification APIs accept optional `provenanceParams` and `options` parameters. Common options include: +- `auditBehavior`: `"NONE"`, `"SUMMARY"`, or `"DETAILED"` +- `auditUserComment`: string attached to audit log records + +Data frames passed to modification functions must be created with `stringsAsFactors = FALSE`. Column names must match the LabKey column names. To set a value to NULL, use an empty string `""`. + +### Utility Functions + +| Function | Purpose | +|---|---| +| `labkey.whoAmI(baseUrl)` | Returns current user info (displayName, id, email, impersonated status) | +| `labkey.setDefaults(apiKey, baseUrl, email, password)` | Set default connection parameters | +| `labkey.setDebugMode(debug)` | Enable/disable debug output for requests | +| `labkey.setWafEncoding(wafEncode)` | Enable/disable WAF encoding for SQL | +| `labkey.getSchemas(baseUrl, folderPath)` | List available schemas | +| `labkey.getQueries(baseUrl, folderPath, schemaName)` | List tables/queries in a schema | +| `labkey.getQueryDetails(baseUrl, folderPath, schemaName, queryName)` | Get column metadata for a table | +| `labkey.getQueryViews(baseUrl, folderPath, schemaName, queryName)` | List named views for a table | +| `labkey.getDefaultViewDetails(baseUrl, folderPath, schemaName, queryName)` | Get default view column details | +| `labkey.getLookupDetails(baseUrl, folderPath, schemaName, queryName, lookupKey)` | Get lookup target column details | +| `labkey.getFolders(baseUrl, folderPath)` | List subfolders | + +## Important Considerations + +1. **maxRows defaults to NULL (ALL rows)** in `labkey.selectRows`. Always set an explicit `maxRows` for large tables to avoid pulling the entire dataset into memory. + +2. **Filter value delimiters are inconsistent**: `IN`/`NOT_IN` use semicolons (`"A;B;C"`), while `BETWEEN`/`NOT_BETWEEN` use commas (`"10,50"`). Null-check operators (`MISSING`, `NOT_MISSING`, etc.) require an empty string `""` as the value. + +3. **colSelect accepts both vectors and comma-separated strings**: `colSelect = c("Name", "Age")` and `colSelect = "Name,Age"` both work. When using a string, do not include spaces between column names. + +4. **Sort syntax**: prefix `+` for ascending or `-` for descending: `colSort = "+Age"` or `colSort = "-Name"`. + +5. **WAF encoding**: `labkey.executeSql` WAF-encodes SQL by default. Requires LabKey Server v23.9.0+. Call `labkey.setWafEncoding(FALSE)` for older servers. + +6. **folderPath requires a leading slash**: Use `"/home"` or `"/MyProject/MyFolder"`, not `"home"`. + +7. **colNameOpt defaults differ**: `labkey.selectRows` defaults to `"caption"`, while `getRows` (session-based) defaults to `"fieldname"`. Use `colNameOpt = "fieldname"` for consistent, scriptable column names. + +8. **Data frames for writes must use stringsAsFactors = FALSE**: When creating data frames for `labkey.insertRows`, `labkey.updateRows`, or `labkey.deleteRows`, always set `stringsAsFactors = FALSE`. + +9. **Multiple filters on the same column** are supported -- pass multiple triplets to `makeFilter()`. + +10. **baseUrl must include the context path and trailing slash**: e.g. `"http://localhost:8080/labkey/"` if the server uses a context path, or `"http://localhost:8080/"` if it does not. + +11. **LabKey SQL is not standard SQL.** It is a SQL dialect specific to LabKey. Use `mcp__labkey__validateSQL` to check syntax before executing. Refer to LabKey documentation for dialect-specific features (e.g., lookup column traversal via `/` or `.` notation). + +12. **SSL configuration**: For HTTPS servers on Windows, you may need to set the `RLABKEY_CAINFO_FILE` environment variable pointing to a CA bundle file. Use `labkey.acceptSelfSignedCerts()` for development servers with self-signed certificates. + +## Typical Analysis Workflow + +```r +library(Rlabkey) +library(jsonlite) + +# 1. Read connection settings from .mcp.json +config <- fromJSON(".mcp.json") +server_url <- sub("/mcp$", "/", config$mcpServers$labkey$url) +api_key <- config$mcpServers$labkey$headers$apikey + +# 2. Set defaults +labkey.setDefaults(baseUrl = server_url, apiKey = api_key) + +# 3. Explore (or use MCP tools for interactive exploration) +schemas <- labkey.getSchemas(baseUrl = server_url, folderPath = "/home") +queries <- labkey.getQueries(baseUrl = server_url, folderPath = "/home", + schemaName = "lists") + +# 4. Retrieve data +rows <- labkey.selectRows(baseUrl = server_url, + folderPath = "/home", + schemaName = "lists", + queryName = "Participants", + colSelect = c("ParticipantId", "Name", "Age", "Country"), + colFilter = makeFilter(c("Age", "GREATER_THAN_OR_EQUAL", "18")), + maxRows = 1000, + colSort = "+Age", + colNameOpt = "fieldname" +) + +# 5. Analyze +summary(rows) +table(rows$Country) +tapply(rows$Age, rows$Country, mean, na.rm = TRUE) + +# 6. Complex queries with SQL +sql_result <- labkey.executeSql(baseUrl = server_url, + folderPath = "/home", + schemaName = "lists", + sql = "SELECT Country, COUNT(*) AS N, AVG(Age) AS AvgAge + FROM Participants + GROUP BY Country + ORDER BY N DESC", + colNameOpt = "fieldname" +) +print(sql_result) +``` From 47c3f3b2b5e59eeb6d627c68a1b9f0a23e59012e Mon Sep 17 00:00:00 2001 From: labkey-matthewb Date: Wed, 15 Apr 2026 15:41:26 -0700 Subject: [PATCH 3/7] new resources for data analysts using MCP first draft --- core/src/org/labkey/core/CoreMcp.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/core/src/org/labkey/core/CoreMcp.java b/core/src/org/labkey/core/CoreMcp.java index 562ff479a3e..3551dc2b0f1 100644 --- a/core/src/org/labkey/core/CoreMcp.java +++ b/core/src/org/labkey/core/CoreMcp.java @@ -151,4 +151,22 @@ public ReadResourceResult getPythonDataAnalysisGuide() throws IOException )); } + @McpResource( + uri = "resource://org/labkey/core/DataAnalysis_R.md", + mimeType = "application/markdown", + name = "R Data Analysis Development Guide", + description = "Provide documentation for developers using R to analyze LabKey data") + public ReadResourceResult getRDataAnalysisGuide() throws IOException + { + incrementResourceRequestCount("R Data Analysis"); + String markdown = IOUtils.resourceToString("org/labkey/core/DataAnalysis_R.md", null, CoreModule.class.getClassLoader()); + return new ReadResourceResult(List.of( + new McpSchema.TextResourceContents( + "resource://org/labkey/core/DataAnalysis_R.md", + "application/markdown", + markdown + ) + )); + } + } From ad0693a544164d23e0fec187df39e7c2651b4283 Mon Sep 17 00:00:00 2001 From: labkey-matthewb Date: Thu, 16 Apr 2026 08:31:48 -0700 Subject: [PATCH 4/7] .netrc --- .../org/labkey/core/DataAnalysis_Python.md | 60 ++++++++------- core/src/org/labkey/core/DataAnalysis_R.md | 77 +++++++++---------- 2 files changed, 71 insertions(+), 66 deletions(-) diff --git a/core/src/org/labkey/core/DataAnalysis_Python.md b/core/src/org/labkey/core/DataAnalysis_Python.md index 42c7f99e510..4a83b50f278 100644 --- a/core/src/org/labkey/core/DataAnalysis_Python.md +++ b/core/src/org/labkey/core/DataAnalysis_Python.md @@ -2,14 +2,28 @@ This project is for performing data analysis against a LabKey Server instance using AI assistance. -**Connection defaults:** The LabKey server URL and API key can be inferred from `.mcp.json` in this directory. The `url` field (minus the `/mcp` path) provides the server endpoint, and the `apikey` header value provides the authentication token. When writing Python scripts, read these values from `.mcp.json` to pre-populate the `APIWrapper` connection parameters. Before running any script, confirm with the data analyst: +**Connection defaults:** The LabKey server URL and API key can be inferred from `.mcp.json` in this directory. The `url` field (minus the `/mcp` path) provides the server endpoint, and the `apikey` header value provides the authentication token. + +**Do not embed API keys in generated scripts.** Instead, ensure a `.netrc` file (Linux/Mac) or `_netrc` file (Windows) exists in the user's home directory with the server credentials. If the file does not exist, offer to create it using the API key from `.mcp.json`. The format is: + +``` +machine +login apikey +password +``` + +The `machine` value is the hostname only — no protocol (`https://`), no port, no path. For example, for `https://myserver.labkey.com:8443/labkey/mcp`, use `myserver.labkey.com`. On Linux/Mac, set permissions to 600 (`chmod 600 ~/.netrc`). + +When writing Python scripts, read the server URL from `.mcp.json` to pre-populate the `APIWrapper` connection parameters, but omit `api_key` — the `labkey` package reads `.netrc` automatically. Before running any script, confirm with the data analyst: - Is this the correct server? - Should `use_ssl` be True or False? (infer from the URL scheme in `.mcp.json`) -- Do they want to use the API key from `.mcp.json` or provide a different one? - What container path should be used? (use MCP `listContainers` to show available options) Confirm all of these settings with the analyst before writing any script. +## Online Reference Material +https://www.labkey.org/Documentation/wiki-page.view?name=python + ## MCP Tools Available A LabKey MCP server is configured (see `.mcp.json`). Use these tools to explore the server interactively: @@ -41,6 +55,8 @@ A LabKey MCP server is configured (see `.mcp.json`). Use these tools to explore ### Connection Setup +**Preferred: `.netrc` authentication (no credentials in scripts)** + ```python from labkey.api_wrapper import APIWrapper @@ -50,13 +66,22 @@ api = APIWrapper( context_path=None, # URL path segment after domain, e.g. "labkey" use_ssl=False, # True for https verify_ssl=True, # False for self-signed dev certs - api_key="your-api-key", # alternative: use ~/.netrc ) ``` -Authentication options: -- **API key**: Pass `api_key=` to APIWrapper (sent as `apikey` header) -- **.netrc file**: Create `~/.netrc` with `machine`, `login`, `password` fields (chmod 600) +The `labkey` package automatically reads credentials from `~/.netrc` (Linux/Mac) or `~/_netrc` (Windows). The `.netrc` entry should use `apikey` as the login and the API key as the password: + +``` +machine localhost +login apikey +password TheUniqueAPIKeyGeneratedForYou +``` + +The `machine` value must be the hostname only — no protocol, no port, no path. Set file permissions to 600 on Linux/Mac (`chmod 600 ~/.netrc`). + +Authentication options (in order of preference): +- **.netrc file** (recommended): Create `~/.netrc` with `machine`, `login apikey`, `password ` fields (chmod 600). No credentials appear in scripts. +- **API key in code** (avoid in generated scripts): Pass `api_key=` to APIWrapper. Only use this for quick interactive testing, not in saved scripts. ### Data Retrieval APIs @@ -192,25 +217,6 @@ result = api.query.select_rows("study", "Demographics", filter_array=filters) **Ontology:** `ONTOLOGY_IN_SUBTREE`, `ONTOLOGY_NOT_IN_SUBTREE` -### Pagination - -```python -# Page through results: -result = api.query.select_rows("lists", "LargeTable", - max_rows=100, - offset=0, # page 1 - include_total_count=True, - sort="Name" -) -total = result["rowCount"] -page_rows = result["rows"] - -# Pagination enum: -from labkey.query import Pagination -result = api.query.select_rows("lists", "Table", show_rows=Pagination.ALL) -# Values: PAGINATED, SELECTED, UNSELECTED, ALL, NONE -``` - ### Container Filters Control which containers are searched. Pass as `container_filter=` to select_rows/execute_sql: @@ -293,8 +299,8 @@ from labkey.api_wrapper import APIWrapper from labkey.query import QueryFilter import pandas as pd -# 1. Connect -api = APIWrapper("localhost:8080", "MyProject", use_ssl=False, api_key="...") +# 1. Connect (credentials read from ~/.netrc automatically) +api = APIWrapper("localhost:8080", "MyProject", use_ssl=False) # 2. Explore (or use MCP tools for interactive exploration) schemas = api.query.get_queries("lists", include_columns=True) diff --git a/core/src/org/labkey/core/DataAnalysis_R.md b/core/src/org/labkey/core/DataAnalysis_R.md index 648d3dbce22..ee6745f33b7 100644 --- a/core/src/org/labkey/core/DataAnalysis_R.md +++ b/core/src/org/labkey/core/DataAnalysis_R.md @@ -2,14 +2,28 @@ This project is for performing data analysis against a LabKey Server instance using AI assistance. -**Connection defaults:** The LabKey server URL and API key can be inferred from `.mcp.json` in this directory. The `url` field (minus the `/mcp` path) provides the server endpoint, and the `apikey` header value provides the authentication token. When writing R scripts, read these values from `.mcp.json` (via `jsonlite::fromJSON`) to pre-populate `labkey.setDefaults()` and the `baseUrl`/`folderPath` parameters. Before running any script, confirm with the data analyst: +**Connection defaults:** The LabKey server URL and API key can be inferred from `.mcp.json` in this directory. The `url` field (minus the `/mcp` path) provides the server endpoint, and the `apikey` header value provides the authentication token. + +**Do not embed API keys in generated scripts.** Instead, ensure a `.netrc` file (Linux/Mac) or `_netrc` file (Windows) exists in the user's home directory with the server credentials. If the file does not exist, offer to create it using the API key from `.mcp.json`. The format is: + +``` +machine +login apikey +password +``` + +The `machine` value is the hostname only — no protocol (`https://`), no port, no path. For example, for `https://myserver.labkey.com:8443/labkey/mcp`, use `myserver.labkey.com`. On Linux/Mac, set permissions to 600 (`chmod 600 ~/.netrc`). On Windows, ensure the `_netrc` file is a plain file (not a "Text Document") and that a `HOME` environment variable points to the directory containing it. + +When writing R scripts, read the server URL from `.mcp.json` (via `jsonlite::fromJSON`) to pre-populate `labkey.setDefaults(baseUrl=)`, but omit `apiKey` — Rlabkey reads `.netrc` automatically. Before running any script, confirm with the data analyst: - Is this the correct server? - Should the URL use `http://` or `https://`? (infer from the URL scheme in `.mcp.json`) -- Do they want to use the API key from `.mcp.json` or provide a different one? - What container path should be used? (use MCP `listContainers` to show available options) Confirm all of these settings with the analyst before writing any script. +## Online Reference Material +https://www.labkey.org/Documentation/wiki-page.view?name=rAPI + ## MCP Tools Available A LabKey MCP server is configured (see `.mcp.json`). Use these tools to explore the server interactively: @@ -42,16 +56,15 @@ A LabKey MCP server is configured (see `.mcp.json`). Use these tools to explore ### Connection Setup -Rlabkey functions take `baseUrl` and `folderPath` as explicit arguments on every call. Use `labkey.setDefaults()` to avoid repeating credentials: +Rlabkey functions take `baseUrl` and `folderPath` as explicit arguments on every call. Use `labkey.setDefaults()` to set the server URL: + +**Preferred: `.netrc` authentication (no credentials in scripts)** ```r library(Rlabkey) -# Option 1: Set defaults (recommended for scripts) -labkey.setDefaults( - baseUrl = "http://localhost:8080/", - apiKey = "your-api-key" -) +# Set the server URL only — credentials are read from ~/.netrc automatically +labkey.setDefaults(baseUrl = "http://localhost:8080/") # Then call functions without baseUrl: rows <- labkey.selectRows( @@ -60,7 +73,7 @@ rows <- labkey.selectRows( queryName = "MyTable" ) -# Option 2: Pass baseUrl explicitly on every call (no defaults needed) +# Or pass baseUrl explicitly on every call: rows <- labkey.selectRows( baseUrl = "http://localhost:8080/", folderPath = "/home", @@ -69,10 +82,20 @@ rows <- labkey.selectRows( ) ``` -Authentication options: -- **API key**: Pass to `labkey.setDefaults(apiKey=)`. An API key avoids storing credentials on the client. API keys can be revoked and set to expire. If an API key is set, it takes precedence over email/password. +Rlabkey automatically reads credentials from `~/.netrc` (Linux/Mac) or `~/_netrc` (Windows). The `.netrc` entry should use `apikey` as the login and the API key as the password: + +``` +machine localhost +login apikey +password TheUniqueAPIKeyGeneratedForYou +``` + +The `machine` value must be the hostname only — no protocol, no port, no path. Set file permissions to 600 on Linux/Mac (`chmod 600 ~/.netrc`). Use `labkey.setCurlOptions(NETRC_FILE='/path/to/_netrc')` for a non-standard location. + +Authentication options (in order of preference): +- **.netrc file** (recommended): Create `~/.netrc` with `machine`, `login apikey`, `password ` fields (chmod 600). No credentials appear in scripts. +- **API key in code** (avoid in generated scripts): Pass to `labkey.setDefaults(apiKey=)`. Only use this for quick interactive testing, not in saved scripts. - **Email/password**: `labkey.setDefaults(email="user@example.com", password="pass")` -- **.netrc file**: Create `~/.netrc` with `machine`, `login`, `password` fields (chmod 600). Rlabkey reads this automatically. Use `labkey.setCurlOptions(NETRC_FILE='/path/to/_netrc')` for a custom location. - **Session key**: Pass a session key via `labkey.setDefaults(apiKey=)`. Session keys tie R access to the user's browser session context (same authorizations, impersonation state, etc.). To clear credentials: `labkey.setDefaults()` (called with no arguments resets all defaults). @@ -277,29 +300,6 @@ makeFilter(c("RowId", "IN", "2;3;6")) makeFilter(c("IntFld", "MISSING", "")) ``` -### Pagination - -```r -# Page through results: -page1 <- labkey.selectRows(baseUrl = "http://localhost:8080/", - folderPath = "/home", schemaName = "lists", - queryName = "LargeTable", - maxRows = 100, - rowOffset = 0, # first 100 rows - colSort = "+Name" -) - -page2 <- labkey.selectRows(baseUrl = "http://localhost:8080/", - folderPath = "/home", schemaName = "lists", - queryName = "LargeTable", - maxRows = 100, - rowOffset = 100, # next 100 rows - colSort = "+Name" -) -``` - -Note: When `maxRows` is NULL (the default), **all rows are returned**. Always set an explicit `maxRows` for large tables. - ### Container Filters Control which containers are searched. Pass as `containerFilter=` to `labkey.selectRows`/`labkey.executeSql`: @@ -429,13 +429,12 @@ Data frames passed to modification functions must be created with `stringsAsFact library(Rlabkey) library(jsonlite) -# 1. Read connection settings from .mcp.json +# 1. Read server URL from .mcp.json (credentials come from ~/.netrc) config <- fromJSON(".mcp.json") server_url <- sub("/mcp$", "/", config$mcpServers$labkey$url) -api_key <- config$mcpServers$labkey$headers$apikey -# 2. Set defaults -labkey.setDefaults(baseUrl = server_url, apiKey = api_key) +# 2. Set defaults (no apiKey — .netrc provides authentication) +labkey.setDefaults(baseUrl = server_url) # 3. Explore (or use MCP tools for interactive exploration) schemas <- labkey.getSchemas(baseUrl = server_url, folderPath = "/home") From 3c0de23b7122cfe7e1156b8659f7f15eee3b93ed Mon Sep 17 00:00:00 2001 From: labkey-matthewb Date: Thu, 16 Apr 2026 12:30:20 -0700 Subject: [PATCH 5/7] Accept URL it's very convenient. --- core/src/org/labkey/core/CoreMcp.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/core/src/org/labkey/core/CoreMcp.java b/core/src/org/labkey/core/CoreMcp.java index 3551dc2b0f1..59117cd9d2c 100644 --- a/core/src/org/labkey/core/CoreMcp.java +++ b/core/src/org/labkey/core/CoreMcp.java @@ -18,6 +18,7 @@ import org.labkey.api.study.Study; import org.labkey.api.study.StudyService; import org.labkey.api.util.HtmlString; +import org.labkey.api.view.ActionURL; import org.springframework.ai.chat.model.ToolContext; import org.springframework.ai.mcp.annotation.McpResource; import org.springframework.ai.tool.annotation.Tool; @@ -92,7 +93,9 @@ String listContainers(ToolContext toolContext) @Tool(description = "Every tool in this MCP requires a container path, e.g. MyProject/MyFolder. A container is also called a folder or project. " + "Please prompt the user for a container path and use this tool to save the path for this MCP session. The user can also change the container " + "during the session using this tool. The user must have read permissions in the container, in other words, the path must be on the list that " + - "the listContainers tool returns. Don't suggest a leading slash on the path because typing a slash in some LLM clients triggers custom shortcuts.") + "the listContainers tool returns. Don't suggest a leading slash on the path because typing a slash in some LLM clients triggers custom shortcuts." + + "Alternately, the user may provide a LabKey server URL like http://localhost:8080/StudyVerifyProject/My%20Study/project-begin.view. " + + "The container path is encoded in the URL and can be accepted as a valid parameter.") @RequiresNoPermission // Because we don't have a container yet, but the tool will verify read permission before setting the container String setContainer(ToolContext context, @ToolParam(description = "Container path, e.g. MyProject/MyFolder") String containerPath) { @@ -100,6 +103,18 @@ String setContainer(ToolContext context, @ToolParam(description = "Container pat Container container = ContainerManager.getForPath(containerPath); + if (null == container) + { + try + { + var url = new ActionURL(containerPath); + container = ContainerManager.getForURL(url); + } + catch (IllegalArgumentException x) + { + } + } + // Must exist and user must have read permission to set a container. Note: Send the same message in either // case to prevent information exposure. if (container == null || !container.hasPermission(getUser(context), ReadPermission.class)) From c87b30c5e1bd7fba52311702b952dc51090d7dd0 Mon Sep 17 00:00:00 2001 From: labkey-matthewb Date: Thu, 16 Apr 2026 15:16:26 -0700 Subject: [PATCH 6/7] Apply suggestions from code review Co-authored-by: Josh Eckels --- core/src/org/labkey/core/CoreMcp.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/org/labkey/core/CoreMcp.java b/core/src/org/labkey/core/CoreMcp.java index 59117cd9d2c..fecba61406d 100644 --- a/core/src/org/labkey/core/CoreMcp.java +++ b/core/src/org/labkey/core/CoreMcp.java @@ -93,7 +93,7 @@ String listContainers(ToolContext toolContext) @Tool(description = "Every tool in this MCP requires a container path, e.g. MyProject/MyFolder. A container is also called a folder or project. " + "Please prompt the user for a container path and use this tool to save the path for this MCP session. The user can also change the container " + "during the session using this tool. The user must have read permissions in the container, in other words, the path must be on the list that " + - "the listContainers tool returns. Don't suggest a leading slash on the path because typing a slash in some LLM clients triggers custom shortcuts." + + "the listContainers tool returns. Don't suggest a leading slash on the path because typing a slash in some LLM clients triggers custom shortcuts. " + "Alternately, the user may provide a LabKey server URL like http://localhost:8080/StudyVerifyProject/My%20Study/project-begin.view. " + "The container path is encoded in the URL and can be accepted as a valid parameter.") @RequiresNoPermission // Because we don't have a container yet, but the tool will verify read permission before setting the container From e7e70dcf1e2870ccf2c281cc225be2d100a568b2 Mon Sep 17 00:00:00 2001 From: labkey-matthewb Date: Thu, 16 Apr 2026 15:18:57 -0700 Subject: [PATCH 7/7] LabKey SQL is read-only. --- query/src/org/labkey/query/controllers/LabKeySql.md | 1 + 1 file changed, 1 insertion(+) diff --git a/query/src/org/labkey/query/controllers/LabKeySql.md b/query/src/org/labkey/query/controllers/LabKeySql.md index e39099d2f76..5e96934f52d 100644 --- a/query/src/org/labkey/query/controllers/LabKeySql.md +++ b/query/src/org/labkey/query/controllers/LabKeySql.md @@ -1,6 +1,7 @@ ### **LabKey SQL Documentation** LabKey SQL is a unique SQL dialect that extends standard SQL functionality with features tailored for the LabKey Server platform, particularly for scientific data management. +LabKey SQL only implements data read operations. It does not support INSERT/UPDATE/DELETE, nor does it support creating or altering tables. -----