diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIAgentConfig.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIAgentConfig.java
index 5df6b067..b71625a1 100644
--- a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIAgentConfig.java
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIAgentConfig.java
@@ -30,8 +30,9 @@ public final class AIAgentConfig extends AIConfig {
       String instructions,
       JudgeConfiguration judgeConfiguration,
       Map<String, Tool> tools,
-      Supplier<LDAIConfigTracker> trackerFactory) {
-    super(key, enabled, Mode.AGENT, model, provider, trackerFactory);
+      Supplier<LDAIConfigTracker> trackerFactory,
+      Evaluator evaluator) {
+    super(key, enabled, Mode.AGENT, model, provider, trackerFactory, evaluator);
     this.instructions = instructions;
     this.judgeConfiguration = judgeConfiguration;
     this.tools = tools == null ? null : Collections.unmodifiableMap(tools);
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AICompletionConfig.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AICompletionConfig.java
index 0a15aca0..e13801b4 100644
--- a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AICompletionConfig.java
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AICompletionConfig.java
@@ -32,8 +32,9 @@ public final class AICompletionConfig extends AIConfig {
       List<Message> messages,
       JudgeConfiguration judgeConfiguration,
       Map<String, Tool> tools,
-      Supplier<LDAIConfigTracker> trackerFactory) {
-    super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory);
+      Supplier<LDAIConfigTracker> trackerFactory,
+      Evaluator evaluator) {
+    super(key, enabled, Mode.COMPLETION, model, provider, trackerFactory, evaluator);
     this.messages = messages == null ? null : Collections.unmodifiableList(messages);
     this.judgeConfiguration = judgeConfiguration;
     this.tools = tools == null ? null : Collections.unmodifiableMap(tools);
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIConfig.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIConfig.java
index 22820f08..f39c264b 100644
--- a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIConfig.java
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIConfig.java
@@ -24,6 +24,7 @@ public abstract class AIConfig {
   private final Model model;
   private final Provider provider;
   private final Supplier<LDAIConfigTracker> trackerFactory;
+  private final Evaluator evaluator;
 
   AIConfig(
       String key,
@@ -31,13 +32,15 @@ public abstract class AIConfig {
       Mode mode,
       Model model,
       Provider provider,
-      Supplier<LDAIConfigTracker> trackerFactory) {
+      Supplier<LDAIConfigTracker> trackerFactory,
+      Evaluator evaluator) {
     this.key = key;
     this.enabled = enabled;
     this.mode = mode;
     this.model = model;
     this.provider = provider;
     this.trackerFactory = Objects.requireNonNull(trackerFactory, "trackerFactory");
+    this.evaluator = Objects.requireNonNull(evaluator, "evaluator");
   }
 
   /**
@@ -102,4 +105,17 @@ public Provider getProvider() {
   public LDAIConfigTracker createTracker() {
     return trackerFactory.get();
   }
+
+  /**
+   * Returns the evaluator that coordinates judge execution for this configuration.
+   * <p>
+   * For {@link AIJudgeConfig} this is always {@link Evaluator#noop()}. For
+   * {@link AICompletionConfig} and {@link AIAgentConfig} it is the evaluator supplied at
+   * construction time (also {@link Evaluator#noop()} unless a custom one is wired in).
+   *
+   * @return the evaluator, never {@code null}
+   */
+  public Evaluator getEvaluator() {
+    return evaluator;
+  }
 }
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIJudgeConfig.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIJudgeConfig.java
index 0c6245b1..2c905886 100644
--- a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIJudgeConfig.java
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/AIJudgeConfig.java
@@ -29,7 +29,7 @@ public final class AIJudgeConfig extends AIConfig {
       List<Message> messages,
       String evaluationMetricKey,
       Supplier<LDAIConfigTracker> trackerFactory) {
-    super(key, enabled, Mode.JUDGE, model, provider, trackerFactory);
+    super(key, enabled, Mode.JUDGE, model, provider, trackerFactory, Evaluator.noop());
     this.messages = messages == null ? null : Collections.unmodifiableList(messages);
     this.evaluationMetricKey = evaluationMetricKey;
   }
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Evaluator.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Evaluator.java
new file mode 100644
index 00000000..287a7a36
--- /dev/null
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Evaluator.java
@@ -0,0 +1,94 @@
+package com.launchdarkly.sdk.server.ai;
+
+import com.launchdarkly.logging.LDLogger;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.JudgeConfiguration;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Coordinates evaluation of an AI Config output by running a set of {@link Judge} instances.
+ * <p>
+ * An {@code Evaluator} is attached to an {@link AICompletionConfig} or {@link AIAgentConfig} and
+ * invoked by managed AI types (plan 4). In v1.0, the evaluator returned by the config retrieval
+ * methods is always a noop that returns an empty list immediately.
+ * <p>
+ * Instances are immutable and thread-safe.
+ */
+public final class Evaluator {
+  private static final Evaluator NOOP = new Evaluator();
+
+  private final Map<String, Judge> judges;
+  private final JudgeConfiguration judgeConfiguration;
+  private final LDLogger logger;
+  private final boolean isNoop;
+
+  private Evaluator() {
+    this.judges = Collections.emptyMap();
+    this.judgeConfiguration = null;
+    this.logger = null;
+    this.isNoop = true;
+  }
+
+  /**
+   * Constructs an evaluator with the given judges and configuration.
+   *
+   * @param judges a map from judge config key to {@link Judge} instance; must not be {@code null}
+   * @param judgeConfiguration the judge configuration listing which judges to run and their sampling
+   *     rates; must not be {@code null}
+   * @param logger the logger; must not be {@code null}
+   */
+  public Evaluator(Map<String, Judge> judges, JudgeConfiguration judgeConfiguration, LDLogger logger) {
+    this.judges = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(judges, "judges")));
+    this.judgeConfiguration = Objects.requireNonNull(judgeConfiguration, "judgeConfiguration");
+    this.logger = Objects.requireNonNull(logger, "logger");
+    this.isNoop = false;
+  }
+
+  /**
+   * Returns the shared noop evaluator, which immediately returns an empty result list without
+   * logging any warnings.
+   *
+   * @return the noop singleton, never {@code null}
+   */
+  public static Evaluator noop() {
+    return NOOP;
+  }
+
+  /**
+   * Runs all configured judges against the given input/output pair and returns their results.
+   * <p>
+   * When this is the noop evaluator, returns a completed future holding an empty list immediately.
+   * Otherwise, judges are run sequentially in the order specified by the {@link JudgeConfiguration}.
+   * Judges referenced in the configuration but absent from the judges map are skipped with a
+   * warning; this is not an error.
+   * <p>
+   * This method does NOT call {@code trackJudgeResult} — that is the caller's responsibility.
+   *
+   * @param input the message history or prompt that was sent to the model
+   * @param output the model's response to evaluate
+   * @return a completed future holding the list of judge results; never {@code null}
+   */
+  public CompletableFuture<List<JudgeResult>> evaluate(String input, String output) {
+    if (isNoop) {
+      return CompletableFuture.completedFuture(Collections.emptyList());
+    }
+
+    List<JudgeResult> results = new ArrayList<>();
+    for (JudgeConfiguration.Judge entry : judgeConfiguration.getJudges()) {
+      Judge judge = judges.get(entry.getKey());
+      if (judge == null) {
+        logger.warn("Evaluator: no judge found for key '{}', skipping", entry.getKey());
+        continue;
+      }
+      results.add(judge.evaluate(input, output, entry.getSamplingRate()));
+    }
+    return CompletableFuture.completedFuture(results);
+  }
+}
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Judge.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Judge.java
new file mode 100644
index 00000000..91e2855c
--- /dev/null
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Judge.java
@@ -0,0 +1,210 @@
+package com.launchdarkly.sdk.server.ai;
+
+import com.launchdarkly.logging.LDLogger;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Message;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.stream.Collectors;
+
+/**
+ * Evaluates an AI model output against a judge prompt, returning a scored {@link JudgeResult}.
+ * <p>
+ * A {@code Judge} wraps an {@link AIJudgeConfig} and a {@link Runner}. Each call to
+ * {@link #evaluate} or {@link #evaluateMessages} invokes the runner with a formatted evaluation
+ * prompt and parses the structured {@code {score, reasoning}} response. Evaluation can be sampled
+ * to reduce cost: pass a {@code samplingRate} of {@code 0.0} to always skip, or {@code 1.0} to
+ * always run.
+ * <p>
+ * Instances are immutable and thread-safe.
+ */
+public final class Judge {
+  /**
+   * JSON-Schema fragment sent to the runner as the {@code outputType}, requesting structured
+   * {@code {score, reasoning}} output.
+   */
+  private static final Map<String, Object> EVALUATION_SCHEMA;
+  static {
+    Map<String, Object> scoreSchema = new HashMap<>();
+    scoreSchema.put("type", "number");
+
+    Map<String, Object> reasoningSchema = new HashMap<>();
+    reasoningSchema.put("type", "string");
+
+    Map<String, Object> properties = new HashMap<>();
+    properties.put("score", Collections.unmodifiableMap(scoreSchema));
+    properties.put("reasoning", Collections.unmodifiableMap(reasoningSchema));
+
+    Map<String, Object> schema = new HashMap<>();
+    schema.put("type", "object");
+    schema.put("properties", Collections.unmodifiableMap(properties));
+    schema.put("required", Collections.singletonList("score"));
+
+    EVALUATION_SCHEMA = Collections.unmodifiableMap(schema);
+  }
+
+  private final AIJudgeConfig config;
+  private final Runner runner;
+  private final LDLogger logger;
+
+  /**
+   * Constructs a judge.
+   *
+   * @param config the judge AI Config; must not be {@code null}
+   * @param runner the runner to invoke; must not be {@code null}
+   * @param logger the logger; must not be {@code null}
+   */
+  public Judge(AIJudgeConfig config, Runner runner, LDLogger logger) {
+    this.config = Objects.requireNonNull(config, "config");
+    this.runner = Objects.requireNonNull(runner, "runner");
+    this.logger = Objects.requireNonNull(logger, "logger");
+  }
+
+  /**
+   * Evaluates the given input/output pair, always running (sampling rate {@code 1.0}).
+   *
+   * @param input the message history or prompt that was sent to the model
+   * @param output the model's response to evaluate
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluate(String input, String output) {
+    return evaluate(input, output, 1.0);
+  }
+
+  /**
+   * Evaluates the given input/output pair, subject to the given sampling rate.
+   *
+   * @param input the message history or prompt that was sent to the model
+   * @param output the model's response to evaluate
+   * @param samplingRate the fraction of evaluations to actually run; {@code 0.0} always skips,
+   *     {@code 1.0} always runs
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluate(String input, String output, double samplingRate) {
+    if (ThreadLocalRandom.current().nextDouble() >= samplingRate) {
+      return JudgeResult.builder()
+          .sampled(false)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+
+    String formatted = "MESSAGE HISTORY:\n" + input + "\n\nRESPONSE TO EVALUATE:\n" + output;
+    LDAIConfigTracker tracker = config.createTracker();
+
+    RunnerResult result;
+    try {
+      result = tracker.trackMetricsOf(RunnerResult::getMetrics, () -> runner.run(formatted, EVALUATION_SCHEMA));
+    } catch (Exception ex) {
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .errorMessage(ex.getMessage())
+          .build();
+    }
+
+    Map<String, Object> parsed = result.getParsed();
+    if (parsed == null) {
+      logger.warn("Judge {}: runner returned null parsed output", config.getKey());
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+
+    Object scoreRaw = parsed.get("score");
+    if (!(scoreRaw instanceof Number)) {
+      logger.warn("Judge {}: parsed output missing numeric score", config.getKey());
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+    double score = ((Number) scoreRaw).doubleValue();
+    if (!Double.isFinite(score) || score < 0.0 || score > 1.0) {
+      logger.warn("Judge {}: score {} is outside [0.0, 1.0]", config.getKey(), score);
+      return JudgeResult.builder()
+          .sampled(true)
+          .success(false)
+          .judgeConfigKey(config.getKey())
+          .metricKey(config.getEvaluationMetricKey())
+          .build();
+    }
+
+    JudgeResult.Builder resultBuilder = JudgeResult.builder()
+        .sampled(true)
+        .success(true)
+        .judgeConfigKey(config.getKey())
+        .metricKey(config.getEvaluationMetricKey())
+        .score(score);
+
+    Object reasoningRaw = parsed.get("reasoning");
+    if (reasoningRaw instanceof String) {
+      resultBuilder.reasoning((String) reasoningRaw);
+    } else if (reasoningRaw != null) {
+      logger.warn("Judge {}: reasoning is not a string, ignoring", config.getKey());
+    }
+
+    return resultBuilder.build();
+  }
+
+  /**
+   * Evaluates a message list and runner response, always running (sampling rate {@code 1.0}).
+   * <p>
+   * Messages are formatted as {@code role: content} lines, joined by newlines.
+   *
+   * @param messages the messages that were sent to the model
+   * @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response) {
+    return evaluateMessages(messages, response, 1.0);
+  }
+
+  /**
+   * Evaluates a message list and runner response, subject to the given sampling rate.
+   * <p>
+   * Messages are formatted as {@code role: content} lines, joined by newlines.
+   *
+   * @param messages the messages that were sent to the model
+   * @param response the runner result whose {@link RunnerResult#getContent() content} is evaluated
+   * @param samplingRate the fraction of evaluations to actually run
+   * @return the evaluation result; never {@code null}
+   */
+  public JudgeResult evaluateMessages(List<Message> messages, RunnerResult response, double samplingRate) {
+    String formattedMessages = messages == null ? "" : messages.stream()
+        .map(m -> m.getRole().getWireValue() + ": " + m.getContent())
+        .collect(Collectors.joining("\n"));
+    return evaluate(formattedMessages, response == null ? "" : response.getContent(), samplingRate);
+  }
+
+  /**
+   * Returns the judge AI Config this instance was constructed with.
+   *
+   * @return the judge config, never {@code null}
+   */
+  public AIJudgeConfig getConfig() {
+    return config;
+  }
+
+  /**
+   * Returns the runner this instance was constructed with.
+   *
+   * @return the runner, never {@code null}
+   */
+  public Runner getRunner() {
+    return runner;
+  }
+}
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/LDAIClientImpl.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/LDAIClientImpl.java
index 8bf81e71..dd81608a 100644
--- a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/LDAIClientImpl.java
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/LDAIClientImpl.java
@@ -199,7 +199,8 @@ private AIConfig buildConfig(
             interpolate(parsed.getInstructions(), variables, context),
             parsed.getJudgeConfiguration(),
             parsed.getTools(),
-            factory);
+            factory,
+            Evaluator.noop());
       case JUDGE:
         return new AIJudgeConfig(
             key,
@@ -219,7 +220,8 @@ private AIConfig buildConfig(
             interpolateMessages(parsed.getMessages(), variables, context),
             parsed.getJudgeConfiguration(),
             parsed.getTools(),
-            factory);
+            factory,
+            Evaluator.noop());
     }
   }
 
@@ -247,7 +249,8 @@ private AIConfig buildConfigFromDefault(
             interpolate(agent.getInstructions(), variables, context),
             agent.getJudgeConfiguration(),
             agent.getTools(),
-            factory);
+            factory,
+            Evaluator.noop());
       }
       case JUDGE: {
         AIJudgeConfigDefault judge = (AIJudgeConfigDefault) defaultValue;
@@ -271,7 +274,8 @@ private AIConfig buildConfigFromDefault(
             interpolateMessages(completion.getMessages(), variables, context),
             completion.getJudgeConfiguration(),
             completion.getTools(),
-            factory);
+            factory,
+            Evaluator.noop());
       }
     }
   }
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Runner.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Runner.java
new file mode 100644
index 00000000..e4ac6650
--- /dev/null
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/Runner.java
@@ -0,0 +1,38 @@
+package com.launchdarkly.sdk.server.ai;
+
+import java.util.Map;
+
+/**
+ * Executes an AI operation and returns a {@link RunnerResult}.
+ * <p>
+ * Implement this interface to wrap a model provider SDK so it can be used by a {@link Judge} or
+ * managed AI type. The SDK passes an optional {@code outputType} schema when it needs structured
+ * output (for example, when a judge requests a {@code {score, reasoning}} object).
+ * <p>
+ * Implementations should be thread-safe; a single instance may be called from multiple threads.
+ */
+public interface Runner {
+  /**
+   * Runs the AI operation with the given input and optional output schema.
+   *
+   * @param input the prompt or message history to send to the model; never {@code null}
+   * @param outputType a JSON-Schema-like map describing the expected structured output, or
+   *     {@code null} if no structured output is required
+   * @return the result of the operation; never {@code null}
+   * @throws Exception if the underlying provider call fails
+   */
+  RunnerResult run(String input, Map<String, Object> outputType) throws Exception;
+
+  /**
+   * Runs the AI operation with the given input and no output-type constraint.
+   * <p>
+   * Delegates to {@link #run(String, Map)} with a {@code null} {@code outputType}.
+   *
+   * @param input the prompt or message history to send to the model; never {@code null}
+   * @return the result of the operation; never {@code null}
+   * @throws Exception if the underlying provider call fails
+   */
+  default RunnerResult run(String input) throws Exception {
+    return run(input, null);
+  }
+}
diff --git a/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/RunnerResult.java b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/RunnerResult.java
new file mode 100644
index 00000000..af34dd9b
--- /dev/null
+++ b/lib/sdk/server-ai/src/main/java/com/launchdarkly/sdk/server/ai/RunnerResult.java
@@ -0,0 +1,120 @@
+package com.launchdarkly.sdk.server.ai;
+
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.AIMetrics;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The result of a single {@link Runner} invocation.
+ * <p>
+ * Instances are immutable. Build them with {@link #builder(String, AIMetrics)}.
+ */
+public final class RunnerResult {
+  private final String content;
+  private final AIMetrics metrics;
+  private final Object raw;
+  private final Map<String, Object> parsed;
+
+  private RunnerResult(Builder b) {
+    this.content = b.content;
+    this.metrics = b.metrics;
+    this.raw = b.raw;
+    this.parsed = b.parsed == null ? null : Collections.unmodifiableMap(new HashMap<>(b.parsed));
+  }
+
+  /**
+   * Returns the text content of the model response.
+   *
+   * @return the response text, or {@code null} if none was produced
+   */
+  public String getContent() {
+    return content;
+  }
+
+  /**
+   * Returns the metrics captured during this invocation.
+   *
+   * @return the metrics, never {@code null}
+   */
+  public AIMetrics getMetrics() {
+    return metrics;
+  }
+
+  /**
+   * Returns the unmodified provider response object, useful for provider-specific inspection.
+   *
+   * @return the raw response, or {@code null} if not set
+   */
+  public Object getRaw() {
+    return raw;
+  }
+
+  /**
+   * Returns the structured output parsed from the model response, when the runner was invoked with
+   * an {@code outputType} schema.
+   *
+   * @return an unmodifiable map of the structured output, or {@code null} if not set
+   */
+  public Map<String, Object> getParsed() {
+    return parsed;
+  }
+
+  /**
+   * Creates a builder for a {@link RunnerResult}.
+   *
+   * @param content the text content of the model response; may be {@code null}
+   * @param metrics the metrics for this invocation; must not be {@code null}
+   * @return a new {@link Builder}
+   */
+  public static Builder builder(String content, AIMetrics metrics) {
+    return new Builder(content, metrics);
+  }
+
+  /**
+   * Builder for {@link RunnerResult}.
+   */
+  public static final class Builder {
+    private final String content;
+    private final AIMetrics metrics;
+    private Object raw;
+    private Map<String, Object> parsed;
+
+    private Builder(String content, AIMetrics metrics) {
+      this.content = content;
+      this.metrics = metrics;
+    }
+
+    /**
+     * Sets the unmodified provider response.
+     *
+     * @param raw the raw response object; may be {@code null}
+     * @return this builder
+     */
+    public Builder raw(Object raw) {
+      this.raw = raw;
+      return this;
+    }
+
+    /**
+     * Sets the structured output parsed from the model response.
+     *
+     * @param parsed the structured output map; may be {@code null}
+     * @return this builder
+     */
+    public Builder parsed(Map<String, Object> parsed) {
+      this.parsed = parsed;
+      return this;
+    }
+
+    /**
+     * Builds the immutable {@link RunnerResult}.
+     *
+     * @return a new {@link RunnerResult}
+     */
+    public RunnerResult build() {
+      return new RunnerResult(this);
+    }
+  }
+}
diff --git a/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/EvaluatorTest.java b/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/EvaluatorTest.java
new file mode 100644
index 00000000..ede4d93f
--- /dev/null
+++ b/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/EvaluatorTest.java
@@ -0,0 +1,176 @@
+package com.launchdarkly.sdk.server.ai;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.notNullValue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.launchdarkly.logging.LDLogger;
+import com.launchdarkly.logging.Logs;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.JudgeConfiguration;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.AIMetrics;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.function.Function;
+
+import org.junit.Test;
+
+@SuppressWarnings("javadoc")
+public class EvaluatorTest {
+  private static final LDLogger LOGGER = LDLogger.withAdapter(Logs.toConsole(), "test");
+  private static final AIMetrics METRICS = AIMetrics.builder().success(true).build();
+
+  // ---- helpers ----------------------------------------------------------------
+
+  /**
+   * Builds a real Judge with the given key/metric, backed by a mocked Runner and tracker.
+   * The runner returns a parsed response with the given score.
+   */
+  private Judge judgeWithScore(String key, String metricKey, double score) throws Exception {
+    Runner runner = mock(Runner.class);
+    LDAIConfigTracker tracker = mock(LDAIConfigTracker.class);
+    when(tracker.trackMetricsOf(any(Function.class), any(Callable.class)))
+        .thenAnswer(inv -> {
+          Callable<?> op = inv.getArgument(1);
+          return op.call();
+        });
+
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", score);
+    parsed.put("reasoning", "test reasoning");
+    RunnerResult result = RunnerResult.builder("content", METRICS).parsed(parsed).build();
+    when(runner.run(any(), any())).thenReturn(result);
+
+    AIJudgeConfig config = new AIJudgeConfig(key, true, null, null, null, metricKey, () -> tracker);
+    return new Judge(config, runner, LOGGER);
+  }
+
+  // ---- noop -------------------------------------------------------------------
+
+  @Test
+  public void noopReturnsEmptyList() throws Exception {
+    List<JudgeResult> results = Evaluator.noop().evaluate("input", "output").get();
+    assertThat(results, empty());
+  }
+
+  @Test
+  public void noopReturnsSameInstance() {
+    assertThat(Evaluator.noop(), is(Evaluator.noop()));
+  }
+
+  @Test
+  public void noopFutureIsAlreadyDone() {
+    assertThat(Evaluator.noop().evaluate("input", "output").isDone(), is(true));
+  }
+
+  // ---- single judge -----------------------------------------------------------
+
+  @Test
+  public void singleJudgeIsRun() throws Exception {
+    Judge judge = judgeWithScore("j1", "metric.1", 0.9);
+    Map<String, Judge> judges = Collections.singletonMap("j1", judge);
+    JudgeConfiguration config = new JudgeConfiguration(
+        Collections.singletonList(new JudgeConfiguration.Judge("j1", 1.0)));
+
+    Evaluator evaluator = new Evaluator(judges, config, LOGGER);
+    List<JudgeResult> results = evaluator.evaluate("input", "output").get();
+
+    assertThat(results, hasSize(1));
+    assertThat(results.get(0).isSuccess(), is(true));
+    assertThat(results.get(0).getScore(), is(0.9));
+  }
+
+  // ---- multiple judges run sequentially ---------------------------------------
+
+  @Test
+  public void multipleJudgesAreAllRun() throws Exception {
+    Judge j1 = judgeWithScore("j1", "m1", 0.8);
+    Judge j2 = judgeWithScore("j2", "m2", 0.6);
+    Map<String, Judge> judgesMap = new HashMap<>();
+    judgesMap.put("j1", j1);
+    judgesMap.put("j2", j2);
+    JudgeConfiguration config = new JudgeConfiguration(Arrays.asList(
+        new JudgeConfiguration.Judge("j1", 1.0),
+        new JudgeConfiguration.Judge("j2", 1.0)));
+
+    Evaluator evaluator = new Evaluator(judgesMap, config, LOGGER);
+    List<JudgeResult> results = evaluator.evaluate("input", "output").get();
+
+    assertThat(results, hasSize(2));
+    assertThat(results.get(0).getScore(), is(0.8));
+    assertThat(results.get(1).getScore(), is(0.6));
+  }
+
+  // ---- missing judge is skipped with a warning --------------------------------
+
+  @Test
+  public void missingJudgeIsSkipped() throws Exception {
+    Judge j1 = judgeWithScore("j1", "m1", 0.7);
+    Map<String, Judge> judgesMap = Collections.singletonMap("j1", j1);
+    JudgeConfiguration config = new JudgeConfiguration(Arrays.asList(
+        new JudgeConfiguration.Judge("j1", 1.0),
+        new JudgeConfiguration.Judge("missing-judge", 1.0)));
+
+    Evaluator evaluator = new Evaluator(judgesMap, config, LOGGER);
+    List<JudgeResult> results = evaluator.evaluate("input", "output").get();
+
+    assertThat(results, hasSize(1));
+    assertThat(results.get(0).getJudgeConfigKey(), is("j1"));
+  }
+
+  // ---- evaluator does NOT call trackJudgeResult --------------------------------
+
+  @Test
+  public void evaluatorDoesNotCallTrackJudgeResult() throws Exception {
+    LDAIConfigTracker outerTracker = mock(LDAIConfigTracker.class);
+
+    Runner runner = mock(Runner.class);
+    LDAIConfigTracker innerTracker = mock(LDAIConfigTracker.class);
+    when(innerTracker.trackMetricsOf(any(Function.class), any(Callable.class)))
+        .thenAnswer(inv -> {
+          Callable<?> op = inv.getArgument(1);
+          return op.call();
+        });
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.5);
+    when(runner.run(any(), any()))
+        .thenReturn(RunnerResult.builder("content", METRICS).parsed(parsed).build());
+
+    AIJudgeConfig judgeConfig = new AIJudgeConfig("jk", true, null, null, null, "mk", () -> innerTracker);
+    Judge judge = new Judge(judgeConfig, runner, LOGGER);
+
+    Map<String, Judge> judgesMap = Collections.singletonMap("jk", judge);
+    JudgeConfiguration config = new JudgeConfiguration(
+        Collections.singletonList(new JudgeConfiguration.Judge("jk", 1.0)));
+
+    Evaluator evaluator = new Evaluator(judgesMap, config, LOGGER);
+    evaluator.evaluate("input", "output").get();
+
+    verify(outerTracker, never()).trackJudgeResult(any());
+  }
+
+  // ---- returned future is already complete ------------------------------------
+
+  @Test
+  public void returnedFutureIsAlreadyDone() throws Exception {
+    Judge judge = judgeWithScore("j1", "m1", 0.5);
+    Map<String, Judge> judgesMap = Collections.singletonMap("j1", judge);
+    JudgeConfiguration config = new JudgeConfiguration(
+        Collections.singletonList(new JudgeConfiguration.Judge("j1", 1.0)));
+
+    Evaluator evaluator = new Evaluator(judgesMap, config, LOGGER);
+    assertThat(evaluator.evaluate("input", "output").isDone(), is(true));
+  }
+}
diff --git a/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/JudgeTest.java b/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/JudgeTest.java
new file mode 100644
index 00000000..b607059a
--- /dev/null
+++ b/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/JudgeTest.java
@@ -0,0 +1,241 @@
+package com.launchdarkly.sdk.server.ai;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.Matchers.notNullValue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.launchdarkly.logging.LDLogger;
+import com.launchdarkly.logging.Logs;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Message;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Message.Role;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.AIMetrics;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.concurrent.Callable;
+
+import org.junit.Before;
+import org.junit.Test;
+
+@SuppressWarnings("javadoc")
+public class JudgeTest {
+  private Runner runner;
+  private LDAIConfigTracker tracker;
+  private AIJudgeConfig judgeConfig;
+  private Judge judge;
+  private static final LDLogger LOGGER = LDLogger.withAdapter(Logs.toConsole(), "test");
+  private static final AIMetrics METRICS = AIMetrics.builder().success(true).build();
+
+  @Before
+  public void setUp() throws Exception {
+    runner = mock(Runner.class);
+    tracker = mock(LDAIConfigTracker.class);
+    // By default trackMetricsOf delegates to the callable
+    when(tracker.trackMetricsOf(any(Function.class), any(Callable.class)))
+        .thenAnswer(inv -> {
+          Callable<?> op = inv.getArgument(1);
+          return op.call();
+        });
+    judgeConfig = makeJudgeConfig("judge-key", "my.metric", tracker);
+    judge = new Judge(judgeConfig, runner, LOGGER);
+  }
+
+  private AIJudgeConfig makeJudgeConfig(String key, String metricKey, LDAIConfigTracker tracker) {
+    return new AIJudgeConfig(key, true, null, null, null, metricKey, () -> tracker);
+  }
+
+  private RunnerResult resultWithParsed(Map<String, Object> parsed) {
+    return RunnerResult.builder("output", METRICS).parsed(parsed).build();
+  }
+
+  // ---- successful evaluation --------------------------------------------------
+
+  @Test
+  public void successfulEvaluationReturnsCorrectScore() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.85);
+    parsed.put("reasoning", "Looks good");
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output");
+
+    assertThat(result.isSampled(), is(true));
+    assertThat(result.isSuccess(), is(true));
+    assertThat(result.getScore(), is(0.85));
+    assertThat(result.getReasoning(), is("Looks good"));
+    assertThat(result.getJudgeConfigKey(), is("judge-key"));
+    assertThat(result.getMetricKey(), is("my.metric"));
+  }
+
+  @Test
+  public void scoreBoundaryZeroIsValid() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.0);
+    parsed.put("reasoning", "Terrible");
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSuccess(), is(true));
+    assertThat(result.getScore(), is(0.0));
+  }
+
+  @Test
+  public void scoreBoundaryOneIsValid() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 1.0);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSuccess(), is(true));
+    assertThat(result.getScore(), is(1.0));
+  }
+
+  @Test
+  public void reasoningIsOptional() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.5);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSuccess(), is(true));
+    assertThat(result.getReasoning(), nullValue());
+  }
+
+  // ---- error paths ------------------------------------------------------------
+
+  @Test
+  public void runnerExceptionResultsInFailure() throws Exception {
+    when(runner.run(any(), any())).thenThrow(new RuntimeException("boom"));
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSampled(), is(true));
+    assertThat(result.isSuccess(), is(false));
+    assertThat(result.getErrorMessage(), is("boom"));
+  }
+
+  @Test
+  public void nullParsedResultsInFailure() throws Exception {
+    when(runner.run(any(), any())).thenReturn(RunnerResult.builder("content", METRICS).build());
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSampled(), is(true));
+    assertThat(result.isSuccess(), is(false));
+  }
+
+  @Test
+  public void missingScoreResultsInFailure() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("reasoning", "No score here");
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSuccess(), is(false));
+  }
+
+  @Test
+  public void scoreAboveOneResultsInFailure() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 1.5);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSuccess(), is(false));
+  }
+
+  @Test
+  public void scoreBelowZeroResultsInFailure() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", -0.1);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output");
+    assertThat(result.isSuccess(), is(false));
+  }
+
+  // ---- sampling ---------------------------------------------------------------
+
+  @Test
+  public void zeroSamplingRateAlwaysSkips() throws Exception {
+    JudgeResult result = judge.evaluate("input", "output", 0.0);
+
+    assertThat(result.isSampled(), is(false));
+    assertThat(result.isSuccess(), is(false));
+    verify(runner, never()).run(any(), any());
+  }
+
+  @Test
+  public void fullSamplingRateAlwaysRuns() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.5);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluate("input", "output", 1.0);
+    assertThat(result.isSampled(), is(true));
+    verify(runner).run(any(), any());
+  }
+
+  // ---- evaluateMessages -------------------------------------------------------
+
+  @Test
+  public void evaluateMessagesFormatsCorrectly() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.9);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    List<Message> messages = Arrays.asList(
+        new Message(Role.USER, "Hello"),
+        new Message(Role.ASSISTANT, "Hi there"));
+    RunnerResult response = RunnerResult.builder("Hi there", METRICS).build();
+    JudgeResult result = judge.evaluateMessages(messages, response);
+
+    assertThat(result.isSuccess(), is(true));
+    verify(runner).run(any(), any());
+  }
+
+  @Test
+  public void evaluateMessagesWithNullMessagesDoesNotThrow() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.5);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    JudgeResult result = judge.evaluateMessages(null, RunnerResult.builder("content", METRICS).build());
+    assertThat(result, notNullValue());
+  }
+
+  // ---- tracker delegation -----------------------------------------------------
+
+  @Test
+  public void trackerMetricsOfIsInvoked() throws Exception {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.7);
+    when(runner.run(any(), any())).thenReturn(resultWithParsed(parsed));
+
+    judge.evaluate("input", "output");
+
+    verify(tracker).trackMetricsOf(any(Function.class), any(Callable.class));
+  }
+
+  // ---- accessors --------------------------------------------------------------
+
+  @Test
+  public void getConfigReturnsConfig() {
+    assertThat(judge.getConfig(), is(judgeConfig));
+  }
+
+  @Test
+  public void getRunnerReturnsRunner() {
+    assertThat(judge.getRunner(), is(runner));
+  }
+}
diff --git a/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/RunnerResultTest.java b/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/RunnerResultTest.java
new file mode 100644
index 00000000..3b84e270
--- /dev/null
+++ b/lib/sdk/server-ai/src/test/java/com/launchdarkly/sdk/server/ai/RunnerResultTest.java
@@ -0,0 +1,86 @@
+package com.launchdarkly.sdk.server.ai;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.Matchers.notNullValue;
+
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.AIMetrics;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.junit.Test;
+
+@SuppressWarnings("javadoc")
+public class RunnerResultTest {
+  private static final AIMetrics METRICS = AIMetrics.builder().success(true).build();
+
+  @Test
+  public void builderSetsContent() {
+    RunnerResult result = RunnerResult.builder("hello", METRICS).build();
+    assertThat(result.getContent(), is("hello"));
+  }
+
+  @Test
+  public void builderSetsMetrics() {
+    RunnerResult result = RunnerResult.builder(null, METRICS).build();
+    assertThat(result.getMetrics(), is(METRICS));
+  }
+
+  @Test
+  public void rawIsNullByDefault() {
+    RunnerResult result = RunnerResult.builder("content", METRICS).build();
+    assertThat(result.getRaw(), nullValue());
+  }
+
+  @Test
+  public void parsedIsNullByDefault() {
+    RunnerResult result = RunnerResult.builder("content", METRICS).build();
+    assertThat(result.getParsed(), nullValue());
+  }
+
+  @Test
+  public void builderSetsRaw() {
+    Object raw = new Object();
+    RunnerResult result = RunnerResult.builder("content", METRICS).raw(raw).build();
+    assertThat(result.getRaw(), is(raw));
+  }
+
+  @Test
+  public void builderSetsParsed() {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("score", 0.8);
+    RunnerResult result = RunnerResult.builder("content", METRICS).parsed(parsed).build();
+    assertThat(result.getParsed(), notNullValue());
+    assertThat(result.getParsed().get("score"), is(0.8));
+  }
+
+  @Test
+  public void parsedMapIsImmutable() {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("key", "value");
+    RunnerResult result = RunnerResult.builder("content", METRICS).parsed(parsed).build();
+    try {
+      result.getParsed().put("extra", "should fail");
+      assertThat("Expected UnsupportedOperationException", false);
+    } catch (UnsupportedOperationException ignored) {
+    }
+  }
+
+  @Test
+  public void mutatingOriginalMapDoesNotAffectResult() {
+    Map<String, Object> parsed = new HashMap<>();
+    parsed.put("key", "original");
+    RunnerResult result = RunnerResult.builder("content", METRICS).parsed(parsed).build();
+    parsed.put("key", "mutated");
+    assertThat(result.getParsed().get("key"), is("original"));
+  }
+
+  @Test
+  public void contentCanBeNull() {
+    RunnerResult result = RunnerResult.builder(null, METRICS).build();
+    assertThat(result.getContent(), nullValue());
+  }
+}