launchdarkly · mattrmc1 · Jun 22, 2026 · Jun 23, 2026 · Jun 23, 2026 · Jun 23, 2026
@@ -81,4 +81,23 @@ AIJudgeConfig judgeConfig(
       LDContext context,
       AIJudgeConfigDefault defaultValue,
       Map<String, Object> variables);
+
+  /**
+   * Reconstructs a tracker from a resumption token, preserving the original run's identity.
+   * <p>
+   * Use this when a multi-turn or streaming AI interaction spans multiple requests. The caller
+   * stores the resumption token from a previous tracker (via
+   * {@link LDAIConfigTracker#getResumptionToken()}) and passes it back here to continue tracking
+   * against the same run.
+   * <p>
+   * <strong>Security note:</strong> resumption tokens embed flag-evaluation details such as the
+   * variation key and config version. Keep tokens server-side and do not round-trip them through
+   * untrusted clients where they could leak flag-targeting information.
+   *
+   * @param resumptionToken the token returned by a previous tracker; must not be {@code null}
+   * @param context the evaluation context for the new request; must not be {@code null}
+   * @return a tracker with the decoded run identity, never {@code null}
+   * @throws IllegalArgumentException if the token is malformed
+   */
+  LDAIConfigTracker createTracker(String resumptionToken, LDContext context);
 }
@@ -8,20 +8,21 @@
 import com.launchdarkly.sdk.LDContext;
 import com.launchdarkly.sdk.LDValue;
 import com.launchdarkly.sdk.LDValueType;
-import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Mode;
 import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Message;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Mode;
 import com.launchdarkly.sdk.server.ai.internal.AIConfigFlagValue;
 import com.launchdarkly.sdk.server.ai.internal.AIConfigParser;
 import com.launchdarkly.sdk.server.ai.internal.AISdkInfo;
 import com.launchdarkly.sdk.server.ai.internal.Interpolator;
-import com.launchdarkly.sdk.server.ai.internal.NoOpAIConfigTracker;
+import com.launchdarkly.sdk.server.ai.internal.LDAIConfigTrackerImpl;
 import com.launchdarkly.sdk.server.interfaces.LDClientInterface;
 
 import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.UUID;
 import java.util.function.Supplier;
 
 /**
@@ -51,8 +52,6 @@ public final class LDAIClientImpl implements LDAIClient {
       .anonymous(true)
       .build();
 
-  // Tracking is implemented in a later step; until then every config hands out the no-op tracker.
-  private static final Supplier<LDAIConfigTracker> TRACKER_FACTORY = () -> NoOpAIConfigTracker.INSTANCE;
 
   private final LDClientInterface client;
   private final LDLogger logger;
@@ -187,6 +186,9 @@ private AIConfig buildConfig(
       AIConfigFlagValue parsed,
       LDContext context,
       Map<String, Object> variables) {
+    Supplier<LDAIConfigTracker> factory = trackerFactory(
+        key, parsed.getVariationKey(), parsed.getVersion(),
+        parsed.getModel(), parsed.getProvider(), context);
     switch (mode) {
       case AGENT:
         return new AIAgentConfig(
@@ -197,7 +199,7 @@ private AIConfig buildConfig(
             interpolate(parsed.getInstructions(), variables, context),
             parsed.getJudgeConfiguration(),
             parsed.getTools(),
-            TRACKER_FACTORY);
+            factory);
       case JUDGE:
         return new AIJudgeConfig(
             key,
@@ -206,7 +208,7 @@ private AIConfig buildConfig(
             parsed.getProvider(),
             interpolateMessages(parsed.getMessages(), variables, context),
             parsed.getEvaluationMetricKey(),
-            TRACKER_FACTORY);
+            factory);
       case COMPLETION:
       default:
         return new AICompletionConfig(
@@ -217,7 +219,7 @@ private AIConfig buildConfig(
             interpolateMessages(parsed.getMessages(), variables, context),
             parsed.getJudgeConfiguration(),
             parsed.getTools(),
-            TRACKER_FACTORY);
+            factory);
     }
   }
 
@@ -231,6 +233,9 @@ private AIConfig buildConfigFromDefault(
       AIConfigDefault defaultValue,
       LDContext context,
       Map<String, Object> variables) {
+    // Default configs still get real trackers — the configKey was requested even if no flag was found.
+    // variationKey is null because no flag evaluation occurred.
+    Supplier<LDAIConfigTracker> factory = trackerFactory(key, null, null, null, null, context);
     switch (mode) {
       case AGENT: {
         AIAgentConfigDefault agent = (AIAgentConfigDefault) defaultValue;
@@ -242,7 +247,7 @@ private AIConfig buildConfigFromDefault(
             interpolate(agent.getInstructions(), variables, context),
             agent.getJudgeConfiguration(),
             agent.getTools(),
-            TRACKER_FACTORY);
+            factory);
       }
       case JUDGE: {
         AIJudgeConfigDefault judge = (AIJudgeConfigDefault) defaultValue;
@@ -253,7 +258,7 @@ private AIConfig buildConfigFromDefault(
             judge.getProvider(),
             interpolateMessages(judge.getMessages(), variables, context),
             judge.getEvaluationMetricKey(),
-            TRACKER_FACTORY);
+            factory);
       }
       case COMPLETION:
       default: {
@@ -266,11 +271,43 @@ private AIConfig buildConfigFromDefault(
             interpolateMessages(completion.getMessages(), variables, context),
             completion.getJudgeConfiguration(),
             completion.getTools(),
-            TRACKER_FACTORY);
+            factory);
       }
     }
   }
 
+  /**
+   * Creates a per-evaluation tracker factory. Each call to the returned {@link Supplier} produces
+   * a fresh {@link LDAIConfigTrackerImpl} with a new {@code runId}.
+   */
+  private Supplier<LDAIConfigTracker> trackerFactory(
+      String configKey,
+      String variationKey,
+      Integer version,
+      com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Model model,
+      com.launchdarkly.sdk.server.ai.datamodel.LDAIConfigTypes.Provider provider,
+      LDContext context) {
+    String modelName = model != null && model.getName() != null ? model.getName() : "";
+    String providerName = provider != null && provider.getName() != null ? provider.getName() : "";
+    int ver = version != null ? version : 1;
+    return () -> new LDAIConfigTrackerImpl(
+        client,
+        UUID.randomUUID().toString(),
+        configKey,
+        variationKey,
+        ver,
+        modelName,
+        providerName,
+        context,
+        null, // graphKey — set by agentGraph() in Plan 3
+        logger);
+  }
+
+  @Override
+  public LDAIConfigTracker createTracker(String resumptionToken, LDContext context) {
+    return LDAIConfigTrackerImpl.fromResumptionToken(resumptionToken, client, context, logger);
+  }
+
   private List<Message> interpolateMessages(
       List<Message> messages, Map<String, Object> variables, LDContext context) {
     if (messages == null) {

@@ -1,16 +1,169 @@
 package com.launchdarkly.sdk.server.ai;
 
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.AIMetrics;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.FeedbackKind;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.JudgeResult;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.MetricSummary;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.TokenUsage;
+import com.launchdarkly.sdk.server.ai.datamodel.LDAITrackingTypes.TrackData;
+
+import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.function.Function;
+
 /**
  * Reports events related to a single AI run of an {@link AIConfig}.
  * <p>
- * A tracker is obtained from a retrieved config via {@link AIConfig#createTracker()}. Each tracker
- * corresponds to one AI run and is used to record metrics such as model usage, duration, and
- * feedback against the AI Config it was created from.
+ * A tracker is obtained from a retrieved config via {@link AIConfig#createTracker()}, or
+ * reconstructed from a resumption token via {@link LDAIClient#createTracker(String, com.launchdarkly.sdk.LDContext)}.
+ * Each tracker corresponds to one AI run and is used to record metrics such as model usage,
+ * duration, and feedback against the AI Config it was created from.
+ * <p>
+ * Most tracking methods are at-most-once: a second call to the same method on the same tracker
+ * is silently dropped. {@link #trackToolCall(String)} and {@link #trackJudgeResult(JudgeResult)}
+ * are multi-fire — each call records a distinct event.
  * <p>
- * <strong>This interface is an intentional placeholder.</strong> The metric- and feedback-reporting
- * methods (and resumption-token support) are introduced in a later step of the AI SDK build-out; it
- * is defined here so that the public config types expose a stable {@code createTracker()} surface.
- * The only implementation in this release is an internal no-op.
+ * Implementations are thread-safe.
  */
 public interface LDAIConfigTracker {
+
+  /**
+   * Returns the correlation metadata for this tracker's run.
+   *
+   * @return the track data, never {@code null}
+   */
+  TrackData getTrackData();
+
+  /**
+   * Returns the resumption token for this run.
+   * <p>
+   * The resumption token encodes the run's identity and can be passed to
+   * {@link LDAIClient#createTracker(String, com.launchdarkly.sdk.LDContext)} to reconstruct a
+   * tracker on a subsequent request (for example, in a streaming scenario).
+   * <p>
+   * <strong>Security note:</strong> resumption tokens embed flag-evaluation details such as the
+   * variation key and config version. Keep tokens server-side and do not round-trip them through
+   * untrusted clients where they could leak flag-targeting information.
+   *
+   * @return the resumption token, or {@code null} if not available
+   */
+  String getResumptionToken();
+
+  /**
+   * Records the duration of the AI generation.
+   * <p>
+   * At-most-once: subsequent calls on the same tracker are silently dropped.
+   *
+   * @param duration the duration; ignored if {@code null}
+   */
+  void trackDuration(Duration duration);
+
+  /**
+   * Executes the given operation and records its wall-clock duration.
+   * <p>
+   * The duration is recorded even if the operation throws. Equivalent to wrapping the operation
+   * in a try/finally that calls {@link #trackDuration(Duration)}.
+   *
+   * @param <T> the return type of the operation
+   * @param operation the operation to execute and time; must not be {@code null}
+   * @return the result of the operation
+   * @throws Exception if the operation throws
+   */
+  <T> T trackDurationOf(Callable<T> operation) throws Exception;
+
+  /**
+   * Records the time from request start to receipt of the first token.
+   * <p>
+   * At-most-once: subsequent calls on the same tracker are silently dropped.
+   *
+   * @param duration the time to first token; ignored if {@code null}
+   */
+  void trackTimeToFirstToken(Duration duration);
+
+  /**
+   * Records that the AI generation succeeded.
+   * <p>
+   * At-most-once and mutually exclusive with {@link #trackError()}: whichever is called first wins.
+   */
+  void trackSuccess();
+
+  /**
+   * Records that the AI generation failed.
+   * <p>
+   * At-most-once and mutually exclusive with {@link #trackSuccess()}: whichever is called first wins.
+   */
+  void trackError();
+
+  /**
+   * Records user feedback for this AI generation.
+   * <p>
+   * At-most-once: subsequent calls on the same tracker are silently dropped.
+   *
+   * @param kind the feedback kind; ignored if {@code null}
+   */
+  void trackFeedback(FeedbackKind kind);
+
+  /**
+   * Records token usage for this AI generation.
+   * <p>
+   * At-most-once: subsequent calls on the same tracker are silently dropped. Calls where all
+   * counts are zero do not consume the at-most-once slot.
+   *
+   * @param tokens the token usage; ignored if {@code null}
+   */
+  void trackTokens(TokenUsage tokens);
+
+  /**
+   * Records a single tool call made during this AI generation.
+   * <p>
+   * Multi-fire: every call emits an event.
+   *
+   * @param toolKey the tool key; ignored if {@code null}
+   */
+  void trackToolCall(String toolKey);
+
+  /**
+   * Records multiple tool calls made during this AI generation.
+   * <p>
+   * Equivalent to calling {@link #trackToolCall(String)} for each key.
+   *
+   * @param toolKeys the tool keys; ignored if {@code null}
+   */
+  void trackToolCalls(List<String> toolKeys);
+
+  /**
+   * Records the result of a judge evaluation.
+   * <p>
+   * Multi-fire per judge metric key. The result is silently skipped if it was not sampled, if
+   * the evaluation did not succeed, or if the metric key or score is absent.
+   *
+   * @param result the judge result; ignored if {@code null}
+   */
+  void trackJudgeResult(JudgeResult result);
+
+  /**
+   * Executes the given operation and tracks its metrics using the extracted {@link AIMetrics}.
+   * <p>
+   * Tracks duration (preferring runner-reported duration when present), success or error, tokens,
+   * and tool calls. If the operation throws, {@link #trackError()} is called and the exception
+   * is re-thrown.
+   *
+   * @param <T> the return type of the operation
+   * @param metricsExtractor a function that extracts {@link AIMetrics} from the operation result;
+   *     exceptions from the extractor propagate to the caller
+   * @param operation the AI operation to execute; must not be {@code null}
+   * @return the result of the operation
+   * @throws Exception if the operation or the metrics extractor throws
+   */
+  <T> T trackMetricsOf(
+      Function<? super T, AIMetrics> metricsExtractor,
+      Callable<T> operation) throws Exception;
+
+  /**
+   * Returns a snapshot of all metrics tracked so far on this tracker.
+   *
+   * @return the metric summary, never {@code null}
+   */
+  MetricSummary getSummary();
 }