models = new HashMap<>();
models.put("completion", "gpt-4.1");
models.put("embedding", "text-embedding-3-large");
@@ -213,6 +263,8 @@ public static void main(String[] args) throws InterruptedException {
System.out.println(" Description: " + result.getDescription());
System.out.println(" Base analyzer: " + result.getBaseAnalyzerId());
System.out.println(" Fields: " + result.getFieldSchema().getFields().size());
+ System.out.println(" Knowledge sources: "
+ + (result.getKnowledgeSources() == null ? 0 : result.getKnowledgeSources().size()));
// END: com.azure.ai.contentunderstanding.createAnalyzerWithLabelsAsync
// Verify analyzer creation
@@ -225,27 +277,79 @@ public static void main(String[] args) throws InterruptedException {
System.out.println(" MerchantName: String (Extract)");
System.out.println(" Items: Array of Objects (Generate)");
System.out.println(" - Quantity, Name, Price");
- System.out.println(" Total: String (Extract)");
+ System.out.println(" TotalPrice: String (Extract)");
ContentFieldDefinition itemsFieldResult = resultFields.get("Items");
System.out.println("Items field verified:");
System.out.println(" Type: " + itemsFieldResult.getType());
System.out.println(" Item properties: " + itemsFieldResult.getItemDefinition().getProperties().size());
-
+ })
+ .flatMap(result -> {
+ // If training data was provided, test the analyzer with a sample document.
+ if (resolvedSasUrl != null && !resolvedSasUrl.trim().isEmpty()) {
+ System.out.println("\nTesting analyzer with sample document...");
+ String testDocUrl
+ = "https://github.com/Azure-Samples/cognitive-services-REST-api-samples/raw/master/curl/form-recognizer/sample-invoice.pdf";
+
+ AnalysisInput input = new AnalysisInput();
+ input.setUrl(testDocUrl);
+
+ return client.beginAnalyze(finalAnalyzerId, Arrays.asList(input))
+ .last()
+ .flatMap(pollResponse -> {
+ if (pollResponse.getStatus().isComplete()) {
+ return pollResponse.getFinalResult();
+ } else {
+ return Mono.error(new RuntimeException(
+ "Polling completed unsuccessfully with status: " + pollResponse.getStatus()));
+ }
+ })
+ .doOnNext(analyzeResult -> {
+ System.out.println("Analysis completed!");
+ if (analyzeResult.getContents() != null
+ && !analyzeResult.getContents().isEmpty()
+ && analyzeResult.getContents().get(0) instanceof DocumentContent) {
+ DocumentContent docContent = (DocumentContent) analyzeResult.getContents().get(0);
+ System.out.println("Extracted fields: " + docContent.getFields().size());
+
+ if (docContent.getFields().containsKey("MerchantName")) {
+ ContentField merchantField = docContent.getFields().get("MerchantName");
+ if (merchantField != null && merchantField.getValue() instanceof String) {
+ System.out.println(" MerchantName: " + merchantField.getValue());
+ }
+ }
+ if (docContent.getFields().containsKey("TotalPrice")) {
+ ContentField totalField = docContent.getFields().get("TotalPrice");
+ if (totalField != null && totalField.getValue() instanceof String) {
+ System.out.println(" TotalPrice: " + totalField.getValue());
+ }
+ }
+ }
+ })
+ .thenReturn(result);
+ }
+ return Mono.just(result);
+ })
+ .doOnNext(result -> {
// Display API pattern information
System.out.println("\nCreateAnalyzerWithLabels API Pattern:");
System.out.println(" 1. Define field schema with nested structures (arrays, objects)");
System.out.println(" 2. Upload training data to Azure Blob Storage:");
- System.out.println(" - Documents: receipt1.pdf, receipt2.pdf, ...");
- System.out.println(" - Labels: receipt1.pdf.labels.json, receipt2.pdf.labels.json, ...");
- System.out.println(" - OCR: receipt1.pdf.result.json, receipt2.pdf.result.json, ...");
+ System.out.println(" - Documents: receipt1.jpg, receipt2.jpg, ...");
+ System.out.println(" - Labels: receipt1.jpg.labels.json, receipt2.jpg.labels.json, ...");
+ System.out.println(" - OCR: receipt1.jpg.result.json, receipt2.jpg.result.json, ...");
System.out.println(" 3. Create LabeledDataKnowledgeSource with storage SAS URL");
System.out.println(" 4. Create analyzer with field schema and knowledge sources");
System.out.println(" 5. Use analyzer for document analysis");
System.out.println("\nCreateAnalyzerWithLabels pattern demonstration completed");
- System.out.println(" Note: This sample demonstrates the API pattern.");
- System.out.println(" For actual training, provide CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL with labeled data.");
+ if (resolvedSasUrl == null || resolvedSasUrl.trim().isEmpty()) {
+ System.out.println(" Note: This sample demonstrates the API pattern.");
+ System.out.println(
+ " For actual training, provide CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL (Option A)");
+ System.out.println(
+ " or CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT + ..._CONTAINER (Option B).");
+ }
})
.doFinally(signalType -> {
// Cleanup using reactive pattern
@@ -275,4 +379,90 @@ public static void main(String[] args) throws InterruptedException {
// Wait for async operations to complete
latch.await(3, TimeUnit.MINUTES);
}
+
+ /**
+ * Uploads local training data files (images, .labels.json, .result.json) to an Azure Blob
+ * container. Existing blobs with the same name are overwritten.
+ *
+ * @param storageAccountName storage account name (no {@code .blob.core.windows.net} suffix)
+ * @param containerName container name (created if it does not exist)
+ * @param credential credential with write access to the container
+ * @param localDirectory local folder containing the label files
+ * @param prefix optional blob prefix (virtual folder) to prepend, e.g. {@code "receipt_labels/"}
+ */
+ public static void uploadTrainingData(
+ String storageAccountName,
+ String containerName,
+ TokenCredential credential,
+ String localDirectory,
+ String prefix) {
+ BlobContainerClient containerClient = new BlobContainerClientBuilder()
+ .endpoint("https://" + storageAccountName + ".blob.core.windows.net")
+ .containerName(containerName)
+ .credential(credential)
+ .buildClient();
+
+ if (!containerClient.exists()) {
+ containerClient.create();
+ }
+
+ File dir = new File(localDirectory);
+ File[] files = dir.listFiles(File::isFile);
+ if (files == null || files.length == 0) {
+ throw new IllegalStateException(
+ "No training data files found under '" + dir.getAbsolutePath() + "'."
+ + " Set CONTENTUNDERSTANDING_TRAINING_DATA_LOCAL_DIR to a folder containing your label files.");
+ }
+
+ String normalizedPrefix = (prefix == null || prefix.trim().isEmpty())
+ ? null
+ : prefix.replaceAll("/+$", "");
+
+ for (File file : files) {
+ String blobName = normalizedPrefix == null
+ ? file.getName()
+ : normalizedPrefix + "/" + file.getName();
+ System.out.println("Uploading " + file.getName() + " -> " + blobName);
+ containerClient.getBlobClient(blobName).uploadFromFile(file.getAbsolutePath(), true);
+ }
+ }
+
+ /**
+ * Generates a User Delegation SAS URL (Read + List) for an Azure Blob container, using a
+ * {@link TokenCredential} so no storage account key is required.
+ *
+ * @param storageAccountName storage account name
+ * @param containerName container name
+ * @param credential credential to obtain a user delegation key
+ * @return container-scoped SAS URL valid for 1 hour
+ */
+ public static String generateUserDelegationSasUrl(
+ String storageAccountName,
+ String containerName,
+ TokenCredential credential) {
+ BlobServiceClient blobServiceClient = new BlobServiceClientBuilder()
+ .endpoint("https://" + storageAccountName + ".blob.core.windows.net")
+ .credential(credential)
+ .buildClient();
+
+ // Start the SAS 5 minutes in the past to tolerate clock skew between the local machine
+ // and the storage service. Without this buffer, SAS generation can intermittently fail
+ // with AuthenticationFailed ("SAS not valid yet").
+ OffsetDateTime startsOn = OffsetDateTime.now().minusMinutes(5);
+ OffsetDateTime expiresOn = OffsetDateTime.now().plusHours(1);
+
+ UserDelegationKey userDelegationKey = blobServiceClient.getUserDelegationKey(startsOn, expiresOn);
+
+ BlobContainerSasPermission permissions = new BlobContainerSasPermission()
+ .setReadPermission(true)
+ .setListPermission(true);
+
+ BlobServiceSasSignatureValues sasValues = new BlobServiceSasSignatureValues(expiresOn, permissions)
+ .setStartTime(startsOn);
+
+ BlobContainerClient containerClient = blobServiceClient.getBlobContainerClient(containerName);
+ String sasToken = containerClient.generateUserDelegationSas(sasValues, userDelegationKey);
+
+ return "https://" + storageAccountName + ".blob.core.windows.net/" + containerName + "?" + sasToken;
+ }
}
diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsAsyncTest.java b/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsAsyncTest.java
index a8ff69adff65..60cf5a04ad56 100644
--- a/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsAsyncTest.java
+++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsAsyncTest.java
@@ -16,7 +16,10 @@
import com.azure.ai.contentunderstanding.models.GenerationMethod;
import com.azure.ai.contentunderstanding.models.KnowledgeSource;
import com.azure.ai.contentunderstanding.models.LabeledDataKnowledgeSource;
+import com.azure.ai.contentunderstanding.samples.Sample16_CreateAnalyzerWithLabels;
+import com.azure.core.credential.TokenCredential;
import com.azure.core.util.polling.PollerFlux;
+import com.azure.identity.DefaultAzureCredentialBuilder;
import reactor.core.publisher.Mono;
import org.junit.jupiter.api.Test;
@@ -39,24 +42,18 @@
* For an easier labeling workflow, use Azure AI Content Understanding Studio at
* https://contentunderstanding.ai.azure.com/
*
- * Labeled receipt data is available in this repo at {@code src/samples/resources/receipt_labels}.
- * For LIVE mode with real training data: upload that folder to Azure Blob Storage, generate a
- * container SAS URL with List/Read permissions, then set the environment variables below. Use
- * {@code CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX} if you uploaded into a subfolder
- * (e.g., "receipt_labels/"); omit or leave unset if files are at the container root.
- *
- * Required environment variables:
+ * Labeled receipt data is bundled at {@code src/samples/resources/receipt_labels}. To use it
+ * for training in LIVE / RECORD modes, choose one of:
*
- * - {@code CONTENTUNDERSTANDING_ENDPOINT} – Azure Content Understanding endpoint URL
+ * - Option A: provide a pre-generated container SAS URL via
+ * {@code CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL}.
+ * - Option B: set {@code CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT} and
+ * {@code CONTENTUNDERSTANDING_TRAINING_DATA_CONTAINER}; the test will upload the bundled
+ * label files via DefaultAzureCredential and generate a User Delegation SAS URL.
*
*
- * Optional environment variables (for labeled training data; used in LIVE mode):
- *
- * - {@code CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL} – SAS URL for the Azure Blob container
- * with labeled training data.
- * - {@code CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX} – Path prefix within the container
- * (e.g., "receipt_labels/"). Omit or leave unset if files are at the container root.
- *
+ * Use {@code CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX} if files live in a subfolder
+ * (e.g., "receipt_labels/"); omit if files are at the container root.
*/
public class Sample16_CreateAnalyzerWithLabelsAsyncTest extends ContentUnderstandingClientTestBase {
@@ -71,10 +68,33 @@ public class Sample16_CreateAnalyzerWithLabelsAsyncTest extends ContentUnderstan
public void testCreateAnalyzerWithLabelsAsync() {
String analyzerId = testResourceNamer.randomName("test_receipt_analyzer_", 50);
- // In PLAYBACK mode, use a placeholder URL to ensure consistent test behavior
- String trainingDataSasUrl = getTestMode() == TestMode.PLAYBACK
- ? "https://placeholder.blob.core.windows.net/container?sv=placeholder"
- : System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL");
+ // Resolve the training-data SAS URL.
+ // PLAYBACK uses a placeholder so the recorded request body matches.
+ // RECORD / LIVE: try Option A (SAS URL env), then Option B (storage account + container env).
+ String trainingDataSasUrl;
+ if (getTestMode() == TestMode.PLAYBACK) {
+ trainingDataSasUrl = "https://placeholder.blob.core.windows.net/container?sv=placeholder";
+ } else {
+ trainingDataSasUrl = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL");
+ String storageAccount = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT");
+ String container = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_CONTAINER");
+ if ((trainingDataSasUrl == null || trainingDataSasUrl.trim().isEmpty())
+ && storageAccount != null
+ && !storageAccount.trim().isEmpty()
+ && container != null
+ && !container.trim().isEmpty()) {
+ TokenCredential credential = new DefaultAzureCredentialBuilder().build();
+ String localLabelDir = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_LOCAL_DIR");
+ if (localLabelDir == null || localLabelDir.trim().isEmpty()) {
+ localLabelDir = "src/samples/resources/receipt_labels";
+ }
+ String trainingDataPrefixForUpload = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX");
+ Sample16_CreateAnalyzerWithLabels.uploadTrainingData(storageAccount, container, credential,
+ localLabelDir, trainingDataPrefixForUpload);
+ trainingDataSasUrl = Sample16_CreateAnalyzerWithLabels.generateUserDelegationSasUrl(storageAccount,
+ container, credential);
+ }
+ }
// Save prefix in test proxy variable during RECORD, load back during PLAYBACK so request bodies match.
String trainingDataPrefix;
if (getTestMode() == TestMode.PLAYBACK) {
@@ -135,12 +155,12 @@ public void testCreateAnalyzerWithLabelsAsync() {
itemsField.setItemDefinition(itemDefinition);
fields.put("Items", itemsField);
- // Total field
- ContentFieldDefinition totalField = new ContentFieldDefinition();
- totalField.setType(ContentFieldType.STRING);
- totalField.setMethod(GenerationMethod.EXTRACT);
- totalField.setDescription("Total amount");
- fields.put("Total", totalField);
+ // TotalPrice field
+ ContentFieldDefinition totalPriceField = new ContentFieldDefinition();
+ totalPriceField.setType(ContentFieldType.STRING);
+ totalPriceField.setMethod(GenerationMethod.EXTRACT);
+ totalPriceField.setDescription("Total amount");
+ fields.put("TotalPrice", totalPriceField);
ContentFieldSchema fieldSchema = new ContentFieldSchema();
fieldSchema.setName("receipt_schema");
@@ -159,7 +179,12 @@ public void testCreateAnalyzerWithLabelsAsync() {
System.out.println("Using labeled training data from: "
+ trainingDataSasUrl.substring(0, Math.min(50, trainingDataSasUrl.length())) + "...");
} else {
- System.out.println("No TRAINING_DATA_SAS_URL set, creating analyzer without labeled training data");
+ System.out.println(
+ "DEMO MODE: no training data configured. The analyzer will be created without labeled data.");
+ System.out.println(" Set CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL (Option A), or both");
+ System.out.println(
+ " CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT and CONTENTUNDERSTANDING_TRAINING_DATA_CONTAINER (Option B),");
+ System.out.println(" to fully exercise the labeled-data API path.");
}
// Step 3: Create analyzer (with or without labeled data)
@@ -195,6 +220,8 @@ public void testCreateAnalyzerWithLabelsAsync() {
System.out.println(" Description: " + result.getDescription());
System.out.println(" Base analyzer: " + result.getBaseAnalyzerId());
System.out.println(" Fields: " + result.getFieldSchema().getFields().size());
+ System.out.println(" Knowledge sources: "
+ + (result.getKnowledgeSources() == null ? 0 : result.getKnowledgeSources().size()));
// END: com.azure.ai.contentunderstanding.createAnalyzerWithLabelsAsync
// BEGIN: Assertion_ContentUnderstandingCreateAnalyzerWithLabelsAsync
@@ -212,7 +239,7 @@ public void testCreateAnalyzerWithLabelsAsync() {
Map resultFields = result.getFieldSchema().getFields();
assertTrue(resultFields.containsKey("MerchantName"), "Should have MerchantName field");
assertTrue(resultFields.containsKey("Items"), "Should have Items field");
- assertTrue(resultFields.containsKey("Total"), "Should have Total field");
+ assertTrue(resultFields.containsKey("TotalPrice"), "Should have TotalPrice field");
ContentFieldDefinition itemsFieldResult = resultFields.get("Items");
assertEquals(ContentFieldType.ARRAY, itemsFieldResult.getType());
@@ -223,7 +250,7 @@ public void testCreateAnalyzerWithLabelsAsync() {
System.out.println(" MerchantName: String (Extract)");
System.out.println(" Items: Array of Objects (Generate)");
System.out.println(" - Quantity, Name, Price");
- System.out.println(" Total: String (Extract)");
+ System.out.println(" TotalPrice: String (Extract)");
// END: Assertion_ContentUnderstandingCreateAnalyzerWithLabelsAsync
// If training data was provided, test the analyzer with a sample document
@@ -265,11 +292,11 @@ public void testCreateAnalyzerWithLabelsAsync() {
System.out.println(" MerchantName: " + merchantName);
}
}
- if (docContent.getFields().containsKey("Total")) {
- ContentField totalFieldValue = docContent.getFields().get("Total");
+ if (docContent.getFields().containsKey("TotalPrice")) {
+ ContentField totalFieldValue = docContent.getFields().get("TotalPrice");
if (totalFieldValue != null) {
String total = (String) totalFieldValue.getValue();
- System.out.println(" Total: " + total);
+ System.out.println(" TotalPrice: " + total);
}
}
}
@@ -279,9 +306,9 @@ public void testCreateAnalyzerWithLabelsAsync() {
System.out.println("\nCreateAnalyzerWithLabels API Pattern:");
System.out.println(" 1. Define field schema with nested structures (arrays, objects)");
System.out.println(" 2. Upload training data to Azure Blob Storage:");
- System.out.println(" - Documents: receipt1.pdf, receipt2.pdf, ...");
- System.out.println(" - Labels: receipt1.pdf.labels.json, receipt2.pdf.labels.json, ...");
- System.out.println(" - OCR: receipt1.pdf.result.json, receipt2.pdf.result.json, ...");
+ System.out.println(" - Documents: receipt1.jpg, receipt2.jpg, ...");
+ System.out.println(" - Labels: receipt1.jpg.labels.json, receipt2.jpg.labels.json, ...");
+ System.out.println(" - OCR: receipt1.jpg.result.json, receipt2.jpg.result.json, ...");
System.out.println(" 3. Create LabeledDataKnowledgeSource with storage SAS URL");
System.out.println(" 4. Create analyzer with field schema and knowledge sources");
System.out.println(" 5. Use analyzer for document analysis");
@@ -289,8 +316,10 @@ public void testCreateAnalyzerWithLabelsAsync() {
System.out.println("\nCreateAnalyzerWithLabels pattern demonstration completed");
if (trainingDataSasUrl == null || trainingDataSasUrl.trim().isEmpty()) {
System.out.println(" Note: This sample demonstrates the API pattern.");
- System.out.println(
- " For actual training, provide CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL with labeled data.");
+ System.out
+ .println(" For actual training, provide CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL (Option A)");
+ System.out
+ .println(" or CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT + ..._CONTAINER (Option B).");
}
} finally {
diff --git a/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsTest.java b/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsTest.java
index 9a7813712e7c..529798f275ac 100644
--- a/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsTest.java
+++ b/sdk/contentunderstanding/azure-ai-contentunderstanding/src/test/java/com/azure/ai/contentunderstanding/tests/samples/Sample16_CreateAnalyzerWithLabelsTest.java
@@ -16,7 +16,10 @@
import com.azure.ai.contentunderstanding.models.GenerationMethod;
import com.azure.ai.contentunderstanding.models.KnowledgeSource;
import com.azure.ai.contentunderstanding.models.LabeledDataKnowledgeSource;
+import com.azure.ai.contentunderstanding.samples.Sample16_CreateAnalyzerWithLabels;
+import com.azure.core.credential.TokenCredential;
import com.azure.core.util.polling.SyncPoller;
+import com.azure.identity.DefaultAzureCredentialBuilder;
import org.junit.jupiter.api.Test;
import com.azure.core.test.TestMode;
@@ -38,24 +41,18 @@
* For an easier labeling workflow, use Azure AI Content Understanding Studio at
* https://contentunderstanding.ai.azure.com/
*
- * Labeled receipt data is available in this repo at {@code src/samples/resources/receipt_labels}.
- * For LIVE mode with real training data: upload that folder to Azure Blob Storage, generate a
- * container SAS URL with List/Read permissions, then set the environment variables below. Use
- * {@code CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX} if you uploaded into a subfolder
- * (e.g., "receipt_labels/"); omit or leave unset if files are at the container root.
- *
- * Required environment variables:
+ * Labeled receipt data is bundled at {@code src/samples/resources/receipt_labels}. To use it
+ * for training in LIVE / RECORD modes, choose one of:
*
- * - {@code CONTENTUNDERSTANDING_ENDPOINT} – Azure Content Understanding endpoint URL
+ * - Option A: provide a pre-generated container SAS URL via
+ * {@code CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL}.
+ * - Option B: set {@code CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT} and
+ * {@code CONTENTUNDERSTANDING_TRAINING_DATA_CONTAINER}; the test will upload the bundled
+ * label files via DefaultAzureCredential and generate a User Delegation SAS URL.
*
*
- * Optional environment variables (for labeled training data; used in LIVE mode):
- *
- * - {@code CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL} – SAS URL for the Azure Blob container
- * with labeled training data.
- * - {@code CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX} – Path prefix within the container
- * (e.g., "receipt_labels/"). Omit or leave unset if files are at the container root.
- *
+ * Use {@code CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX} if files live in a subfolder
+ * (e.g., "receipt_labels/"); omit if files are at the container root.
*/
public class Sample16_CreateAnalyzerWithLabelsTest extends ContentUnderstandingClientTestBase {
@@ -70,10 +67,33 @@ public class Sample16_CreateAnalyzerWithLabelsTest extends ContentUnderstandingC
public void testCreateAnalyzerWithLabels() {
String analyzerId = testResourceNamer.randomName("test_receipt_analyzer_", 50);
- // In PLAYBACK mode, use a placeholder URL to ensure consistent test behavior
- String trainingDataSasUrl = getTestMode() == TestMode.PLAYBACK
- ? "https://placeholder.blob.core.windows.net/container?sv=placeholder"
- : System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL");
+ // Resolve the training-data SAS URL.
+ // PLAYBACK uses a placeholder so the recorded request body matches.
+ // RECORD / LIVE: try Option A (SAS URL env), then Option B (storage account + container env).
+ String trainingDataSasUrl;
+ if (getTestMode() == TestMode.PLAYBACK) {
+ trainingDataSasUrl = "https://placeholder.blob.core.windows.net/container?sv=placeholder";
+ } else {
+ trainingDataSasUrl = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL");
+ String storageAccount = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT");
+ String container = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_CONTAINER");
+ if ((trainingDataSasUrl == null || trainingDataSasUrl.trim().isEmpty())
+ && storageAccount != null
+ && !storageAccount.trim().isEmpty()
+ && container != null
+ && !container.trim().isEmpty()) {
+ TokenCredential credential = new DefaultAzureCredentialBuilder().build();
+ String localLabelDir = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_LOCAL_DIR");
+ if (localLabelDir == null || localLabelDir.trim().isEmpty()) {
+ localLabelDir = "src/samples/resources/receipt_labels";
+ }
+ String trainingDataPrefixForUpload = System.getenv("CONTENTUNDERSTANDING_TRAINING_DATA_PREFIX");
+ Sample16_CreateAnalyzerWithLabels.uploadTrainingData(storageAccount, container, credential,
+ localLabelDir, trainingDataPrefixForUpload);
+ trainingDataSasUrl = Sample16_CreateAnalyzerWithLabels.generateUserDelegationSasUrl(storageAccount,
+ container, credential);
+ }
+ }
// Save prefix in test proxy variable during RECORD, load back during PLAYBACK so request bodies match.
String trainingDataPrefix;
if (getTestMode() == TestMode.PLAYBACK) {
@@ -134,12 +154,12 @@ public void testCreateAnalyzerWithLabels() {
itemsField.setItemDefinition(itemDefinition);
fields.put("Items", itemsField);
- // Total field
- ContentFieldDefinition totalField = new ContentFieldDefinition();
- totalField.setType(ContentFieldType.STRING);
- totalField.setMethod(GenerationMethod.EXTRACT);
- totalField.setDescription("Total amount");
- fields.put("Total", totalField);
+ // TotalPrice field
+ ContentFieldDefinition totalPriceField = new ContentFieldDefinition();
+ totalPriceField.setType(ContentFieldType.STRING);
+ totalPriceField.setMethod(GenerationMethod.EXTRACT);
+ totalPriceField.setDescription("Total amount");
+ fields.put("TotalPrice", totalPriceField);
ContentFieldSchema fieldSchema = new ContentFieldSchema();
fieldSchema.setName("receipt_schema");
@@ -158,7 +178,12 @@ public void testCreateAnalyzerWithLabels() {
System.out.println("Using labeled training data from: "
+ trainingDataSasUrl.substring(0, Math.min(50, trainingDataSasUrl.length())) + "...");
} else {
- System.out.println("No TRAINING_DATA_SAS_URL set, creating analyzer without labeled training data");
+ System.out.println(
+ "DEMO MODE: no training data configured. The analyzer will be created without labeled data.");
+ System.out.println(" Set CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL (Option A), or both");
+ System.out.println(
+ " CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT and CONTENTUNDERSTANDING_TRAINING_DATA_CONTAINER (Option B),");
+ System.out.println(" to fully exercise the labeled-data API path.");
}
// Step 3: Create analyzer (with or without labeled data)
@@ -184,6 +209,8 @@ public void testCreateAnalyzerWithLabels() {
System.out.println(" Description: " + result.getDescription());
System.out.println(" Base analyzer: " + result.getBaseAnalyzerId());
System.out.println(" Fields: " + result.getFieldSchema().getFields().size());
+ System.out.println(" Knowledge sources: "
+ + (result.getKnowledgeSources() == null ? 0 : result.getKnowledgeSources().size()));
// END: com.azure.ai.contentunderstanding.createAnalyzerWithLabels
// BEGIN: Assertion_ContentUnderstandingCreateAnalyzerWithLabels
@@ -201,7 +228,7 @@ public void testCreateAnalyzerWithLabels() {
Map resultFields = result.getFieldSchema().getFields();
assertTrue(resultFields.containsKey("MerchantName"), "Should have MerchantName field");
assertTrue(resultFields.containsKey("Items"), "Should have Items field");
- assertTrue(resultFields.containsKey("Total"), "Should have Total field");
+ assertTrue(resultFields.containsKey("TotalPrice"), "Should have TotalPrice field");
ContentFieldDefinition itemsFieldResult = resultFields.get("Items");
assertEquals(ContentFieldType.ARRAY, itemsFieldResult.getType());
@@ -212,7 +239,7 @@ public void testCreateAnalyzerWithLabels() {
System.out.println(" MerchantName: String (Extract)");
System.out.println(" Items: Array of Objects (Generate)");
System.out.println(" - Quantity, Name, Price");
- System.out.println(" Total: String (Extract)");
+ System.out.println(" TotalPrice: String (Extract)");
// END: Assertion_ContentUnderstandingCreateAnalyzerWithLabels
// If training data was provided, test the analyzer with a sample document
@@ -244,11 +271,11 @@ public void testCreateAnalyzerWithLabels() {
System.out.println(" MerchantName: " + merchantName);
}
}
- if (docContent.getFields().containsKey("Total")) {
- ContentField totalFieldValue = docContent.getFields().get("Total");
+ if (docContent.getFields().containsKey("TotalPrice")) {
+ ContentField totalFieldValue = docContent.getFields().get("TotalPrice");
if (totalFieldValue != null) {
String total = (String) totalFieldValue.getValue();
- System.out.println(" Total: " + total);
+ System.out.println(" TotalPrice: " + total);
}
}
}
@@ -258,9 +285,9 @@ public void testCreateAnalyzerWithLabels() {
System.out.println("\nCreateAnalyzerWithLabels API Pattern:");
System.out.println(" 1. Define field schema with nested structures (arrays, objects)");
System.out.println(" 2. Upload training data to Azure Blob Storage:");
- System.out.println(" - Documents: receipt1.pdf, receipt2.pdf, ...");
- System.out.println(" - Labels: receipt1.pdf.labels.json, receipt2.pdf.labels.json, ...");
- System.out.println(" - OCR: receipt1.pdf.result.json, receipt2.pdf.result.json, ...");
+ System.out.println(" - Documents: receipt1.jpg, receipt2.jpg, ...");
+ System.out.println(" - Labels: receipt1.jpg.labels.json, receipt2.jpg.labels.json, ...");
+ System.out.println(" - OCR: receipt1.jpg.result.json, receipt2.jpg.result.json, ...");
System.out.println(" 3. Create LabeledDataKnowledgeSource with storage SAS URL");
System.out.println(" 4. Create analyzer with field schema and knowledge sources");
System.out.println(" 5. Use analyzer for document analysis");
@@ -268,8 +295,10 @@ public void testCreateAnalyzerWithLabels() {
System.out.println("\nCreateAnalyzerWithLabels pattern demonstration completed");
if (trainingDataSasUrl == null || trainingDataSasUrl.trim().isEmpty()) {
System.out.println(" Note: This sample demonstrates the API pattern.");
- System.out.println(
- " For actual training, provide CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL with labeled data.");
+ System.out
+ .println(" For actual training, provide CONTENTUNDERSTANDING_TRAINING_DATA_SAS_URL (Option A)");
+ System.out
+ .println(" or CONTENTUNDERSTANDING_TRAINING_DATA_STORAGE_ACCOUNT + ..._CONTAINER (Option B).");
}
} finally {