The idea is to map fields of IDocumentData<DictionaryData> (from IntelligentOCR namespace) to ContentValidationData (from DocumentProcessing space).
This is just a sample generated by LLM with wrong fields of IDocumentData. If you have a specification of the IDocumentData class please share it.
using System;
using System.Collections.Generic;
using System.IO;
using UiPath.IntelligentOCR.StudioWeb.Activities.DataExtraction;
using UiPath.DocumentProcessing.Contracts.Actions;
using UiPath.DocumentProcessing.Contracts.Results;
using UiPath.DocumentProcessing.Contracts.Dom;
public static class DocumentDataToValidationDataConverter
{
public static ContentValidationData Convert(IDocumentData<DictionaryData> documentData)
{
if (documentData == null) throw new ArgumentNullException(nameof(documentData));
var extractionResult = BuildExtractionResult(documentData);
var document = BuildDocument(documentData);
var taxonomyPath = GeneratePath("taxonomy.json");
return new ContentValidationData
{
DocumentId = extractionResult.DocumentId,
OriginalDocumentFileName = document.DocumentID,
AutomaticExtractionResultsPath = GeneratePath("auto_results.json"),
ValidatedExtractionResultsPath = GeneratePath("validated_results.json"),
DocumentObjectModelPath = GeneratePath("document_model.json"),
ExtractorPayloadsPath = GeneratePath("extractor_payloads.json"),
TextPath = GeneratePath("document_text.txt"),
TaxonomyPath = taxonomyPath,
BucketId = "default-bucket-id",
BucketName = "default-bucket",
FolderId = Guid.NewGuid().ToString(),
FolderKey = Guid.NewGuid().ToString(),
DocumentPath = GeneratePath(document.DocumentID),
EncodedDocumentPath = GeneratePath("encoded_document.pdf"),
CustomizationInfoPath = GeneratePath("customization.json"),
AdditionalDataPath = GeneratePath("additional/"),
ShowOnlyRelevantPageRange = true
};
}
private static ExtractionResult BuildExtractionResult(IDocumentData<DictionaryData> documentData)
{
var extractionResult = new ExtractionResult
{
DocumentId = documentData.DocumentId,
ExtractorPayloads = documentData.ExtractedData?.Payload,
ResultsVersion = 0,
ResultsDocument = documentData.ExtractedData?.ToResultsDocument() ?? new ResultsDocument()
};
return extractionResult;
}
private static Document BuildDocument(IDocumentData<DictionaryData> documentData)
{
return new Document
{
ContentType = "application/pdf",
DocumentID = documentData.DocumentId,
DocumentMetadata = new List<Metadata>(),
Length = documentData.Text?.Length ?? 0,
Pages = new Page() // Populate as needed
};
}
private static string GeneratePath(string fileName)
{
return Path.Combine("validation-data", Guid.NewGuid().ToString(), fileName);
}
}