Exemples de codes pour les contrats d'analyseur
- Dernière mise à jour2024/10/31
Exemples de codes pour les contrats d'analyseur
En tant que responsable du CoE, vous devez fournir un contrat d'entrée et de sortie pour le package d'analyseur afin de l'intégrer à Automatisation de documents.
Voici des exemples de codes pour les contrats d'entrée et de sortie :
Contrat d'entrée
package aws;
import com.automationanywhere.botcommand.data.impl.StringValue;
import com.automationanywhere.commandsdk.annotations.*;
import com.automationanywhere.commandsdk.annotations.rules.LocalFile;
import com.automationanywhere.commandsdk.annotations.rules.NotEmpty;
import com.automationanywhere.commandsdk.model.AttributeType;
import com.automationanywhere.core.security.SecureString;
import static com.automationanywhere.commandsdk.model.DataType.STRING;
@BotCommand
@CommandPkg(
name = "ExtractionCommand",
label = "Extraction Command",
description = "Extraction Command",
node_label = "Extraction Command",
return_type = STRING,
return_label = "Extraction Command Response",
minimum_botagent_version = "21.98",
minimum_controlroom_version = "10520")
public class ExtractionCommand {
@Execute
public StringValue compute(
@Idx(index = "1", type = AttributeType.FILE)
@LocalFile
@Pkg(label = "Image File Path")
@NotEmpty
final String inputFilePath,
@Idx(index = "2", type = AttributeType.CREDENTIAL) @Pkg(label = "Service Account")
final SecureString serviceAccount) {
String secureString = serviceAccount.getInsecureString();
return new StringValue("");
}
}
Contrat de sortie
La réponse de la commande doit être au format JSON avec un schéma approprié compatible avec Automatisation de documents. Voici le schéma de la sortie JSON générée par la commande :
{
"metadata": {
"documentId": "unique Id used to Indentify document in DA",
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document",
"clusterId": "unique Id used to capture Heuristic feedback",
"numberOfPages": "Number pages in the document"
},
"imagePreprocessingResult": {
"metadata": {
"documentId": "unique Id used to Indentify document in DA",
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document"
},
"pages": [
{
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"deskew": true,
"orientation": true,
"renderDpi": 0,
"width": "width of the Page",
"height": "height of the Page"
}
]
},
"ocrResult": {
"metadata": {
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"numberOfPages": "Number pages in the document",
"learningInstanceSetting": {
"provider": "Name of the provider",
"version": "Provider Version",
"langCodes": [
"Language code in DA"
]
},
"timeInMs": "time taken in ms to get the OCR result for the document",
"pages": [
{
"id": "unqiue Id to indetify the page in the document",
"pageNum": "Page number in the document",
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"langCode": "Code that refers to the language in DA"
}
]
},
"blocks": [
{
"id": "Block Id of the word segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"blockType": "Type of the Block.
Possible options :
WORD/LINE/INFO_BLOCK/KEY_VAL_BLOCK/TABLE/TABLE_HEADER
/TABLE_HEADER_ELEM/COLUMN/KEY_INFO_BLOCK/NO_OBJECT",
"pageNum": "Page number in the document",
"confidence": "OCR Confidence"
}
]
},
"docDetectResult": {
"metadata": {
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document"
},
"featureObjects": [
{
"id": "Feature object UUID",
"blockType": "INFO_BLOCK",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "Feature object Text",
"confidence": "confidence",
"ocrConfidence": "OCR Confidence ",
"pageNum": "Page number in the document"
}
]
},
"extractionResult": {
"metadata": {
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": Time taken in ms to process the entire document",
"pages": [
{
"id": "unqiue Id to indetify the page in the document",
"pageNum": "Page number in the document",
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"width": "width of the Page",
"height": "height of the Page"
"langCode": "Code that refers to the language in DA"
}
]
},
"keyValueFeatures": [
{
"id": "unique Id to indetify the keyValue found in the document",
"domainFieldKey": "field that needs to be extracted",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"pageNum": "Page number in the document",
"ocrConfidence": 0.909,
"extractionScore": "OCR Confidence",
"key": {
"id": "unique Id to indetify the key found in the document",
"text": "text from the segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"extractionScore": "Extraction score"
},
"value": {
"id": "unique Id to indetify the value found in the document",
"text": "text from the segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"extractionScore": "Extraction score"
},
"extractedDataType": "Data type of the extracted field"
}
],
"tableFeatures": [
{
"id": "unique Id to indetify the table in the document",
"headers": [
{
"id": "unique Id to indetify the header column in the document",
"domainFieldKey": "header field that needs to be extracted",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"pageNum": "Page number in the document",
"ocrConfidence": "OCR Confidence",
"extractionScore": "Extraction score",
}
],
"rows": [
{
"id": "unique Id to indetify the row in the table",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"cells": [
{
"id": "unique Id to indetify the cell in the row",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"domainFieldKey": "column field that needs to be extracted",
"text": "text from the segment",
"pageNum": "Page number in the document",
"extractedDataType": "TEXT",
"ocrConfidence": "OCR Confidence",
"extractionScore": "Extraction score",
}
],
"pageNum": 1
}
]
}
]
}
}