파서 컨트랙트용 샘플 코드

CoE 리드는 Document Automation와 통합하기 위해 파서 패키지에 대한 입력 및 출력 컨트랙트를 제공해야 합니다.

다음은 입력 및 출력 컨트랙트의 샘플 코드입니다.

입력 컨트랙트


package aws;


import com.automationanywhere.botcommand.data.impl.StringValue;
import com.automationanywhere.commandsdk.annotations.*;
import com.automationanywhere.commandsdk.annotations.rules.LocalFile;
import com.automationanywhere.commandsdk.annotations.rules.NotEmpty;
import com.automationanywhere.commandsdk.model.AttributeType;
import com.automationanywhere.core.security.SecureString;
import static com.automationanywhere.commandsdk.model.DataType.STRING;


@BotCommand
@CommandPkg(
name = "ExtractionCommand",
label = "Extraction Command",
description = "Extraction Command",
node_label = "Extraction Command",
return_type = STRING,
return_label = "Extraction Command Response",
minimum_botagent_version = "21.98",
minimum_controlroom_version = "10520")
public class ExtractionCommand {


@Execute
public StringValue compute(
@Idx(index = "1", type = AttributeType.FILE)
@LocalFile
@Pkg(label = "Image File Path")
@NotEmpty
final String inputFilePath,
@Idx(index = "2", type = AttributeType.CREDENTIAL) @Pkg(label = "Service Account")
final SecureString serviceAccount) {


String secureString = serviceAccount.getInsecureString();


return new StringValue("");
}
}

출력 컨트랙트

명령의 응답은 Document Automation이 작동할 수 있는 적절한 스키마가 포함된 JSON 형식이어야 합니다. 다음은 명령에서 생성된 JSON 출력의 스키마입니다.

{
"metadata": {
"documentId": "unique Id used to Indentify document in DA",
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document",
"clusterId": "unique Id used to capture Heuristic feedback",
"numberOfPages": "Number pages in the document"
},
"imagePreprocessingResult": {
"metadata": {
"documentId": "unique Id used to Indentify document in DA",
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document"
},
"pages": [
{
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"deskew": true,
"orientation": true,
"renderDpi": 0,
"width": "width of the Page",
"height": "height of the Page"
}
]
},
"ocrResult": {
"metadata": {
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"numberOfPages": "Number pages in the document",
"learningInstanceSetting": {
"provider": "Name of the provider",
"version": "Provider Version",
"langCodes": [
"Language code in DA"
]
},
"timeInMs": "time taken in ms to get the OCR result for the document",
"pages": [
{
"id": "unqiue Id to indetify the page in the document",
"pageNum": "Page number in the document",
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"langCode": "Code that refers to the language in DA"
}
]
},
"blocks": [
{
"id": "Block Id of the word segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"blockType": "Type of the Block. 
Possible options :
WORD/LINE/INFO_BLOCK/KEY_VAL_BLOCK/TABLE/TABLE_HEADER
/TABLE_HEADER_ELEM/COLUMN/KEY_INFO_BLOCK/NO_OBJECT",
"pageNum": "Page number in the document",
"confidence": "OCR Confidence"
}
]
},
"docDetectResult": {
"metadata": {
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document"
},
"featureObjects": [
{
"id": "Feature object UUID",
"blockType": "INFO_BLOCK",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "Feature object Text",
"confidence": "confidence",
"ocrConfidence": "OCR Confidence ",
"pageNum": "Page number in the document"
}
]
},
"extractionResult": {
"metadata": {
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": Time taken in ms to process the entire document",
"pages": [
{
"id": "unqiue Id to indetify the page in the document",
"pageNum": "Page number in the document",
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"width": "width of the Page",
"height": "height of the Page"
"langCode": "Code that refers to the language in DA"
}
]
},
"keyValueFeatures": [
{
"id": "unique Id to indetify the keyValue found in the document",
"domainFieldKey": "field that needs to be extracted",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"pageNum": "Page number in the document",
"ocrConfidence": 0.909,
"extractionScore": "OCR Confidence",
"key": {
"id": "unique Id to indetify the key found in the document",
"text": "text from the segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"extractionScore": "Extraction score"
},
"value": {
"id": "unique Id to indetify the value found in the document",
"text": "text from the segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"extractionScore": "Extraction score"
},
"extractedDataType": "Data type of the extracted field"
}
],
"tableFeatures": [
{
"id": "unique Id to indetify the table in the document",
"headers": [
{
"id": "unique Id to indetify the header column in the document",
"domainFieldKey": "header field that needs to be extracted",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"pageNum": "Page number in the document",
"ocrConfidence": "OCR Confidence",
"extractionScore": "Extraction score",
}
],
"rows": [
{
"id": "unique Id to indetify the row in the table",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"cells": [
{
"id": "unique Id to indetify the cell in the row",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"domainFieldKey": "column field that needs to be extracted",
"text": "text from the segment",
"pageNum": "Page number in the document",
"extractedDataType": "TEXT",
"ocrConfidence": "OCR Confidence",
"extractionScore": "Extraction score",
}
],
"pageNum": 1
}
]
}
]
}
}