パーサー契約書のサンプル コード

CoE リードとして、パーサー パッケージをDocument Automationと統合するために、パーサー パッケージの入力および出力契約書を提供する必要があります。

以下は、入力契約書と出力契約書のサンプル コードです。

入力契約書


package aws;


import com.automationanywhere.botcommand.data.impl.StringValue;
import com.automationanywhere.commandsdk.annotations.*;
import com.automationanywhere.commandsdk.annotations.rules.LocalFile;
import com.automationanywhere.commandsdk.annotations.rules.NotEmpty;
import com.automationanywhere.commandsdk.model.AttributeType;
import com.automationanywhere.core.security.SecureString;
import static com.automationanywhere.commandsdk.model.DataType.STRING;


@BotCommand
@CommandPkg(
name = "ExtractionCommand",
label = "Extraction Command",
description = "Extraction Command",
node_label = "Extraction Command",
return_type = STRING,
return_label = "Extraction Command Response",
minimum_botagent_version = "21.98",
minimum_controlroom_version = "10520")
public class ExtractionCommand {


@Execute
public StringValue compute(
@Idx(index = "1", type = AttributeType.FILE)
@LocalFile
@Pkg(label = "Image File Path")
@NotEmpty
final String inputFilePath,
@Idx(index = "2", type = AttributeType.CREDENTIAL) @Pkg(label = "Service Account")
final SecureString serviceAccount) {


String secureString = serviceAccount.getInsecureString();


return new StringValue("");
}
}

出力契約書

コマンドからの応答は、Document Automationが機能する、適切なスキーマを持つ JSON 形式である必要があります。以下は、コマンドから生成される JSON 出力のスキーマです。

{
"metadata": {
"documentId": "unique Id used to Indentify document in DA",
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document",
"clusterId": "unique Id used to capture Heuristic feedback",
"numberOfPages": "Number pages in the document"
},
"imagePreprocessingResult": {
"metadata": {
"documentId": "unique Id used to Indentify document in DA",
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document"
},
"pages": [
{
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"deskew": true,
"orientation": true,
"renderDpi": 0,
"width": "width of the Page",
"height": "height of the Page"
}
]
},
"ocrResult": {
"metadata": {
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"numberOfPages": "Number pages in the document",
"learningInstanceSetting": {
"provider": "Name of the provider",
"version": "Provider Version",
"langCodes": [
"Language code in DA"
]
},
"timeInMs": "time taken in ms to get the OCR result for the document",
"pages": [
{
"id": "unqiue Id to indetify the page in the document",
"pageNum": "Page number in the document",
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"langCode": "Code that refers to the language in DA"
}
]
},
"blocks": [
{
"id": "Block Id of the word segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"blockType": "Type of the Block. 
Possible options :
WORD/LINE/INFO_BLOCK/KEY_VAL_BLOCK/TABLE/TABLE_HEADER
/TABLE_HEADER_ELEM/COLUMN/KEY_INFO_BLOCK/NO_OBJECT",
"pageNum": "Page number in the document",
"confidence": "OCR Confidence"
}
]
},
"docDetectResult": {
"metadata": {
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": "Time taken in ms to process the entire document"
},
"featureObjects": [
{
"id": "Feature object UUID",
"blockType": "INFO_BLOCK",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "Feature object Text",
"confidence": "confidence",
"ocrConfidence": "OCR Confidence ",
"pageNum": "Page number in the document"
}
]
},
"extractionResult": {
"metadata": {
"filepath": "Input File Path that refers to the Path used by Bot runner during execution",
"executionStatus": {
"statusCode": "Status code which indicates the response post extraction",
"statusMessage": "status message which indicates the response post extraction ",
"message": "Message that details the result"
},
"timeInMs": Time taken in ms to process the entire document",
"pages": [
{
"id": "unqiue Id to indetify the page in the document",
"pageNum": "Page number in the document",
"filepath": "File Path that refers to the Page in the document post splitting the document into pages",
"width": "width of the Page",
"height": "height of the Page"
"langCode": "Code that refers to the language in DA"
}
]
},
"keyValueFeatures": [
{
"id": "unique Id to indetify the keyValue found in the document",
"domainFieldKey": "field that needs to be extracted",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"pageNum": "Page number in the document",
"ocrConfidence": 0.909,
"extractionScore": "OCR Confidence",
"key": {
"id": "unique Id to indetify the key found in the document",
"text": "text from the segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"extractionScore": "Extraction score"
},
"value": {
"id": "unique Id to indetify the value found in the document",
"text": "text from the segment",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"extractionScore": "Extraction score"
},
"extractedDataType": "Data type of the extracted field"
}
],
"tableFeatures": [
{
"id": "unique Id to indetify the table in the document",
"headers": [
{
"id": "unique Id to indetify the header column in the document",
"domainFieldKey": "header field that needs to be extracted",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"text": "text from the segment",
"pageNum": "Page number in the document",
"ocrConfidence": "OCR Confidence",
"extractionScore": "Extraction score",
}
],
"rows": [
{
"id": "unique Id to indetify the row in the table",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"cells": [
{
"id": "unique Id to indetify the cell in the row",
"geometry": {
"x1": "x1 axis of the bounding box rectangle",
"y1": "y1 axis of the bounding box rectangle",
"x2": "x2 axis of the bounding box rectangle",
"y2": "y3 axis of the bounding box rectangle"
},
"domainFieldKey": "column field that needs to be extracted",
"text": "text from the segment",
"pageNum": "Page number in the document",
"extractedDataType": "TEXT",
"ocrConfidence": "OCR Confidence",
"extractionScore": "Extraction score",
}
],
"pageNum": 1
}
]
}
]
}
}