Skip to content

Commit 584f346

Browse files
author
awstools
committed
feat(client-textract): This release adds support for specifying and extracting information from documents using the Queries feature within Analyze Document API
1 parent ae5a8f2 commit 584f346

File tree

8 files changed

+322
-16
lines changed

8 files changed

+322
-16
lines changed

clients/client-textract/src/Textract.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,16 @@ export class Textract extends TextractClient {
7474
* All lines and words that are detected in the document are returned (including text that doesn't have a
7575
* relationship with the value of <code>FeatureTypes</code>). </p>
7676
* </li>
77+
* <li>
78+
* <p>Queries.A QUERIES_RESULT Block object contains the answer to the query, the alias associated and an ID that
79+
* connect it to the query asked. This Block also contains a location and attached confidence score.</p>
80+
* </li>
7781
* </ul>
7882
*
7983
* <p>Selection elements such as check boxes and option buttons (radio buttons) can be detected in form data and in tables.
8084
* A SELECTION_ELEMENT <code>Block</code> object contains information about a selection element,
8185
* including the selection status.</p>
86+
*
8287
* <p>You can choose which type of analysis to perform by specifying the <code>FeatureTypes</code> list.
8388
* </p>
8489
* <p>The output is returned in a list of <code>Block</code> objects.</p>
@@ -165,7 +170,8 @@ export class Textract extends TextractClient {
165170
/**
166171
* <p>Analyzes identity documents for relevant information. This information is extracted
167172
* and returned as <code>IdentityDocumentFields</code>, which records both the normalized
168-
* field and value of the extracted text.</p>
173+
* field and value of the extracted text.Unlike other Amazon Textract operations, <code>AnalyzeID</code>
174+
* doesn't return any Geometry data.</p>
169175
*/
170176
public analyzeID(args: AnalyzeIDCommandInput, options?: __HttpHandlerOptions): Promise<AnalyzeIDCommandOutput>;
171177
public analyzeID(args: AnalyzeIDCommandInput, cb: (err: any, data?: AnalyzeIDCommandOutput) => void): void;
@@ -192,7 +198,7 @@ export class Textract extends TextractClient {
192198

193199
/**
194200
* <p>Detects text in the input document. Amazon Textract can detect lines of text and the
195-
* words that make up a line of text. The input document must be an image in JPEG or PNG
201+
* words that make up a line of text. The input document must be an image in JPEG, PNG, PDF, or TIFF
196202
* format. <code>DetectDocumentText</code> returns the detected text in an array of <a>Block</a> objects. </p>
197203
* <p>Each document page has as an associated <code>Block</code> of type PAGE. Each PAGE <code>Block</code> object
198204
* is the parent of LINE <code>Block</code> objects that represent the lines of detected text on a page. A LINE <code>Block</code> object is
@@ -262,14 +268,17 @@ export class Textract extends TextractClient {
262268
* relationship with the value of the <code>StartDocumentAnalysis</code>
263269
* <code>FeatureTypes</code> input parameter). </p>
264270
* </li>
271+
* <li>
272+
* <p>Queries. A QUERIES_RESULT Block object contains the answer to the query, the alias associated and an ID that
273+
* connect it to the query asked. This Block also contains a location and attached confidence score</p>
274+
* </li>
265275
* </ul>
266276
*
267277
* <p>Selection elements such as check boxes and option buttons (radio buttons) can be detected in form data and in tables.
268278
* A SELECTION_ELEMENT <code>Block</code> object contains information about a selection element,
269279
* including the selection status.</p>
270280
*
271281
*
272-
*
273282
* <p>Use the <code>MaxResults</code> parameter to limit the number of blocks that are
274283
* returned. If there are more results than specified in <code>MaxResults</code>, the value of
275284
* <code>NextToken</code> in the operation response contains a pagination token for getting

clients/client-textract/src/commands/AnalyzeDocumentCommand.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,16 @@ export interface AnalyzeDocumentCommandOutput extends AnalyzeDocumentResponse, _
4141
* All lines and words that are detected in the document are returned (including text that doesn't have a
4242
* relationship with the value of <code>FeatureTypes</code>). </p>
4343
* </li>
44+
* <li>
45+
* <p>Queries.A QUERIES_RESULT Block object contains the answer to the query, the alias associated and an ID that
46+
* connect it to the query asked. This Block also contains a location and attached confidence score.</p>
47+
* </li>
4448
* </ul>
4549
*
4650
* <p>Selection elements such as check boxes and option buttons (radio buttons) can be detected in form data and in tables.
4751
* A SELECTION_ELEMENT <code>Block</code> object contains information about a selection element,
4852
* including the selection status.</p>
53+
*
4954
* <p>You can choose which type of analysis to perform by specifying the <code>FeatureTypes</code> list.
5055
* </p>
5156
* <p>The output is returned in a list of <code>Block</code> objects.</p>

clients/client-textract/src/commands/AnalyzeIDCommand.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ export interface AnalyzeIDCommandOutput extends AnalyzeIDResponse, __MetadataBea
2121
/**
2222
* <p>Analyzes identity documents for relevant information. This information is extracted
2323
* and returned as <code>IdentityDocumentFields</code>, which records both the normalized
24-
* field and value of the extracted text.</p>
24+
* field and value of the extracted text.Unlike other Amazon Textract operations, <code>AnalyzeID</code>
25+
* doesn't return any Geometry data.</p>
2526
* @example
2627
* Use a bare-bones client and the command you need to make an API call.
2728
* ```javascript

clients/client-textract/src/commands/DetectDocumentTextCommand.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ export interface DetectDocumentTextCommandOutput extends DetectDocumentTextRespo
2323

2424
/**
2525
* <p>Detects text in the input document. Amazon Textract can detect lines of text and the
26-
* words that make up a line of text. The input document must be an image in JPEG or PNG
26+
* words that make up a line of text. The input document must be an image in JPEG, PNG, PDF, or TIFF
2727
* format. <code>DetectDocumentText</code> returns the detected text in an array of <a>Block</a> objects. </p>
2828
* <p>Each document page has as an associated <code>Block</code> of type PAGE. Each PAGE <code>Block</code> object
2929
* is the parent of LINE <code>Block</code> objects that represent the lines of detected text on a page. A LINE <code>Block</code> object is

clients/client-textract/src/commands/GetDocumentAnalysisCommand.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,17 @@ export interface GetDocumentAnalysisCommandOutput extends GetDocumentAnalysisRes
5151
* relationship with the value of the <code>StartDocumentAnalysis</code>
5252
* <code>FeatureTypes</code> input parameter). </p>
5353
* </li>
54+
* <li>
55+
* <p>Queries. A QUERIES_RESULT Block object contains the answer to the query, the alias associated and an ID that
56+
* connect it to the query asked. This Block also contains a location and attached confidence score</p>
57+
* </li>
5458
* </ul>
5559
*
5660
* <p>Selection elements such as check boxes and option buttons (radio buttons) can be detected in form data and in tables.
5761
* A SELECTION_ELEMENT <code>Block</code> object contains information about a selection element,
5862
* including the selection status.</p>
5963
*
6064
*
61-
*
6265
* <p>Use the <code>MaxResults</code> parameter to limit the number of blocks that are
6366
* returned. If there are more results than specified in <code>MaxResults</code>, the value of
6467
* <code>NextToken</code> in the operation response contains a pagination token for getting

clients/client-textract/src/models/models_0.ts

Lines changed: 101 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ export namespace Document {
114114

115115
export enum FeatureType {
116116
FORMS = "FORMS",
117+
QUERIES = "QUERIES",
117118
TABLES = "TABLES",
118119
}
119120

@@ -174,11 +175,79 @@ export namespace HumanLoopConfig {
174175
});
175176
}
176177

178+
/**
179+
* <p>Each query contains the question you want to ask in the Text and the alias you want to associate.</p>
180+
*/
181+
export interface Query {
182+
/**
183+
* <p>Question that Amazon Textract will apply to the document. An example would be "What is the customer's SSN?"</p>
184+
*/
185+
Text: string | undefined;
186+
187+
/**
188+
* <p>Alias attached to the query, for ease of location.</p>
189+
*/
190+
Alias?: string;
191+
192+
/**
193+
* <p>List of pages associated with the query. The following is a list of rules for using this parameter.</p>
194+
* <ul>
195+
* <li>
196+
* <p>If a page is not specified, it is set to <code>["1"]</code> by default.</p>
197+
* </li>
198+
* <li>
199+
* <p>The following characters are allowed in the parameter's string:
200+
* <code>0 1 2 3 4 5 6 7 8 9 - *</code>. No whitespace is allowed.</p>
201+
* </li>
202+
* <li>
203+
* <p>When using <code>*</code> to indicate all pages, it must be the only element
204+
* in the string.</p>
205+
* </li>
206+
* <li>
207+
* <p>You can use page intervals, such as <code>[“1-3”, “1-1”, “4-*”]</code>. Where <code>*</code> indicates last page of
208+
* document.</p>
209+
* </li>
210+
* <li>
211+
* <p>Specified pages must be greater than 0 and less than or equal to the number of pages in the document.</p>
212+
* </li>
213+
* </ul>
214+
*/
215+
Pages?: string[];
216+
}
217+
218+
export namespace Query {
219+
/**
220+
* @internal
221+
*/
222+
export const filterSensitiveLog = (obj: Query): any => ({
223+
...obj,
224+
});
225+
}
226+
227+
/**
228+
* <p></p>
229+
*/
230+
export interface QueriesConfig {
231+
/**
232+
* <p></p>
233+
*/
234+
Queries: Query[] | undefined;
235+
}
236+
237+
export namespace QueriesConfig {
238+
/**
239+
* @internal
240+
*/
241+
export const filterSensitiveLog = (obj: QueriesConfig): any => ({
242+
...obj,
243+
});
244+
}
245+
177246
export interface AnalyzeDocumentRequest {
178247
/**
179248
* <p>The input document as base64-encoded bytes or an Amazon S3 object. If you use the AWS CLI
180249
* to call Amazon Textract operations, you can't pass image bytes. The document must be an image
181-
* in JPEG or PNG format.</p>
250+
* in JPEG, PNG, PDF, or TIFF format.</p>
182251
* <p>If you're using an AWS SDK to call Amazon Textract, you might not need to base64-encode
183252
* image bytes that are passed using the <code>Bytes</code> field. </p>
184253
*/
@@ -197,6 +266,11 @@ export interface AnalyzeDocumentRequest {
197266
* <p>Sets the configuration for the human in the loop workflow for analyzing documents.</p>
198267
*/
199268
HumanLoopConfig?: HumanLoopConfig;
269+
270+
/**
271+
* <p>Contains Queries and the alias for those Queries, as determined by the input. </p>
272+
*/
273+
QueriesConfig?: QueriesConfig;
200274
}
201275

202276
export namespace AnalyzeDocumentRequest {
@@ -214,6 +288,8 @@ export enum BlockType {
214288
LINE = "LINE",
215289
MERGED_CELL = "MERGED_CELL",
216290
PAGE = "PAGE",
291+
QUERY = "QUERY",
292+
QUERY_RESULT = "QUERY_RESULT",
217293
SELECTION_ELEMENT = "SELECTION_ELEMENT",
218294
TABLE = "TABLE",
219295
TITLE = "TITLE",
@@ -334,6 +410,7 @@ export namespace Geometry {
334410
}
335411

336412
export enum RelationshipType {
413+
ANSWER = "ANSWER",
337414
CHILD = "CHILD",
338415
COMPLEX_FEATURES = "COMPLEX_FEATURES",
339416
MERGED_CELL = "MERGED_CELL",
@@ -463,6 +540,17 @@ export interface Block {
463540
* value of <code>SelectionStatus</code> to determine the status of the selection
464541
* element.</p>
465542
* </li>
543+
* <li>
544+
* <p>
545+
* <i>QUERY</i> - A question asked during the call of AnalyzeDocument. Contains an
546+
* alias and an ID that attachs it to its answer.</p>
547+
* </li>
548+
* <li>
549+
* <p>
550+
* <i>QUERY_RESULT</i> - A response to a question asked during the call
551+
* of analyze document. Comes with an alias and ID for ease of locating in a
552+
* response. Also contains location and confidence score.</p>
553+
* </li>
466554
* </ul>
467555
*/
468556
BlockType?: BlockType | string;
@@ -574,6 +662,11 @@ export interface Block {
574662
* considered to be a single-page document.</p>
575663
*/
576664
Page?: number;
665+
666+
/**
667+
* <p></p>
668+
*/
669+
Query?: Query;
577670
}
578671

579672
export namespace Block {
@@ -880,8 +973,8 @@ export class ThrottlingException extends __BaseException {
880973
}
881974

882975
/**
883-
* <p>The format of the input document isn't supported. Documents for synchronous operations can be in
884-
* PNG or JPEG format only. Documents for asynchronous operations can be in PDF format.</p>
976+
* <p>The format of the input document isn't supported. Documents for operations can be in
977+
* PNG, JPEG, PDF, or TIFF format.</p>
885978
*/
886979
export class UnsupportedDocumentException extends __BaseException {
887980
readonly name: "UnsupportedDocumentException" = "UnsupportedDocumentException";
@@ -1826,6 +1919,11 @@ export interface StartDocumentAnalysisRequest {
18261919
* be encrypted server side,using SSE-S3.</p>
18271920
*/
18281921
KMSKeyId?: string;
1922+
1923+
/**
1924+
* <p></p>
1925+
*/
1926+
QueriesConfig?: QueriesConfig;
18291927
}
18301928

18311929
export namespace StartDocumentAnalysisRequest {

clients/client-textract/src/protocols/Aws_json1_1.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ import {
9090
OutputConfig,
9191
Point,
9292
ProvisionedThroughputExceededException,
93+
QueriesConfig,
94+
Query,
9395
Relationship,
9496
S3Object,
9597
StartDocumentAnalysisRequest,
@@ -1117,6 +1119,10 @@ const serializeAws_json1_1AnalyzeDocumentRequest = (input: AnalyzeDocumentReques
11171119
input.HumanLoopConfig !== null && {
11181120
HumanLoopConfig: serializeAws_json1_1HumanLoopConfig(input.HumanLoopConfig, context),
11191121
}),
1122+
...(input.QueriesConfig !== undefined &&
1123+
input.QueriesConfig !== null && {
1124+
QueriesConfig: serializeAws_json1_1QueriesConfig(input.QueriesConfig, context),
1125+
}),
11201126
};
11211127
};
11221128

@@ -1265,6 +1271,44 @@ const serializeAws_json1_1OutputConfig = (input: OutputConfig, context: __SerdeC
12651271
};
12661272
};
12671273

1274+
const serializeAws_json1_1Queries = (input: Query[], context: __SerdeContext): any => {
1275+
return input
1276+
.filter((e: any) => e != null)
1277+
.map((entry) => {
1278+
if (entry === null) {
1279+
return null as any;
1280+
}
1281+
return serializeAws_json1_1Query(entry, context);
1282+
});
1283+
};
1284+
1285+
const serializeAws_json1_1QueriesConfig = (input: QueriesConfig, context: __SerdeContext): any => {
1286+
return {
1287+
...(input.Queries !== undefined &&
1288+
input.Queries !== null && { Queries: serializeAws_json1_1Queries(input.Queries, context) }),
1289+
};
1290+
};
1291+
1292+
const serializeAws_json1_1Query = (input: Query, context: __SerdeContext): any => {
1293+
return {
1294+
...(input.Alias !== undefined && input.Alias !== null && { Alias: input.Alias }),
1295+
...(input.Pages !== undefined &&
1296+
input.Pages !== null && { Pages: serializeAws_json1_1QueryPages(input.Pages, context) }),
1297+
...(input.Text !== undefined && input.Text !== null && { Text: input.Text }),
1298+
};
1299+
};
1300+
1301+
const serializeAws_json1_1QueryPages = (input: string[], context: __SerdeContext): any => {
1302+
return input
1303+
.filter((e: any) => e != null)
1304+
.map((entry) => {
1305+
if (entry === null) {
1306+
return null as any;
1307+
}
1308+
return entry;
1309+
});
1310+
};
1311+
12681312
const serializeAws_json1_1S3Object = (input: S3Object, context: __SerdeContext): any => {
12691313
return {
12701314
...(input.Bucket !== undefined && input.Bucket !== null && { Bucket: input.Bucket }),
@@ -1294,6 +1338,10 @@ const serializeAws_json1_1StartDocumentAnalysisRequest = (
12941338
}),
12951339
...(input.OutputConfig !== undefined &&
12961340
input.OutputConfig !== null && { OutputConfig: serializeAws_json1_1OutputConfig(input.OutputConfig, context) }),
1341+
...(input.QueriesConfig !== undefined &&
1342+
input.QueriesConfig !== null && {
1343+
QueriesConfig: serializeAws_json1_1QueriesConfig(input.QueriesConfig, context),
1344+
}),
12971345
};
12981346
};
12991347

@@ -1430,6 +1478,10 @@ const deserializeAws_json1_1Block = (output: any, context: __SerdeContext): Bloc
14301478
: undefined,
14311479
Id: __expectString(output.Id),
14321480
Page: __expectInt32(output.Page),
1481+
Query:
1482+
output.Query !== undefined && output.Query !== null
1483+
? deserializeAws_json1_1Query(output.Query, context)
1484+
: undefined,
14331485
Relationships:
14341486
output.Relationships !== undefined && output.Relationships !== null
14351487
? deserializeAws_json1_1RelationshipList(output.Relationships, context)
@@ -1921,6 +1973,29 @@ const deserializeAws_json1_1ProvisionedThroughputExceededException = (
19211973
} as any;
19221974
};
19231975

1976+
const deserializeAws_json1_1Query = (output: any, context: __SerdeContext): Query => {
1977+
return {
1978+
Alias: __expectString(output.Alias),
1979+
Pages:
1980+
output.Pages !== undefined && output.Pages !== null
1981+
? deserializeAws_json1_1QueryPages(output.Pages, context)
1982+
: undefined,
1983+
Text: __expectString(output.Text),
1984+
} as any;
1985+
};
1986+
1987+
const deserializeAws_json1_1QueryPages = (output: any, context: __SerdeContext): string[] => {
1988+
const retVal = (output || [])
1989+
.filter((e: any) => e != null)
1990+
.map((entry: any) => {
1991+
if (entry === null) {
1992+
return null as any;
1993+
}
1994+
return __expectString(entry) as any;
1995+
});
1996+
return retVal;
1997+
};
1998+
19241999
const deserializeAws_json1_1Relationship = (output: any, context: __SerdeContext): Relationship => {
19252000
return {
19262001
Ids:

0 commit comments

Comments
 (0)