Skip to content

Commit a29ccf3

Browse files
nirupa-kumarnnegrey
authored andcommitted
samples: Vision beta samples (#1154)
* Add Vision beta (Localized objects & Handwriting OCR) * Updates after review * Updates after review * Update to released client library * Update client library * Update client library * Update to Inc.
1 parent 27150e7 commit a29ccf3

File tree

4 files changed

+343
-43
lines changed

4 files changed

+343
-43
lines changed
519 KB
Loading

vision/snippets/resources/puppies.jpg

280 KB
Loading

vision/snippets/src/main/java/com/example/vision/Detect.java

Lines changed: 291 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2017 Google Inc.
2+
* Copyright 2018 Google Inc.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -22,45 +22,46 @@
2222
import com.google.cloud.storage.Storage;
2323
import com.google.cloud.storage.Storage.BlobListOption;
2424
import com.google.cloud.storage.StorageOptions;
25-
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse;
26-
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder;
27-
import com.google.cloud.vision.v1p2beta1.AnnotateImageRequest;
28-
import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse;
29-
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest;
30-
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse;
31-
import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse;
32-
import com.google.cloud.vision.v1p2beta1.BatchAnnotateImagesResponse;
33-
import com.google.cloud.vision.v1p2beta1.Block;
34-
import com.google.cloud.vision.v1p2beta1.ColorInfo;
35-
import com.google.cloud.vision.v1p2beta1.CropHint;
36-
import com.google.cloud.vision.v1p2beta1.CropHintsAnnotation;
37-
import com.google.cloud.vision.v1p2beta1.DominantColorsAnnotation;
38-
import com.google.cloud.vision.v1p2beta1.EntityAnnotation;
39-
import com.google.cloud.vision.v1p2beta1.FaceAnnotation;
40-
import com.google.cloud.vision.v1p2beta1.Feature;
41-
import com.google.cloud.vision.v1p2beta1.Feature.Type;
42-
import com.google.cloud.vision.v1p2beta1.GcsDestination;
43-
import com.google.cloud.vision.v1p2beta1.GcsSource;
44-
import com.google.cloud.vision.v1p2beta1.Image;
45-
import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient;
46-
import com.google.cloud.vision.v1p2beta1.ImageContext;
47-
import com.google.cloud.vision.v1p2beta1.ImageSource;
48-
import com.google.cloud.vision.v1p2beta1.InputConfig;
49-
import com.google.cloud.vision.v1p2beta1.LocationInfo;
50-
import com.google.cloud.vision.v1p2beta1.OperationMetadata;
51-
import com.google.cloud.vision.v1p2beta1.OutputConfig;
52-
import com.google.cloud.vision.v1p2beta1.Page;
53-
import com.google.cloud.vision.v1p2beta1.Paragraph;
54-
import com.google.cloud.vision.v1p2beta1.SafeSearchAnnotation;
55-
import com.google.cloud.vision.v1p2beta1.Symbol;
56-
import com.google.cloud.vision.v1p2beta1.TextAnnotation;
57-
import com.google.cloud.vision.v1p2beta1.WebDetection;
58-
import com.google.cloud.vision.v1p2beta1.WebDetection.WebEntity;
59-
import com.google.cloud.vision.v1p2beta1.WebDetection.WebImage;
60-
import com.google.cloud.vision.v1p2beta1.WebDetection.WebLabel;
61-
import com.google.cloud.vision.v1p2beta1.WebDetection.WebPage;
62-
import com.google.cloud.vision.v1p2beta1.WebDetectionParams;
63-
import com.google.cloud.vision.v1p2beta1.Word;
25+
import com.google.cloud.vision.v1p3beta1.AnnotateFileResponse;
26+
import com.google.cloud.vision.v1p3beta1.AnnotateFileResponse.Builder;
27+
import com.google.cloud.vision.v1p3beta1.AnnotateImageRequest;
28+
import com.google.cloud.vision.v1p3beta1.AnnotateImageResponse;
29+
import com.google.cloud.vision.v1p3beta1.AsyncAnnotateFileRequest;
30+
import com.google.cloud.vision.v1p3beta1.AsyncAnnotateFileResponse;
31+
import com.google.cloud.vision.v1p3beta1.AsyncBatchAnnotateFilesResponse;
32+
import com.google.cloud.vision.v1p3beta1.BatchAnnotateImagesResponse;
33+
import com.google.cloud.vision.v1p3beta1.Block;
34+
import com.google.cloud.vision.v1p3beta1.ColorInfo;
35+
import com.google.cloud.vision.v1p3beta1.CropHint;
36+
import com.google.cloud.vision.v1p3beta1.CropHintsAnnotation;
37+
import com.google.cloud.vision.v1p3beta1.DominantColorsAnnotation;
38+
import com.google.cloud.vision.v1p3beta1.EntityAnnotation;
39+
import com.google.cloud.vision.v1p3beta1.FaceAnnotation;
40+
import com.google.cloud.vision.v1p3beta1.Feature;
41+
import com.google.cloud.vision.v1p3beta1.Feature.Type;
42+
import com.google.cloud.vision.v1p3beta1.GcsDestination;
43+
import com.google.cloud.vision.v1p3beta1.GcsSource;
44+
import com.google.cloud.vision.v1p3beta1.Image;
45+
import com.google.cloud.vision.v1p3beta1.ImageAnnotatorClient;
46+
import com.google.cloud.vision.v1p3beta1.ImageContext;
47+
import com.google.cloud.vision.v1p3beta1.ImageSource;
48+
import com.google.cloud.vision.v1p3beta1.InputConfig;
49+
import com.google.cloud.vision.v1p3beta1.LocalizedObjectAnnotation;
50+
import com.google.cloud.vision.v1p3beta1.LocationInfo;
51+
import com.google.cloud.vision.v1p3beta1.OperationMetadata;
52+
import com.google.cloud.vision.v1p3beta1.OutputConfig;
53+
import com.google.cloud.vision.v1p3beta1.Page;
54+
import com.google.cloud.vision.v1p3beta1.Paragraph;
55+
import com.google.cloud.vision.v1p3beta1.SafeSearchAnnotation;
56+
import com.google.cloud.vision.v1p3beta1.Symbol;
57+
import com.google.cloud.vision.v1p3beta1.TextAnnotation;
58+
import com.google.cloud.vision.v1p3beta1.WebDetection;
59+
import com.google.cloud.vision.v1p3beta1.WebDetection.WebEntity;
60+
import com.google.cloud.vision.v1p3beta1.WebDetection.WebImage;
61+
import com.google.cloud.vision.v1p3beta1.WebDetection.WebLabel;
62+
import com.google.cloud.vision.v1p3beta1.WebDetection.WebPage;
63+
import com.google.cloud.vision.v1p3beta1.WebDetectionParams;
64+
import com.google.cloud.vision.v1p3beta1.Word;
6465
import com.google.protobuf.ByteString;
6566
import com.google.protobuf.util.JsonFormat;
6667

@@ -101,7 +102,8 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
101102
+ "\n"
102103
+ "Commands:\n"
103104
+ "\tfaces | labels | landmarks | logos | text | safe-search | properties"
104-
+ "| web | web-entities | web-entities-include-geo | crop | ocr \n"
105+
+ "| web | web-entities | web-entities-include-geo | crop | ocr\n"
106+
+ "| object-localization| handwritten-ocr\n"
105107
+ "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
106108
+ "resource (gs://...)\n"
107109
+ "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n"
@@ -189,6 +191,18 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
189191
} else if (command.equals("ocr")) {
190192
String destPath = args.length > 2 ? args[2] : "";
191193
detectDocumentsGcs(path, destPath);
194+
} else if (command.equals("object-localization")) {
195+
if (path.startsWith("gs://")) {
196+
detectLocalizedObjectsGcs(path, out);
197+
} else {
198+
detectLocalizedObjects(path, out);
199+
}
200+
} else if (command.equals("handwritten-ocr")) {
201+
if (path.startsWith("gs://")) {
202+
detectHandwrittenOcrGcs(path, out);
203+
} else {
204+
detectHandwrittenOcr(path, out);
205+
}
192206
}
193207
}
194208

@@ -1423,4 +1437,240 @@ public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinatio
14231437
}
14241438
}
14251439
// [END vision_async_detect_document_ocr]
1440+
1441+
// [START vision_localize_objects]
1442+
/**
1443+
* Detects localized objects in the specified local image.
1444+
*
1445+
* @param filePath The path to the file to perform localized object detection on.
1446+
* @param out A {@link PrintStream} to write detected objects to.
1447+
* @throws Exception on errors while closing the client.
1448+
* @throws IOException on Input/Output errors.
1449+
*/
1450+
public static void detectLocalizedObjects(String filePath, PrintStream out)
1451+
throws Exception, IOException {
1452+
List<AnnotateImageRequest> requests = new ArrayList<>();
1453+
1454+
ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));
1455+
1456+
Image img = Image.newBuilder().setContent(imgBytes).build();
1457+
AnnotateImageRequest request =
1458+
AnnotateImageRequest.newBuilder()
1459+
.addFeatures(Feature.newBuilder().setType(Type.OBJECT_LOCALIZATION))
1460+
.setImage(img)
1461+
.build();
1462+
requests.add(request);
1463+
1464+
// Perform the request
1465+
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
1466+
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
1467+
List<AnnotateImageResponse> responses = response.getResponsesList();
1468+
1469+
// Display the results
1470+
for (AnnotateImageResponse res : responses) {
1471+
for (LocalizedObjectAnnotation entity : res.getLocalizedObjectAnnotationsList()) {
1472+
out.format("Object name: %s\n", entity.getName());
1473+
out.format("Confidence: %s\n", entity.getScore());
1474+
out.format("Normalized Vertices:\n");
1475+
entity
1476+
.getBoundingPoly()
1477+
.getNormalizedVerticesList()
1478+
.forEach(vertex -> out.format("- (%s, %s)\n", vertex.getX(), vertex.getY()));
1479+
}
1480+
}
1481+
}
1482+
}
1483+
// [END vision_localize_objects]
1484+
1485+
// [START vision_localize_objects_uri]
1486+
/**
1487+
* Detects localized objects in a remote image on Google Cloud Storage.
1488+
*
1489+
* @param gcsPath The path to the remote file on Google Cloud Storage to detect localized objects
1490+
* on.
1491+
* @param out A {@link PrintStream} to write detected objects to.
1492+
* @throws Exception on errors while closing the client.
1493+
* @throws IOException on Input/Output errors.
1494+
*/
1495+
public static void detectLocalizedObjectsGcs(String gcsPath, PrintStream out)
1496+
throws Exception, IOException {
1497+
List<AnnotateImageRequest> requests = new ArrayList<>();
1498+
1499+
ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
1500+
Image img = Image.newBuilder().setSource(imgSource).build();
1501+
1502+
AnnotateImageRequest request =
1503+
AnnotateImageRequest.newBuilder()
1504+
.addFeatures(Feature.newBuilder().setType(Type.OBJECT_LOCALIZATION))
1505+
.setImage(img)
1506+
.build();
1507+
requests.add(request);
1508+
1509+
// Perform the request
1510+
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
1511+
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
1512+
List<AnnotateImageResponse> responses = response.getResponsesList();
1513+
client.close();
1514+
// Display the results
1515+
for (AnnotateImageResponse res : responses) {
1516+
for (LocalizedObjectAnnotation entity : res.getLocalizedObjectAnnotationsList()) {
1517+
out.format("Object name: %s\n", entity.getName());
1518+
out.format("Confidence: %s\n", entity.getScore());
1519+
out.format("Normalized Vertices:\n");
1520+
entity
1521+
.getBoundingPoly()
1522+
.getNormalizedVerticesList()
1523+
.forEach(vertex -> out.format("- (%s, %s)\n", vertex.getX(), vertex.getY()));
1524+
}
1525+
}
1526+
}
1527+
}
1528+
// [END vision_localize_objects_uri]
1529+
1530+
// [START vision_handwritten_ocr]
1531+
/**
1532+
* Performs handwritten text detection on a local image file.
1533+
*
1534+
* @param filePath The path to the local file to detect handwritten text on.
1535+
* @param out A {@link PrintStream} to write the results to.
1536+
* @throws Exception on errors while closing the client.
1537+
* @throws IOException on Input/Output errors.
1538+
*/
1539+
public static void detectHandwrittenOcr(String filePath, PrintStream out) throws Exception {
1540+
List<AnnotateImageRequest> requests = new ArrayList<>();
1541+
1542+
ByteString imgBytes = ByteString.readFrom(new FileInputStream(filePath));
1543+
1544+
Image img = Image.newBuilder().setContent(imgBytes).build();
1545+
Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
1546+
// Set the Language Hint codes for handwritten OCR
1547+
ImageContext imageContext =
1548+
ImageContext.newBuilder().addLanguageHints("en-t-i0-handwrit").build();
1549+
1550+
AnnotateImageRequest request =
1551+
AnnotateImageRequest.newBuilder()
1552+
.addFeatures(feat)
1553+
.setImage(img)
1554+
.setImageContext(imageContext)
1555+
.build();
1556+
requests.add(request);
1557+
1558+
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
1559+
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
1560+
List<AnnotateImageResponse> responses = response.getResponsesList();
1561+
client.close();
1562+
1563+
for (AnnotateImageResponse res : responses) {
1564+
if (res.hasError()) {
1565+
out.printf("Error: %s\n", res.getError().getMessage());
1566+
return;
1567+
}
1568+
1569+
// For full list of available annotations, see http://g.co/cloud/vision/docs
1570+
TextAnnotation annotation = res.getFullTextAnnotation();
1571+
for (Page page : annotation.getPagesList()) {
1572+
String pageText = "";
1573+
for (Block block : page.getBlocksList()) {
1574+
String blockText = "";
1575+
for (Paragraph para : block.getParagraphsList()) {
1576+
String paraText = "";
1577+
for (Word word : para.getWordsList()) {
1578+
String wordText = "";
1579+
for (Symbol symbol : word.getSymbolsList()) {
1580+
wordText = wordText + symbol.getText();
1581+
out.format(
1582+
"Symbol text: %s (confidence: %f)\n",
1583+
symbol.getText(), symbol.getConfidence());
1584+
}
1585+
out.format("Word text: %s (confidence: %f)\n\n", wordText, word.getConfidence());
1586+
paraText = String.format("%s %s", paraText, wordText);
1587+
}
1588+
// Output Example using Paragraph:
1589+
out.println("\nParagraph: \n" + paraText);
1590+
out.format("Paragraph Confidence: %f\n", para.getConfidence());
1591+
blockText = blockText + paraText;
1592+
}
1593+
pageText = pageText + blockText;
1594+
}
1595+
}
1596+
out.println("\nComplete annotation:");
1597+
out.println(annotation.getText());
1598+
}
1599+
}
1600+
}
1601+
// [END vision_handwritten_ocr]
1602+
1603+
// [START vision_handwritten_ocr_uri]
1604+
/**
1605+
* Performs handwritten text detection on a remote image on Google Cloud Storage.
1606+
*
1607+
* @param gcsPath The path to the remote file on Google Cloud Storage to detect handwritten text
1608+
* on.
1609+
* @param out A {@link PrintStream} to write the results to.
1610+
* @throws Exception on errors while closing the client.
1611+
* @throws IOException on Input/Output errors.
1612+
*/
1613+
public static void detectHandwrittenOcrGcs(String gcsPath, PrintStream out) throws Exception {
1614+
List<AnnotateImageRequest> requests = new ArrayList<>();
1615+
1616+
ImageSource imgSource = ImageSource.newBuilder().setGcsImageUri(gcsPath).build();
1617+
Image img = Image.newBuilder().setSource(imgSource).build();
1618+
1619+
Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
1620+
// Set the parameters for the image
1621+
ImageContext imageContext =
1622+
ImageContext.newBuilder().addLanguageHints("en-t-i0-handwrit").build();
1623+
1624+
AnnotateImageRequest request =
1625+
AnnotateImageRequest.newBuilder()
1626+
.addFeatures(feat)
1627+
.setImage(img)
1628+
.setImageContext(imageContext)
1629+
.build();
1630+
requests.add(request);
1631+
1632+
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
1633+
BatchAnnotateImagesResponse response = client.batchAnnotateImages(requests);
1634+
List<AnnotateImageResponse> responses = response.getResponsesList();
1635+
client.close();
1636+
1637+
for (AnnotateImageResponse res : responses) {
1638+
if (res.hasError()) {
1639+
out.printf("Error: %s\n", res.getError().getMessage());
1640+
return;
1641+
}
1642+
1643+
// For full list of available annotations, see http://g.co/cloud/vision/docs
1644+
TextAnnotation annotation = res.getFullTextAnnotation();
1645+
for (Page page : annotation.getPagesList()) {
1646+
String pageText = "";
1647+
for (Block block : page.getBlocksList()) {
1648+
String blockText = "";
1649+
for (Paragraph para : block.getParagraphsList()) {
1650+
String paraText = "";
1651+
for (Word word : para.getWordsList()) {
1652+
String wordText = "";
1653+
for (Symbol symbol : word.getSymbolsList()) {
1654+
wordText = wordText + symbol.getText();
1655+
out.format(
1656+
"Symbol text: %s (confidence: %f)\n",
1657+
symbol.getText(), symbol.getConfidence());
1658+
}
1659+
out.format("Word text: %s (confidence: %f)\n\n", wordText, word.getConfidence());
1660+
paraText = String.format("%s %s", paraText, wordText);
1661+
}
1662+
// Output Example using Paragraph:
1663+
out.println("\nParagraph: \n" + paraText);
1664+
out.format("Paragraph Confidence: %f\n", para.getConfidence());
1665+
blockText = blockText + paraText;
1666+
}
1667+
pageText = pageText + blockText;
1668+
}
1669+
}
1670+
out.println("\nComplete annotation:");
1671+
out.println(annotation.getText());
1672+
}
1673+
}
1674+
}
1675+
// [END vision_handwritten_ocr_uri]
14261676
}

0 commit comments

Comments
 (0)