Skip to content

Commit 7107943

Browse files
gguussdpebot
authored andcommitted
Fixes for text encoding [(#913)](GoogleCloudPlatform/python-docs-samples#913)
* Fixes for non-ASCII encodings * Adds test for UTF * Style fix
1 parent 55e4bc4 commit 7107943

File tree

1 file changed

+22
-12
lines changed

1 file changed

+22
-12
lines changed

samples/snippets/snippets.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,16 @@
2424
import argparse
2525

2626
from google.cloud import language
27+
import six
2728

2829

2930
def sentiment_text(text):
3031
"""Detects sentiment in the text."""
3132
language_client = language.Client()
3233

34+
if isinstance(text, six.binary_type):
35+
text = text.decode('utf-8')
36+
3337
# Instantiates a plain text document.
3438
document = language_client.document_from_text(text)
3539

@@ -60,6 +64,9 @@ def entities_text(text):
6064
"""Detects entities in the text."""
6165
language_client = language.Client()
6266

67+
if isinstance(text, six.binary_type):
68+
text = text.decode('utf-8')
69+
6370
# Instantiates a plain text document.
6471
document = language_client.document_from_text(text)
6572

@@ -69,11 +76,11 @@ def entities_text(text):
6976

7077
for entity in entities:
7178
print('=' * 20)
72-
print('{:<16}: {}'.format('name', entity.name))
73-
print('{:<16}: {}'.format('type', entity.entity_type))
74-
print('{:<16}: {}'.format('metadata', entity.metadata))
75-
print('{:<16}: {}'.format('salience', entity.salience))
76-
print('{:<16}: {}'.format('wikipedia_url',
79+
print(u'{:<16}: {}'.format('name', entity.name))
80+
print(u'{:<16}: {}'.format('type', entity.entity_type))
81+
print(u'{:<16}: {}'.format('metadata', entity.metadata))
82+
print(u'{:<16}: {}'.format('salience', entity.salience))
83+
print(u'{:<16}: {}'.format('wikipedia_url',
7784
entity.metadata.get('wikipedia_url', '-')))
7885

7986

@@ -90,18 +97,21 @@ def entities_file(gcs_uri):
9097

9198
for entity in entities:
9299
print('=' * 20)
93-
print('{:<16}: {}'.format('name', entity.name))
94-
print('{:<16}: {}'.format('type', entity.entity_type))
95-
print('{:<16}: {}'.format('metadata', entity.metadata))
96-
print('{:<16}: {}'.format('salience', entity.salience))
97-
print('{:<16}: {}'.format('wikipedia_url',
100+
print(u'{:<16}: {}'.format('name', entity.name))
101+
print(u'{:<16}: {}'.format('type', entity.entity_type))
102+
print(u'{:<16}: {}'.format('metadata', entity.metadata))
103+
print(u'{:<16}: {}'.format('salience', entity.salience))
104+
print(u'{:<16}: {}'.format('wikipedia_url',
98105
entity.metadata.get('wikipedia_url', '-')))
99106

100107

101108
def syntax_text(text):
102109
"""Detects syntax in the text."""
103110
language_client = language.Client()
104111

112+
if isinstance(text, six.binary_type):
113+
text = text.decode('utf-8')
114+
105115
# Instantiates a plain text document.
106116
document = language_client.document_from_text(text)
107117

@@ -110,7 +120,7 @@ def syntax_text(text):
110120
tokens = document.analyze_syntax().tokens
111121

112122
for token in tokens:
113-
print('{}: {}'.format(token.part_of_speech, token.text_content))
123+
print(u'{}: {}'.format(token.part_of_speech, token.text_content))
114124

115125

116126
def syntax_file(gcs_uri):
@@ -125,7 +135,7 @@ def syntax_file(gcs_uri):
125135
tokens = document.analyze_syntax().tokens
126136

127137
for token in tokens:
128-
print('{}: {}'.format(token.part_of_speech, token.text_content))
138+
print(u'{}: {}'.format(token.part_of_speech, token.text_content))
129139

130140

131141
if __name__ == '__main__':

0 commit comments

Comments
 (0)