google-assistant-sdk/pushtotalk: update sample to v1alpha2

proppy · proppy · commit bded2ca790bd · 2017-11-21T14:53:57.000+09:00
- Converse -&gt; Assist
- ConverseRequest -&gt; AssistRequest
- ConverseResponse -&gt; AssistResponse
- ConverseState -&gt; DialogStateIn
- ConverseResult -&gt; DialogStateOut
- InterimSpokenRequestTextList -&gt; SpeechRecognitionResult

Bug: 69066657
Change-Id: I0c84c743529c122cc0b7445cfb66aef8eb7cbe4e
diff --git a/google-assistant-sdk/googlesamples/assistant/grpc/assistant_helpers.py b/google-assistant-sdk/googlesamples/assistant/grpc/assistant_helpers.py
@@ -16,42 +16,39 @@
 
 import logging
 
-from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2
+from google.assistant.embedded.v1alpha2 import embedded_assistant_pb2
 
 
-END_OF_UTTERANCE = embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE
-
-
-def log_converse_request_without_audio(converse_request):
-    """Log ConverseRequest fields without audio data."""
+def log_assist_request_without_audio(assist_request):
+    """Log AssistRequest fields without audio data."""
     if logging.getLogger().isEnabledFor(logging.DEBUG):
-        resp_copy = embedded_assistant_pb2.ConverseRequest()
-        resp_copy.CopyFrom(converse_request)
+        resp_copy = embedded_assistant_pb2.AssistRequest()
+        resp_copy.CopyFrom(assist_request)
         if len(resp_copy.audio_in) > 0:
             size = len(resp_copy.audio_in)
             resp_copy.ClearField('audio_in')
-            logging.debug('ConverseRequest: audio_in (%d bytes)',
+            logging.debug('AssistRequest: audio_in (%d bytes)',
                           size)
             return
-        logging.debug('ConverseRequest: %s', resp_copy)
+        logging.debug('AssistRequest: %s', resp_copy)
 
 
-def log_converse_response_without_audio(converse_response):
-    """Log ConverseResponse fields without audio data."""
+def log_assist_response_without_audio(assist_response):
+    """Log AssistResponse fields without audio data."""
     if logging.getLogger().isEnabledFor(logging.DEBUG):
-        resp_copy = embedded_assistant_pb2.ConverseResponse()
-        resp_copy.CopyFrom(converse_response)
+        resp_copy = embedded_assistant_pb2.AssistResponse()
+        resp_copy.CopyFrom(assist_response)
         has_audio_data = (resp_copy.HasField('audio_out') and
                           len(resp_copy.audio_out.audio_data) > 0)
         if has_audio_data:
             size = len(resp_copy.audio_out.audio_data)
             resp_copy.audio_out.ClearField('audio_data')
             if resp_copy.audio_out.ListFields():
-                logging.debug('ConverseResponse: %s audio_data (%d bytes)',
+                logging.debug('AssistResponse: %s audio_data (%d bytes)',
                               resp_copy,
                               size)
             else:
-                logging.debug('ConverseResponse: audio_data (%d bytes)',
+                logging.debug('AssistResponse: audio_data (%d bytes)',
                               size)
             return
-        logging.debug('ConverseResponse: %s', resp_copy)
+        logging.debug('AssistResponse: %s', resp_copy)
diff --git a/google-assistant-sdk/googlesamples/assistant/grpc/pushtotalk.py b/google-assistant-sdk/googlesamples/assistant/grpc/pushtotalk.py
@@ -27,11 +27,10 @@
 import google.auth.transport.requests
 import google.oauth2.credentials
 
-from google.assistant.embedded.v1alpha1 import (
+from google.assistant.embedded.v1alpha2 import (
     embedded_assistant_pb2,
     embedded_assistant_pb2_grpc
 )
-from google.rpc import code_pb2
 from tenacity import retry, stop_after_attempt, retry_if_exception
 
 try:
@@ -47,9 +46,9 @@
 
 
 ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com'
-END_OF_UTTERANCE = embedded_assistant_pb2.ConverseResponse.END_OF_UTTERANCE
-DIALOG_FOLLOW_ON = embedded_assistant_pb2.ConverseResult.DIALOG_FOLLOW_ON
-CLOSE_MICROPHONE = embedded_assistant_pb2.ConverseResult.CLOSE_MICROPHONE
+END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE
+DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON
+CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE
 DEFAULT_GRPC_DEADLINE = 60 * 3 + 5
 
 
@@ -67,16 +66,18 @@ class SampleAssistant(object):
       device_handler: callback for device actions.
     """
 
-    def __init__(self, device_model_id, device_id, conversation_stream,
+    def __init__(self, language_code, device_model_id, device_id,
+                 conversation_stream,
                  channel, deadline_sec, device_handler):
+        self.language_code = language_code
         self.device_model_id = device_model_id
         self.device_id = device_id
         self.conversation_stream = conversation_stream
 
-        # Opaque blob provided in ConverseResponse that,
-        # when provided in a follow-up ConverseRequest,
+        # Opaque blob provided in AssistResponse that,
+        # when provided in a follow-up AssistRequest,
         # gives the Assistant a context marker within the current state
-        # of the multi-Converse()-RPC "conversation".
+        # of the multi-Assist()-RPC "conversation".
         # This value, along with MicrophoneMode, supports a more natural
         # "conversation" with the Assistant.
         self.conversation_state = None
@@ -106,7 +107,7 @@ def is_grpc_error_unavailable(e):
 
     @retry(reraise=True, stop=stop_after_attempt(3),
            retry=retry_if_exception(is_grpc_error_unavailable))
-    def converse(self):
+    def assist(self):
         """Send a voice request to the Assistant and playback the response.
 
         Returns: True if conversation should continue.
@@ -117,46 +118,39 @@ def converse(self):
         self.conversation_stream.start_recording()
         logging.info('Recording audio request.')
 
-        def iter_converse_requests():
-            for c in self.gen_converse_requests():
-                assistant_helpers.log_converse_request_without_audio(c)
+        def iter_assist_requests():
+            for c in self.gen_assist_requests():
+                assistant_helpers.log_assist_request_without_audio(c)
                 yield c
             self.conversation_stream.start_playback()
 
-        # This generator yields ConverseResponse proto messages
+        # This generator yields AssistResponse proto messages
         # received from the gRPC Google Assistant API.
-        for resp in self.assistant.Converse(iter_converse_requests(),
-                                            self.deadline):
-            assistant_helpers.log_converse_response_without_audio(resp)
-            if resp.error.code != code_pb2.OK:
-                logging.error('server error: %s', resp.error.message)
-                break
+        for resp in self.assistant.Assist(iter_assist_requests(),
+                                          self.deadline):
+            assistant_helpers.log_assist_response_without_audio(resp)
             if resp.event_type == END_OF_UTTERANCE:
                 logging.info('End of audio request detected')
                 self.conversation_stream.stop_recording()
-            if resp.result.spoken_request_text:
+            if resp.speech_results:
                 logging.info('Transcript of user request: "%s".',
-                             resp.result.spoken_request_text)
+                             ' '.join(r.transcript
+                                      for r in resp.speech_results))
                 logging.info('Playing assistant response.')
             if len(resp.audio_out.audio_data) > 0:
                 self.conversation_stream.write(resp.audio_out.audio_data)
-            if resp.result.spoken_response_text:
-                logging.info(
-                    'Transcript of TTS response '
-                    '(only populated from IFTTT): "%s".',
-                    resp.result.spoken_response_text)
-            if resp.result.conversation_state:
-                self.conversation_state = resp.result.conversation_state
-            if resp.result.volume_percentage != 0:
-                logging.info('Setting volume to %s%%',
-                             resp.result.volume_percentage)
-                self.conversation_stream.volume_percentage = (
-                    resp.result.volume_percentage
-                )
-            if resp.result.microphone_mode == DIALOG_FOLLOW_ON:
+            if resp.dialog_state_out.conversation_state:
+                conversation_state = resp.dialog_state_out.conversation_state
+                logging.debug('Updating conversation state.')
+                self.conversation_state = conversation_state
+            if resp.dialog_state_out.volume_percentage != 0:
+                volume_percentage = resp.dialog_state_out.volume_percentage
+                logging.info('Setting volume to %s%%', volume_percentage)
+                self.conversation_stream.volume_percentage = volume_percentage
+            if resp.dialog_state_out.microphone_mode == DIALOG_FOLLOW_ON:
                 continue_conversation = True
                 logging.info('Expecting follow-on query from user.')
-            elif resp.result.microphone_mode == CLOSE_MICROPHONE:
+            elif resp.dialog_state_out.microphone_mode == CLOSE_MICROPHONE:
                 continue_conversation = False
             if resp.device_action.device_request_json:
                 device_request = json.loads(
@@ -174,17 +168,17 @@ def iter_converse_requests():
         self.conversation_stream.stop_playback()
         return continue_conversation
 
-    def gen_converse_requests(self):
-        """Yields: ConverseRequest messages to send to the API."""
+    def gen_assist_requests(self):
+        """Yields: AssistRequest messages to send to the API."""
 
-        converse_state = None
-        if self.conversation_state:
-            logging.debug('Sending converse_state: %s',
-                          self.conversation_state)
-            converse_state = embedded_assistant_pb2.ConverseState(
-                conversation_state=self.conversation_state,
+        dialog_state_in = embedded_assistant_pb2.DialogStateIn(
+                language_code=self.language_code,
+                conversation_state=b''
             )
-        config = embedded_assistant_pb2.ConverseConfig(
+        if self.conversation_state:
+            logging.debug('Sending conversation state.')
+            dialog_state_in.conversation_state = self.conversation_state
+        config = embedded_assistant_pb2.AssistConfig(
             audio_in_config=embedded_assistant_pb2.AudioInConfig(
                 encoding='LINEAR16',
                 sample_rate_hertz=self.conversation_stream.sample_rate,
@@ -194,18 +188,18 @@ def gen_converse_requests(self):
                 sample_rate_hertz=self.conversation_stream.sample_rate,
                 volume_percentage=self.conversation_stream.volume_percentage,
             ),
-            converse_state=converse_state,
+            dialog_state_in=dialog_state_in,
             device_config=embedded_assistant_pb2.DeviceConfig(
-                device_model_id=self.device_model_id,
                 device_id=self.device_id,
+                device_model_id=self.device_model_id,
             )
         )
-        # The first ConverseRequest must contain the ConverseConfig
+        # The first AssistRequest must contain the AssistConfig
         # and no audio data.
-        yield embedded_assistant_pb2.ConverseRequest(config=config)
+        yield embedded_assistant_pb2.AssistRequest(config=config)
         for data in self.conversation_stream:
             # Subsequent requests need audio data, but not config.
-            yield embedded_assistant_pb2.ConverseRequest(audio_in=data)
+            yield embedded_assistant_pb2.AssistRequest(audio_in=data)
 
 
 @click.command()
@@ -237,6 +231,10 @@ def gen_converse_requests(self):
                   click.get_app_dir('googlesamples-assistant'),
                   'device_config.json'),
               help='Path to save and restore the device configuration')
+@click.option('--lang', show_default=True,
+              metavar='<language code>',
+              default='en-US',
+              help='Language code of the Assistant')
 @click.option('--verbose', '-v', is_flag=True, default=False,
               help='Verbose logging.')
 @click.option('--input-audio-file', '-i',
@@ -275,7 +273,7 @@ def gen_converse_requests(self):
 @click.option('--once', default=False, is_flag=True,
               help='Force termination after a single conversation.')
 def main(api_endpoint, credentials, project,
-         device_model_id, device_id, device_config, verbose,
+         device_model_id, device_id, device_config, lang, verbose,
          input_audio_file, output_audio_file,
          audio_sample_rate, audio_sample_width,
          audio_iter_size, audio_block_size, audio_flush_size,
@@ -398,13 +396,14 @@ def onoff(on):
             with open(device_config, 'w') as f:
                 json.dump(payload, f)
 
-    with SampleAssistant(device_model_id, device_id, conversation_stream,
+    with SampleAssistant(lang, device_model_id, device_id,
+                         conversation_stream,
                          grpc_channel, grpc_deadline,
                          device_handler) as assistant:
         # If file arguments are supplied:
         # exit after the first turn of the conversation.
         if input_audio_file or output_audio_file:
-            assistant.converse()
+            assistant.assist()
             return
 
         # If no file arguments supplied:
@@ -415,7 +414,7 @@ def onoff(on):
         while True:
             if wait_for_user_trigger:
                 click.pause(info='Press Enter to send a new request...')
-            continue_conversation = assistant.converse()
+            continue_conversation = assistant.assist()
             # wait for user trigger if there is no follow-up turn in
             # the conversation.
             wait_for_user_trigger = not continue_conversation