feat(texttospeech): update the api

yoshi-automation · yoshi-automation · commit c56647a705f4 · 2025-05-13T07:10:25.000Z
#### texttospeech:v1

The following keys were added:
- schemas.SynthesisInput.properties.markup.type (Total Keys: 1)

#### texttospeech:v1beta1

The following keys were added:
- schemas.SynthesisInput.properties.markup.type (Total Keys: 1)
diff --git a/docs/dyn/texttospeech_v1.projects.locations.html b/docs/dyn/texttospeech_v1.projects.locations.html
@@ -121,6 +121,7 @@ <h3>Method Details</h3>
         },
       ],
     },
+    &quot;markup&quot;: &quot;A String&quot;, # Markup for HD voices specifically. This field may not be used with any other voices.
     &quot;multiSpeakerMarkup&quot;: { # A collection of turns for multi-speaker synthesis. # The multi-speaker input to be synthesized. Only applicable for multi-speaker synthesis.
       &quot;turns&quot;: [ # Required. Speaker turns.
         { # A multi-speaker turn.
diff --git a/docs/dyn/texttospeech_v1.text.html b/docs/dyn/texttospeech_v1.text.html
@@ -118,6 +118,7 @@ <h3>Method Details</h3>
         },
       ],
     },
+    &quot;markup&quot;: &quot;A String&quot;, # Markup for HD voices specifically. This field may not be used with any other voices.
     &quot;multiSpeakerMarkup&quot;: { # A collection of turns for multi-speaker synthesis. # The multi-speaker input to be synthesized. Only applicable for multi-speaker synthesis.
       &quot;turns&quot;: [ # Required. Speaker turns.
         { # A multi-speaker turn.
diff --git a/docs/dyn/texttospeech_v1beta1.projects.locations.html b/docs/dyn/texttospeech_v1beta1.projects.locations.html
@@ -121,6 +121,7 @@ <h3>Method Details</h3>
         },
       ],
     },
+    &quot;markup&quot;: &quot;A String&quot;, # Markup for HD voices specifically. This field may not be used with any other voices.
     &quot;multiSpeakerMarkup&quot;: { # A collection of turns for multi-speaker synthesis. # The multi-speaker input to be synthesized. Only applicable for multi-speaker synthesis.
       &quot;turns&quot;: [ # Required. Speaker turns.
         { # A multi-speaker turn.
diff --git a/docs/dyn/texttospeech_v1beta1.text.html b/docs/dyn/texttospeech_v1beta1.text.html
@@ -121,6 +121,7 @@ <h3>Method Details</h3>
         },
       ],
     },
+    &quot;markup&quot;: &quot;A String&quot;, # Markup for HD voices specifically. This field may not be used with any other voices.
     &quot;multiSpeakerMarkup&quot;: { # A collection of turns for multi-speaker synthesis. # The multi-speaker input to be synthesized. Only applicable for multi-speaker synthesis.
       &quot;turns&quot;: [ # Required. Speaker turns.
         { # A multi-speaker turn.
diff --git a/googleapiclient/discovery_cache/documents/texttospeech.v1.json b/googleapiclient/discovery_cache/documents/texttospeech.v1.json
@@ -318,7 +318,7 @@
 }
 }
 },
-"revision": "20250415",
+"revision": "20250424",
 "rootUrl": "https://texttospeech.googleapis.com/",
 "schemas": {
 "AdvancedVoiceOptions": {
@@ -403,12 +403,16 @@
 "enum": [
 "PHONETIC_ENCODING_UNSPECIFIED",
 "PHONETIC_ENCODING_IPA",
-"PHONETIC_ENCODING_X_SAMPA"
+"PHONETIC_ENCODING_X_SAMPA",
+"PHONETIC_ENCODING_JAPANESE_YOMIGANA",
+"PHONETIC_ENCODING_PINYIN"
 ],
 "enumDescriptions": [
 "Not specified.",
 "IPA, such as apple -> \u02c8\u00e6p\u0259l. https://en.wikipedia.org/wiki/International_Phonetic_Alphabet",
-"X-SAMPA, such as apple -> \"{p@l\". https://en.wikipedia.org/wiki/X-SAMPA"
+"X-SAMPA, such as apple -> \"{p@l\". https://en.wikipedia.org/wiki/X-SAMPA",
+"For reading-to-pron conversion to work well, the `pronunciation` field should only contain Kanji, Hiragana, and Katakana. The pronunciation can also contain pitch accents. The start of a pitch phrase is specified with `^` and the down-pitch position is specified with `!`, for example: phrase:\u7aef pronunciation:^\u306f\u3057 phrase:\u7bb8 pronunciation:^\u306f!\u3057 phrase:\u6a4b pronunciation:^\u306f\u3057! We currently only support the Tokyo dialect, which allows at most one down-pitch per phrase (i.e. at most one `!` between `^`).",
+"Used to specify pronunciations for Mandarin words. See https://en.wikipedia.org/wiki/Pinyin. For example: \u671d\u9633, the pronunciation is \"chao2 yang2\". The number represents the tone, and there is a space between syllables. Neutral tones are represented by 5, for example \u5b69\u5b50 \"hai2 zi5\"."
 ],
 "type": "string"
 },
@@ -608,6 +612,10 @@
 "$ref": "CustomPronunciations",
 "description": "Optional. The pronunciation customizations are applied to the input. If this is set, the input is synthesized using the given pronunciation customizations. The initial support is for en-us, with plans to expand to other locales in the future. Instant Clone voices aren't supported. In order to customize the pronunciation of a phrase, there must be an exact match of the phrase in the input types. If using SSML, the phrase must not be inside a phoneme tag."
 },
+"markup": {
+"description": "Markup for HD voices specifically. This field may not be used with any other voices.",
+"type": "string"
+},
 "multiSpeakerMarkup": {
 "$ref": "MultiSpeakerMarkup",
 "description": "The multi-speaker input to be synthesized. Only applicable for multi-speaker synthesis."
diff --git a/googleapiclient/discovery_cache/documents/texttospeech.v1beta1.json b/googleapiclient/discovery_cache/documents/texttospeech.v1beta1.json
@@ -261,7 +261,7 @@
 }
 }
 },
-"revision": "20250415",
+"revision": "20250424",
 "rootUrl": "https://texttospeech.googleapis.com/",
 "schemas": {
 "AdvancedVoiceOptions": {
@@ -342,12 +342,16 @@
 "enum": [
 "PHONETIC_ENCODING_UNSPECIFIED",
 "PHONETIC_ENCODING_IPA",
-"PHONETIC_ENCODING_X_SAMPA"
+"PHONETIC_ENCODING_X_SAMPA",
+"PHONETIC_ENCODING_JAPANESE_YOMIGANA",
+"PHONETIC_ENCODING_PINYIN"
 ],
 "enumDescriptions": [
 "Not specified.",
 "IPA, such as apple -> \u02c8\u00e6p\u0259l. https://en.wikipedia.org/wiki/International_Phonetic_Alphabet",
-"X-SAMPA, such as apple -> \"{p@l\". https://en.wikipedia.org/wiki/X-SAMPA"
+"X-SAMPA, such as apple -> \"{p@l\". https://en.wikipedia.org/wiki/X-SAMPA",
+"For reading-to-pron conversion to work well, the `pronunciation` field should only contain Kanji, Hiragana, and Katakana. The pronunciation can also contain pitch accents. The start of a pitch phrase is specified with `^` and the down-pitch position is specified with `!`, for example: phrase:\u7aef pronunciation:^\u306f\u3057 phrase:\u7bb8 pronunciation:^\u306f!\u3057 phrase:\u6a4b pronunciation:^\u306f\u3057! We currently only support the Tokyo dialect, which allows at most one down-pitch per phrase (i.e. at most one `!` between `^`).",
+"Used to specify pronunciations for Mandarin words. See https://en.wikipedia.org/wiki/Pinyin. For example: \u671d\u9633, the pronunciation is \"chao2 yang2\". The number represents the tone, and there is a space between syllables. Neutral tones are represented by 5, for example \u5b69\u5b50 \"hai2 zi5\"."
 ],
 "type": "string"
 },
@@ -541,6 +545,10 @@
 "$ref": "CustomPronunciations",
 "description": "Optional. The pronunciation customizations are applied to the input. If this is set, the input is synthesized using the given pronunciation customizations. The initial support is for en-us, with plans to expand to other locales in the future. Instant Clone voices aren't supported. In order to customize the pronunciation of a phrase, there must be an exact match of the phrase in the input types. If using SSML, the phrase must not be inside a phoneme tag."
 },
+"markup": {
+"description": "Markup for HD voices specifically. This field may not be used with any other voices.",
+"type": "string"
+},
 "multiSpeakerMarkup": {
 "$ref": "MultiSpeakerMarkup",
 "description": "The multi-speaker input to be synthesized. Only applicable for multi-speaker synthesis."