Factorize generate parameters

SBrandeis · SBrandeis · commit 399f4840b528 · 2024-01-26T16:34:17.000+01:00
diff --git a/packages/tasks/src/scripts/inference-codegen.ts b/packages/tasks/src/scripts/inference-codegen.ts
@@ -74,7 +74,6 @@ async function generateTypescript(inputData: InputData): Promise<SerializedRende
  * And writes that to the `inference.ts` file
  *
  */
-
 async function postProcessOutput(path2generated: string, outputSpec: Record<string, unknown>): Promise<void> {
 	const source = ts.createSourceFile(
 		path.basename(path2generated),
@@ -149,9 +148,12 @@ async function main() {
 			.filter((entry) => entry.name !== "placeholder")
 			.map(async (entry) => ({ task: entry.name, dirPath: path.join(entry.path, entry.name) }))
 	);
-	const allSpecFiles = allTasks
-		.flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")])
-		.filter((filepath) => pathExists(filepath));
+	const allSpecFiles = [
+		path.join(tasksDir, "schema-utils.json"),
+		...allTasks
+			.flatMap(({ dirPath }) => [path.join(dirPath, "spec", "input.json"), path.join(dirPath, "spec", "output.json")])
+			.filter((filepath) => pathExists(filepath)),
+	];
 
 	for (const { task, dirPath } of allTasks) {
 		const taskSpecDir = path.join(dirPath, "spec");
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts b/packages/tasks/src/tasks/automatic-speech-recognition/inference.ts
@@ -14,9 +14,35 @@ export interface AutomaticSpeechRecognitionInput {
 	/**
 	 * Additional inference parameters
 	 */
-	parameters?: {
-		[key: string]: unknown;
-	};
+	parameters?: AutomaticSpeechRecognitionParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Automatic Speech Recognition
+ */
+export interface AutomaticSpeechRecognitionParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	/**
+	 * Whether to output corresponding timestamps with the generated text
+	 */
+	returnTimestamps?: boolean;
+	[property: string]: unknown;
+}
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
 	[property: string]: unknown;
 }
 export type AutomaticSpeechRecognitionOutput = AutomaticSpeechRecognitionOutputElement[];
diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json b/packages/tasks/src/tasks/automatic-speech-recognition/spec/input.json
@@ -18,7 +18,16 @@
 			"title": "AutomaticSpeechRecognitionParameters",
 			"description": "Additional inference parameters for Automatic Speech Recognition",
 			"type": "object",
-			"properties": {}
+			"properties": {
+				"returnTimestamps": {
+					"type": "boolean",
+					"description": "Whether to output corresponding timestamps with the generated text"
+				},
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters"
+				}
+			}
 		}
 	},
 	"required": ["data"]
diff --git a/packages/tasks/src/tasks/image-to-text/inference.ts b/packages/tasks/src/tasks/image-to-text/inference.ts
@@ -23,12 +23,28 @@ export interface ImageToTextInput {
  * Additional inference parameters for Image To Text
  */
 export interface ImageToTextParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
 	/**
 	 * The amount of maximum tokens to generate.
 	 */
 	maxNewTokens?: number;
 	[property: string]: unknown;
 }
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
+	[property: string]: unknown;
+}
 export type ImageToTextOutput = ImageToTextOutputElement[];
 /**
  * Outputs of inference for the Image To Text task
diff --git a/packages/tasks/src/tasks/image-to-text/spec/input.json b/packages/tasks/src/tasks/image-to-text/spec/input.json
@@ -22,6 +22,10 @@
 				"maxNewTokens": {
 					"type": "integer",
 					"description": "The amount of maximum tokens to generate."
+				},
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters"
 				}
 			}
 		}
diff --git a/packages/tasks/src/tasks/text-to-audio/inference.ts b/packages/tasks/src/tasks/text-to-audio/inference.ts
@@ -14,9 +14,31 @@ export interface TextToAudioInput {
 	/**
 	 * Additional inference parameters
 	 */
-	parameters?: {
-		[key: string]: unknown;
-	};
+	parameters?: TextToAudioParameters;
+	[property: string]: unknown;
+}
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Text To Audio
+ */
+export interface TextToAudioParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	[property: string]: unknown;
+}
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
 	[property: string]: unknown;
 }
 export type TextToAudioOutput = TextToAudioOutputElement[];
diff --git a/packages/tasks/src/tasks/text-to-audio/spec/input.json b/packages/tasks/src/tasks/text-to-audio/spec/input.json
@@ -19,7 +19,12 @@
 			"title": "TextToAudioParameters",
 			"description": "Additional inference parameters for Text To Audio",
 			"type": "object",
-			"properties": {}
+			"properties": {
+				"generate": {
+					"description": "Parametrization of the text generation process",
+					"$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters"
+				}
+			}
 		}
 	},
 	"required": ["data"]
diff --git a/packages/tasks/src/tasks/text-to-speech/inference.ts b/packages/tasks/src/tasks/text-to-speech/inference.ts
@@ -17,7 +17,33 @@ export interface TextToSpeechInput {
 	/**
 	 * Additional inference parameters
 	 */
-	parameters?: { [key: string]: unknown };
+	parameters?: TextToAudioParameters;
+	[property: string]: unknown;
+}
+
+/**
+ * Additional inference parameters
+ *
+ * Additional inference parameters for Text To Audio
+ */
+export interface TextToAudioParameters {
+	/**
+	 * Parametrization of the text generation process
+	 */
+	generate?: GenerationParameters;
+	[property: string]: unknown;
+}
+
+/**
+ * Parametrization of the text generation process
+ *
+ * Ad-hoc parametrization of the text generation process
+ */
+export interface GenerationParameters {
+	/**
+	 * I can be the papa you'd be the mama
+	 */
+	temperature?: number;
 	[property: string]: unknown;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,10 @@`
`22`	`22`	`"maxNewTokens": {`
`23`	`23`	`"type": "integer",`
`24`	`24`	`"description": "The amount of maximum tokens to generate."`
	`25`	`+ },`
	`26`	`+ "generate": {`
	`27`	`+ "description": "Parametrization of the text generation process",`
	`28`	`+ "$ref": "/inference/schemas/schema-utils.json#/definitions/GenerationParameters"`
`25`	`29`	`}`
`26`	`30`	`}`
`27`	`31`	`}`