huggingface · julien-c · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025 · Apr 29, 2025
@@ -1,7 +1,7 @@
 # 🤗 Hugging Face Inference
 
-A Typescript powered wrapper for the HF Inference API (serverless), Inference Endpoints (dedicated), and all supported Inference Providers.
-It works with [Inference API (serverless)](https://huggingface.co/docs/api-inference/index) and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with all supported third-party Inference Providers.
+A Typescript powered wrapper for Inference Providers (serverless) and Inference Endpoints (dedicated).
+It works with [Inference Providers (serverless)](https://huggingface.co/docs/api-inference/index) – including all supported third-party Inference Providers – and [Inference Endpoints (dedicated)](https://huggingface.co/docs/inference-endpoints/index), and even with .
 
 Check out the [full documentation](https://huggingface.co/docs/huggingface.js/inference/README).
 
@@ -25,20 +25,20 @@ yarn add @huggingface/inference
 
 ```ts
 // esm.sh
-import { InferenceClient } from "https://esm.sh/@huggingface/inference"
+import { InferenceClient } from "https://esm.sh/@huggingface/inference";
 // or npm:
-import { InferenceClient } from "npm:@huggingface/inference"
+import { InferenceClient } from "npm:@huggingface/inference";
 ```
 
 ### Initialize
 
 ```typescript
-import { InferenceClient } from '@huggingface/inference'
+import { InferenceClient } from '@huggingface/inference';
 
-const hf = new InferenceClient('your access token')
+const hf = new InferenceClient('your access token');
 ```
 
-❗**Important note:** Using an access token is optional to get started, however you will be rate limited eventually. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your access token for **free**.
+❗**Important note:** Always pass an access token. Join [Hugging Face](https://huggingface.co/join) and then visit [access tokens](https://huggingface.co/settings/tokens) to generate your access token for **free**.
 
 Your access token should be kept private. If you need to protect it in front-end applications, we suggest setting up a proxy server that stores the access token.
 

diff --git a/packages/mcp-client/cli.ts b/packages/mcp-client/cli.ts
@@ -11,6 +11,7 @@ import { version as packageVersion } from "./package.json";
 
 const MODEL_ID = process.env.MODEL_ID ?? "Qwen/Qwen2.5-72B-Instruct";
 const PROVIDER = (process.env.PROVIDER as InferenceProvider) ?? "nebius";
+const BASE_URL = process.env.BASE_URL;
 
 const SERVERS: StdioServerParameters[] = [
 	{
@@ -48,12 +49,21 @@ async function main() {
 		process.exit(1);
 	}
 
-	const agent = new Agent({
-		provider: PROVIDER,
-		model: MODEL_ID,
-		apiKey: process.env.HF_TOKEN,
-		servers: SERVERS,
-	});
+	const agent = new Agent(
+		BASE_URL
+			? {
+					baseUrl: BASE_URL,
+					model: MODEL_ID,
+					apiKey: process.env.HF_TOKEN,
+					servers: SERVERS,
+			  }
+			: {
+					provider: PROVIDER,
+					model: MODEL_ID,
+					apiKey: process.env.HF_TOKEN,
+					servers: SERVERS,
+			  }
+	);
 
 	const rl = readline.createInterface({ input: stdin, output: stdout });
 	let abortController = new AbortController();

diff --git a/packages/mcp-client/src/Agent.ts b/packages/mcp-client/src/Agent.ts
@@ -49,18 +49,28 @@ export class Agent extends McpClient {
 
 	constructor({
 		provider,
+		baseUrl,
 		model,
 		apiKey,
 		servers,
 		prompt,
-	}: {
-		provider: InferenceProvider;
+	}: (
+		| {
+				provider: InferenceProvider;
+				baseUrl?: undefined;
+		  }
+		| {
+				baseUrl: string;
+				provider?: undefined;
+		  }
+	) & {
 		model: string;
 		apiKey: string;
 		servers: StdioServerParameters[];
 		prompt?: string;
 	}) {
-		super({ provider, model, apiKey });
+		super(provider ? { provider, baseUrl, model, apiKey } : { provider, baseUrl, model, apiKey });
+		/// ^This shenanigan is just here to please an overzealous TS type-checker.
 		this.servers = servers;
 		this.messages = [
 			{

diff --git a/packages/mcp-client/src/McpClient.ts b/packages/mcp-client/src/McpClient.ts
@@ -2,7 +2,7 @@ import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 import type { StdioServerParameters } from "@modelcontextprotocol/sdk/client/stdio.js";
 import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 import { InferenceClient } from "@huggingface/inference";
-import type { InferenceProvider } from "@huggingface/inference";
+import type { InferenceClientEndpoint, InferenceProvider } from "@huggingface/inference";
 import type {
 	ChatCompletionInputMessage,
 	ChatCompletionInputTool,
@@ -22,14 +22,32 @@ export interface ChatCompletionInputMessageTool extends ChatCompletionInputMessa
 }
 
 export class McpClient {
-	protected client: InferenceClient;
-	protected provider: string;
+	protected client: InferenceClient | InferenceClientEndpoint;
+	protected provider: string | undefined;
+
 	protected model: string;
 	private clients: Map<ToolName, Client> = new Map();
 	public readonly availableTools: ChatCompletionInputTool[] = [];
 
-	constructor({ provider, model, apiKey }: { provider: InferenceProvider; model: string; apiKey: string }) {
-		this.client = new InferenceClient(apiKey);
+	constructor({
+		provider,
+		baseUrl,
+		model,
+		apiKey,
+	}: (
+		| {
+				provider: InferenceProvider;
+				baseUrl?: undefined;
+		  }
+		| {
+				baseUrl: string;
+				provider?: undefined;
+		  }
+	) & {
+		model: string;
+		apiKey: string;
+	}) {
+		this.client = baseUrl ? new InferenceClient(apiKey).endpoint(baseUrl) : new InferenceClient(apiKey);
 		this.provider = provider;
 		this.model = model;
 	}