Skip to content

Commit 6899bc8

Browse files
committed
precommit
1 parent 1dfb1a8 commit 6899bc8

File tree

5 files changed

+22
-28
lines changed

5 files changed

+22
-28
lines changed

examples/sglang/components/embedding_worker.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,14 @@
1717
Using SGLang and Dynamo to serve embedding models!
1818
"""
1919

20-
import asyncio
2120
import logging
22-
import random
23-
import socket
24-
from typing import Any
2521

2622
import sglang as sgl
2723
from utils.protocol import EmbeddingRequest
2824
from utils.sglang import parse_sglang_args
2925

3026
from dynamo.llm import ModelType, register_llm
31-
from dynamo.sdk import async_on_start, depends, dynamo_context, endpoint, service
27+
from dynamo.sdk import async_on_start, dynamo_context, endpoint, service
3228

3329
logger = logging.getLogger(__name__)
3430

@@ -41,7 +37,6 @@
4137
workers=1,
4238
)
4339
class SGLangEmbeddingWorker:
44-
4540
def __init__(self):
4641
class_name = self.__class__.__name__
4742
self.engine_args = parse_sglang_args(class_name, "")
@@ -70,11 +65,11 @@ async def generate(self, request: EmbeddingRequest):
7065
input = [i for i in request.input]
7166
else:
7267
raise ValueError(f"Invalid input type: {type(request.input)}")
73-
68+
7469
g = await self.engine.async_encode(
7570
prompt=input,
7671
)
77-
72+
7873
# Transform response to match OpenAI embedding format
7974
response = self._transform_response(g, request.model)
8075
yield response
@@ -83,16 +78,18 @@ def _transform_response(self, ret, model_name):
8378
"""Transform SGLang response to OpenAI embedding format"""
8479
if not isinstance(ret, list):
8580
ret = [ret]
86-
81+
8782
embedding_objects = []
8883
prompt_tokens = 0
89-
84+
9085
for idx, ret_item in enumerate(ret):
91-
embedding_objects.append({
92-
"object": "embedding",
93-
"embedding": ret_item["embedding"],
94-
"index": idx,
95-
})
86+
embedding_objects.append(
87+
{
88+
"object": "embedding",
89+
"embedding": ret_item["embedding"],
90+
"index": idx,
91+
}
92+
)
9693
prompt_tokens += ret_item["meta_info"]["prompt_tokens"]
9794

9895
return {
@@ -103,4 +100,4 @@ def _transform_response(self, ret, model_name):
103100
"prompt_tokens": prompt_tokens,
104101
"total_tokens": prompt_tokens,
105102
},
106-
}
103+
}

examples/sglang/components/frontend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
import subprocess
1818
from pathlib import Path
1919

20-
from components.worker import SGLangWorker
2120
from components.embedding_worker import SGLangEmbeddingWorker
21+
from components.worker import SGLangWorker
2222
from fastapi import FastAPI
2323
from pydantic import BaseModel
2424

examples/sglang/configs/embedding.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ Frontend:
55
SGLangEmbeddingWorker:
66
model-path: intfloat/e5-base-v2
77
served-model-name: e5
8-
is-embedding: true
98
tp: 1
109
trust-remote-code: true
1110
is-embedding: true

examples/sglang/graphs/embedding.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
32
# SPDX-License-Identifier: Apache-2.0
43
#
@@ -14,7 +13,7 @@
1413
# See the License for the specific language governing permissions and
1514
# limitations under the License.
1615

17-
from components.frontend import Frontend
1816
from components.embedding_worker import SGLangEmbeddingWorker
17+
from components.frontend import Frontend
1918

2019
Frontend.link(SGLangEmbeddingWorker)

examples/sglang/utils/protocol.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
from typing import List, Optional, Union, Literal
16+
from typing import List, Literal, Optional, Union
1717

1818
from pydantic import BaseModel, Field
1919

@@ -61,18 +61,17 @@ class DisaggPreprocessedRequest(BaseModel):
6161
bootstrap_port: int
6262
bootstrap_room: int
6363

64-
EmbeddingInput = Union[
65-
str,
66-
List[str],
67-
List[int],
68-
List[List[int]]
69-
]
64+
65+
EmbeddingInput = Union[str, List[str], List[int], List[List[int]]]
7066

7167
EncodingFormat = Literal["float", "base64"]
7268

69+
7370
class EmbeddingRequest(BaseModel):
7471
model: str
7572
input: EmbeddingInput
7673
encoding_format: Optional[EncodingFormat] = None
7774
user: Optional[str] = None
78-
dimensions: Optional[int] = None # only supported in text-embedding-3 and later models from OpenAI
75+
dimensions: Optional[
76+
int
77+
] = None # only supported in text-embedding-3 and later models from OpenAI

0 commit comments

Comments
 (0)