Skip to content

Commit 743e6f3

Browse files
authored
Fix error: characters can not be displayed normally in chinese (#1342)
1 parent 170581a commit 743e6f3

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

torchchat/usages/openai_api.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,8 @@ def callback(x, *, done_generating=False):
388388

389389
device_sync(device=self.builder_args.device)
390390

391+
buffer = []
392+
ILLEGAL_CHAR = '\ufffd'
391393
# Process each token, metrics tuple yielded by Generator.generate.
392394
for y, _ in self.generate(
393395
model=self.model,
@@ -413,10 +415,15 @@ def callback(x, *, done_generating=False):
413415
break
414416

415417
y = y.view(-1)
418+
buffer.append(y.item())
416419
# Decode the torch.Tensor token to a string and append to the buffer. Separate the sequences with a period token.
417420
content = "".join(
418-
self.tokenizer.decode([self.tokenizer.encode(".")[0]] + y.tolist())[1:]
421+
self.tokenizer.decode([self.tokenizer.encode(".")[0]] + buffer)[1:]
419422
)
423+
# Skip content while illegal characters appear.
424+
if ILLEGAL_CHAR in content:
425+
continue
426+
buffer.clear()
420427

421428
# Package the sequence into a CompletionChunkResponse and yield it.
422429
chunk_delta = ChunkDelta(

0 commit comments

Comments
 (0)