@@ -71,11 +71,11 @@ def encode_header(self, role) -> List[int]:
71
71
72
72
def encode_message (self , message ) -> List [int ]:
73
73
tokens = self .encode_header (message ["role" ])
74
- if type (message ["content" ]) is str :
74
+ if isinstance (message ["content" ], str ) :
75
75
tokens .extend (
76
76
self .tokenizer .encode (message ["content" ], bos = False , eos = False )
77
77
)
78
- elif type (message ["content" ]) is list :
78
+ elif isinstance (message ["content" ], list ) :
79
79
for content in message ["content" ]:
80
80
if content ["type" ] == "text" :
81
81
tokens .extend (
@@ -190,7 +190,7 @@ def from_args(cls, args):
190
190
for image_prompt in image_prompts
191
191
if (not os .path .exists (image_prompt ))
192
192
]
193
- if len ( non_existent_image_prompts ) :
193
+ if non_existent_image_prompts :
194
194
raise RuntimeError (
195
195
f"Image prompt { non_existent_image_prompts } does not exist"
196
196
)
@@ -238,7 +238,7 @@ def __init__(
238
238
draft_quantize : bool ,
239
239
):
240
240
torch ._inductor .config .coordinate_descent_tuning = (
241
- False if builder_args .device == "cpu" else True
241
+ builder_args .device != "cpu"
242
242
)
243
243
torch ._inductor .config .triton .unique_kernel_names = True
244
244
torch ._inductor .config .fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
@@ -1002,11 +1002,8 @@ def chat(
1002
1002
max_seq_length ,
1003
1003
)
1004
1004
1005
- max_seq_length = (
1006
- max_seq_length + self .speculative_builder_args .speculate_k + 1
1007
- if self .draft_model is not None
1008
- else max_seq_length
1009
- )
1005
+ if self .draft_model is not None :
1006
+ max_seq_length += self .speculative_builder_args .speculate_k + 1
1010
1007
1011
1008
aggregate_metrics = {
1012
1009
"tokens_per_sec" : [],
0 commit comments