23
23
24
24
# pyright: reportRedeclaration=false
25
25
26
+ DEFAULT_TIMEOUT_SECONDS = aiohttp .ClientTimeout (total = 600 )
27
+
26
28
@step ("a server listening on {server_fqdn}:{server_port}" )
27
29
def step_server_config (context , server_fqdn : str , server_port : str ):
28
30
context .server_fqdn = server_fqdn
@@ -689,7 +691,7 @@ def step_tokenize_set_add_special(context):
689
691
@async_run_until_complete
690
692
async def step_tokenize (context ):
691
693
context .tokenized_text = context_text (context )
692
- async with aiohttp .ClientSession () as session :
694
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
693
695
tokenize_args = {
694
696
"content" : context .tokenized_text ,
695
697
}
@@ -706,7 +708,7 @@ async def step_tokenize(context):
706
708
@async_run_until_complete
707
709
async def step_detokenize (context ):
708
710
assert len (context .tokens ) > 0
709
- async with aiohttp .ClientSession () as session :
711
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
710
712
async with session .post (f'{ context .base_url } /detokenize' ,
711
713
json = {
712
714
"tokens" : context .tokens ,
@@ -735,7 +737,7 @@ def step_strings_for_tokenization(context):
735
737
@step ('an OPTIONS request is sent from {origin}' )
736
738
@async_run_until_complete
737
739
async def step_options_request (context , origin ):
738
- async with aiohttp .ClientSession () as session :
740
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
739
741
headers = {'Authorization' : f'Bearer { context .user_api_key } ' , 'Origin' : origin }
740
742
async with session .options (f'{ context .base_url } /v1/chat/completions' ,
741
743
headers = headers ) as response :
@@ -751,7 +753,7 @@ def step_check_options_header_value(context, cors_header, cors_header_value):
751
753
@step ('prometheus metrics are exposed' )
752
754
@async_run_until_complete
753
755
async def step_prometheus_metrics_exported (context ):
754
- async with aiohttp .ClientSession () as session :
756
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
755
757
async with await session .get (f'{ context .base_url } /metrics' ) as metrics_response :
756
758
assert metrics_response .status == 200
757
759
assert metrics_response .headers ['Content-Type' ] == "text/plain; version=0.0.4"
@@ -824,7 +826,7 @@ async def concurrent_requests(context, f_completion, *args, **kwargs):
824
826
@step ('the slot {slot_id:d} is saved with filename "{filename}"' )
825
827
@async_run_until_complete
826
828
async def step_save_slot (context , slot_id , filename ):
827
- async with aiohttp .ClientSession () as session :
829
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
828
830
async with session .post (f'{ context .base_url } /slots/{ slot_id } ?action=save' ,
829
831
json = {"filename" : filename },
830
832
headers = {"Content-Type" : "application/json" }) as response :
@@ -834,7 +836,7 @@ async def step_save_slot(context, slot_id, filename):
834
836
@step ('the slot {slot_id:d} is restored with filename "{filename}"' )
835
837
@async_run_until_complete
836
838
async def step_restore_slot (context , slot_id , filename ):
837
- async with aiohttp .ClientSession () as session :
839
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
838
840
async with session .post (f'{ context .base_url } /slots/{ slot_id } ?action=restore' ,
839
841
json = {"filename" : filename },
840
842
headers = {"Content-Type" : "application/json" }) as response :
@@ -844,7 +846,7 @@ async def step_restore_slot(context, slot_id, filename):
844
846
@step ('the slot {slot_id:d} is erased' )
845
847
@async_run_until_complete
846
848
async def step_erase_slot (context , slot_id ):
847
- async with aiohttp .ClientSession () as session :
849
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
848
850
async with session .post (f'{ context .base_url } /slots/{ slot_id } ?action=erase' ,
849
851
headers = {"Content-Type" : "application/json" }) as response :
850
852
context .response = response
@@ -853,7 +855,7 @@ async def step_erase_slot(context, slot_id):
853
855
@step ('switch {on_or_off} lora adapter {lora_id:d}' )
854
856
@async_run_until_complete
855
857
async def toggle_lora_adapter (context , on_or_off : str , lora_id : int ):
856
- async with aiohttp .ClientSession () as session :
858
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
857
859
async with session .post (f'{ context .base_url } /lora-adapters' ,
858
860
json = [{'id' : lora_id , 'scale' : 1 if on_or_off == 'on' else 0 }],
859
861
headers = {"Content-Type" : "application/json" }) as response :
@@ -889,7 +891,7 @@ async def request_completion(prompt,
889
891
print (f"Set user_api_key: { user_api_key } " )
890
892
headers ['Authorization' ] = f'Bearer { user_api_key } '
891
893
892
- async with aiohttp .ClientSession () as session :
894
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
893
895
async with session .post (f'{ base_url } /completion' ,
894
896
json = {
895
897
"input_prefix" : prompt_prefix ,
@@ -902,8 +904,7 @@ async def request_completion(prompt,
902
904
"temperature" : temperature if temperature is not None else 0.8 ,
903
905
"n_probs" : 2 ,
904
906
},
905
- headers = headers ,
906
- timeout = 3600 ) as response :
907
+ headers = headers ) as response :
907
908
if expect_api_error is None or not expect_api_error :
908
909
assert response .status == 200
909
910
assert response .headers ['Access-Control-Allow-Origin' ] == origin
@@ -961,7 +962,7 @@ async def oai_chat_completions(user_prompt,
961
962
if async_client :
962
963
origin = 'llama.cpp'
963
964
headers = {'Authorization' : f'Bearer { user_api_key } ' , 'Origin' : origin }
964
- async with aiohttp .ClientSession () as session :
965
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
965
966
async with session .post (f'{ base_url } { base_path } ' ,
966
967
json = payload ,
967
968
headers = headers ) as response :
@@ -1048,7 +1049,7 @@ async def oai_chat_completions(user_prompt,
1048
1049
1049
1050
1050
1051
async def request_embedding (content , seed , base_url = None ) -> list [list [float ]]:
1051
- async with aiohttp .ClientSession () as session :
1052
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
1052
1053
async with session .post (f'{ base_url } /embedding' ,
1053
1054
json = {
1054
1055
"content" : content ,
@@ -1068,14 +1069,13 @@ async def request_oai_embeddings(input, seed,
1068
1069
headers = []
1069
1070
if user_api_key is not None :
1070
1071
headers = {'Authorization' : f'Bearer { user_api_key } ' , 'Origin' : origin }
1071
- async with aiohttp .ClientSession () as session :
1072
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
1072
1073
async with session .post (f'{ base_url } /v1/embeddings' ,
1073
1074
json = {
1074
1075
"input" : input ,
1075
1076
"model" : model ,
1076
1077
},
1077
- headers = headers ,
1078
- timeout = 3600 ) as response :
1078
+ headers = headers ) as response :
1079
1079
assert response .status == 200 , f"received status code not expected: { response .status } "
1080
1080
assert response .headers ['Access-Control-Allow-Origin' ] == origin
1081
1081
assert response .headers ['Content-Type' ] == "application/json; charset=utf-8"
@@ -1194,7 +1194,7 @@ async def wait_for_slots_status(context,
1194
1194
if 'GITHUB_ACTIONS' in os .environ :
1195
1195
timeout *= 2
1196
1196
1197
- async with aiohttp .ClientSession () as session :
1197
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
1198
1198
while True :
1199
1199
async with await session .get (f'{ base_url } /slots' , params = params ) as slots_response :
1200
1200
status_code = slots_response .status
@@ -1237,7 +1237,7 @@ def assert_embeddings(embeddings):
1237
1237
1238
1238
1239
1239
async def request_slots_status (context , expected_slots ):
1240
- async with aiohttp .ClientSession () as session :
1240
+ async with aiohttp .ClientSession (timeout = DEFAULT_TIMEOUT_SECONDS ) as session :
1241
1241
async with await session .get (f'{ context .base_url } /slots' ) as slots_response :
1242
1242
assert slots_response .status == 200
1243
1243
slots = await slots_response .json ()
0 commit comments