File tree Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Original file line number Diff line number Diff line change @@ -62,7 +62,8 @@ model_json=$(cat <<EOF
62
62
"enforce_eager": "true",
63
63
"enable_lora": "true",
64
64
"max_lora_rank": 32,
65
- "lora_extra_vocab_size": 256
65
+ "lora_extra_vocab_size": 256,
66
+ "distributed_executor_backend":"ray"
66
67
}
67
68
EOF
68
69
)
@@ -120,7 +121,8 @@ model_json=$(cat <<EOF
120
121
"block_size": 16,
121
122
"enforce_eager": "true",
122
123
"enable_lora": "false",
123
- "lora_extra_vocab_size": 256
124
+ "lora_extra_vocab_size": 256,
125
+ "distributed_executor_backend":"ray"
124
126
}
125
127
EOF
126
128
)
Original file line number Diff line number Diff line change @@ -63,6 +63,7 @@ function run_multi_gpu_test() {
63
63
export KIND=" ${1} "
64
64
export TENSOR_PARALLELISM=" ${2} "
65
65
export INSTANCE_COUNT=" ${3} "
66
+ export DISTRIBUTED_EXECUTOR_BACKEND=" ${4} "
66
67
67
68
# Setup a clean model repository
68
69
export TEST_MODEL=" vllm_opt_${KIND} _tp${TENSOR_PARALLELISM} _count${INSTANCE_COUNT} "
@@ -73,6 +74,10 @@ function run_multi_gpu_test() {
73
74
cp -r " ${SAMPLE_MODELS_REPO} /vllm_model" " models/${TEST_MODEL} "
74
75
sed -i " s/KIND_MODEL/${KIND} /" " ${TEST_MODEL_TRITON_CONFIG} "
75
76
sed -i " 3s/^/ \" tensor_parallel_size\" : ${TENSOR_PARALLELISM} ,\n/" " ${TEST_MODEL_VLLM_CONFIG} "
77
+ if [ $TENSOR_PARALLELISM -ne " 1" ]; then
78
+ jq --arg backend $DISTRIBUTED_EXECUTOR_BACKEND ' . += {"distributed_executor_backend":$backend}' " ${TEST_MODEL_VLLM_CONFIG} " > " temp.json"
79
+ mv temp.json " ${TEST_MODEL_VLLM_CONFIG} "
80
+ fi
76
81
# Assert the correct kind is set in case the template config changes in the future
77
82
validate_file_contains " ${KIND} " " ${TEST_MODEL_TRITON_CONFIG} "
78
83
@@ -119,10 +124,11 @@ RET=0
119
124
KINDS=" KIND_MODEL KIND_GPU"
120
125
TPS=" 1 2"
121
126
INSTANCE_COUNTS=" 1 2"
127
+ DISTRIBUTED_EXECUTOR_BACKEND=" ray"
122
128
for kind in ${KINDS} ; do
123
129
for tp in ${TPS} ; do
124
130
for count in ${INSTANCE_COUNTS} ; do
125
- run_multi_gpu_test " ${kind} " " ${tp} " " ${count} "
131
+ run_multi_gpu_test " ${kind} " " ${tp} " " ${count} " " ${DISTRIBUTED_EXECUTOR_BACKEND} "
126
132
done
127
133
done
128
134
done
You can’t perform that action at this time.
0 commit comments