Skip to content

Commit 91845c2

Browse files
gwang111benieric
authored andcommitted
only pick from realtime inference for now
1 parent 561e7f8 commit 91845c2

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

src/sagemaker/inference_recommender/inference_recommender_mixin.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ def _update_params_for_right_size(
306306
initial_instance_count = self.inference_recommendations[0]["EndpointConfiguration"][
307307
"InitialInstanceCount"
308308
]
309-
return (instance_type, initial_instance_count)
309+
return self._filter_recommendations_for_realtime()
310310

311311
def _update_params_for_recommendation_id(
312312
self,
@@ -610,3 +610,15 @@ def _search_recommendation(self, recommendation_list, inference_recommendation_i
610610
),
611611
None,
612612
)
613+
614+
# TODO: until we have bandwidth to integrate right_size + deploy with serverless
615+
def _filter_recommendations_for_realtime(self):
616+
instance_type = None
617+
initial_instance_count = None
618+
for recommendations in self.inference_recommendations:
619+
if not "serverlessConfig" in recommendations["EndpointConfiguration"]:
620+
instance_type = recommendations["EndpointConfiguration"]["InstanceType"]
621+
initial_instance_count = recommendations["EndpointConfiguration"][
622+
"InitialInstanceCount"
623+
]
624+
return (instance_type, initial_instance_count)

0 commit comments

Comments
 (0)