aws
diff --git a/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/.gitignore
Lines changed: 1 addition & 0 deletions b/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/__init__.py b/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/__init__.py
diff --git a/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/clients.py
Lines changed: 62 additions & 0 deletions b/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/clients.py
Lines changed: 62 additions & 0 deletions
diff --git a/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/concurrency_probe.py
Lines changed: 55 additions & 0 deletions b/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/concurrency_probe.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/constants.py
Lines changed: 24 additions & 0 deletions b/‎introduction_to_amazon_algorithms/jumpstart-foundation-models/text-generation-benchmarking/benchmarking/constants.py
Lines changed: 24 additions & 0 deletions
@@ -0,0 +1 @@
+*.json
@@ -0,0 +1,62 @@
+import json
+from typing import Any, Dict
+
+import boto3
+
+
+SERVICE_CODE = "AmazonSageMaker"
+PRODUCT_FAMILY = "ML Instance"
+PRODUCT_FAMILY_KEY = "productFamily"
+PRICING_SERVICE_API_REGION = "us-east-1"  # All pricing APIs are hosted in IAD
+REGION_KEY = "regionCode"
+INSTANCE_NAME_KEY = "instanceName"
+PLATO_INSTANCE_TYPE_KEY = "platoinstancetype"
+PLATO_INSTANCE_TYPE = "Hosting"
+
+
+def _create_pricing_filter(type: str, field: str, value: str) -> Dict[str, str]:
+    return {"Type": type, "Field": field, "Value": value}
+
+
+class PricingClient:
+    """Boto3 client to access AWS Pricing."""
+
+    def __init__(self) -> None:
+        """Creates the boto3 client for AWS pricing."""
+        self._client = boto3.client(service_name="pricing", region_name=PRICING_SERVICE_API_REGION)
+
+    def get_price_per_unit(self, instance_type: str, region: str) -> float:
+        """Returns the price per unit in USD of a SageMaker machine learning instance in a region."""
+        filters = [
+            _create_pricing_filter(type="TERM_MATCH", field=PRODUCT_FAMILY_KEY, value=PRODUCT_FAMILY),
+            _create_pricing_filter(type="TERM_MATCH", field=REGION_KEY, value=region),
+            _create_pricing_filter(type="TERM_MATCH", field=INSTANCE_NAME_KEY, value=instance_type),
+            _create_pricing_filter(
+                type="TERM_MATCH",
+                field=PLATO_INSTANCE_TYPE_KEY,
+                value=PLATO_INSTANCE_TYPE,
+            ),
+        ]
+        response = self._client.get_products(ServiceCode=SERVICE_CODE, Filters=filters)
+        price_list = json.loads(response["PriceList"][0])["terms"]["OnDemand"]
+        price_dimensions = list(price_list.values())[0]["priceDimensions"]
+        price_per_unit = list(price_dimensions.values())[0]["pricePerUnit"]["USD"]
+        return float(price_per_unit)
+
+
+class SageMakerClient:
+    """Boto3 SageMaker client to access endpoint and model information."""
+
+    def __init__(self) -> None:
+        self._client = boto3.client("sagemaker")
+
+    def describe_endpoint_config(self, endpoint_config_name: str) -> Dict[str, Any]:
+        return self._client.describe_endpoint_config(EndpointConfigName=endpoint_config_name)
+
+    def describe_endpoint(self, endpoint_name: str) -> Dict[str, Any]:
+        return self._client.describe_endpoint(EndpointName=endpoint_name)
+
+    def describe_model(self, endpoint_name: str) -> Dict[str, Any]:
+        endpoint_config = self.describe_endpoint_config(endpoint_name)
+        model_name = endpoint_config["ProductionVariants"][0]["ModelName"]
+        return self._client.describe_model(ModelName=model_name)
@@ -0,0 +1,55 @@
+from abc import abstractmethod
+from typing import Any, Dict, Optional
+
+from sagemaker.predictor import Predictor
+
+
+class ConcurrentProbeIteratorBase:
+    def __init__(self, model_id: str, payload_name: str):
+        self.model_id = model_id
+        self.payload_name = payload_name
+        self.exception: Optional[Exception] = None
+        self.stop_reason: str = "No stop reason set."
+        self.result: Dict[str, Any] = None
+
+    def __iter__(self) -> "ConcurrentProbeIteratorBase":
+        return self
+
+    @abstractmethod
+    def __next__(self) -> int:
+        raise NotImplementedError
+
+    def send(self, result: Dict[str, Any], predictor: Predictor) -> bool:
+        """Send load test results to the iterator and return whether to use results.
+
+        Some iterators may make internal adjustments (e.g., scale endpoint instances and repeat load test for the same
+        conccurent request setting) before using the results.
+        """
+        self.result = result
+        return True
+
+
+class ConcurrentProbeExponentialScalingIterator(ConcurrentProbeIteratorBase):
+    """An iterator used during a concurrency probe to exponentially scale concurrent requests."""
+
+    def __init__(self, model_id: str, payload_name: str, start: int = 1, scale_factor: float = 2.0) -> None:
+        self.concurrent_requests = start
+        self.scale_factor = scale_factor
+        super().__init__(model_id, payload_name)
+
+    def __next__(self) -> int:
+        if self.exception is not None:
+            e = self.exception
+            self.stop_reason = "".join([type(e).__name__, f": {e}" if str(e) else ""])
+            raise StopIteration
+
+        if self.result is None:
+            return self.concurrent_requests
+
+        self.concurrent_requests = int(self.concurrent_requests * self.scale_factor)
+
+        return self.concurrent_requests
+
+
+def num_invocation_scaler(concurrent_requests: int, num_invocation_factor: int = 3) -> int:
+    return concurrent_requests * num_invocation_factor
@@ -0,0 +1,24 @@
+import boto3
+from botocore.config import Config
+from sagemaker.session import Session
+from pathlib import Path
+
+
+SAVE_METRICS_FILE_PATH = Path.cwd() / "latency_benchmarking.json"
+CLOUDWATCH_PERIOD_SECONDS = 60.0
+MAX_CONCURRENT_INVOCATIONS_PER_MODEL = 30
+MAX_CONCURRENT_BENCHMARKS = 20
+RETRY_WAIT_TIME_SECONDS = 30.0
+MAX_TOTAL_RETRY_TIME_SECONDS = 120.0
+NUM_INVOCATIONS = 10
+SM_INVOCATION_TIMEOUT_SECONDS = 60.0
+SM_SESSION = Session(
+    sagemaker_runtime_client=boto3.client(
+        "sagemaker-runtime",
+        config=Config(connect_timeout=5, retries={"mode": "standard", "total_max_attempts": 10}),
+    ),
+    sagemaker_client=boto3.client(
+        "sagemaker",
+        config=Config(connect_timeout=5, read_timeout=60, retries={"total_max_attempts": 20}),
+    ),
+)