feat(client-sagemaker): CreateInferenceRecommenderjob API now supports passing endpoint details directly, that will help customers to identify the max invocation and max latency they can achieve for their model and the associated endpoint along with getting recommendations on other instances.

awstools · awstools · commit d7d3486d1f1b · 2022-10-21T20:25:55.000Z
diff --git a/clients/client-sagemaker/src/commands/ListModelPackageGroupsCommand.ts b/clients/client-sagemaker/src/commands/ListModelPackageGroupsCommand.ts
@@ -13,12 +13,8 @@ import {
   SerdeContext as __SerdeContext,
 } from "@aws-sdk/types";
 
-import {
-  ListModelPackageGroupsInput,
-  ListModelPackageGroupsInputFilterSensitiveLog,
-  ListModelPackageGroupsOutput,
-  ListModelPackageGroupsOutputFilterSensitiveLog,
-} from "../models/models_2";
+import { ListModelPackageGroupsInput, ListModelPackageGroupsInputFilterSensitiveLog } from "../models/models_2";
+import { ListModelPackageGroupsOutput, ListModelPackageGroupsOutputFilterSensitiveLog } from "../models/models_3";
 import {
   deserializeAws_json1_1ListModelPackageGroupsCommand,
   serializeAws_json1_1ListModelPackageGroupsCommand,
diff --git a/clients/client-sagemaker/src/endpoint/EndpointParameters.ts b/clients/client-sagemaker/src/endpoint/EndpointParameters.ts
@@ -24,7 +24,7 @@ export const resolveClientEndpointParameters = <T>(
 };
 
 export interface EndpointParameters extends __EndpointParameters {
-  Region: string;
+  Region?: string;
   UseDualStack?: boolean;
   UseFIPS?: boolean;
   Endpoint?: string;
diff --git a/clients/client-sagemaker/src/endpoint/ruleset.ts b/clients/client-sagemaker/src/endpoint/ruleset.ts
@@ -6,7 +6,7 @@ export const ruleSet: RuleSetObject = {
   parameters: {
     Region: {
       builtIn: "AWS::Region",
-      required: true,
+      required: false,
       documentation: "The AWS region used to dispatch the request.",
       type: "String",
     },
diff --git a/clients/client-sagemaker/src/models/models_0.ts b/clients/client-sagemaker/src/models/models_0.ts
@@ -1666,9 +1666,9 @@ export interface TransformResources {
   /**
    * <p>The number of
    *             ML
-   *             compute instances to use in the transform job. For distributed
-   *             transform jobs, specify a value greater than 1. The default value is
-   *             <code>1</code>.</p>
+   *             compute instances to use in the transform job. The default value is
+   *                 <code>1</code>, and the maximum is <code>100</code>. For distributed transform jobs,
+   *             specify a value greater than <code>1</code>.</p>
    */
   InstanceCount: number | undefined;
 
diff --git a/clients/client-sagemaker/src/models/models_1.ts b/clients/client-sagemaker/src/models/models_1.ts
@@ -2011,6 +2011,16 @@ export interface EndpointInputConfiguration {
   EnvironmentParameterRanges?: EnvironmentParameterRanges;
 }
 
+/**
+ * <p>Details about a customer endpoint that was compared in an Inference Recommender job.</p>
+ */
+export interface EndpointInfo {
+  /**
+   * <p>The name of a customer's endpoint.</p>
+   */
+  EndpointName: string | undefined;
+}
+
 /**
  * <p>Specifies the maximum number of jobs that can run in parallel
  *     and the maximum number of jobs that can run.</p>
@@ -2143,6 +2153,11 @@ export interface RecommendationJobInputConfig {
    *          override the corresponding fields in the model package.</p>
    */
   ContainerConfig?: RecommendationJobContainerConfig;
+
+  /**
+   * <p>Existing customer endpoints on which to run an Inference Recommender job.</p>
+   */
+  Endpoints?: EndpointInfo[];
 }
 
 export enum RecommendationJobType {
@@ -9664,14 +9679,6 @@ export enum EndpointStatus {
   UPDATING = "Updating",
 }
 
-export enum VariantStatus {
-  ACTIVATING_TRAFFIC = "ActivatingTraffic",
-  BAKING = "Baking",
-  CREATING = "Creating",
-  DELETING = "Deleting",
-  UPDATING = "Updating",
-}
-
 /**
  * @internal
  */
@@ -9952,6 +9959,13 @@ export const EndpointInputConfigurationFilterSensitiveLog = (obj: EndpointInputC
   ...obj,
 });
 
+/**
+ * @internal
+ */
+export const EndpointInfoFilterSensitiveLog = (obj: EndpointInfo): any => ({
+  ...obj,
+});
+
 /**
  * @internal
  */
diff --git a/clients/client-sagemaker/src/models/models_2.ts b/clients/client-sagemaker/src/models/models_2.ts
@@ -82,6 +82,7 @@ import {
   DomainStatus,
   DriftCheckBaselines,
   EdgePackagingJobStatus,
+  EndpointInfo,
   EndpointStatus,
   ExperimentConfig,
   FlowDefinitionOutputConfig,
@@ -142,9 +143,16 @@ import {
   TrialComponentParameterValue,
   TrialComponentParameterValueFilterSensitiveLog,
   TrialComponentStatus,
-  VariantStatus,
 } from "./models_1";
 
+export enum VariantStatus {
+  ACTIVATING_TRAFFIC = "ActivatingTraffic",
+  BAKING = "Baking",
+  CREATING = "Creating",
+  DELETING = "Deleting",
+  UPDATING = "Updating",
+}
+
 /**
  * <p>Describes the status of the production variant.</p>
  */
@@ -1413,6 +1421,36 @@ export interface DescribeInferenceRecommendationsJobRequest {
   JobName: string | undefined;
 }
 
+/**
+ * <p>The metrics for an existing endpoint compared in an Inference Recommender job.</p>
+ */
+export interface InferenceMetrics {
+  /**
+   * <p>The expected maximum number of requests per minute for the instance.</p>
+   */
+  MaxInvocations: number | undefined;
+
+  /**
+   * <p>The expected model latency at maximum invocations per minute for the instance.</p>
+   */
+  ModelLatency: number | undefined;
+}
+
+/**
+ * <p>The performance results from running an Inference Recommender job on an existing endpoint.</p>
+ */
+export interface EndpointPerformance {
+  /**
+   * <p>The metrics for an existing endpoint.</p>
+   */
+  Metrics: InferenceMetrics | undefined;
+
+  /**
+   * <p>Details about a customer endpoint that was compared in an Inference Recommender job.</p>
+   */
+  EndpointInfo: EndpointInfo | undefined;
+}
+
 /**
  * <p>The endpoint configuration made by Inference Recommender during a recommendation job.</p>
  */
@@ -1595,6 +1633,11 @@ export interface DescribeInferenceRecommendationsJobResponse {
    * <p>The recommendations made by Inference Recommender.</p>
    */
   InferenceRecommendations?: InferenceRecommendation[];
+
+  /**
+   * <p>The performance results from running an Inference Recommender job on an existing endpoint.</p>
+   */
+  EndpointPerformances?: EndpointPerformance[];
 }
 
 export interface DescribeLabelingJobRequest {
@@ -8936,30 +8979,6 @@ export interface ModelPackageGroupSummary {
   ModelPackageGroupStatus: ModelPackageGroupStatus | string | undefined;
 }
 
-export interface ListModelPackageGroupsOutput {
-  /**
-   * <p>A list of summaries of the model groups in your Amazon Web Services account.</p>
-   */
-  ModelPackageGroupSummaryList: ModelPackageGroupSummary[] | undefined;
-
-  /**
-   * <p>If the response is truncated, SageMaker returns this token. To retrieve the next set
-   *             of model groups, use it in the subsequent request.</p>
-   */
-  NextToken?: string;
-}
-
-export enum ModelPackageType {
-  BOTH = "Both",
-  UNVERSIONED = "Unversioned",
-  VERSIONED = "Versioned",
-}
-
-export enum ModelPackageSortBy {
-  CREATION_TIME = "CreationTime",
-  NAME = "Name",
-}
-
 /**
  * @internal
  */
@@ -9199,6 +9218,20 @@ export const DescribeInferenceRecommendationsJobRequestFilterSensitiveLog = (
   ...obj,
 });
 
+/**
+ * @internal
+ */
+export const InferenceMetricsFilterSensitiveLog = (obj: InferenceMetrics): any => ({
+  ...obj,
+});
+
+/**
+ * @internal
+ */
+export const EndpointPerformanceFilterSensitiveLog = (obj: EndpointPerformance): any => ({
+  ...obj,
+});
+
 /**
  * @internal
  */
@@ -10706,10 +10739,3 @@ export const ListModelPackageGroupsInputFilterSensitiveLog = (obj: ListModelPack
 export const ModelPackageGroupSummaryFilterSensitiveLog = (obj: ModelPackageGroupSummary): any => ({
   ...obj,
 });
-
-/**
- * @internal
- */
-export const ListModelPackageGroupsOutputFilterSensitiveLog = (obj: ListModelPackageGroupsOutput): any => ({
-  ...obj,
-});
diff --git a/clients/client-sagemaker/src/models/models_3.ts b/clients/client-sagemaker/src/models/models_3.ts
@@ -99,9 +99,8 @@ import {
   LineageType,
   MetricData,
   ModelPackageGroupStatus,
-  ModelPackageSortBy,
+  ModelPackageGroupSummary,
   ModelPackageStatusDetails,
-  ModelPackageType,
   MonitoringExecutionSummary,
   MonitoringJobDefinitionSortKey,
   MonitoringJobDefinitionSummary,
@@ -132,6 +131,30 @@ import {
   Workteam,
 } from "./models_2";
 
+export interface ListModelPackageGroupsOutput {
+  /**
+   * <p>A list of summaries of the model groups in your Amazon Web Services account.</p>
+   */
+  ModelPackageGroupSummaryList: ModelPackageGroupSummary[] | undefined;
+
+  /**
+   * <p>If the response is truncated, SageMaker returns this token. To retrieve the next set
+   *             of model groups, use it in the subsequent request.</p>
+   */
+  NextToken?: string;
+}
+
+export enum ModelPackageType {
+  BOTH = "Both",
+  UNVERSIONED = "Unversioned",
+  VERSIONED = "Versioned",
+}
+
+export enum ModelPackageSortBy {
+  CREATION_TIME = "CreationTime",
+  NAME = "Name",
+}
+
 export interface ListModelPackagesInput {
   /**
    * <p>A filter that returns only model packages created after the specified time
@@ -5830,6 +5853,13 @@ export interface SearchRequest {
   MaxResults?: number;
 }
 
+/**
+ * @internal
+ */
+export const ListModelPackageGroupsOutputFilterSensitiveLog = (obj: ListModelPackageGroupsOutput): any => ({
+  ...obj,
+});
+
 /**
  * @internal
  */
diff --git a/clients/client-sagemaker/src/protocols/Aws_json1_1.ts b/clients/client-sagemaker/src/protocols/Aws_json1_1.ts
diff --git a/codegen/sdk-codegen/aws-models/sagemaker.json b/codegen/sdk-codegen/aws-models/sagemaker.json