Skip to content

Commit d7d3486

Browse files
author
awstools
committed
feat(client-sagemaker): CreateInferenceRecommenderjob API now supports passing endpoint details directly, that will help customers to identify the max invocation and max latency they can achieve for their model and the associated endpoint along with getting recommendations on other instances.
1 parent f9bc7af commit d7d3486

File tree

9 files changed

+286
-56
lines changed

9 files changed

+286
-56
lines changed

clients/client-sagemaker/src/commands/ListModelPackageGroupsCommand.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,8 @@ import {
1313
SerdeContext as __SerdeContext,
1414
} from "@aws-sdk/types";
1515

16-
import {
17-
ListModelPackageGroupsInput,
18-
ListModelPackageGroupsInputFilterSensitiveLog,
19-
ListModelPackageGroupsOutput,
20-
ListModelPackageGroupsOutputFilterSensitiveLog,
21-
} from "../models/models_2";
16+
import { ListModelPackageGroupsInput, ListModelPackageGroupsInputFilterSensitiveLog } from "../models/models_2";
17+
import { ListModelPackageGroupsOutput, ListModelPackageGroupsOutputFilterSensitiveLog } from "../models/models_3";
2218
import {
2319
deserializeAws_json1_1ListModelPackageGroupsCommand,
2420
serializeAws_json1_1ListModelPackageGroupsCommand,

clients/client-sagemaker/src/endpoint/EndpointParameters.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ export const resolveClientEndpointParameters = <T>(
2424
};
2525

2626
export interface EndpointParameters extends __EndpointParameters {
27-
Region: string;
27+
Region?: string;
2828
UseDualStack?: boolean;
2929
UseFIPS?: boolean;
3030
Endpoint?: string;

clients/client-sagemaker/src/endpoint/ruleset.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export const ruleSet: RuleSetObject = {
66
parameters: {
77
Region: {
88
builtIn: "AWS::Region",
9-
required: true,
9+
required: false,
1010
documentation: "The AWS region used to dispatch the request.",
1111
type: "String",
1212
},

clients/client-sagemaker/src/models/models_0.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,9 +1666,9 @@ export interface TransformResources {
16661666
/**
16671667
* <p>The number of
16681668
* ML
1669-
* compute instances to use in the transform job. For distributed
1670-
* transform jobs, specify a value greater than 1. The default value is
1671-
* <code>1</code>.</p>
1669+
* compute instances to use in the transform job. The default value is
1670+
* <code>1</code>, and the maximum is <code>100</code>. For distributed transform jobs,
1671+
* specify a value greater than <code>1</code>.</p>
16721672
*/
16731673
InstanceCount: number | undefined;
16741674

clients/client-sagemaker/src/models/models_1.ts

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2011,6 +2011,16 @@ export interface EndpointInputConfiguration {
20112011
EnvironmentParameterRanges?: EnvironmentParameterRanges;
20122012
}
20132013

2014+
/**
2015+
* <p>Details about a customer endpoint that was compared in an Inference Recommender job.</p>
2016+
*/
2017+
export interface EndpointInfo {
2018+
/**
2019+
* <p>The name of a customer's endpoint.</p>
2020+
*/
2021+
EndpointName: string | undefined;
2022+
}
2023+
20142024
/**
20152025
* <p>Specifies the maximum number of jobs that can run in parallel
20162026
* and the maximum number of jobs that can run.</p>
@@ -2143,6 +2153,11 @@ export interface RecommendationJobInputConfig {
21432153
* override the corresponding fields in the model package.</p>
21442154
*/
21452155
ContainerConfig?: RecommendationJobContainerConfig;
2156+
2157+
/**
2158+
* <p>Existing customer endpoints on which to run an Inference Recommender job.</p>
2159+
*/
2160+
Endpoints?: EndpointInfo[];
21462161
}
21472162

21482163
export enum RecommendationJobType {
@@ -9664,14 +9679,6 @@ export enum EndpointStatus {
96649679
UPDATING = "Updating",
96659680
}
96669681

9667-
export enum VariantStatus {
9668-
ACTIVATING_TRAFFIC = "ActivatingTraffic",
9669-
BAKING = "Baking",
9670-
CREATING = "Creating",
9671-
DELETING = "Deleting",
9672-
UPDATING = "Updating",
9673-
}
9674-
96759682
/**
96769683
* @internal
96779684
*/
@@ -9952,6 +9959,13 @@ export const EndpointInputConfigurationFilterSensitiveLog = (obj: EndpointInputC
99529959
...obj,
99539960
});
99549961

9962+
/**
9963+
* @internal
9964+
*/
9965+
export const EndpointInfoFilterSensitiveLog = (obj: EndpointInfo): any => ({
9966+
...obj,
9967+
});
9968+
99559969
/**
99569970
* @internal
99579971
*/

clients/client-sagemaker/src/models/models_2.ts

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ import {
8282
DomainStatus,
8383
DriftCheckBaselines,
8484
EdgePackagingJobStatus,
85+
EndpointInfo,
8586
EndpointStatus,
8687
ExperimentConfig,
8788
FlowDefinitionOutputConfig,
@@ -142,9 +143,16 @@ import {
142143
TrialComponentParameterValue,
143144
TrialComponentParameterValueFilterSensitiveLog,
144145
TrialComponentStatus,
145-
VariantStatus,
146146
} from "./models_1";
147147

148+
export enum VariantStatus {
149+
ACTIVATING_TRAFFIC = "ActivatingTraffic",
150+
BAKING = "Baking",
151+
CREATING = "Creating",
152+
DELETING = "Deleting",
153+
UPDATING = "Updating",
154+
}
155+
148156
/**
149157
* <p>Describes the status of the production variant.</p>
150158
*/
@@ -1413,6 +1421,36 @@ export interface DescribeInferenceRecommendationsJobRequest {
14131421
JobName: string | undefined;
14141422
}
14151423

1424+
/**
1425+
* <p>The metrics for an existing endpoint compared in an Inference Recommender job.</p>
1426+
*/
1427+
export interface InferenceMetrics {
1428+
/**
1429+
* <p>The expected maximum number of requests per minute for the instance.</p>
1430+
*/
1431+
MaxInvocations: number | undefined;
1432+
1433+
/**
1434+
* <p>The expected model latency at maximum invocations per minute for the instance.</p>
1435+
*/
1436+
ModelLatency: number | undefined;
1437+
}
1438+
1439+
/**
1440+
* <p>The performance results from running an Inference Recommender job on an existing endpoint.</p>
1441+
*/
1442+
export interface EndpointPerformance {
1443+
/**
1444+
* <p>The metrics for an existing endpoint.</p>
1445+
*/
1446+
Metrics: InferenceMetrics | undefined;
1447+
1448+
/**
1449+
* <p>Details about a customer endpoint that was compared in an Inference Recommender job.</p>
1450+
*/
1451+
EndpointInfo: EndpointInfo | undefined;
1452+
}
1453+
14161454
/**
14171455
* <p>The endpoint configuration made by Inference Recommender during a recommendation job.</p>
14181456
*/
@@ -1595,6 +1633,11 @@ export interface DescribeInferenceRecommendationsJobResponse {
15951633
* <p>The recommendations made by Inference Recommender.</p>
15961634
*/
15971635
InferenceRecommendations?: InferenceRecommendation[];
1636+
1637+
/**
1638+
* <p>The performance results from running an Inference Recommender job on an existing endpoint.</p>
1639+
*/
1640+
EndpointPerformances?: EndpointPerformance[];
15981641
}
15991642

16001643
export interface DescribeLabelingJobRequest {
@@ -8936,30 +8979,6 @@ export interface ModelPackageGroupSummary {
89368979
ModelPackageGroupStatus: ModelPackageGroupStatus | string | undefined;
89378980
}
89388981

8939-
export interface ListModelPackageGroupsOutput {
8940-
/**
8941-
* <p>A list of summaries of the model groups in your Amazon Web Services account.</p>
8942-
*/
8943-
ModelPackageGroupSummaryList: ModelPackageGroupSummary[] | undefined;
8944-
8945-
/**
8946-
* <p>If the response is truncated, SageMaker returns this token. To retrieve the next set
8947-
* of model groups, use it in the subsequent request.</p>
8948-
*/
8949-
NextToken?: string;
8950-
}
8951-
8952-
export enum ModelPackageType {
8953-
BOTH = "Both",
8954-
UNVERSIONED = "Unversioned",
8955-
VERSIONED = "Versioned",
8956-
}
8957-
8958-
export enum ModelPackageSortBy {
8959-
CREATION_TIME = "CreationTime",
8960-
NAME = "Name",
8961-
}
8962-
89638982
/**
89648983
* @internal
89658984
*/
@@ -9199,6 +9218,20 @@ export const DescribeInferenceRecommendationsJobRequestFilterSensitiveLog = (
91999218
...obj,
92009219
});
92019220

9221+
/**
9222+
* @internal
9223+
*/
9224+
export const InferenceMetricsFilterSensitiveLog = (obj: InferenceMetrics): any => ({
9225+
...obj,
9226+
});
9227+
9228+
/**
9229+
* @internal
9230+
*/
9231+
export const EndpointPerformanceFilterSensitiveLog = (obj: EndpointPerformance): any => ({
9232+
...obj,
9233+
});
9234+
92029235
/**
92039236
* @internal
92049237
*/
@@ -10706,10 +10739,3 @@ export const ListModelPackageGroupsInputFilterSensitiveLog = (obj: ListModelPack
1070610739
export const ModelPackageGroupSummaryFilterSensitiveLog = (obj: ModelPackageGroupSummary): any => ({
1070710740
...obj,
1070810741
});
10709-
10710-
/**
10711-
* @internal
10712-
*/
10713-
export const ListModelPackageGroupsOutputFilterSensitiveLog = (obj: ListModelPackageGroupsOutput): any => ({
10714-
...obj,
10715-
});

clients/client-sagemaker/src/models/models_3.ts

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,8 @@ import {
9999
LineageType,
100100
MetricData,
101101
ModelPackageGroupStatus,
102-
ModelPackageSortBy,
102+
ModelPackageGroupSummary,
103103
ModelPackageStatusDetails,
104-
ModelPackageType,
105104
MonitoringExecutionSummary,
106105
MonitoringJobDefinitionSortKey,
107106
MonitoringJobDefinitionSummary,
@@ -132,6 +131,30 @@ import {
132131
Workteam,
133132
} from "./models_2";
134133

134+
export interface ListModelPackageGroupsOutput {
135+
/**
136+
* <p>A list of summaries of the model groups in your Amazon Web Services account.</p>
137+
*/
138+
ModelPackageGroupSummaryList: ModelPackageGroupSummary[] | undefined;
139+
140+
/**
141+
* <p>If the response is truncated, SageMaker returns this token. To retrieve the next set
142+
* of model groups, use it in the subsequent request.</p>
143+
*/
144+
NextToken?: string;
145+
}
146+
147+
export enum ModelPackageType {
148+
BOTH = "Both",
149+
UNVERSIONED = "Unversioned",
150+
VERSIONED = "Versioned",
151+
}
152+
153+
export enum ModelPackageSortBy {
154+
CREATION_TIME = "CreationTime",
155+
NAME = "Name",
156+
}
157+
135158
export interface ListModelPackagesInput {
136159
/**
137160
* <p>A filter that returns only model packages created after the specified time
@@ -5830,6 +5853,13 @@ export interface SearchRequest {
58305853
MaxResults?: number;
58315854
}
58325855

5856+
/**
5857+
* @internal
5858+
*/
5859+
export const ListModelPackageGroupsOutputFilterSensitiveLog = (obj: ListModelPackageGroupsOutput): any => ({
5860+
...obj,
5861+
});
5862+
58335863
/**
58345864
* @internal
58355865
*/

0 commit comments

Comments
 (0)