Skip to content

Commit 5f38d6b

Browse files
author
awstools
committed
feat(client-sagemaker-runtime): AWS SageMaker Runtime feature: Add sticky routing to support stateful inference models.
1 parent 55553d6 commit 5f38d6b

File tree

5 files changed

+121
-0
lines changed

5 files changed

+121
-0
lines changed

clients/client-sagemaker-runtime/src/commands/InvokeEndpointCommand.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ export interface InvokeEndpointCommandOutput extends InvokeEndpointCommandOutput
8585
* InferenceId: "STRING_VALUE",
8686
* EnableExplanations: "STRING_VALUE",
8787
* InferenceComponentName: "STRING_VALUE",
88+
* SessionId: "STRING_VALUE",
8889
* };
8990
* const command = new InvokeEndpointCommand(input);
9091
* const response = await client.send(command);
@@ -93,6 +94,8 @@ export interface InvokeEndpointCommandOutput extends InvokeEndpointCommandOutput
9394
* // ContentType: "STRING_VALUE",
9495
* // InvokedProductionVariant: "STRING_VALUE",
9596
* // CustomAttributes: "STRING_VALUE",
97+
* // NewSessionId: "STRING_VALUE",
98+
* // ClosedSessionId: "STRING_VALUE",
9699
* // };
97100
*
98101
* ```

clients/client-sagemaker-runtime/src/commands/InvokeEndpointWithResponseStreamCommand.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ export interface InvokeEndpointWithResponseStreamCommandOutput
8686
* TargetContainerHostname: "STRING_VALUE",
8787
* InferenceId: "STRING_VALUE",
8888
* InferenceComponentName: "STRING_VALUE",
89+
* SessionId: "STRING_VALUE",
8990
* };
9091
* const command = new InvokeEndpointWithResponseStreamCommand(input);
9192
* const response = await client.send(command);

clients/client-sagemaker-runtime/src/models/models_0.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,28 @@ export interface InvokeEndpointInput {
146146
* @public
147147
*/
148148
InferenceComponentName?: string;
149+
150+
/**
151+
* <p>Creates a stateful session or identifies an existing one. You can do one of the
152+
* following:</p>
153+
* <ul>
154+
* <li>
155+
* <p>Create a stateful session by specifying the value
156+
* <code>NEW_SESSION</code>.</p>
157+
* </li>
158+
* <li>
159+
* <p>Send your request to an existing stateful session by specifying the ID of that
160+
* session.</p>
161+
* </li>
162+
* </ul>
163+
* <p>With a stateful session, you can send multiple requests to a stateful model. When you
164+
* create a session with a stateful model, the model must create the session ID and set the
165+
* expiration time. The model must also provide that information in the response to your
166+
* request. You can get the ID and timestamp from the <code>NewSessionId</code> response
167+
* parameter. For any subsequent request where you specify that session ID, SageMaker routes the request to the same instance that supports the session.</p>
168+
* @public
169+
*/
170+
SessionId?: string;
149171
}
150172

151173
/**
@@ -195,6 +217,19 @@ export interface InvokeEndpointOutput {
195217
* @public
196218
*/
197219
CustomAttributes?: string;
220+
221+
/**
222+
* <p>If you created a stateful session with your request, the ID and expiration time that
223+
* the model assigns to that session.</p>
224+
* @public
225+
*/
226+
NewSessionId?: string;
227+
228+
/**
229+
* <p>If you closed a stateful session with your request, the ID of that session.</p>
230+
* @public
231+
*/
232+
ClosedSessionId?: string;
198233
}
199234

200235
/**
@@ -508,6 +543,20 @@ export interface InvokeEndpointWithResponseStreamInput {
508543
* @public
509544
*/
510545
InferenceComponentName?: string;
546+
547+
/**
548+
* <p>The ID of a stateful session to handle your request.</p>
549+
* <p>You can't create a stateful session by using the
550+
* <code>InvokeEndpointWithResponseStream</code> action. Instead, you can create one by
551+
* using the <code>
552+
* <a>InvokeEndpoint</a>
553+
* </code> action. In your request, you
554+
* specify <code>NEW_SESSION</code> for the <code>SessionId</code> request parameter. The
555+
* response to that request provides the session ID for the <code>NewSessionId</code>
556+
* response parameter.</p>
557+
* @public
558+
*/
559+
SessionId?: string;
511560
}
512561

513562
/**

clients/client-sagemaker-runtime/src/protocols/Aws_restJson1.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ export const se_InvokeEndpointCommand = async (
6363
[_xasii]: input[_II]!,
6464
[_xasee]: input[_EE]!,
6565
[_xasic]: input[_ICN]!,
66+
[_xassi]: input[_SI]!,
6667
});
6768
b.bp("/endpoints/{EndpointName}/invocations");
6869
b.p("EndpointName", () => input.EndpointName!, "{EndpointName}", false);
@@ -114,6 +115,7 @@ export const se_InvokeEndpointWithResponseStreamCommand = async (
114115
[_xastch]: input[_TCH]!,
115116
[_xasii]: input[_II]!,
116117
[_xasic]: input[_ICN]!,
118+
[_xassi]: input[_SI]!,
117119
});
118120
b.bp("/endpoints/{EndpointName}/invocations-response-stream");
119121
b.p("EndpointName", () => input.EndpointName!, "{EndpointName}", false);
@@ -140,6 +142,8 @@ export const de_InvokeEndpointCommand = async (
140142
[_CT]: [, output.headers[_ct]],
141143
[_IPV]: [, output.headers[_xaipv]],
142144
[_CA]: [, output.headers[_xasca]],
145+
[_NSI]: [, output.headers[_xasnsi]],
146+
[_CSI]: [, output.headers[_xascsi]],
143147
});
144148
const data: any = await collectBody(output.body, context);
145149
contents.Body = data;
@@ -450,6 +454,7 @@ const isSerializableHeaderValue = (value: any): boolean =>
450454

451455
const _A = "Accept";
452456
const _CA = "CustomAttributes";
457+
const _CSI = "ClosedSessionId";
453458
const _CT = "ContentType";
454459
const _EE = "EnableExplanations";
455460
const _FL = "FailureLocation";
@@ -458,8 +463,10 @@ const _II = "InferenceId";
458463
const _IL = "InputLocation";
459464
const _IPV = "InvokedProductionVariant";
460465
const _ITS = "InvocationTimeoutSeconds";
466+
const _NSI = "NewSessionId";
461467
const _OL = "OutputLocation";
462468
const _RTTLS = "RequestTTLSeconds";
469+
const _SI = "SessionId";
463470
const _TCH = "TargetContainerHostname";
464471
const _TM = "TargetModel";
465472
const _TV = "TargetVariant";
@@ -468,15 +475,18 @@ const _ct = "content-type";
468475
const _xaipv = "x-amzn-invoked-production-variant";
469476
const _xasa = "x-amzn-sagemaker-accept";
470477
const _xasca = "x-amzn-sagemaker-custom-attributes";
478+
const _xascsi = "x-amzn-sagemaker-closed-session-id";
471479
const _xasct = "x-amzn-sagemaker-content-type";
472480
const _xasee = "x-amzn-sagemaker-enable-explanations";
473481
const _xasf = "x-amzn-sagemaker-failurelocation";
474482
const _xasi = "x-amzn-sagemaker-inputlocation";
475483
const _xasi_ = "x-amzn-sagemaker-invocationtimeoutseconds";
476484
const _xasic = "x-amzn-sagemaker-inference-component";
477485
const _xasii = "x-amzn-sagemaker-inference-id";
486+
const _xasnsi = "x-amzn-sagemaker-new-session-id";
478487
const _xaso = "x-amzn-sagemaker-outputlocation";
479488
const _xasr = "x-amzn-sagemaker-requestttlseconds";
489+
const _xassi = "x-amzn-sagemaker-session-id";
480490
const _xastch = "x-amzn-sagemaker-target-container-hostname";
481491
const _xastm = "x-amzn-sagemaker-target-model";
482492
const _xastv = "x-amzn-sagemaker-target-variant";

codegen/sdk-codegen/aws-models/sagemaker-runtime.json

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1449,6 +1449,13 @@
14491449
"smithy.api#documentation": "<p>If the endpoint hosts one or more inference components, this parameter specifies the\n name of inference component to invoke.</p>",
14501450
"smithy.api#httpHeader": "X-Amzn-SageMaker-Inference-Component"
14511451
}
1452+
},
1453+
"SessionId": {
1454+
"target": "com.amazonaws.sagemakerruntime#SessionIdOrNewSessionConstantHeader",
1455+
"traits": {
1456+
"smithy.api#documentation": "<p>Creates a stateful session or identifies an existing one. You can do one of the\n following:</p>\n <ul>\n <li>\n <p>Create a stateful session by specifying the value\n <code>NEW_SESSION</code>.</p>\n </li>\n <li>\n <p>Send your request to an existing stateful session by specifying the ID of that\n session.</p>\n </li>\n </ul>\n <p>With a stateful session, you can send multiple requests to a stateful model. When you\n create a session with a stateful model, the model must create the session ID and set the\n expiration time. The model must also provide that information in the response to your\n request. You can get the ID and timestamp from the <code>NewSessionId</code> response\n parameter. For any subsequent request where you specify that session ID, SageMaker routes the request to the same instance that supports the session.</p>",
1457+
"smithy.api#httpHeader": "X-Amzn-SageMaker-Session-Id"
1458+
}
14521459
}
14531460
},
14541461
"traits": {
@@ -1487,6 +1494,20 @@
14871494
"smithy.api#documentation": "<p>Provides additional information in the response about the inference returned by a\n model hosted at an Amazon SageMaker endpoint. The information is an opaque value that is\n forwarded verbatim. You could use this value, for example, to return an ID received in\n the <code>CustomAttributes</code> header of a request or other metadata that a service\n endpoint was programmed to produce. The value must consist of no more than 1024 visible\n US-ASCII characters as specified in <a href=\"https://tools.ietf.org/html/rfc7230#section-3.2.6\">Section 3.3.6. Field Value\n Components</a> of the Hypertext Transfer Protocol (HTTP/1.1). If the customer\n wants the custom attribute returned, the model must set the custom attribute to be\n included on the way back. </p>\n <p>The code in your model is responsible for setting or updating any custom attributes in\n the response. If your code does not set this value in the response, an empty value is\n returned. For example, if a custom attribute represents the trace ID, your model can\n prepend the custom attribute with <code>Trace ID:</code> in your post-processing\n function.</p>\n <p>This feature is currently supported in the Amazon Web Services SDKs but not in the Amazon SageMaker\n Python SDK.</p>",
14881495
"smithy.api#httpHeader": "X-Amzn-SageMaker-Custom-Attributes"
14891496
}
1497+
},
1498+
"NewSessionId": {
1499+
"target": "com.amazonaws.sagemakerruntime#NewSessionResponseHeader",
1500+
"traits": {
1501+
"smithy.api#documentation": "<p>If you created a stateful session with your request, the ID and expiration time that\n the model assigns to that session.</p>",
1502+
"smithy.api#httpHeader": "X-Amzn-SageMaker-New-Session-Id"
1503+
}
1504+
},
1505+
"ClosedSessionId": {
1506+
"target": "com.amazonaws.sagemakerruntime#SessionIdHeader",
1507+
"traits": {
1508+
"smithy.api#documentation": "<p>If you closed a stateful session with your request, the ID of that session.</p>",
1509+
"smithy.api#httpHeader": "X-Amzn-SageMaker-Closed-Session-Id"
1510+
}
14901511
}
14911512
},
14921513
"traits": {
@@ -1598,6 +1619,13 @@
15981619
"smithy.api#documentation": "<p>If the endpoint hosts one or more inference components, this parameter specifies the\n name of inference component to invoke for a streaming response.</p>",
15991620
"smithy.api#httpHeader": "X-Amzn-SageMaker-Inference-Component"
16001621
}
1622+
},
1623+
"SessionId": {
1624+
"target": "com.amazonaws.sagemakerruntime#SessionIdHeader",
1625+
"traits": {
1626+
"smithy.api#documentation": "<p>The ID of a stateful session to handle your request.</p>\n <p>You can't create a stateful session by using the\n <code>InvokeEndpointWithResponseStream</code> action. Instead, you can create one by\n using the <code>\n <a>InvokeEndpoint</a>\n </code> action. In your request, you\n specify <code>NEW_SESSION</code> for the <code>SessionId</code> request parameter. The\n response to that request provides the session ID for the <code>NewSessionId</code>\n response parameter.</p>",
1627+
"smithy.api#httpHeader": "X-Amzn-SageMaker-Session-Id"
1628+
}
16011629
}
16021630
},
16031631
"traits": {
@@ -1718,6 +1746,16 @@
17181746
"smithy.api#error": "client"
17191747
}
17201748
},
1749+
"com.amazonaws.sagemakerruntime#NewSessionResponseHeader": {
1750+
"type": "string",
1751+
"traits": {
1752+
"smithy.api#length": {
1753+
"min": 0,
1754+
"max": 256
1755+
},
1756+
"smithy.api#pattern": "^[a-zA-Z0-9](-*[a-zA-Z0-9])*;\\sExpires=[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$"
1757+
}
1758+
},
17211759
"com.amazonaws.sagemakerruntime#PartBlob": {
17221760
"type": "blob",
17231761
"traits": {
@@ -1788,6 +1826,26 @@
17881826
"smithy.api#httpError": 503
17891827
}
17901828
},
1829+
"com.amazonaws.sagemakerruntime#SessionIdHeader": {
1830+
"type": "string",
1831+
"traits": {
1832+
"smithy.api#length": {
1833+
"min": 0,
1834+
"max": 256
1835+
},
1836+
"smithy.api#pattern": "^[a-zA-Z0-9](-*[a-zA-Z0-9])*$"
1837+
}
1838+
},
1839+
"com.amazonaws.sagemakerruntime#SessionIdOrNewSessionConstantHeader": {
1840+
"type": "string",
1841+
"traits": {
1842+
"smithy.api#length": {
1843+
"min": 0,
1844+
"max": 256
1845+
},
1846+
"smithy.api#pattern": "^(NEW_SESSION)$|^[a-zA-Z0-9](-*[a-zA-Z0-9])*$"
1847+
}
1848+
},
17911849
"com.amazonaws.sagemakerruntime#StatusCode": {
17921850
"type": "integer"
17931851
},

0 commit comments

Comments
 (0)