aws
diff --git a/‎doc/amazon_sagemaker_featurestore.rst
Lines changed: 1 addition & 0 deletions b/‎doc/amazon_sagemaker_featurestore.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/api/training/smp_versions/latest/smd_model_parallel_pytorch.rst
Lines changed: 96 additions & 0 deletions b/‎doc/api/training/smp_versions/latest/smd_model_parallel_pytorch.rst
Lines changed: 96 additions & 0 deletions
diff --git a/‎requirements/tox/doc8_requirements.txt
Lines changed: 1 addition & 1 deletion b/‎requirements/tox/doc8_requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/estimator.py
Lines changed: 29 additions & 9 deletions b/‎src/sagemaker/estimator.py
Lines changed: 29 additions & 9 deletions
diff --git a/‎src/sagemaker/feature_store/feature_group.py
Lines changed: 43 additions & 8 deletions b/‎src/sagemaker/feature_store/feature_group.py
Lines changed: 43 additions & 8 deletions
diff --git a/‎src/sagemaker/feature_store/feature_store.py
Lines changed: 11 additions & 2 deletions b/‎src/sagemaker/feature_store/feature_store.py
Lines changed: 11 additions & 2 deletions
diff --git a/‎src/sagemaker/feature_store/inputs.py
Lines changed: 58 additions & 0 deletions b/‎src/sagemaker/feature_store/inputs.py
Lines changed: 58 additions & 0 deletions
@@ -202,6 +202,7 @@ location of your offline store.
        role_arn = role,
        s3_uri = offline_feature_store_bucket,
        enable_online_store = True,
+       ttl_duration = None,
        online_store_kms_key_id = None,
        offline_store_kms_key_id = None,
        disable_glue_table_creation = False,
 
@@ -494,6 +494,102 @@ smdistributed.modelparallel.torch.DistributedOptimizer
       ``state_dict`` contains elements corresponding to only the current
       partition, or to the entire model.
 
+smdistributed.modelparallel.torch.nn.FlashAttentionLayer
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. function:: smdistributed.modelparallel.torch.nn.FlashAttentionLayer(attention_dropout_prob=0.1, attention_head_size=None, scale_attention_scores=True, scale_attn_by_layer_idx=False, layer_idx=None, scale=None, triton_flash_attention=False, use_alibi=False)
+
+   This class supports
+   `FlashAttention <https://github.com/HazyResearch/flash-attention>`_
+   for PyTorch 2.0.
+   It takes the ``qkv`` matrix as an argument through its ``forward`` class method,
+   computes attention scores and probabilities,
+   and then operates the matrix multiplication with value layers.
+
+   Through this class, the smp library supports
+   custom attention masks such as Attention with
+   Linear Biases (ALiBi), and you can activate them by setting
+   ``triton_flash_attention`` and ``use_alibi`` to ``True``.
+
+   Note that the Triton flash attention does not support dropout
+   on the attention probabilities. It uses standard lower triangular
+   causal mask when causal mode is enabled. It also runs only
+   on P4d and P4de instances, with fp16 or bf16.
+
+   This class computes the scale factor to apply when computing attention.
+   By default, ``scale`` is set to ``None``, and it's automatically calculated.
+   When ``scale_attention_scores`` is ``True`` (which is default), you must pass a value
+   to ``attention_head_size``. When ``scale_attn_by_layer_idx`` is ``True``,
+   you must pass a value to ``layer_idx``. If both factors are used, they are
+   multiplied as follows: ``(1/(sqrt(attention_head_size) * (layer_idx+1)))``.
+   This scale calculation can be bypassed if you specify a custom scaling
+   factor to ``scale``. In other words, if you specify a value to ``scale``, the set of parameters
+   (``scale_attention_scores``, ``attention_head_size``, ``scale_attn_by_layer_idx``, ``layer_idx``)
+   is overridden and ignored.
+
+   **Parameters**
+
+   * ``attention_dropout_prob`` (float): (default: 0.1) specifies dropout probability
+     to apply to attention.
+   * ``attention_head_size`` (int): Required when ``scale_attention_scores`` is True.
+     When ``scale_attention_scores`` is passed, this contributes
+     ``1/sqrt(attention_head_size)`` to the scale factor.
+   * ``scale_attention_scores`` (boolean): (default: True) determines whether
+     to multiply 1/sqrt(attention_head_size) to the scale factor.
+   * ``layer_idx`` (int): Required when ``scale_attn_by_layer_idx`` is ``True``.
+     The layer id to use for scaling attention by layer id.
+     It contributes 1/(layer_idx + 1) to the scaling factor.
+   * ``scale_attn_by_layer_idx`` (boolean): (default: False) determines whether
+     to multiply 1/(layer_idx + 1) to the scale factor.
+   * ``scale`` (float) (default: None): If passed, this scale factor will be
+     applied bypassing the all of the previous arguments.
+   * ``triton_flash_attention`` (bool): (default: False) If passed, Triton
+     implementation of flash attention will be used. This is necessary to supports
+     Attention with Linear Biases (ALiBi) (see next arg). Note that this version
+     of the kernel doesn’t support dropout.
+   * ``use_alibi`` (bool): (default: False) If passed, it enables Attention with
+     Linear Biases (ALiBi) using the mask provided.
+
+   .. method:: forward(self, qkv, attn_mask=None, causal=False)
+
+      Returns a single ``torch.Tensor`` ``(batch_size x num_heads x seq_len x head_size)``,
+      which represents the output of attention computation.
+
+      **Parameters**
+
+      * ``qkv``: ``torch.Tensor`` in the form of ``(batch_size x seqlen x 3 x num_heads x head_size)``.
+      * ``attn_mask``: ``torch.Tensor`` in the form of ``(batch_size x 1 x 1 x seqlen)``.
+        By default it is ``None``, and usage of this mask needs ``triton_flash_attention``
+        and ``use_alibi`` to be set. See how to generate the mask in the following code snippet.
+      * ``causal``: When passed, it uses the standard lower triangular mask. The default is ``False``.
+
+   When using ALiBi, it needs an attention mask prepared like the following.
+
+   .. code:: python
+
+      def generate_alibi_attn_mask(attention_mask, batch_size, seq_length,
+         num_attention_heads, alibi_bias_max=8):
+
+         device, dtype = attention_mask.device, attention_mask.dtype
+         alibi_attention_mask = torch.zeros(
+            1, num_attention_heads, 1, seq_length, dtype=dtype, device=device
+         )
+
+         alibi_bias = torch.arange(1 - seq_length, 1, dtype=dtype, device=device).view(
+            1, 1, 1, seq_length
+         )
+         m = torch.arange(1, num_attention_heads + 1, dtype=dtype, device=device)
+         m.mul_(alibi_bias_max / num_attention_heads)
+         alibi_bias = alibi_bias * (1.0 / (2 ** m.view(1, num_attention_heads, 1, 1)))
+
+         alibi_attention_mask.add_(alibi_bias)
+         alibi_attention_mask = alibi_attention_mask[..., :seq_length, :seq_length]
+         if attention_mask is not None and attention_mask.bool().any():
+            alibi_attention_mask.masked_fill(
+                  attention_mask.bool().view(batch_size, 1, 1, seq_length), float("-inf")
+            )
+
+         return alibi_attention_mask
 
 smdistributed.modelparallel.torch Context Managers and Util Functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -1,2 +1,2 @@
 doc8==0.10.1
-Pygments==2.11.2
+Pygments==2.15.0
@@ -1737,21 +1737,41 @@ def register(
 
     @property
     def model_data(self):
-        """str: The model location in S3. Only set if Estimator has been ``fit()``."""
+        """Str or dict: The model location in S3. Only set if Estimator has been ``fit()``."""
         if self.latest_training_job is not None and not isinstance(
             self.sagemaker_session, PipelineSession
         ):
-            model_uri = self.sagemaker_session.sagemaker_client.describe_training_job(
+            job_details = self.sagemaker_session.sagemaker_client.describe_training_job(
                 TrainingJobName=self.latest_training_job.name
-            )["ModelArtifacts"]["S3ModelArtifacts"]
-        else:
-            logger.warning(
-                "No finished training job found associated with this estimator. Please make sure "
-                "this estimator is only used for building workflow config"
             )
-            model_uri = os.path.join(
-                self.output_path, self._current_job_name, "output", "model.tar.gz"
+            model_uri = job_details["ModelArtifacts"]["S3ModelArtifacts"]
+            compression_type = job_details.get("OutputDataConfig", {}).get(
+                "CompressionType", "GZIP"
             )
+            if compression_type == "GZIP":
+                return model_uri
+            # fail fast if we don't recognize training output compression type
+            if compression_type not in {"GZIP", "NONE"}:
+                raise ValueError(
+                    f'Unrecognized training job output data compression type "{compression_type}"'
+                )
+            # model data is in uncompressed form NOTE SageMaker Hosting mandates presence of
+            # trailing forward slash in S3 model data URI, so append one if necessary.
+            if not model_uri.endswith("/"):
+                model_uri += "/"
+            return {
+                "S3DataSource": {
+                    "S3Uri": model_uri,
+                    "S3DataType": "S3Prefix",
+                    "CompressionType": "None",
+                }
+            }
+
+        logger.warning(
+            "No finished training job found associated with this estimator. Please make sure "
+            "this estimator is only used for building workflow config"
+        )
+        model_uri = os.path.join(self.output_path, self._current_job_name, "output", "model.tar.gz")
         return model_uri
 
     @abstractmethod
 
@@ -61,6 +61,8 @@
     FeatureParameter,
     TableFormatEnum,
     DeletionModeEnum,
+    TtlDuration,
+    OnlineStoreConfigUpdate,
 )
 from sagemaker.utils import resolve_value_from_config
 
@@ -523,6 +525,7 @@ def create(
         role_arn: str = None,
         online_store_kms_key_id: str = None,
         enable_online_store: bool = False,
+        ttl_duration: TtlDuration = None,
         offline_store_kms_key_id: str = None,
         disable_glue_table_creation: bool = False,
         data_catalog_config: DataCatalogConfig = None,
@@ -539,6 +542,7 @@ def create(
             event_time_feature_name (str): name of the event time feature.
             role_arn (str): ARN of the role used to call CreateFeatureGroup.
             online_store_kms_key_id (str): KMS key ARN for online store (default: None).
+            ttl_duration (TtlDuration): Default time to live duration for records (default: None).
             enable_online_store (bool): whether to enable online store or not (default: False).
             offline_store_kms_key_id (str): KMS key ARN for offline store (default: None).
                 If a KMS encryption key is not specified, SageMaker encrypts all data at
@@ -592,7 +596,10 @@ def create(
 
         # online store configuration
         if enable_online_store:
-            online_store_config = OnlineStoreConfig(enable_online_store=enable_online_store)
+            online_store_config = OnlineStoreConfig(
+                enable_online_store=enable_online_store,
+                ttl_duration=ttl_duration,
+            )
             if online_store_kms_key_id is not None:
                 online_store_config.online_store_security_config = OnlineStoreSecurityConfig(
                     kms_key_id=online_store_kms_key_id
@@ -633,21 +640,37 @@ def describe(self, next_token: str = None) -> Dict[str, Any]:
             feature_group_name=self.name, next_token=next_token
         )
 
-    def update(self, feature_additions: Sequence[FeatureDefinition]) -> Dict[str, Any]:
+    def update(
+        self,
+        feature_additions: Sequence[FeatureDefinition] = None,
+        online_store_config: OnlineStoreConfigUpdate = None,
+    ) -> Dict[str, Any]:
         """Update a FeatureGroup and add new features from the given feature definitions.
 
         Args:
             feature_additions (Sequence[Dict[str, str]): list of feature definitions to be updated.
+            online_store_config (OnlineStoreConfigUpdate): online store config to be updated.
 
         Returns:
             Response dict from service.
         """
 
+        if feature_additions is None:
+            feature_additions_parameter = None
+        else:
+            feature_additions_parameter = [
+                feature_addition.to_dict() for feature_addition in feature_additions
+            ]
+
+        if online_store_config is None:
+            online_store_config_parameter = None
+        else:
+            online_store_config_parameter = online_store_config.to_dict()
+
         return self.sagemaker_session.update_feature_group(
             feature_group_name=self.name,
-            feature_additions=[
-                feature_addition.to_dict() for feature_addition in feature_additions
-            ],
+            feature_additions=feature_additions_parameter,
+            online_store_config=online_store_config_parameter,
         )
 
     def update_feature_metadata(
@@ -756,7 +779,9 @@ def load_feature_definitions(
         return self.feature_definitions
 
     def get_record(
-        self, record_identifier_value_as_string: str, feature_names: Sequence[str] = None
+        self,
+        record_identifier_value_as_string: str,
+        feature_names: Sequence[str] = None,
     ) -> Sequence[Dict[str, str]]:
         """Get a single record in a FeatureGroup
 
@@ -772,14 +797,24 @@ def get_record(
             feature_names=feature_names,
         ).get("Record")
 
-    def put_record(self, record: Sequence[FeatureValue]):
+    def put_record(self, record: Sequence[FeatureValue], ttl_duration: TtlDuration = None):
         """Put a single record in the FeatureGroup.
 
         Args:
             record (Sequence[FeatureValue]): a list contains feature values.
+            ttl_duration (TtlDuration): customer specified ttl duration.
         """
+
+        if ttl_duration is not None:
+            return self.sagemaker_session.put_record(
+                feature_group_name=self.name,
+                record=[value.to_dict() for value in record],
+                ttl_duration=ttl_duration.to_dict(),
+            )
+
         return self.sagemaker_session.put_record(
-            feature_group_name=self.name, record=[value.to_dict() for value in record]
+            feature_group_name=self.name,
+            record=[value.to_dict() for value in record],
         )
 
     def delete_record(
 
@@ -137,18 +137,27 @@ def list_feature_groups(
             next_token=next_token,
         )
 
-    def batch_get_record(self, identifiers: Sequence[Identifier]) -> Dict[str, Any]:
+    def batch_get_record(
+        self,
+        identifiers: Sequence[Identifier],
+        expiration_time_response: str = None,
+    ) -> Dict[str, Any]:
         """Get record in batch from FeatureStore
 
         Args:
             identifiers (Sequence[Identifier]): A list of identifiers to uniquely identify records
                 in FeatureStore.
+            expiration_time_response (str): the field of expiration time response
+                to toggle returning of expiresAt.
 
         Returns:
             Response dict from service.
         """
         batch_get_record_identifiers = [identifier.to_dict() for identifier in identifiers]
-        return self.sagemaker_session.batch_get_record(identifiers=batch_get_record_identifiers)
+        return self.sagemaker_session.batch_get_record(
+            identifiers=batch_get_record_identifiers,
+            expiration_time_response=expiration_time_response,
+        )
 
     def search(
         self,
 
@@ -84,17 +84,43 @@ def to_dict(self) -> Dict[str, Any]:
         return Config.construct_dict(KmsKeyId=self.kms_key_id)
 
 
+@attr.s
+class TtlDuration(Config):
+    """TtlDuration for records in online FeatureStore.
+
+    Attributes:
+        unit (str): time unit.
+        value (int): time value.
+    """
+
+    unit: str = attr.ib()
+    value: int = attr.ib()
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Construct a dictionary based on the attributes.
+
+        Returns:
+            dict represents the attributes.
+        """
+        return Config.construct_dict(
+            Unit=self.unit,
+            Value=self.value,
+        )
+
+
 @attr.s
 class OnlineStoreConfig(Config):
     """OnlineStoreConfig for FeatureStore.
 
     Attributes:
         enable_online_store (bool): whether to enable the online store.
         online_store_security_config (OnlineStoreSecurityConfig): configuration of security setting.
+        ttl_duration (TtlDuration): Default time to live duration for records.
     """
 
     enable_online_store: bool = attr.ib(default=True)
     online_store_security_config: OnlineStoreSecurityConfig = attr.ib(default=None)
+    ttl_duration: TtlDuration = attr.ib(default=None)
 
     def to_dict(self) -> Dict[str, Any]:
         """Construct a dictionary based on the attributes.
@@ -105,6 +131,28 @@ def to_dict(self) -> Dict[str, Any]:
         return Config.construct_dict(
             EnableOnlineStore=self.enable_online_store,
             SecurityConfig=self.online_store_security_config,
+            TtlDuration=self.ttl_duration,
+        )
+
+
+@attr.s
+class OnlineStoreConfigUpdate(Config):
+    """OnlineStoreConfigUpdate for FeatureStore.
+
+    Attributes:
+        ttl_duration (TtlDuration): Default time to live duration for records.
+    """
+
+    ttl_duration: TtlDuration = attr.ib(default=None)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Construct a dictionary based on the attributes.
+
+        Returns:
+            dict represents the attributes.
+        """
+        return Config.construct_dict(
+            TtlDuration=self.ttl_duration,
         )
 
 
@@ -379,3 +427,13 @@ class DeletionModeEnum(Enum):
 
     SOFT_DELETE = "SoftDelete"
     HARD_DELETE = "HardDelete"
+
+
+class ExpirationTimeResponseEnum(Enum):
+    """Enum of toggling the response of ExpiresAt.
+
+    The ExpirationTimeResponse for toggling the response of ExpiresAt can be Disabled or Enabled.
+    """
+
+    DISABLED = "Disabled"
+    ENABLED = "Enabled"
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`doc8==0.10.1`
`2`		`-Pygments==2.11.2`
	`2`	`+Pygments==2.15.0`