aws
diff --git a/‎src/sagemaker/modules/constants.py
Lines changed: 2 additions & 2 deletions b/‎src/sagemaker/modules/constants.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/sagemaker/modules/local_core/local_container.py
Lines changed: 6 additions & 3 deletions b/‎src/sagemaker/modules/local_core/local_container.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/sagemaker/modules/train/container_drivers/utils.py
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/modules/train/container_drivers/utils.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/sagemaker/modules/train/model_trainer.py
Lines changed: 1 addition & 1 deletion b/‎src/sagemaker/modules/train/model_trainer.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/data/modules/local_script/data/test/x_test.npy
19 KB b/‎tests/data/modules/local_script/data/test/x_test.npy
19 KB
diff --git a/‎tests/data/modules/local_script/data/test/y_test.npy
2.48 KB b/‎tests/data/modules/local_script/data/test/y_test.npy
2.48 KB
diff --git a/‎tests/data/modules/local_script/data/train/x_train.npy
75.4 KB b/‎tests/data/modules/local_script/data/train/x_train.npy
75.4 KB
diff --git a/‎tests/data/modules/local_script/data/train/y_train.npy
9.53 KB b/‎tests/data/modules/local_script/data/train/y_train.npy
9.53 KB
diff --git a/‎tests/data/modules/local_script/local_training_script.py
Lines changed: 147 additions & 0 deletions b/‎tests/data/modules/local_script/local_training_script.py
Lines changed: 147 additions & 0 deletions
diff --git a/‎tests/data/modules/local_script/pytorch_model_def.py
Lines changed: 23 additions & 0 deletions b/‎tests/data/modules/local_script/pytorch_model_def.py
Lines changed: 23 additions & 0 deletions
diff --git a/‎tests/data/modules/script_mode/custom_script.py
Lines changed: 2 additions & 3 deletions b/‎tests/data/modules/script_mode/custom_script.py
Lines changed: 2 additions & 3 deletions
@@ -16,8 +16,8 @@
 
 DEFAULT_INSTANCE_TYPE = "ml.m5.xlarge"
 
-SM_CODE = "sm_code"
-SM_CODE_CONTAINER_PATH = "/opt/ml/input/data/sm_code"
+SM_CODE = "code"
+SM_CODE_CONTAINER_PATH = "/opt/ml/input/data/code"
 
 SM_DRIVERS = "sm_drivers"
 SM_DRIVERS_CONTAINER_PATH = "/opt/ml/input/data/sm_drivers"
 
@@ -32,9 +32,8 @@
 )
 from sagemaker.local.utils import check_for_studio, recursive_copy
 from sagemaker.model import DIR_PARAM_NAME
-from sagemaker.modules import logger
+from sagemaker.modules import logger, Session
 from sagemaker.modules.configs import Channel
-from sagemaker.session import Session
 from sagemaker.utils import ECR_URI_PATTERN, create_tar_file, _module_import_error, download_folder
 from sagemaker_core.main.utils import Unassigned
 from sagemaker_core.shapes import DataSource
@@ -105,13 +104,17 @@ class _LocalContainer(BaseModel):
     input_data_config: Optional[List[Channel]]
     environment: Optional[Dict[str, str]]
     hyper_parameters: Optional[Dict[str, str]]
-    sagemaker_session: Optional[Session]
+    sagemaker_session: Optional[Session] = None
     container_entrypoint: Optional[List[str]]
     container_arguments: Optional[List[str]]
 
     def model_post_init(self, __context: Any):
         """Post init method to perform custom validation and set default values."""
         self.hosts = [f"algo-{i}" for i in range(1, self.instance_count + 1)]
+        if self.environment is None:
+            self.environment = {}
+        if self.hyper_parameters is None:
+            self.hyper_parameters = {}
 
         for channel in self.input_data_config:
             if channel.data_source and channel.data_source.s3_data_source != Unassigned():
 
@@ -36,7 +36,7 @@
 TrainingJob - {training_job_name}
 """
 
-USER_CODE_PATH = "/opt/ml/input/data/sm_code"
+USER_CODE_PATH = "/opt/ml/input/data/code"
 SOURCE_CODE_JSON = "/opt/ml/input/data/sm_drivers/sourcecode.json"
 DISTRIBUTED_JSON = "/opt/ml/input/data/sm_drivers/distributed.json"
 
 
@@ -535,7 +535,7 @@ def train(
             shutil.copytree(SM_DRIVERS_LOCAL_PATH, drivers_dir.name, dirs_exist_ok=True)
 
             # If source code is provided, create a channel for the source code
-            # The source code will be mounted at /opt/ml/input/data/sm_code in the container
+            # The source code will be mounted at /opt/ml/input/data/code in the container
             if self.source_code.source_dir:
                 source_code_channel = self.create_input_data_channel(
                     channel_name=SM_CODE,
 
@@ -0,0 +1,147 @@
+# flake8: noqa
+import argparse
+import numpy as np
+import os
+import sys
+import logging
+import json
+import shutil
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader, TensorDataset
+from pytorch_model_def import get_model
+
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+logger.addHandler(logging.StreamHandler(sys.stdout))
+current_dir = os.path.dirname(os.path.abspath(__file__))
+data_dir = "/opt/ml/input/data"
+
+
+def get_train_data(train_dir):
+    """
+    Get the training data and convert to tensors
+    """
+
+    x_train = np.load(os.path.join(train_dir, "x_train.npy"))
+    y_train = np.load(os.path.join(train_dir, "y_train.npy"))
+    logger.info(f"x train: {x_train.shape}, y train: {y_train.shape}")
+
+    return torch.from_numpy(x_train), torch.from_numpy(y_train)
+
+
+def get_test_data(test_dir):
+    """
+    Get the testing data and convert to tensors
+    """
+
+    x_test = np.load(os.path.join(test_dir, "x_test.npy"))
+    y_test = np.load(os.path.join(test_dir, "y_test.npy"))
+    logger.info(f"x test: {x_test.shape}, y test: {y_test.shape}")
+
+    return torch.from_numpy(x_test), torch.from_numpy(y_test)
+
+
+def model_fn(model_dir):
+    """
+    Load the model for inference
+    """
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = get_model()
+    model.load_state_dict(torch.load(model_dir + "/model.pth"))
+    model.eval()
+    return model.to(device)
+
+
+def input_fn(request_body, request_content_type):
+    """
+    Deserialize and prepare the prediction input
+    """
+
+    if request_content_type == "application/json":
+        request = json.loads(request_body)
+        train_inputs = torch.tensor(request)
+        return train_inputs
+
+
+def predict_fn(input_data, model):
+    """
+    Apply model to the incoming request
+    """
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.to(device)
+    model.eval()
+    with torch.no_grad():
+        return model(input_data.float()).numpy()[0]
+
+
+def train():
+    """
+    Train the PyTorch model
+    """
+    # Directories: train, test and model
+    train_dir = os.path.join(data_dir, "train")
+    test_dir = os.path.join(data_dir, "test")
+    model_dir = os.environ.get("SM_MODEL_DIR", os.path.join(current_dir, "data/model"))
+
+    # Load the training and testing data
+    x_train, y_train = get_train_data(train_dir)
+    x_test, y_test = get_test_data(test_dir)
+    train_ds = TensorDataset(x_train, y_train)
+
+    # Training parameters - used to configure the training loop
+    batch_size = 64
+    epochs = 1
+    learning_rate = 0.1
+    logger.info(
+        "batch_size = {}, epochs = {}, learning rate = {}".format(batch_size, epochs, learning_rate)
+    )
+
+    train_dl = DataLoader(train_ds, batch_size, shuffle=True)
+
+    # Define the model, loss function and optimizer
+    model = get_model()
+    model = model.to(device)
+    criterion = nn.MSELoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
+
+    # Train the model
+    for epoch in range(epochs):
+        for x_train_batch, y_train_batch in train_dl:
+            y = model(x_train_batch.float())
+            loss = criterion(y.flatten(), y_train_batch.float())
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+        epoch += 1
+        logger.info(f"epoch: {epoch} -> loss: {loss}")
+
+    # Test the model
+    with torch.no_grad():
+        y = model(x_test.float()).flatten()
+        mse = ((y - y_test) ** 2).sum() / y_test.shape[0]
+    print("\nTest MSE:", mse.numpy())
+
+    # Save the model
+    os.makedirs(model_dir, exist_ok=True)
+    torch.save(model.state_dict(), model_dir + "/model.pth")
+    inference_code_path = model_dir + "/code/"
+
+    if not os.path.exists(inference_code_path):
+        os.mkdir(inference_code_path)
+        logger.info("Created a folder at {}!".format(inference_code_path))
+
+    shutil.copy("local_training_script.py", inference_code_path)
+    shutil.copy("pytorch_model_def.py", inference_code_path)
+    logger.info("Saving models files to {}".format(inference_code_path))
+
+
+if __name__ == "__main__":
+    print("Running the training job ...\n")
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    train()
@@ -0,0 +1,23 @@
+# flake8: noqa
+import torch
+import torch.nn as nn
+
+
+class NeuralNet(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(8, 8)
+        self.fc2 = nn.Linear(8, 6)
+        self.fc3 = nn.Linear(6, 1)
+
+    def forward(self, x):
+        x = torch.tanh(self.fc1(x))
+        x = torch.sigmoid(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+
+def get_model():
+
+    model = NeuralNet()
+    return model
@@ -132,9 +132,8 @@ def train():
         os.mkdir(inference_code_path)
         logger.info("Created a folder at {}!".format(inference_code_path))
 
-    code_dir = os.environ.get("SM_CHANNEL_CODE", current_dir)
-    shutil.copy(os.path.join(code_dir, "custom_script.py"), inference_code_path)
-    shutil.copy(os.path.join(code_dir, "pytorch_model_def.py"), inference_code_path)
+    shutil.copy("custom_script.py", inference_code_path)
+    shutil.copy("pytorch_model_def.py", inference_code_path)
     logger.info("Saving models files to {}".format(inference_code_path))