Skip to content

change: add modifier for s3_input class #1699

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jul 13, 2020
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions src/sagemaker/cli/compatibility/v2/modifiers/training_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""Classes to modify Predictor code to be compatible
with version 2.0 and later of the SageMaker Python SDK.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't forget to update this

"""
from __future__ import absolute_import

from sagemaker.cli.compatibility.v2.modifiers import matching
from sagemaker.cli.compatibility.v2.modifiers.modifier import Modifier

BASE_S3_INPUT = "s3_input"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe S3_INPUT_NAME instead? I don't think "base" gives much info here.

SESSION = "session"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this looks to be unused

S3_INPUT = {"s3_input": ("sagemaker", "sagemaker.session")}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also include sagemaker.inputs?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the namespaces and added a test case.



class TrainingInputConstructorRefactor(Modifier):
"""A class to refactor *s3_input class."""

def node_should_be_modified(self, node):
"""Checks if the ``ast.Call`` node instantiates a class of interest.

This looks for the following calls:

- ``sagemaker.s3_input``
- ``sagemaker.session.s3_input``
- ``s3_input``
Comment on lines +35 to +37
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- ``sagemaker.s3_input``
- ``sagemaker.session.s3_input``
- ``s3_input``
- ``sagemaker.s3_input``
- ``sagemaker.inputs.s3_input``
- ``sagemaker.session.s3_input``
- ``s3_input``


Args:
node (ast.Call): a node that represents a function call. For more,
see https://docs.python.org/3/library/ast.html#abstract-grammar.

Returns:
bool: If the ``ast.Call`` instantiates a class of interest.
"""
return matching.matches_any(node, S3_INPUT)

def modify_node(self, node):
"""Modifies the ``ast.Call`` node to call ``TrainingInput`` instead.

Args:
node (ast.Call): a node that represents a *TrainingInput constructor.
"""
_rename_class(node)


def _rename_class(node):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if you need to make this a standalone function, unless maybe if you're refactoring it so that it can be used elsewhere too

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, there's no need to make it a function. I've moved the logic to modify_node

"""Renames the s3_input class to TrainingInput"""
if matching.matches_name(node, BASE_S3_INPUT):
node.func.id = "TrainingInput"
elif matching.matches_attr(node, BASE_S3_INPUT):
node.func.attr = "TrainingInput"


class TrainingInputImportFromRenamer(Modifier):
"""A class to update import statements of ``s3_input``."""

def node_should_be_modified(self, node):
"""Checks if the import statement imports ``RealTimePredictor`` from the correct module.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't forget to update the docstring


Args:
node (ast.ImportFrom): a node that represents a ``from ... import ... `` statement.
For more, see https://docs.python.org/3/library/ast.html#abstract-grammar.

Returns:
bool: If the import statement imports ``RealTimePredictor`` from the correct module.
"""
return node.module in S3_INPUT[BASE_S3_INPUT] and any(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seeing this, I think it might be better to define the list of namespaces separately. S3_INPUT[BASE_S3_INPUT] is a little unintuitive to read on its own here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I got rid of the dict and used namespaces to check the module.

name.name == BASE_S3_INPUT for name in node.names
)

def modify_node(self, node):
"""Changes the ``ast.ImportFrom`` node's name from ``s3_input`` to ``TrainingInput``.

Args:
node (ast.ImportFrom): a node that represents a ``from ... import ... `` statement.
For more, see https://docs.python.org/3/library/ast.html#abstract-grammar.
"""
for name in node.names:
if name.name == BASE_S3_INPUT:
name.name = "TrainingInput"
elif name.name == "session":
name.name = "inputs"
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import pasta
import pytest

from sagemaker.cli.compatibility.v2.modifiers import training_input
from tests.unit.sagemaker.cli.compatibility.v2.modifiers.ast_converter import ast_call, ast_import


@pytest.fixture
def constructors():
return (
"sagemaker.session.s3_input(s3_data='s3://a')",
"sagemaker.s3_input(s3_data='s3://a')",
"s3_input(s3_data='s3://a')",
)


@pytest.fixture
def import_statements():
return (
"from sagemaker.session import s3_input",
"from sagemaker import s3_input",
)


def test_constructor_node_should_be_modified(constructors):
modifier = training_input.TrainingInputConstructorRefactor()
for constructor in constructors:
node = ast_call(constructor)
assert modifier.node_should_be_modified(node)


def test_constructor_node_should_be_modified_random_call():
modifier = training_input.TrainingInputConstructorRefactor()
node = ast_call("FileSystemInput()")
assert not modifier.node_should_be_modified(node)


def test_constructor_modify_node():
modifier = training_input.TrainingInputConstructorRefactor()

node = ast_call("s3_input(s3_data='s3://a')")
modifier.modify_node(node)
assert "TrainingInput(s3_data='s3://a')" == pasta.dump(node)

node = ast_call("sagemaker.s3_input(s3_data='s3://a')")
modifier.modify_node(node)
assert "sagemaker.TrainingInput(s3_data='s3://a')" == pasta.dump(node)


def test_import_from_node_should_be_modified_training_input(import_statements):
modifier = training_input.TrainingInputImportFromRenamer()
for statement in import_statements:
node = ast_import(statement)
assert modifier.node_should_be_modified(node)


def test_import_from_node_should_be_modified_random_import():
modifier = training_input.TrainingInputImportFromRenamer()
node = ast_import("from sagemaker import Session")
assert not modifier.node_should_be_modified(node)


def test_import_from_modify_node():
modifier = training_input.TrainingInputImportFromRenamer()

node = ast_import("from sagemaker import s3_input")
modifier.modify_node(node)
expected_result = "from sagemaker import TrainingInput"
assert expected_result == pasta.dump(node)

node = ast_import("from sagemaker.inputs import s3_input as training_input")
modifier.modify_node(node)
expected_result = "from sagemaker.inputs import TrainingInput as training_input"
assert expected_result == pasta.dump(node)