Skip to content

Commit 3e9ba1f

Browse files
yzhu0ahsan-z-khanmetrizable
authored andcommitted
feat: friendly names for short URIs (#2087)
* friendly names for short URIs * friendly names for short URIs * friendly name for short uri fix Co-authored-by: Ahsan Khan <[email protected]> Co-authored-by: Eric Johnson <[email protected]>
1 parent 77a94e5 commit 3e9ba1f

File tree

2 files changed

+29
-8
lines changed

2 files changed

+29
-8
lines changed

src/sagemaker/lineage/visualizer.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
"""This module contains functionality to display lineage data."""
1414
from __future__ import absolute_import
1515
import logging
16-
import os
17-
from urllib.parse import urlparse
1816
import pandas as pd
1917

2018
from sagemaker.lineage.association import Association
@@ -303,13 +301,12 @@ def _get_friendly_name(self, name, arn, entity_type):
303301
if entity_type == "artifact":
304302
artifact = self._session.sagemaker_client.describe_artifact(ArtifactArn=arn)
305303
uri = artifact["Source"]["SourceUri"]
306-
# try to get file name from url
307-
uri_parsed = urlparse(uri)
308-
name = os.path.basename(uri_parsed.path)
309304

310-
# directory?
311-
ext = os.path.splitext(name)[1]
312-
if not ext or len(ext) > 3:
305+
# shorten the uri if the length is more than 40,
306+
# e.g s3://flintstone-end-to-end-tests-gamma-us-west-2-069083975568/results/
307+
# canary-auto-1608761252626/preprocessed-data/tuning_data/train.txt
308+
# become s3://.../preprocessed-data/tuning_data/train.txt
309+
if len(uri) > 48:
313310
name = uri[:5] + "..." + uri[len(uri) - 40 :]
314311

315312
# if not then use the full uri

tests/unit/sagemaker/lineage/test_visualizer.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,30 @@ def vizualizer(sagemaker_session):
3030
return visualizer.LineageTableVisualizer(sagemaker_session)
3131

3232

33+
def test_friendly_name_short_uri(vizualizer, sagemaker_session):
34+
uri = "s3://f-069083975568/train.txt"
35+
arn = "test_arn"
36+
sagemaker_session.sagemaker_client.describe_artifact.return_value = {
37+
"Source": {"SourceUri": uri, "SourceTypes": ""}
38+
}
39+
actual_name = vizualizer._get_friendly_name(name=None, arn=arn, entity_type="artifact")
40+
assert uri == actual_name
41+
42+
43+
def test_friendly_name_long_uri(vizualizer, sagemaker_session):
44+
uri = (
45+
"s3://flintstone-end-to-end-tests-gamma-us-west-2-069083975568/results/canary-auto-1608761252626/"
46+
"preprocessed-data/tuning_data/train.txt"
47+
)
48+
arn = "test_arn"
49+
sagemaker_session.sagemaker_client.describe_artifact.return_value = {
50+
"Source": {"SourceUri": uri, "SourceTypes": ""}
51+
}
52+
actual_name = vizualizer._get_friendly_name(name=None, arn=arn, entity_type="artifact")
53+
expected_name = "s3://.../preprocessed-data/tuning_data/train.txt"
54+
assert expected_name == actual_name
55+
56+
3357
def test_trial_component_name(sagemaker_session, vizualizer):
3458
name = "tc-name"
3559

0 commit comments

Comments
 (0)