@@ -98,25 +98,42 @@ def __init__(
98
98
):
99
99
"""Construct a `Run` instance.
100
100
101
- NOTE: It is not recommended to initialize a Run object (using the constructor)
102
- in a Sagemaker job (e.g. training job, etc.) script.
103
- Instead please follow the example below to 1). initialize a Run object in a notebook,
104
- 2). create a SageMaker job inside the Run object's context (i.e. the `with` statement),
105
- and 3). load the same Run object in a job script via `load_run()`.
101
+ SageMaker Experiments automatically tracks the inputs, parameters, configurations,
102
+ and results of your iterations as runs.
103
+ You can assign, group, and organize these runs into experiments.
104
+ You can also create, compare, and evaluate runs.
105
+
106
+ The code sample below shows how to initialize a run, log parameters to the Run object
107
+ and invoke a training job under the context of this Run object, which automatically
108
+ passes the run's ``experiment_config`` (including the experiment name, run name etc.)
109
+ to the training job.
110
+
111
+ Note:
112
+ All log methods (e.g. ``log_parameter``, ``log_metric``, etc.) have to be called within
113
+ the run context (i.e. the ``with`` statement). Otherwise, a ``RuntimeError`` is thrown.
106
114
107
115
.. code:: python
108
116
109
- # In a notebook
110
117
with Run(experiment_name="my-exp", run_name="my-run", ...) as run:
111
118
run.log_parameter(...)
112
119
...
113
120
estimator.fit(job_name="my-job") # Create a training job
114
121
122
+ In order to reuse an existing run to log extra data, ``load_run`` is recommended.
123
+ The code snippet below displays how to load the run initialized above
124
+ in a custom training job script, where no ``run_name`` or ``experiment_name``
125
+ is presented as they are automatically retrieved from the experiment config
126
+ in the job environment.
127
+
128
+ Note:
129
+ Instead of the ``Run`` constructor, the ``load_run`` is recommended to use
130
+ in a job script to load the existing run created before the job launch.
131
+ Otherwise, a new run may be created each time you launch a job.
132
+
115
133
.. code:: python
116
134
117
- # In a job script
118
135
with load_run() as run:
119
- run.log_parameters (...)
136
+ run.log_metric (...)
120
137
...
121
138
122
139
Args:
@@ -171,9 +188,9 @@ def __init__(
171
188
)
172
189
if is_existed :
173
190
logger .info (
174
- "The Run (%s) under experiment (%s) already exists. Loading it. "
191
+ "The run (%s) under experiment (%s) already exists. Loading it. "
175
192
"Note: sagemaker.experiments.load_run is recommended to use when "
176
- "the desired Run already exists." ,
193
+ "the desired run already exists." ,
177
194
self .run_name ,
178
195
self .experiment_name ,
179
196
)
@@ -197,7 +214,7 @@ def __init__(
197
214
198
215
@property
199
216
def experiment_config (self ) -> dict :
200
- """Get experiment config from Run attributes."""
217
+ """Get experiment config from run attributes."""
201
218
return {
202
219
EXPERIMENT_NAME : self .experiment_name ,
203
220
TRIAL_NAME : self .run_group_name ,
@@ -242,10 +259,8 @@ def log_metric(
242
259
"""Record a custom scalar metric value for this run.
243
260
244
261
Note:
245
- 1. This method is for manual custom metrics, for automatic metrics see the
246
- `enable_sagemaker_metrics` parameter on the `estimator` class.
247
- 2. Metrics logged with this method will only appear in SageMaker when this method
248
- is called from a training job host.
262
+ This method is for manual custom metrics, for automatic metrics see the
263
+ ``enable_sagemaker_metrics`` parameter on the ``estimator`` class.
249
264
250
265
Args:
251
266
name (str): The name of the metric.
@@ -466,7 +481,7 @@ def log_file(
466
481
name (str): The name of the artifact (default: None).
467
482
media_type (str): The MediaType (MIME type) of the file.
468
483
If not specified, this library will attempt to infer the media type
469
- from the file extension of `file_path`.
484
+ from the file extension of `` file_path` `.
470
485
is_output (bool): Determines direction of association to the
471
486
run. Defaults to True (output artifact).
472
487
If set to False then represented as input association.
@@ -510,7 +525,7 @@ def _is_input_valid(input_type, field_name, field_value) -> bool:
510
525
"""Check if the input is valid or not
511
526
512
527
Args:
513
- input_type (str): The type of the input, one of `parameter`, `metric`.
528
+ input_type (str): The type of the input, one of `` parameter`` , `` metric` `.
514
529
field_name (str): The name of the field to be checked.
515
530
field_value (str or int or float): The value of the field to be checked.
516
531
"""
@@ -611,7 +626,7 @@ def _extract_run_name_from_tc_name(trial_component_name: str, experiment_name: s
611
626
"""Extract the user supplied run name from a trial component name.
612
627
613
628
Args:
614
- trial_component_name (str): The name of a Run trial component.
629
+ trial_component_name (str): The name of a run trial component.
615
630
experiment_name (str): The experiment_name supplied by the user,
616
631
which was prepended to the run_name to generate the trial_component_name.
617
632
@@ -622,13 +637,13 @@ def _extract_run_name_from_tc_name(trial_component_name: str, experiment_name: s
622
637
623
638
@staticmethod
624
639
def _append_run_tc_label_to_tags (tags : Optional [List [Dict [str , str ]]] = None ) -> list :
625
- """Append the Run TrialComponent label to tags used to create a trial component.
640
+ """Append the run trial component label to tags used to create a trial component.
626
641
627
642
Args:
628
643
tags (List[Dict[str, str]]): The tags supplied by users to initialize a Run object.
629
644
630
645
Returns:
631
- list: The updated tags with the appended Run TrialComponent label.
646
+ list: The updated tags with the appended run trial component label.
632
647
"""
633
648
if not tags :
634
649
tags = []
@@ -659,7 +674,7 @@ def __enter__(self):
659
674
self ._trial_component .start_time = start_time
660
675
self ._trial_component .status = _api_types .TrialComponentStatus (
661
676
primary_status = _TrialComponentStatusType .InProgress .value ,
662
- message = "Within a Run context" ,
677
+ message = "Within a run context" ,
663
678
)
664
679
# Save the start_time and status changes to backend
665
680
self ._trial_component .save ()
@@ -699,13 +714,61 @@ def load_run(
699
714
experiment_name : Optional [str ] = None ,
700
715
sagemaker_session : Optional ["Session" ] = None ,
701
716
) -> Run :
702
- """Load a Run by the run name or from the job environment.
717
+ """Load an existing run.
718
+
719
+ In order to reuse an existing run to log extra data, ``load_run`` is recommended.
720
+ It can be used in several ways:
721
+
722
+ 1. Use ``load_run`` by explicitly passing in ``run_name`` and ``experiment_name``.
723
+
724
+ If ``run_name`` and ``experiment_name`` are passed in, they are honored over
725
+ the default experiment config in the job environment or the run context
726
+ (i.e. within the ``with`` block).
727
+
728
+ Note:
729
+ Both ``run_name`` and ``experiment_name`` should be supplied to make this usage work.
730
+ Otherwise, you may get a ``ValueError``.
731
+
732
+ .. code:: python
733
+
734
+ with load_run(experiment_name="my-exp", run_name="my-run") as run:
735
+ run.log_metric(...)
736
+ ...
737
+
738
+ 2. Use the ``load_run`` in a job script without supplying ``run_name`` and ``experiment_name``.
739
+
740
+ In this case, the default experiment config (specified when creating the job) is fetched
741
+ from the job environment to load the run.
742
+
743
+ .. code:: python
744
+
745
+ # In a job script
746
+ with load_run() as run:
747
+ run.log_metric(...)
748
+ ...
749
+
750
+ 3. Use the ``load_run`` in a notebook within a run context (i.e. the ``with`` block)
751
+ but without supplying ``run_name`` and ``experiment_name``.
752
+
753
+ Every time we call ``with Run(...) as run1:``, the initialized ``run1`` is tracked
754
+ in the run context. Then when we call ``load_run()`` under this with statement, the ``run1``
755
+ in the context is loaded by default.
756
+
757
+ .. code:: python
758
+
759
+ # In a notebook
760
+ with Run(experiment_name="my-exp", run_name="my-run", ...) as run1:
761
+ run1.log_parameter(...)
762
+
763
+ with load_run() as run2: # run2 is the same object as run1
764
+ run2.log_metric(...)
765
+ ...
703
766
704
767
Args:
705
- run_name (str): The name of the Run to be loaded (default: None).
706
- If it is None, the `RunName` in the `ExperimentConfig` of the job will be
707
- fetched to load the Run .
708
- experiment_name (str): The name of the Experiment that the to be loaded Run
768
+ run_name (str): The name of the run to be loaded (default: None).
769
+ If it is None, the `` RunName`` in the `` ExperimentConfig` ` of the job will be
770
+ fetched to load the run .
771
+ experiment_name (str): The name of the Experiment that the to be loaded run
709
772
is associated with (default: None).
710
773
Note: the experiment_name must be supplied along with a valid run_name.
711
774
Otherwise, it will be ignored.
@@ -728,7 +791,7 @@ def load_run(
728
791
"run_name is explicitly supplied in load_run, "
729
792
"which will be prioritized to load the Run object. "
730
793
"In other words, the run name in the experiment config, fetched from the "
731
- "job environment or the current Run context, will be ignored."
794
+ "job environment or the current run context, will be ignored."
732
795
)
733
796
else :
734
797
exp_config = get_tc_and_exp_config_from_job_env (
@@ -767,7 +830,7 @@ def list_runs(
767
830
sort_by : SortByType = SortByType .CREATION_TIME ,
768
831
sort_order : SortOrderType = SortOrderType .DESCENDING ,
769
832
) -> list :
770
- """Return a list of `Run` objects matching the given criteria.
833
+ """Return a list of `` Run` ` objects matching the given criteria.
771
834
772
835
Args:
773
836
experiment_name (str): Only Run objects related to the specified experiment
@@ -787,7 +850,7 @@ def list_runs(
787
850
sort_order (SortOrderType): One of ASCENDING, or DESCENDING (default: DESCENDING).
788
851
789
852
Returns:
790
- list: A list of `Run` objects.
853
+ list: A list of `` Run` ` objects.
791
854
"""
792
855
tc_summaries = _TrialComponent .list (
793
856
experiment_name = experiment_name ,
0 commit comments