Skip to content
This repository was archived by the owner on Jul 19, 2023. It is now read-only.

Polish Helm charts #76

Merged
merged 13 commits into from
Jan 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions hack/charts/README.md

This file was deleted.

10 changes: 7 additions & 3 deletions hack/charts/batch-transform-jobs/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
apiVersion: v1
name: sagemaker-k8s-trainingjob
version: 0.1.0
description: A Helm chart for deploying the SageMaker Batch Transform Job for Kubernetes.
name: amazon-sagemaker-batchtransformjob
version: 1.0.0
description: A Helm chart for deploying a SageMaker Batch Transform Job from Kubernetes.
maintainers:
- name: Gautam Kumar
email: [email protected]
- name: Cade Daniel
email: [email protected]
- name: Nicholas Thomson
email: [email protected]
- name: Meghna Baijal
email: [email protected]
8 changes: 2 additions & 6 deletions hack/charts/batch-transform-jobs/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
Thanks for installing the {{ .Chart.Name }}.
Successfully deployed a new batch transform job chart.

Your release is named {{ .Release.Name }}.

To learn more about the release, try:

$ helm status {{ .Release.Name }}
$ helm get {{ .Release.Name }}

To use the latest features, try adding attributes to yaml by generating it:

$ helm install <directory of charts> --debug --dry-run
$ helm get all {{ .Release.Name }}
26 changes: 15 additions & 11 deletions hack/charts/batch-transform-jobs/templates/batch-transform-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@ metadata:
name: {{ .Values.name }}
spec:
region: {{ .Values.spec.region }}
modelName: {{ .Values.spec.modelname | quote }}
modelName: {{ .Values.spec.modelName | quote }}
transformInput:
dataSource:
s3DataSource:
s3DataType: {{ .Values.spec.inputdataconfig.s3DataType | default "S3Prefix" }}
s3Uri: {{ .Values.spec.inputdataconfig.s3uri }}
contentType: {{ .contenttype | default "text/csv" }}
dataSource:
s3DataSource:
s3DataType: {{ .Values.spec.inputDataConfig.s3DataType | default "S3Prefix" }}
s3Uri: {{ .Values.spec.inputDataConfig.s3Uri }}
contentType: {{ .contentType | default "text/csv" }}
transformOutput:
s3OutputPath: {{ .Values.spec.outputpath }}
s3OutputPath: {{ .Values.spec.outputPath }}
transformResources:
instanceCount: {{ .Values.spec.instancecount }}
instanceType: {{ .Values.spec.instancetype }}
instanceCount: {{ .Values.spec.instanceCount }}
instanceType: {{ .Values.spec.instanceType }}
{{- if .Values.spec.tags }}
tags:
- key: test-key
value: test-value
{{- range $key, $value := .Values.spec.tags }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
{{- end }}
16 changes: 8 additions & 8 deletions hack/charts/batch-transform-jobs/values.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
name: xgboost-mnist-batch-transform
spec:
instancecount: 1
instancetype: ml.m4.xlarge
region: us-west-2
modelname: xgboost-mnist-model
inputdataconfig:
s3uri: s3://sagemaker-sample-data-us-west-2/batch-transform/mnist-1000-samples
s3datatype: S3Prefix
outputpath: s3://my-bucket/batch_transform/output
instanceCount: 1
instanceType: ml.m4.xlarge
region: us-west-2
modelName: xgboost-mnist-model
inputDataConfig:
s3Uri: s3://sagemaker-sample-data-us-west-2/batch-transform/mnist-1000-samples
s3DataType: S3Prefix
outputPath: s3://my-bucket/batch_transform/output

10 changes: 7 additions & 3 deletions hack/charts/hyperparameter-tuning-jobs/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
apiVersion: v1
name: sagemaker-k8s-trainingjob
version: 0.1.0
description: A Helm chart for deploying the SageMaker Hyperparameter tuning job for Kubernetes.
name: amazon-sagemaker-hyperparametertuningjob
version: 1.0.0
description: A Helm chart for deploying a SageMaker HyperParameter tuning job from Kubernetes.
maintainers:
- name: Gautam Kumar
email: [email protected]
- name: Cade Daniel
email: [email protected]
- name: Nicholas Thomson
email: [email protected]
- name: Meghna Baijal
email: [email protected]
8 changes: 2 additions & 6 deletions hack/charts/hyperparameter-tuning-jobs/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
Thanks for installing the {{ .Chart.Name }}.
Successfully deployed a new hyperparameter tuning job chart.

Your release is named {{ .Release.Name }}.

To learn more about the release, try:

$ helm status {{ .Release.Name }}
$ helm get {{ .Release.Name }}

To use the latest features, try adding attributes to yaml by generating it:

$ helm install <directory of charts> --debug --dry-run
$ helm get all {{ .Release.Name }}
140 changes: 72 additions & 68 deletions hack/charts/hyperparameter-tuning-jobs/templates/hpo-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,72 +3,76 @@ kind: HyperparameterTuningJob
metadata:
name: {{ .Values.name }}
spec:
hyperParameterTuningJobConfig:
resourceLimits:
maxNumberOfTrainingJobs: {{ .Values.spec.hyperparametertuningjobconfig.maxnumberoftrainingjobs}}
maxParallelTrainingJobs: {{ .Values.spec.hyperparametertuningjobconfig.maxparalleltrainingjobs }}
strategy: {{ .Values.spec.hyperparametertuningjobconfig.strategy | quote }}
region: {{ .Values.spec.region }}
tags:
- key: test-key
value: test-value
hyperParameterTuningJobConfig:
strategy: {{ .Values.spec.hyperparametertuningjobconfig.strategy | quote }}
hyperParameterTuningJobObjective:
type: {{ .Values.spec.hyperparametertuningjobconfig.hyperparametertuningjobobjective.type }}
metricName: {{ .Values.spec.hyperparametertuningjobconfig.hyperparametertuningjobobjective.metricname }}
resourceLimits:
maxNumberOfTrainingJobs: {{ .Values.spec.hyperparametertuningjobconfig.maxnumberoftrainingjobs}}
maxParallelTrainingJobs: {{ .Values.spec.hyperparametertuningjobconfig.maxparalleltrainingjobs }}
parameterRanges:
integerParameterRanges:
- name: {{ .Values.spec.hyperparametertuningjobconfig.integerparameterranges.name }}
minValue: {{ .Values.spec.hyperparametertuningjobconfig.integerparameterranges.minvalue | quote }}
maxValue: {{ .Values.spec.hyperparametertuningjobconfig.integerparameterranges.maxvalue | quote }}
scalingType: {{ .Values.spec.hyperparametertuningjobconfig.integerparameterranges.scalingtype }}
# Add support for next two ranges in values.yaml
continuousParameterRanges: []
categoricalParameterRanges: []
trainingJobEarlyStoppingType: Auto
trainingJobDefinition:
staticHyperParameters:
{{- range $key, $value := .Values.spec.trainingjobconfig.hyperparameters }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
algorithmSpecification:
trainingImage: {{ .Values.spec.image }}
trainingInputMode: {{ .Values.spec.traininginputmode }}
roleArn: {{ .Values.spec.rolearn }}
inputDataConfig:
{{- range .Values.spec.inputdataconfig }}
- channelName: {{ .channel }}
dataSource:
{{- if .s3 }}
s3DataSource:
s3DataType: {{ .s3.s3datatype | default "S3Prefix" }}
s3Uri: {{ .s3.s3uri }}
s3DataDistributionType: {{ .s3.s3datadistributiontype | default "FullyReplicated" }}
{{- end }}
{{- if .filesystem }}
fileSystemDataSource:
fileSystemId: {{ .filesystem.filesystemid }}
fileSystemAccessMode: {{ .filesystemaccessmode | default "ro" }}
fileSystemType: {{ .filesystem.filesystemtype }}
directoryPath: {{ .filesystem.directorypath }}
{{- end }}
contentType: {{ .contenttype | default "text/csv" }}
compressionType: {{ .compressiontype | default "None" }}
recordWrapperType: {{ .recordwrappertype | default "None" }}
inputMode: {{ .inputmode | default "File" }}
hyperParameterTuningJobConfig:
resourceLimits:
maxNumberOfTrainingJobs: {{ .Values.spec.hyperparameterTuningJobConfig.maxNumberOfTrainingJobs}}
maxParallelTrainingJobs: {{ .Values.spec.hyperparameterTuningJobConfig.maxParallelTrainingJobs }}
strategy: {{ .Values.spec.hyperparameterTuningJobConfig.strategy | quote }}
region: {{ .Values.spec.region }}
hyperParameterTuningJobConfig:
strategy: {{ .Values.spec.hyperparameterTuningJobConfig.strategy | quote }}
hyperParameterTuningJobObjective:
type: {{ .Values.spec.hyperparameterTuningJobConfig.hyperparameterTuningJobObjective.type }}
metricName: {{ .Values.spec.hyperparameterTuningJobConfig.hyperparameterTuningJobObjective.metricName }}
resourceLimits:
maxNumberOfTrainingJobs: {{ .Values.spec.hyperparameterTuningJobConfig.maxNumberOfTrainingJobs}}
maxParallelTrainingJobs: {{ .Values.spec.hyperparameterTuningJobConfig.maxParallelTrainingJobs }}
parameterRanges:
integerParameterRanges:
- name: {{ .Values.spec.hyperparameterTuningJobConfig.integerParameterRanges.name }}
minValue: {{ .Values.spec.hyperparameterTuningJobConfig.integerParameterRanges.minValue | quote }}
maxValue: {{ .Values.spec.hyperparameterTuningJobConfig.integerParameterRanges.maxValue | quote }}
scalingType: {{ .Values.spec.hyperparameterTuningJobConfig.integerParameterRanges.scalingType }}
# Add support for next two ranges in values.yaml
continuousParameterRanges: []
categoricalParameterRanges: []
Comment on lines +27 to +28
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is todo ? @cadedaniel can u confirm that spec will work just that helm chart needs policing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah nick made a task for these https://issues.amazon.com/issues/P32170625

trainingJobEarlyStoppingType: Auto
trainingJobDefinition:
staticHyperParameters:
{{- range $key, $value := .Values.spec.trainingJobConfig.hyperparameters }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
algorithmSpecification:
trainingImage: {{ .Values.spec.image }}
trainingInputMode: {{ .Values.spec.trainingInputMode }}
roleArn: {{ .Values.spec.roleArn }}
inputDataConfig:
{{- range .Values.spec.inputDataConfig }}
- channelName: {{ .channel }}
dataSource:
{{- if .s3 }}
s3DataSource:
s3DataType: {{ .s3.s3DataType | default "S3Prefix" }}
s3Uri: {{ .s3.s3Uri }}
s3DataDistributionType: {{ .s3.s3DataDistributionType | default "FullyReplicated" }}
{{- end }}
outputDataConfig:
s3OutputPath: {{ .Values.spec.outputpath }}
resourceConfig:
instanceType: {{ .Values.spec.instancetype }}
instanceCount: {{ .Values.spec.instancecount }}
volumeSizeInGB: {{ .Values.spec.volumesize }}
stoppingCondition:
maxRuntimeInSeconds: {{ .Values.spec.maxruntimeinseconds }}
enableNetworkIsolation: {{ .Values.spec.enablenetworkisolation }}
enableInterContainerTrafficEncryption: {{ .Values.spec.enableintercontainertrafficencryption }}
{{- if .fileSystem }}
fileSystemDataSource:
fileSystemId: {{ .fileSystem.fileSystemID }}
fileSystemAccessMode: {{ .fileSystemAccessMode | default "ro" }}
fileSystemType: {{ .fileSystem.fileSystemType }}
directoryPath: {{ .fileSystem.directoryPath }}
{{- end }}
contentType: {{ .contentType | default "text/csv" }}
compressionType: {{ .compressionType | default "None" }}
recordWrapperType: {{ .recordWrapperType | default "None" }}
inputMode: {{ .inputMode | default "File" }}
{{- end }}
outputDataConfig:
s3OutputPath: {{ .Values.spec.outputPath }}
resourceConfig:
instanceType: {{ .Values.spec.instanceType }}
instanceCount: {{ .Values.spec.instanceCount }}
volumeSizeInGB: {{ .Values.spec.volumeSizeInGB }}
stoppingCondition:
maxRuntimeInSeconds: {{ .Values.spec.maxRuntimeInSeconds }}
enableNetworkIsolation: {{ .Values.spec.enableNetworkIsolation }}
enableInterContainerTrafficEncryption: {{ .Values.spec.enableInterContainerTrafficEncryption }}
{{- if .Values.spec.tags }}
tags:
{{- range $key, $value := .Values.spec.tags }}
- name: {{ $key }}
value: {{ $value | quote }}
{{- end }}
{{- end }}
110 changes: 55 additions & 55 deletions hack/charts/hyperparameter-tuning-jobs/values.yaml
Original file line number Diff line number Diff line change
@@ -1,57 +1,57 @@
name: xgboost-mnist-hpo
spec:
instancecount: 1
instancetype: ml.m4.xlarge
volumesize: 35
maxruntimeinseconds: 86400
rolearn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole
region: us-west-2
image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1
enablenetworkisolation: true
enableintercontainertrafficencryption: false
traininginputmode: File
hyperparametertuningjobconfig:
maxnumberoftrainingjobs: 10
maxparalleltrainingjobs: 10
strategy: Bayesian
hyperparametertuningjobobjective:
type: Minimize
metricname: validation:error
integerparameterranges:
name: num_round
minvalue: 10
maxvalue: 20
scalingtype: Linear
trainingjobconfig:
hyperparameters:
base_score: '0.5'
booster: gbtree
csv_weights: '0'
dsplit: row
grow_policy: depthwise
lambda_bias: '0.0'
max_bin: '256'
max_leaves: '0'
normalize_type: tree
objective: reg:linear
one_drop: '0'
prob_buffer_row: '1.0'
process_type: default
rate_drop: '0.0'
refresh_leaf: '1'
sample_type: uniform
scale_pos_weight: '1.0'
silent: '0'
sketch_eps: '0.03'
skip_drop: '0.0'
tree_method: auto
tweedie_variance_power: '1.5'
updater: grow_colmaker,prune
inputdataconfig:
- channel: train
s3:
s3uri: s3://my-bucket/xgboost-mnist/train/
- channel: validation
s3:
s3uri: s3://my-bucket/xgboost-mnist/validation/
outputpath: s3://my-bucket/xgboost-mnist/xgboost/
instanceCount: 1
instanceType: ml.m4.xlarge
volumeSizeInGB: 35
maxRuntimeInSeconds: 86400
roleArn: arn:aws:iam::123456789012:role/service-role/AmazonSageMaker-ExecutionRole
region: us-west-2
image: 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:1
enableNetworkIsolation: true
enableInterContainerTrafficEncryption: false
trainingInputMode: File
hyperparameterTuningJobConfig:
maxNumberOfTrainingJobs: 10
maxParallelTrainingJobs: 10
strategy: Bayesian
hyperparameterTuningJobObjective:
type: Minimize
metricName: validation:error
integerParameterRanges:
name: num_round
minValue: 10
maxValue: 20
scalingType: Linear
trainingJobConfig:
hyperparameters:
base_score: '0.5'
booster: gbtree
csv_weights: '0'
dsplit: row
grow_policy: depthwise
lambda_bias: '0.0'
max_bin: '256'
max_leaves: '0'
normalize_type: tree
objective: reg:linear
one_drop: '0'
prob_buffer_row: '1.0'
process_type: default
rate_drop: '0.0'
refresh_leaf: '1'
sample_type: uniform
scale_pos_weight: '1.0'
silent: '0'
sketch_eps: '0.03'
skip_drop: '0.0'
tree_method: auto
tweedie_variance_power: '1.5'
updater: grow_colmaker,prune
inputDataConfig:
- channel: train
s3:
s3Uri: s3://my-bucket/xgboost-mnist/train/
- channel: validation
s3:
s3Uri: s3://my-bucket/xgboost-mnist/validation/
outputPath: s3://my-bucket/xgboost-mnist/xgboost/
Loading