Skip to content

Commit

Permalink
fix katib trial experiment templates
Browse files Browse the repository at this point in the history
  • Loading branch information
streamnsight committed Sep 6, 2022
1 parent 154450c commit bd3a9c4
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 0 deletions.
3 changes: 3 additions & 0 deletions deployments/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,6 @@ resources:

components:
- ../../oci/common/istio-1-11/istio-install/overlays/oci-load-balancer

# patch to fix katib trial templates / remove istio sidecar injector
- ../../oci/apps/katib/trial-experiment-fix
7 changes: 7 additions & 0 deletions oci/apps/katib/trial-experiment-fix/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# The katib-external-db manifest does not create the necessary katib ui virtual service and does not apply the configurations from katib-cert-manager, which the katib-with-kubeflow manifest does.
# This manifest extends off of the katib-with-kubeflow manifest and applies the necessary patch for katib to use an external db
apiVersion: kustomize.config.k8s.io/v1alpha1
kind: Component

patchesStrategicMerge:
- trial-templates.ConfigMap.yaml
87 changes: 87 additions & 0 deletions oci/apps/katib/trial-experiment-fix/trial-templates.ConfigMap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: trial-templates
namespace: kubeflow
labels:
katib.kubeflow.org/component: trial-templates
data:
defaultTrialTemplate.yaml: |-
apiVersion: batch/v1
kind: Job
spec:
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/mxnet-mnist:v0.13.0
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
- "--lr=${trialParameters.learningRate}"
- "--num-layers=${trialParameters.numberLayers}"
- "--optimizer=${trialParameters.optimizer}"
restartPolicy: Never
# For ConfigMap templates double quotes must set in commands to correct parse JSON parameters in Trial Template (e.g nn_config, architecture)
enasCPUTemplate: |-
apiVersion: batch/v1
kind: Job
spec:
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
spec:
containers:
- name: training-container
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.13.0
command:
- python3
- -u
- RunTrial.py
- --num_epochs=1
- "--architecture=\"${trialParameters.neuralNetworkArchitecture}\""
- "--nn_config=\"${trialParameters.neuralNetworkConfig}\""
restartPolicy: Never
pytorchJobTemplate: |-
apiVersion: kubeflow.org/v1
kind: PyTorchJob
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0
imagePullPolicy: Always
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
- "--lr=${trialParameters.learningRate}"
- "--momentum=${trialParameters.momentum}"
Worker:
replicas: 2
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:v0.13.0
imagePullPolicy: Always
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
- "--lr=${trialParameters.learningRate}"
- "--momentum=${trialParameters.momentum}"

0 comments on commit bd3a9c4

Please sign in to comment.