Skip to content

Commit

Permalink
Merge pull request opendatahub-io#235 from opendatahub-io/kfp_included
Browse files Browse the repository at this point in the history
no longer use pull secret and use stage
  • Loading branch information
MichaelClifford authored Dec 11, 2024
2 parents 116ead6 + 28b0611 commit a7de1b0
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 25 deletions.
2 changes: 1 addition & 1 deletion importer-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ deploymentSpec:
env:
- name: REGISTRY_AUTH_FILE
value: /mnt/containers/.dockerconfigjson
image: quay.io/redhat-et/ilab:1.3
image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
pipelineInfo:
description: Helper pipeline to the InstructLab pipeline which allows users to seed/import
a new base model
Expand Down
8 changes: 0 additions & 8 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
DeletePVC,
mount_pvc,
set_image_pull_policy,
set_image_pull_secrets,
use_config_map_as_env,
use_secret_as_env,
use_secret_as_volume,
Expand All @@ -23,7 +22,6 @@
MOCKED_STAGES = ["sdg", "train", "eval"]
PIPELINE_FILE_NAME = "pipeline.yaml"
IMPORTER_PIPELINE_FILE_NAME = "importer-pipeline.yaml"
IMAGE_PULL_SECRET = "redhat-et-ilab-botty-pull-secret"
STANDALONE_TEMPLATE_FILE_NAME = "standalone.tpl"
GENERATED_STANDALONE_FILE_NAME = "standalone.py"
DEFAULT_REPO_URL = "https://github.com/instructlab/taxonomy.git"
Expand Down Expand Up @@ -213,8 +211,6 @@ def pipeline(
mount_path="/data",
)

set_image_pull_secrets(sdg_task, [IMAGE_PULL_SECRET])

# uncomment if updating image with same tag
# set_image_pull_policy(sdg_task, "Always")

Expand Down Expand Up @@ -252,8 +248,6 @@ def pipeline(
data_processing_task.set_caching_options(False)
data_processing_task.set_env_variable("XDG_CACHE_HOME", "/tmp")

set_image_pull_secrets(data_processing_task, [IMAGE_PULL_SECRET])

# Upload "skills_processed_data" and "knowledge_processed_data" artifacts to S3 without blocking the rest of the workflow
skills_processed_data_to_artifact_task = skills_processed_data_to_artifact_op()
skills_processed_data_to_artifact_task.after(data_processing_task)
Expand Down Expand Up @@ -353,7 +347,6 @@ def pipeline(
JUDGE_CONFIG_MAP,
dict(endpoint="JUDGE_ENDPOINT", model="JUDGE_NAME"),
)
set_image_pull_secrets(run_mt_bench_task, [IMAGE_PULL_SECRET])
use_secret_as_env(run_mt_bench_task, JUDGE_SECRET, {"api_key": "JUDGE_API_KEY"})

# uncomment if updating image with same tag
Expand Down Expand Up @@ -392,7 +385,6 @@ def pipeline(

final_eval_task.set_env_variable("HOME", "/tmp")
final_eval_task.set_env_variable("HF_HOME", "/tmp")
set_image_pull_secrets(final_eval_task, [IMAGE_PULL_SECRET])

# uncomment if updating image with same tag
# set_image_pull_policy(final_eval_task, "Always")
Expand Down
20 changes: 6 additions & 14 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ deploymentSpec:
env:
- name: XDG_CACHE_HOME
value: /tmp
image: quay.io/redhat-et/ilab:1.3
image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
exec-deletepvc:
container:
image: argostub/deletepvc
Expand Down Expand Up @@ -747,7 +747,7 @@ deploymentSpec:
\ path_to_data = \"/input_data/knowledge/data.jsonl\"\n elif phase_num\
\ == 2:\n path_to_model = list_phase1_final_model()\n path_to_data\
\ = \"/input_data/skills/data.jsonl\"\n else:\n raise RuntimeError(f\"\
Unsupported value of {phase_num=}\")\n\n image = \"quay.io/redhat-et/ilab:1.3\"\
Unsupported value of {phase_num=}\")\n\n image = \"registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1\"\
\n\n manifest = inspect.cleandoc(\n f\"\"\"\n apiVersion:\
\ kubeflow.org/v1\n kind: PyTorchJob\n metadata:\n \
\ name: {name}\n spec:\n nprocPerNode: \\\"{nproc_per_node}\\\
Expand Down Expand Up @@ -951,7 +951,7 @@ deploymentSpec:
\ path_to_data = \"/input_data/knowledge/data.jsonl\"\n elif phase_num\
\ == 2:\n path_to_model = list_phase1_final_model()\n path_to_data\
\ = \"/input_data/skills/data.jsonl\"\n else:\n raise RuntimeError(f\"\
Unsupported value of {phase_num=}\")\n\n image = \"quay.io/redhat-et/ilab:1.3\"\
Unsupported value of {phase_num=}\")\n\n image = \"registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1\"\
\n\n manifest = inspect.cleandoc(\n f\"\"\"\n apiVersion:\
\ kubeflow.org/v1\n kind: PyTorchJob\n metadata:\n \
\ name: {name}\n spec:\n nprocPerNode: \\\"{nproc_per_node}\\\
Expand Down Expand Up @@ -1373,7 +1373,7 @@ deploymentSpec:
value: /tmp
- name: HF_HOME
value: /tmp
image: quay.io/redhat-et/ilab:1.3
image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
resources:
accelerator:
count: '1'
Expand Down Expand Up @@ -1509,7 +1509,7 @@ deploymentSpec:
value: /tmp
- name: HF_HOME
value: /tmp
image: quay.io/redhat-et/ilab:1.3
image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
resources:
accelerator:
count: '1'
Expand Down Expand Up @@ -1619,7 +1619,7 @@ deploymentSpec:
value: /tmp
- name: HF_HOME
value: /tmp
image: quay.io/redhat-et/ilab:1.3
image: registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1
exec-sdg-to-artifact-op:
container:
args:
Expand Down Expand Up @@ -2241,8 +2241,6 @@ platforms:
deploymentSpec:
executors:
exec-data-processing-op:
imagePullSecret:
- secretName: redhat-et-ilab-botty-pull-secret
pvcMount:
- mountPath: /model
taskOutputParameter:
Expand Down Expand Up @@ -2296,8 +2294,6 @@ platforms:
envVar: JUDGE_ENDPOINT
- configMapKey: model
envVar: JUDGE_NAME
imagePullSecret:
- secretName: redhat-et-ilab-botty-pull-secret
pvcMount:
- mountPath: /output
taskOutputParameter:
Expand All @@ -2324,8 +2320,6 @@ platforms:
envVar: JUDGE_ENDPOINT
- configMapKey: model
envVar: JUDGE_NAME
imagePullSecret:
- secretName: redhat-et-ilab-botty-pull-secret
pvcMount:
- mountPath: /output
taskOutputParameter:
Expand All @@ -2344,8 +2338,6 @@ platforms:
envVar: endpoint
- configMapKey: model
envVar: model
imagePullSecret:
- secretName: redhat-et-ilab-botty-pull-secret
pvcMount:
- mountPath: /data
taskOutputParameter:
Expand Down
2 changes: 1 addition & 1 deletion training/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def list_phase1_final_model():
else:
raise RuntimeError(f"Unsupported value of {phase_num=}")

image = "quay.io/redhat-et/ilab:1.3"
image = "registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1"

manifest = inspect.cleandoc(
f"""
Expand Down
2 changes: 1 addition & 1 deletion utils/consts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
PYTHON_IMAGE = "quay.io/modh/odh-generic-data-science-notebook:v3-2024b-20241111"
TOOLBOX_IMAGE = "registry.access.redhat.com/ubi9/toolbox"
OC_IMAGE = "registry.redhat.io/openshift4/ose-cli"
RHELAI_IMAGE = "quay.io/redhat-et/ilab:1.3"
RHELAI_IMAGE = "registry.stage.redhat.io/rhelai1/instructlab-nvidia-rhel9:1.3.1"

0 comments on commit a7de1b0

Please sign in to comment.