Skip to content

Commit

Permalink
Merge pull request opendatahub-io#163 from tumido/cleanup
Browse files Browse the repository at this point in the history
chore: clean up mmlu dead code
  • Loading branch information
MichaelClifford authored Nov 19, 2024
2 parents fc8e70d + bd56364 commit d9f2416
Show file tree
Hide file tree
Showing 5 changed files with 0 additions and 231 deletions.
5 changes: 0 additions & 5 deletions eval/mmlu/__init__.py

This file was deleted.

108 changes: 0 additions & 108 deletions eval/mmlu/components.py

This file was deleted.

48 changes: 0 additions & 48 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ def pipeline_wrapper(mock: List[Literal[MOCKED_STAGES]]):

# Imports for evaluation
from eval.final import run_final_eval_op
from eval.mmlu import load_mmlu_results_op, run_mmlu_op

## from eval.mmlu import run_mmlu_op, load_mmlu_results_op
from eval.mt_bench import run_mt_bench_op
Expand Down Expand Up @@ -339,49 +338,6 @@ def pipeline(
kubectl_wait_task.after(kubectl_apply_task)
kubectl_wait_task.set_caching_options(False)

# # MMLU Evaluation of models

# models_list_task = list_models_in_directory_op(
# models_folder="/output/model/model/hf_format",
# )
# models_list_task.set_caching_options(False)

# models_list_task.after(kubectl_wait_task)

# mount_pvc(
# task=models_list_task,
# pvc_name=output_pvc_task.output,
# mount_path="/output/model",
# )

# run_mmlu_task = run_mmlu_op(
# models_list=models_list_task.output,
# models_path_prefix="/output/model/hf_format",
# mmlu_tasks_list=mmlu_tasks_list,
# model_dtype=model_dtype,
# few_shots=few_shots,
# batch_size=batch_size,
# device=device,
# )

# run_mmlu_task.set_caching_options(False)

# mount_pvc(
# task=run_mmlu_task, pvc_name=output_pvc_task.output, mount_path="/output"
# )

# load_mmlu_results_task = load_mmlu_results_op(
# mmlu_output=run_mmlu_task.outputs["mmlu_output"],
# )

# run_mmlu_task.set_accelerator_type("nvidia.com/gpu")
# run_mmlu_task.set_accelerator_limit(1)

# # Run training on MMLU best-model
# # Run final eval on best scored mt_bench candidate
# # For now, running mt_bench on same output models as training phase 1
# # TODO: Another training phase, using the best-model from MMLU as base

#### Train 2

pytorchjob_manifest_2_task = pytorchjob_manifest_op(
Expand Down Expand Up @@ -423,8 +379,6 @@ def pipeline(
kubectl_wait_2_task.after(kubectl_apply_2_task)
kubectl_wait_2_task.set_caching_options(False)

###

models_list_2_task = list_models_in_directory_op(
models_folder="/output/phase_2/model/hf_format",
)
Expand All @@ -436,8 +390,6 @@ def pipeline(
mount_path="/output",
)

###

# MT_Bench Evaluation of models

run_mt_bench_task = run_mt_bench_op(
Expand Down
35 changes: 0 additions & 35 deletions standalone/standalone.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@
)
CANDIDATE_MODEL_PATH = path.join(CANDIDATE_MODEL_PATH_PREFIX, "candidate_model")
TAXONOMY_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "taxonomy")
# MMLU_SCORES_PATH = "/output/mmlu-results.txt" - after training phase 1 is done MMLU is not performed anymore

# TRAINING
PYTORCH_NNODES = 2
Expand Down Expand Up @@ -988,14 +987,6 @@ def run(
ctx.obj["training_phase"] = "1"
ctx.invoke(train)

# Evaluation of phase 1 with MMLU
# ctx.obj["eval_type"] = "mmlu"
# scores = ctx.invoke(evaluation)
# scores = json.loads(scores)
# best_model = max(scores, key=lambda x: x["average_score"])
# logger.info("Best model: %s", best_model.get("model"))
# ctx.obj["model_to_train"] = best_model.get("model")

# Training Phase 2
ctx.obj["training_phase"] = "2"
ctx.invoke(train)
Expand Down Expand Up @@ -1476,32 +1467,6 @@ def create_eval_job(

job_name = f"eval-{eval_type}"

# if eval_type == "mmlu":
# init_containers = [
# kubernetes.client.V1Container(
# name=f"run-eval-{eval_type}",
# image="",
# command=,
# args=,
# volume_mounts=[
# kubernetes.client.V1VolumeMount(
# name=TRAINING_VOLUME_NAME, mount_path=TRAINING_PVC_MOUNT_PATH
# ),
# ],
# )
# ]
# container = kubernetes.client.V1Container(
# name=f"output-eval-{eval_type}-scores",
# image="",
# command=["/bin/sh", "-c"],
# args=[f"cat {MMLU_SCORES_PATH}"],
# volume_mounts=[
# kubernetes.client.V1VolumeMount(
# name=TRAINING_VOLUME_NAME, mount_path=TRAINING_PVC_MOUNT_PATH
# ),
# ],
# )

exec_run_mt_bench_op_command = """
from typing import *
Expand Down
35 changes: 0 additions & 35 deletions standalone/standalone.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ CANDIDATE_MODEL_PATH_PREFIX = path.join(
)
CANDIDATE_MODEL_PATH = path.join(CANDIDATE_MODEL_PATH_PREFIX, "candidate_model")
TAXONOMY_DATA_PATH = path.join(DATA_PVC_MOUNT_PATH, "taxonomy")
# MMLU_SCORES_PATH = "/output/mmlu-results.txt" - after training phase 1 is done MMLU is not performed anymore

# TRAINING
PYTORCH_NNODES = 2
Expand Down Expand Up @@ -988,14 +987,6 @@ def run(
ctx.obj["training_phase"] = "1"
ctx.invoke(train)

# Evaluation of phase 1 with MMLU
# ctx.obj["eval_type"] = "mmlu"
# scores = ctx.invoke(evaluation)
# scores = json.loads(scores)
# best_model = max(scores, key=lambda x: x["average_score"])
# logger.info("Best model: %s", best_model.get("model"))
# ctx.obj["model_to_train"] = best_model.get("model")

# Training Phase 2
ctx.obj["training_phase"] = "2"
ctx.invoke(train)
Expand Down Expand Up @@ -1332,32 +1323,6 @@ def create_eval_job(

job_name = f"eval-{eval_type}"

# if eval_type == "mmlu":
# init_containers = [
# kubernetes.client.V1Container(
# name=f"run-eval-{eval_type}",
# image="{{exec_run_mmlu_op_image}}",
# command={{exec_run_mmlu_op_command}},
# args={{exec_run_mmlu_op_args}},
# volume_mounts=[
# kubernetes.client.V1VolumeMount(
# name=TRAINING_VOLUME_NAME, mount_path=TRAINING_PVC_MOUNT_PATH
# ),
# ],
# )
# ]
# container = kubernetes.client.V1Container(
# name=f"output-eval-{eval_type}-scores",
# image="{{exec_run_mmlu_op_image}}",
# command=["/bin/sh", "-c"],
# args=[f"cat {MMLU_SCORES_PATH}"],
# volume_mounts=[
# kubernetes.client.V1VolumeMount(
# name=TRAINING_VOLUME_NAME, mount_path=TRAINING_PVC_MOUNT_PATH
# ),
# ],
# )

exec_run_mt_bench_op_command = """
{{exec_run_mt_bench_op_command}}
"""
Expand Down

0 comments on commit d9f2416

Please sign in to comment.