Merge pull request opendatahub-io#116 from MichaelClifford/fix-115

small clean up for mt_bench
MichaelClifford · Oct 18, 2024 · c5eea56 · c5eea56
2 parents 348c920 + d7833d2
commit c5eea56
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 18 deletions.
diff --git a/eval/mt_bench/components.py b/eval/mt_bench/components.py
@@ -139,6 +139,8 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
             usable_cpu_count = multiprocessing.cpu_count() // 2
         max_workers = usable_cpu_count
 
+    # modify model_list to ignore any jsonl files present in the directory
+    models_list = [model for model in models_list if not model.endswith(".jsonl")]
     for model_name in models_list:
         print(f"Serving candidate model: {model_name}")
         model_path = f"{models_path_prefix}/{model_name}"
@@ -193,10 +195,13 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
 
     # Rename the best model directory to "candidate_model" for the next step
     # So we know which model to use for the final evaluation
-    os.rename(
-        os.path.join(models_path_prefix, best_model),
-        os.path.join(models_path_prefix, "candidate_model"),
-    )
+    if os.path.exists(os.path.join(models_path_prefix, "candidate_model")):
+        print("candidate_model already exists. Skipping renaming")
+    else:
+        os.rename(
+            os.path.join(models_path_prefix, best_model),
+            os.path.join(models_path_prefix, "candidate_model"),
+        )
 
     return outputs(best_model=best_model, best_score=best_score)
 

diff --git a/pipeline.yaml b/pipeline.yaml
@@ -1362,13 +1362,15 @@ deploymentSpec:
           \    if max_workers == \"auto\":\n        try:\n            usable_cpu_count\
           \ = len(os.sched_getaffinity(0)) // 2\n        except AttributeError:\n\
           \            usable_cpu_count = multiprocessing.cpu_count() // 2\n     \
-          \   max_workers = usable_cpu_count\n\n    for model_name in models_list:\n\
-          \        print(f\"Serving candidate model: {model_name}\")\n        model_path\
-          \ = f\"{models_path_prefix}/{model_name}\"\n\n        vllm_process, vllm_server\
-          \ = launch_vllm(model_path, gpu_count)\n\n        # model ID is the model_path\
-          \ value in vLLM\n        evaluator = MTBenchEvaluator(\n            model_name=model_path,\n\
-          \            judge_model_name=judge_model_name,\n            output_dir=\"\
-          /tmp/eval_output\",\n            merge_system_user_message=merge_system_user_message,\n\
+          \   max_workers = usable_cpu_count\n\n    # modify model_list to ignore\
+          \ any jsonl files present in the directory\n    models_list = [model for\
+          \ model in models_list if not model.endswith(\".jsonl\")]\n    for model_name\
+          \ in models_list:\n        print(f\"Serving candidate model: {model_name}\"\
+          )\n        model_path = f\"{models_path_prefix}/{model_name}\"\n\n     \
+          \   vllm_process, vllm_server = launch_vllm(model_path, gpu_count)\n\n \
+          \       # model ID is the model_path value in vLLM\n        evaluator =\
+          \ MTBenchEvaluator(\n            model_name=model_path,\n            judge_model_name=judge_model_name,\n\
+          \            output_dir=\"/tmp/eval_output\",\n            merge_system_user_message=merge_system_user_message,\n\
           \        )\n\n        evaluator.gen_answers(\n            server_url=vllm_server,\n\
           \            serving_gpus=gpu_count,\n            max_workers=max_workers,\n\
           \        )\n\n        shutdown_vllm(vllm_process)\n\n        overall_score,\
@@ -1389,9 +1391,11 @@ deploymentSpec:
           utf-8\") as f:\n            json.dump({\"best_model\": best_model, \"best_score\"\
           : best_score}, f, indent=4)\n\n    # Rename the best model directory to\
           \ \"candidate_model\" for the next step\n    # So we know which model to\
-          \ use for the final evaluation\n    os.rename(\n        os.path.join(models_path_prefix,\
-          \ best_model),\n        os.path.join(models_path_prefix, \"candidate_model\"\
-          ),\n    )\n\n    return outputs(best_model=best_model, best_score=best_score)\n\
+          \ use for the final evaluation\n    if os.path.exists(os.path.join(models_path_prefix,\
+          \ \"candidate_model\")):\n        print(\"candidate_model already exists.\
+          \ Skipping renaming\")\n    else:\n        os.rename(\n            os.path.join(models_path_prefix,\
+          \ best_model),\n            os.path.join(models_path_prefix, \"candidate_model\"\
+          ),\n        )\n\n    return outputs(best_model=best_model, best_score=best_score)\n\
           \n"
         image: quay.io/sallyom/instructlab-ocp:eval-10-8
         resources:

diff --git a/standalone/standalone.py b/standalone/standalone.py
@@ -1728,6 +1728,8 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
             usable_cpu_count = multiprocessing.cpu_count() // 2
         max_workers = usable_cpu_count
 
+    # modify model_list to ignore any jsonl files present in the directory
+    models_list = [model for model in models_list if model.endswith(".jsonl") != True]
     for model_name in models_list:
         print(f"Serving candidate model: {model_name}")
         model_path = f"{models_path_prefix}/{model_name}"
@@ -1782,10 +1784,13 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
 
     # Rename the best model directory to "candidate_model" for the next step
     # So we know which model to use for the final evaluation
-    os.rename(
-        os.path.join(models_path_prefix, best_model),
-        os.path.join(models_path_prefix, "candidate_model"),
-    )
+    if os.path.exists(os.path.join(models_path_prefix, "candidate_model")):
+        print("candidate_model already exists. Skipping renaming")
+    else:
+        os.rename(
+            os.path.join(models_path_prefix, best_model),
+            os.path.join(models_path_prefix, "candidate_model"),
+        )
 
     return outputs(best_model=best_model, best_score=best_score)
 """