bespokelabsai · vutrung96 · Nov 8, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
diff --git a/build.py → build_pkg.py b/build.py → build_pkg.py
@@ -1,53 +1,55 @@
-import os
-import subprocess
 import shutil
+import subprocess
 import sys
 from pathlib import Path
 
+
 def run_command(command, cwd=None):
     result = subprocess.run(command, shell=True, cwd=cwd, check=True)
     return result
 
+
 def npm_install():
     print("Running npm install")
     run_command("npm install", cwd="bespoke-dataset-viewer")
 
+
 def nextjs_build():
     print("Running Next.js build")
     run_command("npm run build", cwd="bespoke-dataset-viewer")
     print("Copying build artifacts to static folder")
-    
+
     # Source and target directories
     source_base = Path("bespoke-dataset-viewer")
     target_base = Path("src/bespokelabs/curator/viewer/static")
-    
+
     # Ensure target directory exists
     if target_base.exists():
         shutil.rmtree(target_base)
     target_base.mkdir(parents=True, exist_ok=True)
 
     # Copy only the necessary files, excluding node_modules
     files_to_copy = [
-        '.next',
-        'app',
-        'components',
-        'lib',
-        'public',
-        'types',
-        'package.json',
-        'package-lock.json',
-        'next.config.ts',
-        'next-env.d.ts',
-        'tsconfig.json',
-        'postcss.config.mjs',
-        'tailwind.config.ts',
-        'components.json'
+        ".next",
+        "app",
+        "components",
+        "lib",
+        "public",
+        "types",
+        "package.json",
+        "package-lock.json",
+        "next.config.ts",
+        "next-env.d.ts",
+        "tsconfig.json",
+        "postcss.config.mjs",
+        "tailwind.config.ts",
+        "components.json",
     ]
-    
+
     for item in files_to_copy:
         source = source_base / item
         target = target_base / item
-        
+
         if source.exists():
             if source.is_file():
                 shutil.copy2(source, target)
@@ -60,6 +62,7 @@ def nextjs_build():
         else:
             print(f"Warning: {source} not found")
 
+
 def run_pytest():
     print("Running pytest")
     try:
@@ -68,11 +71,13 @@ def run_pytest():
         print("Pytest failed. Aborting build.")
         sys.exit(1)
 
+
 def main():
     npm_install()
     nextjs_build()
     run_pytest()
     print("Build completed successfully.")
 
+
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/examples/camel.py b/examples/camel.py
@@ -1,6 +1,5 @@
 from typing import List
 
-import pandas as pd
 from pydantic import BaseModel, Field
 
 from bespokelabs import curator
@@ -24,18 +23,14 @@ class QAs(BaseModel):
 
 
 subject_prompter = curator.Prompter(
-    prompt_func=lambda: {
-        "user_prompt": f"Generate a diverse list of 3 subjects. Keep it high-level (e.g. Math, Science)."
-    },
+    prompt_func=lambda: f"Generate a diverse list of 3 subjects. Keep it high-level (e.g. Math, Science).",
     parse_func=lambda _, subjects: [subject for subject in subjects.subjects],
     model_name="gpt-4o-mini",
     response_format=Subjects,
 )
 subject_dataset = subject_prompter()
 subsubject_prompter = curator.Prompter(
-    prompt_func=lambda subject: {
-        "user_prompt": f"For the given subject {subject}. Generate 3 diverse subsubjects. No explanation."
-    },
+    prompt_func=lambda subject: f"For the given subject {subject}. Generate 3 diverse subsubjects. No explanation.",
     parse_func=lambda subject, subsubjects: [
         {"subject": subject["subject"], "subsubject": subsubject.subject}
         for subsubject in subsubjects.subjects
@@ -46,9 +41,7 @@ class QAs(BaseModel):
 subsubject_dataset = subsubject_prompter(subject_dataset)
 
 qa_prompter = curator.Prompter(
-    prompt_func=lambda subsubject: {
-        "user_prompt": f"For the given subsubject {subsubject}. Generate 3 diverse questions and answers. No explanation."
-    },
+    prompt_func=lambda subsubject: f"For the given subsubject {subsubject}. Generate 3 diverse questions and answers. No explanation.",
     model_name="gpt-4o-mini",
     response_format=QAs,
     parse_func=lambda subsubject, qas: [
@@ -63,6 +56,5 @@ class QAs(BaseModel):
 )
 qa_dataset = qa_prompter(subsubject_dataset)
 
-qa_hf_dataset = qa_dataset.to_huggingface()
-qa_hf_dataset.map(lambda row: {"answer": row["answer"].strip()}, num_proc=2)
-print(qa_hf_dataset.to_pandas())
+qa_dataset.map(lambda row: {"answer": row["answer"].strip()}, num_proc=2)
+print(qa_dataset.to_pandas())
diff --git a/examples/distill.py b/examples/distill.py
@@ -0,0 +1,26 @@
+from bespokelabs import curator
+from datasets import load_dataset
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+dataset = load_dataset("allenai/WildChat", split="train")
+dataset = dataset.select(range(3_000))
+
+
+def prompt_func(row):
+    return row["conversation"][0]["content"]
+
+
+def parse_func(row, response):
+    instruction = row["conversation"][0]["content"]
+    return {"instruction": instruction, "new_response": response}
+
+
+distill_prompter = curator.Prompter(
+    prompt_func=prompt_func, parse_func=parse_func, model_name="gpt-4o-mini", batch=True
+)
+
+distilled_dataset = distill_prompter(dataset)
+print(distilled_dataset)
+print(distilled_dataset[0])
diff --git a/examples/poetry.py b/examples/poetry.py
@@ -1,11 +1,9 @@
 from bespokelabs import curator
 
 poet = curator.Prompter(
-    prompt_func=lambda: {
-        "user_prompt": "Write a poem about the beauty of computer science"
-    },
+    prompt_func=lambda: "Write a poem about the beauty of computer science",
     model_name="gpt-4o-mini",
 )
 
 poem = poet()
-print(poem.to_list()[0])
+print(poem["response"][0])