Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor refactoring and cleanups. #6

Merged
merged 1 commit into from
Nov 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions bella.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
"""Bella: Bespoke Labs Synthetic Data Generation Library."""

import asyncio
from typing import Optional
from prompt import Prompter
from datasets import Dataset
import logging
import json
import logging
import os
from datasets.arrow_writer import ArrowWriter
from typing import Optional

from api_request_parallel_processor import process_api_requests_from_file
import tiktoken
from datasets import Dataset
from datasets.arrow_writer import ArrowWriter
from prompt import Prompter


def _create_requests_file(
Expand Down
12 changes: 7 additions & 5 deletions prompt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from re import M
from jinja2 import Template
from pydantic import BaseModel
from typing import Any, Dict, Optional, Type
Expand All @@ -9,8 +8,8 @@ class Prompter:

def __init__(
self,
model_name,
user_prompt,
model_name: str,
user_prompt: str,
system_prompt: Optional[str] = None,
response_format: Optional[Type[BaseModel]] = None,
):
Expand All @@ -32,14 +31,17 @@ def get_request_object(self, row: Dict[str, Any], idx: int) -> Dict[str, Any]:
messages.append({"role": "user", "content": user_template.render(**row)})

if self.response_format:
# OpenAI API https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format
# OpenAI API
# https://platform.openai.com/docs/api-reference/chat/create#chat-create-response_format
request = {
"model": self.model_name,
"messages": messages,
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "output_schema", # not sure if this should be something else. Also not sure if we should use strict: True
# TODO(ryan): not sure if this should be something else.
# TODO(ryan): also not sure if we should use strict: True
"name": "output_schema",
"schema": self.response_format.model_json_schema(),
},
},
Expand Down