From 109ce0a237ecadabb96b1e93b84a1511cb3b3c6e Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 00:55:42 +0000 Subject: [PATCH 1/9] Update README and the poem example. --- README.md | 38 +++++++++++++++++++++++++++++------- examples/poem.py | 50 +----------------------------------------------- 2 files changed, 32 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 69be2d0e..8d5448b7 100644 --- a/README.md +++ b/README.md @@ -37,22 +37,46 @@ pip install bespokelabs-curator ```python from bespokelabs import curator -import os +from datasets import Dataset +from pydantic import BaseModel, Field +from typing import List -os.environ['OPENAI_API_KEY'] = 'sk-...' # Set your OpenAI API key here +# Create a dataset object for the topics you want to create the poems. +topics = Dataset.from_list([{"topic": "Dreams vs. reality"}, + {"topic": "Urban loneliness in a bustling city"}, + {"topic": "Beauty of Bespoke Labs's Curator library"}]) +# Define a class to encapsulate a list of poems. +class Poems(BaseModel): + poems_list: List[str] = Field(description="A list of poems.") + + +# We define a prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( - prompt_func=lambda: "Write a poem about the beauty of computer science", + # The prompt_func takes a row of the dataset as input. + # The row is a dictionary with a single key 'topic' in this case. + prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", + response_format=Poems, + # `row` is the input row, and `poems` is the Poems class which + # is parsed from the structured output from the LLM. + parse_func=lambda row, poems: [ + {"topic": row["topic"], "poem": p} for p in poems.poems_list + ], ) -poem = poet() -print(poem["response"][0]) +# We apply the prompter to the topics dataset. +poems = poet(topics) ``` +Note that `topics` can be created with `curator.Prompter` as well, +and we can scale this up to create tens of thousands of diverse poems. +You can see a more detailed example in the [examples/poem.py](examples/poem.py) file, +and other examples in the [examples](examples) directory. -You can see more examples in the [examples](examples) directory. +To run the examples, make sure to set your OpenAI API key in +the environment variable `OPENAI_API_KEY` by running `export OPENAI_API_KEY=sk-...` in your terminal. -To run the examples, make sure to set your OpenAI API key in the environment variable `OPENAI_API_KEY` by running `export OPENAI_API_KEY=sk-...` in your terminal. +See the [docs](https://docs.bespokelabs.ai/) for more details as well as troubleshooting. ## Bespoke Curator Viewer diff --git a/examples/poem.py b/examples/poem.py index 46ad3b7f..5697e5e2 100644 --- a/examples/poem.py +++ b/examples/poem.py @@ -1,54 +1,6 @@ """Example of using the curator library to generate diverse poems. -We generate 10 diverse topics and then generate 2 poems for each topic. - -You can do this in a loop, but that is inefficient and breaks when requests fail. -When you need to do this thousands of times (or more), you need a better abstraction. - -curator.Prompter takes care of this heavy lifting. - -# Key Components of Prompter - -## prompt_func - -Calls an LLM on each row of the input dataset in parallel. - -1. Takes a dataset row as input -2. Returns the prompt for the LLM - -## parse_func - -Converts LLM output into structured data by adding it back to the dataset. - -1. Takes two arguments: - - Input row - - LLM response (in response_format) -2. Returns new rows (in list of dictionaries) - - -# Data Flow Example -Input Dataset: - Row A - Row B -Processing by Prompter: - Row A → prompt_func(A) → Response R1 → parse_func(A, R1) → [C, D] - Row B → prompt_func(B) → Response R2 → parse_func(B, R2) → [E, F] - -Output Dataset: - Row C - Row D - Row E - Row F - -In this example: - -- The two input rows (A and B) are processed in parallel to prompt the LLM -- Each generates a response (R1 and R2) -- The parse function converts each response into (multiple) new rows (C, D, E, F) -- The final dataset contains all generated rows - -You can chain prompters together to iteratively build up a dataset. -""" +We generate 10 diverse topics and then generate 2 poems for each topic.""" from bespokelabs import curator from datasets import Dataset From a626ec7414d478da64184eedb0bcdde5767422e0 Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 00:55:42 +0000 Subject: [PATCH 2/9] Update README and the poem example. --- README.md | 38 +++++++++++++++++++++++++++++------- examples/poem.py | 50 +----------------------------------------------- 2 files changed, 32 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 9d0665c8..2d08f057 100644 --- a/README.md +++ b/README.md @@ -37,22 +37,46 @@ pip install bespokelabs-curator ```python from bespokelabs import curator -import os +from datasets import Dataset +from pydantic import BaseModel, Field +from typing import List -os.environ['OPENAI_API_KEY'] = 'sk-...' # Set your OpenAI API key here +# Create a dataset object for the topics you want to create the poems. +topics = Dataset.from_list([{"topic": "Dreams vs. reality"}, + {"topic": "Urban loneliness in a bustling city"}, + {"topic": "Beauty of Bespoke Labs's Curator library"}]) +# Define a class to encapsulate a list of poems. +class Poems(BaseModel): + poems_list: List[str] = Field(description="A list of poems.") + + +# We define a prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( - prompt_func=lambda: "Write a poem about the beauty of computer science", + # The prompt_func takes a row of the dataset as input. + # The row is a dictionary with a single key 'topic' in this case. + prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", + response_format=Poems, + # `row` is the input row, and `poems` is the Poems class which + # is parsed from the structured output from the LLM. + parse_func=lambda row, poems: [ + {"topic": row["topic"], "poem": p} for p in poems.poems_list + ], ) -poem = poet() -print(poem["response"][0]) +# We apply the prompter to the topics dataset. +poems = poet(topics) ``` +Note that `topics` can be created with `curator.Prompter` as well, +and we can scale this up to create tens of thousands of diverse poems. +You can see a more detailed example in the [examples/poem.py](examples/poem.py) file, +and other examples in the [examples](examples) directory. -You can see more examples in the [examples](examples) directory. +To run the examples, make sure to set your OpenAI API key in +the environment variable `OPENAI_API_KEY` by running `export OPENAI_API_KEY=sk-...` in your terminal. -To run the examples, make sure to set your OpenAI API key in the environment variable `OPENAI_API_KEY` by running `export OPENAI_API_KEY=sk-...` in your terminal. +See the [docs](https://docs.bespokelabs.ai/) for more details as well as troubleshooting. ## Bespoke Curator Viewer diff --git a/examples/poem.py b/examples/poem.py index 46ad3b7f..5697e5e2 100644 --- a/examples/poem.py +++ b/examples/poem.py @@ -1,54 +1,6 @@ """Example of using the curator library to generate diverse poems. -We generate 10 diverse topics and then generate 2 poems for each topic. - -You can do this in a loop, but that is inefficient and breaks when requests fail. -When you need to do this thousands of times (or more), you need a better abstraction. - -curator.Prompter takes care of this heavy lifting. - -# Key Components of Prompter - -## prompt_func - -Calls an LLM on each row of the input dataset in parallel. - -1. Takes a dataset row as input -2. Returns the prompt for the LLM - -## parse_func - -Converts LLM output into structured data by adding it back to the dataset. - -1. Takes two arguments: - - Input row - - LLM response (in response_format) -2. Returns new rows (in list of dictionaries) - - -# Data Flow Example -Input Dataset: - Row A - Row B -Processing by Prompter: - Row A → prompt_func(A) → Response R1 → parse_func(A, R1) → [C, D] - Row B → prompt_func(B) → Response R2 → parse_func(B, R2) → [E, F] - -Output Dataset: - Row C - Row D - Row E - Row F - -In this example: - -- The two input rows (A and B) are processed in parallel to prompt the LLM -- Each generates a response (R1 and R2) -- The parse function converts each response into (multiple) new rows (C, D, E, F) -- The final dataset contains all generated rows - -You can chain prompters together to iteratively build up a dataset. -""" +We generate 10 diverse topics and then generate 2 poems for each topic.""" from bespokelabs import curator from datasets import Dataset From 322c63c8562b194d2629a182e276b587999f21a8 Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 03:48:17 +0000 Subject: [PATCH 3/9] Also update the starting example. --- README.md | 1 + .../components/dataset-viewer/RunsTable.tsx | 35 +++++++++++++------ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2d08f057..b11313af 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ poet = curator.Prompter( # We apply the prompter to the topics dataset. poems = poet(topics) +print(poems.to_pandas()) ``` Note that `topics` can be created with `curator.Prompter` as well, and we can scale this up to create tens of thousands of diverse poems. diff --git a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx index 66bdcb6f..b21d2bb6 100644 --- a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx +++ b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx @@ -21,22 +21,37 @@ const COLUMNS: Column[] = [ ] const EXAMPLE_CODE = `from bespokelabs import curator -import os +from datasets import Dataset +from pydantic import BaseModel, Field +from typing import List -# Set your OpenAI API key here -os.environ['OPENAI_API_KEY'] = 'sk-...' +# Create a dataset object for the topics you want to create the poems. +topics = Dataset.from_list([{"topic": "Dreams vs. reality"}, + {"topic": "Urban loneliness in a bustling city"}, + {"topic": "Beauty of Bespoke Labs's Curator library"}]) -# Create a prompter instance +# Define a class to encapsulate a list of poems. +class Poems(BaseModel): + poems_list: List[str] = Field(description="A list of poems.") + + +# We define a prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( - prompt_func=lambda: { - "user_prompt": "Write a poem about the beauty of computer science" - }, + # The prompt_func takes a row of the dataset as input. + # The row is a dictionary with a single key 'topic' in this case. + prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", + response_format=Poems, + # `row` is the input row, and `poems` is the Poems class which + # is parsed from the structured output from the LLM. + parse_func=lambda row, poems: [ + {"topic": row["topic"], "poem": p} for p in poems.poems_list + ], ) -# Generate and print the poem -poem = poet() -print(poem.to_list()[0])` +# We apply the prompter to the topics dataset. +poems = poet(topics) +print(poems.to_pandas())` export function RunsTable() { const [runs, setRuns] = useState([]) From e485bb78ea0e35837f44cba327a38dc13e22880f Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 03:52:02 +0000 Subject: [PATCH 4/9] Minor updates to readme. --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0b687f54..5c15dfdd 100644 --- a/README.md +++ b/README.md @@ -53,17 +53,14 @@ class Poems(BaseModel): # We define a prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( - # The prompt_func takes a row of the dataset as input. - # The row is a dictionary with a single key 'topic' in this case. - prompt_func=lambda row: f"Write two poems about {row['topic']}.", # The prompt_func takes a row of the dataset as input. # The row is a dictionary with a single key 'topic' in this case. prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", ) -poem = poet() -print(poem["response"][0]) +poem = poet(topics) +print(poem.to_pandas()) ``` Note that `topics` can be created with `curator.Prompter` as well, and we can scale this up to create tens of thousands of diverse poems. @@ -73,7 +70,8 @@ and other examples in the [examples](examples) directory. To run the examples, make sure to set your OpenAI API key in the environment variable `OPENAI_API_KEY` by running `export OPENAI_API_KEY=sk-...` in your terminal. -See the [docs](https://docs.bespokelabs.ai/) for more details as well as troubleshooting. +See the [docs](https://docs.bespokelabs.ai/) for more details as well as +for troubleshooting information. ## Bespoke Curator Viewer From 1f16a4c84dcbf6e87c8db40e8225139c806dc73b Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 04:01:10 +0000 Subject: [PATCH 5/9] Fix the links so they are not broken in pypi page. --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5c15dfdd..9f660340 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@

- - Bespoke Labs Logo + + Bespoke Labs Logo

@@ -64,8 +64,8 @@ print(poem.to_pandas()) ``` Note that `topics` can be created with `curator.Prompter` as well, and we can scale this up to create tens of thousands of diverse poems. -You can see a more detailed example in the [examples/poem.py](examples/poem.py) file, -and other examples in the [examples](examples) directory. +You can see a more detailed example in the [examples/poem.py](https://github.com/bespokelabsai/curator/blob/mahesh/update_doc/examples/poem.py) file, +and other examples in the [examples](https://github.com/bespokelabsai/curator/blob/mahesh/update_doc/examples) directory. To run the examples, make sure to set your OpenAI API key in the environment variable `OPENAI_API_KEY` by running `export OPENAI_API_KEY=sk-...` in your terminal. From 4b483866b72595ee0407a042a0f19561ae49ea3b Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 05:50:59 +0000 Subject: [PATCH 6/9] Address Charlie's comments. --- README.md | 22 +++++++++++++++---- .../components/dataset-viewer/RunsTable.tsx | 20 ++++++++++++----- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 9f660340..f8204672 100644 --- a/README.md +++ b/README.md @@ -42,25 +42,39 @@ from pydantic import BaseModel, Field from typing import List # Create a dataset object for the topics you want to create the poems. -topics = Dataset.from_list([{"topic": "Dreams vs. reality"}, - {"topic": "Urban loneliness in a bustling city"}, - {"topic": "Beauty of Bespoke Labs's Curator library"}]) +topics = Dataset.from_dict({"topic": [ + "Dreams vs. reality", + "Urban loneliness in a bustling city", + "Beauty of Bespoke Labs's Curator library" +]}) # Define a class to encapsulate a list of poems. class Poems(BaseModel): poems_list: List[str] = Field(description="A list of poems.") -# We define a prompter that generates poems which gets applied to the topics dataset. +# We define a Prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( # The prompt_func takes a row of the dataset as input. # The row is a dictionary with a single key 'topic' in this case. prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", + response_format=Poems, + # row is the input row, and poems is the Poems class which + # is parsed from the structured output from the LLM. + parse_func=lambda row, poems: [ + {"topic": row["topic"], "poem": p} for p in poems.poems_list + ], ) poem = poet(topics) print(poem.to_pandas()) +# Example output: +# topic poem +# 0 Urban loneliness in a bustling city **In the Crowd**\nBeneath the neon glow of cit... +# 1 Beauty of Bespoke Labs's Curator library **Whispers of Curated Dreams**\nIn a sanctuary... +# 2 Beauty of Bespoke Labs's Curator library **Library of Tailored Thoughts**\nNestled with... +# 3 Dreams vs. reality **Whispers of Dreams** \nIn the silent folds ... ``` Note that `topics` can be created with `curator.Prompter` as well, and we can scale this up to create tens of thousands of diverse poems. diff --git a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx index b21d2bb6..06fc1a4b 100644 --- a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx +++ b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx @@ -26,9 +26,11 @@ from pydantic import BaseModel, Field from typing import List # Create a dataset object for the topics you want to create the poems. -topics = Dataset.from_list([{"topic": "Dreams vs. reality"}, - {"topic": "Urban loneliness in a bustling city"}, - {"topic": "Beauty of Bespoke Labs's Curator library"}]) +topics = Dataset.from_dict({"topic": [ + "Dreams vs. reality", + "Urban loneliness in a bustling city", + "Beauty of Bespoke Labs's Curator library" +]}) # Define a class to encapsulate a list of poems. class Poems(BaseModel): @@ -42,7 +44,7 @@ poet = curator.Prompter( prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", response_format=Poems, - # `row` is the input row, and `poems` is the Poems class which + # row is the input row, and poems is the Poems class which # is parsed from the structured output from the LLM. parse_func=lambda row, poems: [ {"topic": row["topic"], "poem": p} for p in poems.poems_list @@ -51,7 +53,15 @@ poet = curator.Prompter( # We apply the prompter to the topics dataset. poems = poet(topics) -print(poems.to_pandas())` +print(poems.to_pandas()) + +# Example output: +# topic poem +# 0 Urban loneliness in a bustling city **In the Crowd**\nBeneath the neon glow of cit... +# 1 Beauty of Bespoke Labs's Curator library **Whispers of Curated Dreams**\nIn a sanctuary... +# 2 Beauty of Bespoke Labs's Curator library **Library of Tailored Thoughts**\nNestled with... +# 3 Dreams vs. reality **Whispers of Dreams** \nIn the silent folds ... +` export function RunsTable() { const [runs, setRuns] = useState([]) From 94bc1635d399003c00b0a4d62b60588458c89652 Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 06:11:29 +0000 Subject: [PATCH 7/9] Fix issues with only single poems being parsed. --- README.md | 22 +++---- .../components/dataset-viewer/RunsTable.tsx | 28 ++++----- examples/poem.py | 58 +++++++------------ 3 files changed, 46 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index f8204672..92c080df 100644 --- a/README.md +++ b/README.md @@ -43,27 +43,29 @@ from typing import List # Create a dataset object for the topics you want to create the poems. topics = Dataset.from_dict({"topic": [ - "Dreams vs. reality", "Urban loneliness in a bustling city", "Beauty of Bespoke Labs's Curator library" ]}) # Define a class to encapsulate a list of poems. +class Poem(BaseModel): + poem: str = Field(description="A poem.") + class Poems(BaseModel): - poems_list: List[str] = Field(description="A list of poems.") + poems_list: List[Poem] = Field(description="A list of poems.") # We define a Prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( - # The prompt_func takes a row of the dataset as input. - # The row is a dictionary with a single key 'topic' in this case. + # `prompt_func` takes a row of the dataset as input. + # `row` is a dictionary with a single key 'topic' in this case. prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", response_format=Poems, - # row is the input row, and poems is the Poems class which + # `row` is the input row, and `poems` is the `Poems` class which # is parsed from the structured output from the LLM. parse_func=lambda row, poems: [ - {"topic": row["topic"], "poem": p} for p in poems.poems_list + {"topic": row["topic"], "poem": p.poem} for p in poems.poems_list ], ) @@ -71,10 +73,10 @@ poem = poet(topics) print(poem.to_pandas()) # Example output: # topic poem -# 0 Urban loneliness in a bustling city **In the Crowd**\nBeneath the neon glow of cit... -# 1 Beauty of Bespoke Labs's Curator library **Whispers of Curated Dreams**\nIn a sanctuary... -# 2 Beauty of Bespoke Labs's Curator library **Library of Tailored Thoughts**\nNestled with... -# 3 Dreams vs. reality **Whispers of Dreams** \nIn the silent folds ... +# 0 Urban loneliness in a bustling city In the city's heart, where the sirens wail,\nA... +# 1 Urban loneliness in a bustling city City streets hum with a bittersweet song,\nHor... +# 2 Beauty of Bespoke Labs's Curator library In whispers of design and crafted grace,\nBesp... +# 3 Beauty of Bespoke Labs's Curator library In the hushed breath of parchment and ink,\nBe... ``` Note that `topics` can be created with `curator.Prompter` as well, and we can scale this up to create tens of thousands of diverse poems. diff --git a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx index 06fc1a4b..5b9b9076 100644 --- a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx +++ b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx @@ -27,40 +27,40 @@ from typing import List # Create a dataset object for the topics you want to create the poems. topics = Dataset.from_dict({"topic": [ - "Dreams vs. reality", "Urban loneliness in a bustling city", "Beauty of Bespoke Labs's Curator library" ]}) # Define a class to encapsulate a list of poems. +class Poem(BaseModel): + poem: str = Field(description="A poem.") + class Poems(BaseModel): - poems_list: List[str] = Field(description="A list of poems.") + poems_list: List[Poem] = Field(description="A list of poems.") -# We define a prompter that generates poems which gets applied to the topics dataset. +# We define a Prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( - # The prompt_func takes a row of the dataset as input. - # The row is a dictionary with a single key 'topic' in this case. + # prompt_func takes a row of the dataset as input. + # row is a dictionary with a single key 'topic' in this case. prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", response_format=Poems, # row is the input row, and poems is the Poems class which # is parsed from the structured output from the LLM. parse_func=lambda row, poems: [ - {"topic": row["topic"], "poem": p} for p in poems.poems_list + {"topic": row["topic"], "poem": p.poem} for p in poems.poems_list ], ) -# We apply the prompter to the topics dataset. -poems = poet(topics) -print(poems.to_pandas()) - +poem = poet(topics) +print(poem.to_pandas()) # Example output: # topic poem -# 0 Urban loneliness in a bustling city **In the Crowd**\nBeneath the neon glow of cit... -# 1 Beauty of Bespoke Labs's Curator library **Whispers of Curated Dreams**\nIn a sanctuary... -# 2 Beauty of Bespoke Labs's Curator library **Library of Tailored Thoughts**\nNestled with... -# 3 Dreams vs. reality **Whispers of Dreams** \nIn the silent folds ... +# 0 Urban loneliness in a bustling city In the city's heart, where the sirens wail,\nA... +# 1 Urban loneliness in a bustling city City streets hum with a bittersweet song,\nHor... +# 2 Beauty of Bespoke Labs's Curator library In whispers of design and crafted grace,\nBesp... +# 3 Beauty of Bespoke Labs's Curator library In the hushed breath of parchment and ink,\nBe... ` export function RunsTable() { diff --git a/examples/poem.py b/examples/poem.py index 5697e5e2..8ef8c618 100644 --- a/examples/poem.py +++ b/examples/poem.py @@ -1,58 +1,40 @@ -"""Example of using the curator library to generate diverse poems. - -We generate 10 diverse topics and then generate 2 poems for each topic.""" - from bespokelabs import curator from datasets import Dataset from pydantic import BaseModel, Field from typing import List +# Create a dataset object for the topics you want to create the poems. +topics = Dataset.from_dict({"topic": [ + "Urban loneliness in a bustling city", + "Beauty of Bespoke Labs's Curator library" +]}) -# We use Pydantic and structured outputs to define the format of the response. -# This defines a list of topics, which is the response format for the topic generator. -class Topics(BaseModel): - topics_list: List[str] = Field(description="A list of topics.") - +# Define a class to encapsulate a list of poems. +class Poem(BaseModel): + poem: str = Field(description="A poem.") -# We define a prompter that generates topics. -topic_generator = curator.Prompter( - prompt_func=lambda: f"Generate 10 diverse topics that are suitable for writing poems about.", - model_name="gpt-4o-mini", - response_format=Topics, - parse_func=lambda _, topics: [{"topic": t} for t in topics.topics_list], -) - -# We call the prompter to generate the dataset. -# When no input dataset is provided, an "empty" dataset with a single row is used as a starting point. -topics: Dataset = topic_generator() -print(topics["topic"]) - - -# Define a list of poems. class Poems(BaseModel): - poems_list: List[str] = Field(description="A list of poems.") + poems_list: List[Poem] = Field(description="A list of poems.") -# We define a prompter that generates poems which gets applied to the topics dataset. +# We define a Prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( # The prompt_func takes a row of the dataset as input. # The row is a dictionary with a single key 'topic' in this case. prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", response_format=Poems, - # `row` is the input row, and `poems` is the Poems class which is parsed from the structured output from the LLM. + # row is the input row, and poems is the Poems class which + # is parsed from the structured output from the LLM. parse_func=lambda row, poems: [ - {"topic": row["topic"], "poem": p} for p in poems.poems_list + {"topic": row["topic"], "poem": p.poem} for p in poems.poems_list ], ) -# We apply the prompter to the topics dataset. -poems = poet(topics) -print(poems.to_pandas()) - -# Expected output: -# topic poem -# 0 Dreams vs. reality In the realm where dreams take flight,\nWhere ... -# 1 Dreams vs. reality Reality stands with open eyes,\nA weighty thro... -# 2 Urban loneliness in a bustling city In the city's heart where shadows blend,\nAmon... -# 3 Urban loneliness in a bustling city Among the crowds, I walk alone,\nA sea of face... +poem = poet(topics) +print(poem.to_pandas()) +# topic poem +# 0 Urban loneliness in a bustling city In the city's heart, where the sirens wail,\nA... +# 1 Urban loneliness in a bustling city City streets hum with a bittersweet song,\nHor... +# 2 Beauty of Bespoke Labs's Curator library In whispers of design and crafted grace,\nBesp... +# 3 Beauty of Bespoke Labs's Curator library In the hushed breath of parchment and ink,\nBe... \ No newline at end of file From c8391275bbcc2bfe3f997c9a0927a7a1d1071e83 Mon Sep 17 00:00:00 2001 From: Mahesh Sathiamoorthy Date: Fri, 15 Nov 2024 06:12:55 +0000 Subject: [PATCH 8/9] undo changes to poem.py --- examples/poem.py | 58 +++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/examples/poem.py b/examples/poem.py index 8ef8c618..f7cfe704 100644 --- a/examples/poem.py +++ b/examples/poem.py @@ -1,40 +1,58 @@ +"""Example of using the curator library to generate diverse poems. + +We generate 10 diverse topics and then generate 2 poems for each topic.""" + from bespokelabs import curator from datasets import Dataset from pydantic import BaseModel, Field from typing import List -# Create a dataset object for the topics you want to create the poems. -topics = Dataset.from_dict({"topic": [ - "Urban loneliness in a bustling city", - "Beauty of Bespoke Labs's Curator library" -]}) -# Define a class to encapsulate a list of poems. -class Poem(BaseModel): - poem: str = Field(description="A poem.") +# We use Pydantic and structured outputs to define the format of the response. +# This defines a list of topics, which is the response format for the topic generator. +class Topics(BaseModel): + topics_list: List[str] = Field(description="A list of topics.") + +# We define a prompter that generates topics. +topic_generator = curator.Prompter( + prompt_func=lambda: f"Generate 10 diverse topics that are suitable for writing poems about.", + model_name="gpt-4o-mini", + response_format=Topics, + parse_func=lambda _, topics: [{"topic": t} for t in topics.topics_list], +) + +# We call the prompter to generate the dataset. +# When no input dataset is provided, an "empty" dataset with a single row is used as a starting point. +topics: Dataset = topic_generator() +print(topics["topic"]) + + +# Define a list of poems. class Poems(BaseModel): - poems_list: List[Poem] = Field(description="A list of poems.") + poems_list: List[str] = Field(description="A list of poems.") -# We define a Prompter that generates poems which gets applied to the topics dataset. +# We define a prompter that generates poems which gets applied to the topics dataset. poet = curator.Prompter( # The prompt_func takes a row of the dataset as input. # The row is a dictionary with a single key 'topic' in this case. prompt_func=lambda row: f"Write two poems about {row['topic']}.", model_name="gpt-4o-mini", response_format=Poems, - # row is the input row, and poems is the Poems class which - # is parsed from the structured output from the LLM. + # `row` is the input row, and `poems` is the Poems class which is parsed from the structured output from the LLM. parse_func=lambda row, poems: [ - {"topic": row["topic"], "poem": p.poem} for p in poems.poems_list + {"topic": row["topic"], "poem": p} for p in poems.poems_list ], ) -poem = poet(topics) -print(poem.to_pandas()) -# topic poem -# 0 Urban loneliness in a bustling city In the city's heart, where the sirens wail,\nA... -# 1 Urban loneliness in a bustling city City streets hum with a bittersweet song,\nHor... -# 2 Beauty of Bespoke Labs's Curator library In whispers of design and crafted grace,\nBesp... -# 3 Beauty of Bespoke Labs's Curator library In the hushed breath of parchment and ink,\nBe... \ No newline at end of file +# We apply the prompter to the topics dataset. +poems = poet(topics) +print(poems.to_pandas()) + +# Expected output: +# topic poem +# 0 Dreams vs. reality In the realm where dreams take flight,\nWhere ... +# 1 Dreams vs. reality Reality stands with open eyes,\nA weighty thro... +# 2 Urban loneliness in a bustling city In the city's heart where shadows blend,\nAmon... +# 3 Urban loneliness in a bustling city Among the crowds, I walk alone,\nA sea of face... \ No newline at end of file From 1ae2173ba57cd62e23d5ebd06b4a3f3f4652d8ba Mon Sep 17 00:00:00 2001 From: Charlie Cheng-Jie Ji Date: Fri, 15 Nov 2024 22:32:52 +0000 Subject: [PATCH 9/9] fixed string escape and make code font size slightly smaller --- .../components/dataset-viewer/RunsTable.tsx | 9 ++++----- .../components/ui/python-highlighter.tsx | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx index 5b9b9076..d85a42be 100644 --- a/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx +++ b/bespoke-dataset-viewer/components/dataset-viewer/RunsTable.tsx @@ -57,11 +57,10 @@ poem = poet(topics) print(poem.to_pandas()) # Example output: # topic poem -# 0 Urban loneliness in a bustling city In the city's heart, where the sirens wail,\nA... -# 1 Urban loneliness in a bustling city City streets hum with a bittersweet song,\nHor... -# 2 Beauty of Bespoke Labs's Curator library In whispers of design and crafted grace,\nBesp... -# 3 Beauty of Bespoke Labs's Curator library In the hushed breath of parchment and ink,\nBe... -` +# 0 Urban loneliness in a bustling city In the city's heart, where the sirens wail,\\nA... +# 1 Urban loneliness in a bustling city City streets hum with a bittersweet song,\\nHor... +# 2 Beauty of Bespoke Labs's Curator library In whispers of design and crafted grace,\\nBesp... +# 3 Beauty of Bespoke Labs's Curator library In the hushed breath of parchment and ink,\\nBe...` export function RunsTable() { const [runs, setRuns] = useState([]) diff --git a/bespoke-dataset-viewer/components/ui/python-highlighter.tsx b/bespoke-dataset-viewer/components/ui/python-highlighter.tsx index 3435915c..46889ba6 100644 --- a/bespoke-dataset-viewer/components/ui/python-highlighter.tsx +++ b/bespoke-dataset-viewer/components/ui/python-highlighter.tsx @@ -1,9 +1,9 @@ +import { Button } from "@/components/ui/button"; +import { Check, Copy } from "lucide-react"; import Prism from 'prismjs'; import 'prismjs/components/prism-python'; import 'prismjs/themes/prism-tomorrow.css'; import React from 'react'; -import { Button } from "@/components/ui/button" -import { Check, Copy } from "lucide-react" interface PythonHighlighterProps { code: string; @@ -38,7 +38,7 @@ export const PythonHighlighter: React.FC = ({ code }) => )} -
+            
                 
                     {code}