Skip to content

Commit

Permalink
Merge pull request #41 from normand1/dn/refactor-and-sqlite-plugin
Browse files Browse the repository at this point in the history
Refactor and sqlite plugin
  • Loading branch information
normand1 authored Dec 7, 2024
2 parents 24d5cb9 + f9649a0 commit d93a0d5
Show file tree
Hide file tree
Showing 81 changed files with 1,623 additions and 505 deletions.
76 changes: 22 additions & 54 deletions .config.env
Original file line number Diff line number Diff line change
@@ -1,59 +1,27 @@

#! This is the configuration file for the podcast generator
#! Ensure no passwords or API keys are stored in this file
#! Passwords and API keys should be stored in .auth.env file which is ignored by git

## Environment variables
#articlesRSSFeedPlugin,podcastFeedPlugin
PODCAST_DATA_SOURCE_PLUGINS=redditAPIPlugin
#testerDataSourcePlugin
SUBREDDIT=programming
NUMBER_OF_POSTS_TO_FETCH=1
#testerDataSourcePlugin

PODCAST_DATA_SOURCE_PLUGINS="sqliteTokenPlugin"

PODCAST_INTRO_PLUGINS=standardIntroPlugin
#testerIntroPlugin
PODCAST_SCRAPER_PLUGINS=podcastRssAudioTranscriptScraper,newsStoryScraperPlugin
#testerScraperPlugin
PODCAST_SCRAPER_PLUGINS=tokenResearchScraperPlugin
PODCAST_SEGMENT_WRITER_PLUGINS=topTenSegmentWriterPlugin
#testerSegmentWriter
PODCAST_OUTRO_PLUGINS=outroWriterPlugin
#testerOutroPlugin
PODCAST_PRODUCER_PLUGINS=producerPlugin
PODCAST_FEEDS=https://feed.syntax.fm/
#https://podnews.net/rss
# https://www.spreaker.com/show/6060119/episodes/feed
ARTICLES_RSS_FEEDS=https://rss.app/feeds/_c2pBTHi1b82IDjwT.xml
#,https://feed.syntax.fm/
NUMBER_OF_ITEMS_TO_FETCH=1
SHOULD_PAUSE_AND_VALIDATE_STORIES_BEFORE_SCRAPING=true

# The following environment variables are for configuring the podcast for your specific needs
# required
PODCAST_NAME="Autonomous Tech Podcast"
PODCAST_TYPE=tech
PODCAST_DESCRIPTION="An autonomous tech podcast"

# Langchain
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
# LANGCHAIN_PROJECT=pr-ajar-archives-11

# OpenAI
OPENAI_MODEL_SUMMARY=gpt-4o-mini
OPENAI_MAX_TOKENS_SUMMARY=4096
OPENAI_TEMPERATURE_SUMMARY=0.2

# Anthropic
ANTHROPIC_MODEL=claude-3-5-sonnet-20240620
ANTHROPIC_MAX_TOKENS=8192

# The following environment variables are required to publish the podcast
UPLOAD_TIMEOUT=60 * 5 * 1000
AUDIO_FILE_FORMAT='mp3'
AUDIO_FILE_TEMPLATE='episode.%(ext)s'
#PUPETEER_HEADLESS=false
# SAVE_AS_DRAFT=0

# TTS Script Selection
# TTS_SCRIPT=ttsLocalScript.sh
# Uncomment the line below and comment out the line above to use ttsScript.sh instead
TTS_SCRIPT=ttsLocalScript.sh
PODCAST_RESEARCHER_PLUGINS=warpcastCastsResearcherPlugin,warpcastUserResearcherPlugin
TOKEN_STORIES_DB_PATH=/Users/davidnorman/clanker-launch-bot/tokens.db
TOKEN_STORIES_COUNT_LIMIT=1

SHOULD_PAUSE_AND_VALIDATE_STORIES_BEFORE_SCRAPING=false

PODCAST_NAME="Slop Pod"
PODCAST_TYPE="Memes and internet culture"
PODCAST_DESCRIPTION="This is a podcast about the internet, memes and the people and stories behind it all"

LLM_MODEL_PROVIDER=anthropic
LLM_MODEL_VERSION_NAME=claude-3-5-sonnet-latest
OPENAI_MAX_TOKENS_SUMMARY=2048
OPENAI_TEMPERATURE_SUMMARY=0.7

TTS_SCRIPT=ttsLocalScript.sh

65 changes: 65 additions & 0 deletions .config.env.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#! This is the configuration file for the podcast generator
#! Ensure no passwords or API keys are stored in this file
#! Passwords and API keys should be stored in .auth.env file which is ignored by git

## Environment variables
#articlesRSSFeedPlugin,podcastFeedPlugin
PODCAST_DATA_SOURCE_PLUGINS=sqliteTokenPlugin
#testerDataSourcePlugin
SUBREDDIT=programming
NUMBER_OF_POSTS_TO_FETCH=1
#testerDataSourcePlugin
PODCAST_INTRO_PLUGINS=standardIntroPlugin
#testerIntroPlugin
PODCAST_SCRAPER_PLUGINS=tokenResearchScraperPlugin
#testerScraperPlugin
PODCAST_SEGMENT_WRITER_PLUGINS=topTenSegmentWriterPlugin
#testerSegmentWriter
PODCAST_OUTRO_PLUGINS=outroWriterPlugin
#testerOutroPlugin
PODCAST_PRODUCER_PLUGINS=producerPlugin
PODCAST_FEEDS=https://feed.syntax.fm/
#https://podnews.net/rss
# https://www.spreaker.com/show/6060119/episodes/feed
ARTICLES_RSS_FEEDS=https://rss.app/feeds/_c2pBTHi1b82IDjwT.xml
#,https://feed.syntax.fm/
NUMBER_OF_ITEMS_TO_FETCH=1
SHOULD_PAUSE_AND_VALIDATE_STORIES_BEFORE_SCRAPING=true

# The following environment variables are for configuring the podcast for your specific needs
# required
PODCAST_NAME="Slop Cast"
PODCAST_TYPE="crypto, tech, memes"
PODCAST_DESCRIPTION="A podcast about the latest memecoins on farcaster and other platforms"

# Token Stories
TOKEN_STORIES_DB_PATH=/Users/davidnorman/clanker-launch-bot/tokens.db

# Langchain
# LANGCHAIN_TRACING_V2=true
# LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
# LANGCHAIN_PROJECT=pr-ajar-archives-11

# anthropic or openai
LLM_MODEL_PROVIDER=anthropic

# OpenAI
LLM_MODEL_VERSION_NAME=gpt-4o-mini
OPENAI_MAX_TOKENS_SUMMARY=4096
OPENAI_TEMPERATURE_SUMMARY=0.2

# Anthropic
ANTHROPIC_MODEL=claude-3-5-sonnet-latest
ANTHROPIC_MAX_TOKENS=8192

# The following environment variables are required to publish the podcast
UPLOAD_TIMEOUT=60 * 5 * 1000
AUDIO_FILE_FORMAT='mp3'
AUDIO_FILE_TEMPLATE='episode.%(ext)s'
#PUPETEER_HEADLESS=false
# SAVE_AS_DRAFT=0

# TTS Script Selection
# TTS_SCRIPT=ttsLocalScript.sh
# Uncomment the line below and comment out the line above to use ttsScript.sh instead
TTS_SCRIPT=ttsLocalScript.sh
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Python unit tests
on:
push:
branches: [ master ]
pull_request_target:
pull_request:
branches: [ master ]

jobs:
Expand Down
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[MAIN]
# https://pylint.pycqa.org/en/latest/user_guide/checkers/features.html
disable=C0114,C0116,E0401,C0115,R0902
disable=C0114,C0116,E0401,C0115,R0902,E1131,W0719

# Analyse import fallback blocks. This can be used to support both Python 2 and
# 3 compatible code, which means that the block might have code that exists
Expand Down
12 changes: 6 additions & 6 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
},
{
"name": "Python: Debug App",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/podcastTextGenerationApp/app.py",
"console": "integratedTerminal",
Expand All @@ -34,7 +34,7 @@
},
{
"name": "Run Current File",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
Expand All @@ -43,16 +43,16 @@
},
{
"name": "Generate Podcast Text Files",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/podcastTextGenerationApp/generatePodcast.py",
"console": "integratedTerminal",
// "args": ["Podcast-Jul24-2024-05PM"],
"args": ["-f", "Fomo-Cast-Dec03-2024-10PM"],
"justMyCode": true
},
{
"name": "Generate Chapter Files",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/podcastTextGenerationApp/generatePodcastChapterFile.py",
// "args": ["output/Podcast-Jul23-2024-09AM"],
Expand All @@ -61,7 +61,7 @@
},
{
"name": "run generatePodcast.py",
"type": "python",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/podcastTextGenerationApp/generatePodcast.py",
"console": "integratedTerminal",
Expand Down
26 changes: 22 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,32 @@ Run the setup script:
./setup_script.sh
```

## Install Build Dependencies

```bash
brew install helmfile
```

## Configure Plugins

HyperFeeder is made to be easily configurable and extensible with plugins. You can easily use existing plugins in different configurations by either modifying the plugins used in each step of the podcast generation process manually in the `.config.env` file or you can run the `configurePlugins.sh` script to use preset plugin configurations for generating a podcast based on either `news` or `podcasts`.
#TODO: UPDATE THIS SECTION!

HyperFeeder is made to be easily configurable and extensible with plugins. You can easily use existing plugins in different configurations by either modifying the plugins used in each step of the podcast generation process manually in the `.config.env` file or you can run the `configurePlugins.sh` script to use preset plugin configurations for generating a podcast based on any of the available plugins.

<!-- TODO: UPDATE EXAMPLE GIF FOR NEW WORKFLOW ![Config Demo GIF](./config_demo.gif) -->

Different plugins require specific data sources and configuration options to be set in `.config.env` to work properly.
Check the plugin directories for details on what each plugin requires in the `.config.env` file to be run.

We use Helm to configure these values and to ensure that requiremets for each plugin are met when modifying the script.

To update general publication settings modify:
[podcastTextGenerationApp/charts/values/base.yaml](podcastTextGenerationApp/charts/values/base.yaml)

![Config Demo GIF](./config_demo.gif)
To update which plugins are active modify:
[podcastTextGenerationApp/charts/helmfile.yaml](podcastTextGenerationApp/charts/helmfile.yaml)

Different plugins require specific data sources and configuration options to be set in the `.config.env` to work properly. These must be updated manually (for now).
Check the plugin directories for details on what each plugin requires in the `.config.env` file to be run. You can also check your active plugins by looking at the very top of the .config.env file. The current Plugin Types are: `NEW_PODCAST_DATA_SOURCE_PLUGINS`, `NEW_PODCAST_INTRO_PLUGINS`, `NEW_PODCAST_SCRAPER_PLUGINS`, `NEW_PODCAST_SUMMARY_PLUGINS` (DEPRECATED), `NEW_PODCAST_SEGMENT_WRITER_PLUGINS`.
When you have made changes then run ./configurePlugins.sh

## Dependencies

Expand Down
1 change: 1 addition & 0 deletions audioScripts/ttsLocalScript.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/zsh
echo "ttsLocalScript.sh started"
set -e # Exit immediately if a command exits with a non-zero status
set -x # Print commands and their arguments as they are executed

# Function for logging
log() {
Expand Down
80 changes: 12 additions & 68 deletions configurePlugins.sh
Original file line number Diff line number Diff line change
@@ -1,73 +1,17 @@
#!/bin/zsh

# Prompt user for configuration choice
echo -n "Enter configuration choice\n (n) news | (p) podcasts | (nl) newsletter): "
read choice
cd podcastTextGenerationApp/charts

# Define the new values based on the chosen configuration
if [[ "$choice" == "news" || "$choice" == "n" ]]; then
NEW_PODCAST_DATA_SOURCE_PLUGINS=hackerNewsAPIPlugin
NEW_PODCAST_INTRO_PLUGINS=standardIntroPlugin
NEW_PODCAST_SCRAPER_PLUGINS=newsStoryScraperPlugin
NEW_PODCAST_SUMMARY_PLUGINS=storySummaryPlugin
NEW_PODCAST_SEGMENT_WRITER_PLUGINS=topTenSegmentWriterPlugin
NEW_PODCAST_OUTRO_PLUGINS=outroWriterPlugin
NEW_PODCAST_PRODUCER_PLUGINS=producerPlugin
elif [[ "$choice" == "podcasts" || "$choice" == "p" ]]; then
echo -n "Enter the podcast RSS Feed: "
read podcast_feed
if grep -q "PODCAST_FEEDS=" podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource; then
# If the variable exists in the file, replace it
sed -i '' "s#^PODCAST_FEEDS=.*#PODCAST_FEEDS=$podcast_feed#" podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource
else
# If the variable doesn't exist in the file, add it
echo "PODCAST_FEEDS=$podcast_feed" >> podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource
fi
echo -n "Enter the number of items to fetch: "
read number_of_items
if grep -q "NUMBER_OF_ITEMS_TO_FETCH=" podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource; then
# If the variable exists in the file, replace it
sed -i '' "s#^NUMBER_OF_ITEMS_TO_FETCH=.*#NUMBER_OF_ITEMS_TO_FETCH=$number_of_items#" podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource
else
# If the variable doesn't exist in the file, add it
echo "NUMBER_OF_ITEMS_TO_FETCH=$number_of_items" >> podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource
fi
NEW_PODCAST_DATA_SOURCE_PLUGINS=podcastFeedPlugin
NEW_PODCAST_INTRO_PLUGINS=standardIntroPlugin
NEW_PODCAST_SCRAPER_PLUGINS=rawScraperPlugin
NEW_PODCAST_SUMMARY_PLUGINS=storySummaryPlugin
NEW_PODCAST_SEGMENT_WRITER_PLUGINS=topTenSegmentWriterPlugin
NEW_PODCAST_OUTRO_PLUGINS=outroWriterPlugin
NEW_PODCAST_PRODUCER_PLUGINS=producerPlugin
elif [[ "$choice" == "newsletter" || "$choice" == "nl" ]]; then
echo -n "Enter the newsletter RSS Feed: "
read newsletter_feed
if grep -q "NEWSLETTER_RSS_FEEDS=" podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource; then
# If the variable exists in the file, replace it
sed -i '' "s#^NEWSLETTER_RSS_FEEDS=.*#NEWSLETTER_RSS_FEEDS=$newsletter_feed#" podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource
else
# If the variable doesn't exist in the file, add it
echo "NEWSLETTER_RSS_FEEDS=$newsletter_feed" >> podcastTextGenerationApp/podcastDataSourcePlugins/.env.datasource
fi
NEW_PODCAST_DATA_SOURCE_PLUGINS=articlesRSSFeedPlugin
NEW_PODCAST_INTRO_PLUGINS=standardIntroPlugin
NEW_PODCAST_SCRAPER_PLUGINS=rssItemScraperPlugin
NEW_PODCAST_SUMMARY_PLUGINS=storySummaryPlugin
NEW_PODCAST_SEGMENT_WRITER_PLUGINS=topTenSegmentWriterPlugin
NEW_PODCAST_OUTRO_PLUGINS=outroWriterPlugin
NEW_PODCAST_PRODUCER_PLUGINS=producerPlugin
# Generate templates
echo "Generating templates..."
if helmfile template > output.yaml 2>/dev/tty; then
# Extract environment variables for validation
grep -A 1000 "config.env:" output.yaml | sed 's/^ *//' | sed '1d' > ../../.config.env
cd ../../

# Source the updated environment variables
source .config.env
echo "Plugins configured! 🚀🚀🚀"
else
echo "Invalid choice. Exiting."
exit 1
echo "Failed to generate templates. Please check for errors."
fi

# Update the .config.env file
sed -i '' "s/^PODCAST_DATA_SOURCE_PLUGINS=.*/PODCAST_DATA_SOURCE_PLUGINS=$NEW_PODCAST_DATA_SOURCE_PLUGINS/" .env
sed -i '' "s/^PODCAST_INTRO_PLUGINS=.*/PODCAST_INTRO_PLUGINS=$NEW_PODCAST_INTRO_PLUGINS/" .env
sed -i '' "s/^PODCAST_SCRAPER_PLUGINS=.*/PODCAST_SCRAPER_PLUGINS=$NEW_PODCAST_SCRAPER_PLUGINS/" .env
sed -i '' "s/^PODCAST_SUMMARY_PLUGINS=.*/PODCAST_SUMMARY_PLUGINS=$NEW_PODCAST_SUMMARY_PLUGINS/" .env
sed -i '' "s/^PODCAST_SEGMENT_WRITER_PLUGINS=.*/PODCAST_SEGMENT_WRITER_PLUGINS=$NEW_PODCAST_SEGMENT_WRITER_PLUGINS/" .env
sed -i '' "s/^PODCAST_OUTRO_PLUGINS=.*/PODCAST_OUTRO_PLUGINS=$NEW_PODCAST_OUTRO_PLUGINS/" .env
sed -i '' "s/^PODCAST_PRODUCER_PLUGINS=.*/PODCAST_PRODUCER_PLUGINS=$NEW_PODCAST_PRODUCER_PLUGINS/" .env

echo "Configuration updated successfully."
4 changes: 2 additions & 2 deletions js.config.env
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
LANGCHAIN_PROJECT=pr-ajar-archives-11

# OpenAI
OPENAI_MODEL_SUMMARY=gpt-4-turbo
LLM_MODEL_VERSION_NAME=gpt-4-turbo
OPENAI_MAX_TOKENS_SUMMARY=4096
OPENAI_TEMPERATURE_SUMMARY=0.2

# Anthropic
ANTHROPIC_MODEL=claude-3-5-sonnet-20240620
ANTHROPIC_MODEL=claude-3-5-sonnet-latest
ANTHROPIC_MAX_TOKENS=8192

# TTS Script Selection
Expand Down
Binary file added neynar_cache.sqlite
Binary file not shown.
6 changes: 6 additions & 0 deletions podcastTextGenerationApp/charts/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: mypodcast
description: A helm chart to generate a .config.env for a publication
type: application
version: 0.1.0
appVersion: "1.0"
19 changes: 19 additions & 0 deletions podcastTextGenerationApp/charts/helmfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Uncomment the plugins you want to use and update their values in the associated yaml file as needed
releases:
- name: hyperfeeder
namespace: default
chart: ./
values:
- values/base.yaml
- values/researchers.yaml
- values/podcast.yaml
- values/llm.yaml
- values/tts.yaml
- values/sqliteToken.yaml
# - values/reddit.yaml
# - values/podcastFeed.yaml
# - values/articlesRSS.yaml
# - values/newsletterRSS.yaml
# - values/hackerNewsAPI.yaml
# - values/arxivApi.yaml

Loading

0 comments on commit d93a0d5

Please sign in to comment.