Skip to content

Commit

Permalink
Merge pull request #940 from ElishaKay/custom-logs-handler
Browse files Browse the repository at this point in the history
Docs upgrades
  • Loading branch information
assafelovic authored Oct 23, 2024
2 parents b0aa661 + b9d2cea commit cadb2ce
Show file tree
Hide file tree
Showing 8 changed files with 313 additions and 1 deletion.
64 changes: 64 additions & 0 deletions docs/docs/gpt-researcher/gptr/handling-logs-as-they-stream.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Handling Logs

Here is a snippet of code to help you handle the streaming logs of your Research tasks.

```python
from typing import Dict, Any
import asyncio
from gpt_researcher import GPTResearcher

class CustomLogsHandler:
"""A custom Logs handler class to handle JSON data."""
def __init__(self):
self.logs = [] # Initialize logs to store data

async def send_json(self, data: Dict[str, Any]) -> None:
"""Send JSON data and log it."""
self.logs.append(data) # Append data to logs
print(f"My custom Log: {data}") # For demonstration, print the log

async def run():
# Define the necessary parameters with sample values

query = "What happened in the latest burning man floods?"
report_type = "research_report" # Type of report to generate
report_source = "online" # Could specify source like 'online', 'books', etc.
tone = "informative" # Tone of the report ('informative', 'casual', etc.)
config_path = None # Path to a config file, if needed

# Initialize researcher with a custom WebSocket
custom_logs_handler = CustomLogsHandler()

researcher = GPTResearcher(
query=query,
report_type=report_type,
report_source=report_source,
tone=tone,
config_path=config_path,
websocket=custom_logs_handler
)

await researcher.conduct_research() # Conduct the research
report = await researcher.write_report() # Write the research report

return report

# Run the asynchronous function using asyncio
if __name__ == "__main__":
asyncio.run(run())
```

The data from the research process will be logged and stored in the `CustomLogsHandler` instance. You can customize the logging behavior as needed for your application.

Here's a sample of the output:

```
{
"type": "logs",
"content": "added_source_url",
"output": "✅ Added source url to research: https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery\n",
"metadata": "https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery"
}
```

The `metadata` field will include whatever metadata is relevant to the log entry. Let the script above run to completion for the full logs output of a given research task.
30 changes: 30 additions & 0 deletions docs/docs/gpt-researcher/llms/testing-your-llm.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Testing your LLM

Here is a snippet of code to help you verify that your LLM-related environment variables are set up correctly.

```python
from gpt_researcher.config.config import Config
from gpt_researcher.utils.llm import create_chat_completion
import asyncio
from dotenv import load_dotenv
load_dotenv()

async def main():
cfg = Config()

try:
report = await create_chat_completion(
model=cfg.smart_llm_model,
messages = [{"role": "user", "content": "sup?"}],
temperature=0.35,
llm_provider=cfg.smart_llm_provider,
stream=True,
max_tokens=cfg.smart_token_limit,
llm_kwargs=cfg.llm_kwargs
)
except Exception as e:
print(f"Error in calling LLM: {e}")

# Run the async function
asyncio.run(main())
```
68 changes: 68 additions & 0 deletions docs/docs/gpt-researcher/search-engines/test-your-retriever.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Testing your Retriever

To test your retriever, you can use the following code snippet. The script will search for a sub-query and display the search results.

```python
import asyncio
from dotenv import load_dotenv
from gpt_researcher.config.config import Config
from gpt_researcher.actions.retriever import get_retrievers
from gpt_researcher.skills.researcher import ResearchConductor
import pprint
# Load environment variables from .env file
load_dotenv()

async def test_scrape_data_by_query():
# Initialize the Config object
config = Config()

# Retrieve the retrievers based on the current configuration
retrievers = get_retrievers({}, config)
print("Retrievers:", retrievers)

# Create a mock researcher object with necessary attributes
class MockResearcher:
def init(self):
self.retrievers = retrievers
self.cfg = config
self.verbose = True
self.websocket = None
self.scraper_manager = None # Mock or implement scraper manager
self.vector_store = None # Mock or implement vector store

researcher = MockResearcher()
research_conductor = ResearchConductor(researcher)
# print('research_conductor',dir(research_conductor))
# print('MockResearcher',dir(researcher))
# Define a sub-query to test
sub_query = "design patterns for autonomous ai agents"

# Iterate through all retrievers
for retriever_class in retrievers:
# Instantiate the retriever with the sub-query
retriever = retriever_class(sub_query)

# Perform the search using the current retriever
search_results = await asyncio.to_thread(
retriever.search, max_results=10
)

print("\033[35mSearch results:\033[0m")
pprint.pprint(search_results, indent=4, width=80)

if __name__ == "__main__":
asyncio.run(test_scrape_data_by_query())
```

The output of the search results will include the title, body, and href of each search result. For example:

```json
[{
"body": "Jun 5, 2024 ... Three AI Design Patterns of Autonomous "
"Agents. Overview of the Three Patterns. Three notable AI "
"design patterns for autonomous agents include:.",
"href": "https://accredianpublication.medium.com/building-smarter-systems-the-role-of-agentic-design-patterns-in-genai-13617492f5df",
"title": "Building Smarter Systems: The Role of Agentic Design "
"Patterns in ..."},
...]
```
5 changes: 4 additions & 1 deletion docs/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
'gpt-researcher/gptr/example',
'gpt-researcher/gptr/config',
'gpt-researcher/gptr/scraping',
'gpt-researcher/gptr/handling-logs-as-they-stream',
'gpt-researcher/gptr/querying-the-backend',
'gpt-researcher/gptr/automated-tests',
'gpt-researcher/gptr/troubleshooting',
Expand Down Expand Up @@ -70,7 +71,8 @@
collapsed: true,
items: [
'gpt-researcher/llms/llms',
'gpt-researcher/llms/running-with-ollama'
'gpt-researcher/llms/running-with-ollama',
'gpt-researcher/llms/testing-your-llm'
]
},
{
Expand All @@ -80,6 +82,7 @@
collapsed: true,
items: [
'gpt-researcher/search-engines/retrievers',
'gpt-researcher/search-engines/test-your-retriever'
]
},
{
Expand Down
43 changes: 43 additions & 0 deletions tests/gptr-logs-handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import Dict, Any
import asyncio
from gpt_researcher import GPTResearcher

class CustomLogsHandler:
"""A custom Logs handler class to handle JSON data."""
def __init__(self):
self.logs = [] # Initialize logs to store data

async def send_json(self, data: Dict[str, Any]) -> None:
"""Send JSON data and log it."""
self.logs.append(data) # Append data to logs
print(f"My custom Log: {data}") # For demonstration, print the log

async def run():
# Define the necessary parameters with sample values

query = "What happened in the latest burning man floods?"
report_type = "research_report" # Type of report to generate
report_source = "online" # Could specify source like 'online', 'books', etc.
tone = "informative" # Tone of the report ('informative', 'casual', etc.)
config_path = None # Path to a config file, if needed

# Initialize researcher with a custom WebSocket
custom_logs_handler = CustomLogsHandler()

researcher = GPTResearcher(
query=query,
report_type=report_type,
report_source=report_source,
tone=tone,
config_path=config_path,
websocket=custom_logs_handler
)

await researcher.conduct_research() # Conduct the research
report = await researcher.write_report() # Write the research report

return report

# Run the asynchronous function using asyncio
if __name__ == "__main__":
asyncio.run(run())
31 changes: 31 additions & 0 deletions tests/test-openai-llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import asyncio
from gpt_researcher.utils.llm import get_llm
from gpt_researcher import GPTResearcher
from dotenv import load_dotenv
load_dotenv()

async def main():

# Example usage of get_llm function
llm_provider = "openai"
model = "gpt-3.5-turbo"
temperature = 0.7
max_tokens = 1000

llm = get_llm(llm_provider, model=model, temperature=temperature, max_tokens=max_tokens)
print(f"LLM Provider: {llm_provider}, Model: {model}, Temperature: {temperature}, Max Tokens: {max_tokens}")
print('llm: ',llm)
await test_llm(llm=llm)


async def test_llm(llm):
# Test the connection with a simple query
messages = [{"role": "user", "content": "sup?"}]
try:
response = await llm.get_chat_response(messages, stream=False)
print("LLM response:", response)
except Exception as e:
print(f"Error: {e}")

# Run the async function
asyncio.run(main())
24 changes: 24 additions & 0 deletions tests/test-your-llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from gpt_researcher.config.config import Config
from gpt_researcher.utils.llm import create_chat_completion
import asyncio
from dotenv import load_dotenv
load_dotenv()

async def main():
cfg = Config()

try:
report = await create_chat_completion(
model=cfg.smart_llm_model,
messages = [{"role": "user", "content": "sup?"}],
temperature=0.35,
llm_provider=cfg.smart_llm_provider,
stream=True,
max_tokens=cfg.smart_token_limit,
llm_kwargs=cfg.llm_kwargs
)
except Exception as e:
print(f"Error in calling LLM: {e}")

# Run the async function
asyncio.run(main())
49 changes: 49 additions & 0 deletions tests/test-your-retriever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import asyncio
from dotenv import load_dotenv
from gpt_researcher.config.config import Config
from gpt_researcher.actions.retriever import get_retrievers
from gpt_researcher.skills.researcher import ResearchConductor
import pprint
# Load environment variables from .env file
load_dotenv()

async def test_scrape_data_by_query():
# Initialize the Config object
config = Config()

# Retrieve the retrievers based on the current configuration
retrievers = get_retrievers({}, config)
print("Retrievers:", retrievers)

# Create a mock researcher object with necessary attributes
class MockResearcher:
def init(self):
self.retrievers = retrievers
self.cfg = config
self.verbose = True
self.websocket = None
self.scraper_manager = None # Mock or implement scraper manager
self.vector_store = None # Mock or implement vector store

researcher = MockResearcher()
research_conductor = ResearchConductor(researcher)
# print('research_conductor',dir(research_conductor))
# print('MockResearcher',dir(researcher))
# Define a sub-query to test
sub_query = "design patterns for autonomous ai agents"

# Iterate through all retrievers
for retriever_class in retrievers:
# Instantiate the retriever with the sub-query
retriever = retriever_class(sub_query)

# Perform the search using the current retriever
search_results = await asyncio.to_thread(
retriever.search, max_results=10
)

print("\033[35mSearch results:\033[0m")
pprint.pprint(search_results, indent=4, width=80)

if __name__ == "__main__":
asyncio.run(test_scrape_data_by_query())

0 comments on commit cadb2ce

Please sign in to comment.