Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docs upgrades #940

Merged
merged 12 commits into from
Oct 23, 2024
Merged
64 changes: 64 additions & 0 deletions docs/docs/gpt-researcher/gptr/handling-logs-as-they-stream.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Handling Logs

Here is a snippet of code to help you handle the streaming logs of your Research tasks.

```python
from typing import Dict, Any
import asyncio
from gpt_researcher import GPTResearcher

class CustomLogsHandler:
"""A custom Logs handler class to handle JSON data."""
def __init__(self):
self.logs = [] # Initialize logs to store data

async def send_json(self, data: Dict[str, Any]) -> None:
"""Send JSON data and log it."""
self.logs.append(data) # Append data to logs
print(f"My custom Log: {data}") # For demonstration, print the log

async def run():
# Define the necessary parameters with sample values

query = "What happened in the latest burning man floods?"
report_type = "research_report" # Type of report to generate
report_source = "online" # Could specify source like 'online', 'books', etc.
tone = "informative" # Tone of the report ('informative', 'casual', etc.)
config_path = None # Path to a config file, if needed

# Initialize researcher with a custom WebSocket
custom_logs_handler = CustomLogsHandler()

researcher = GPTResearcher(
query=query,
report_type=report_type,
report_source=report_source,
tone=tone,
config_path=config_path,
websocket=custom_logs_handler
)

await researcher.conduct_research() # Conduct the research
report = await researcher.write_report() # Write the research report

return report

# Run the asynchronous function using asyncio
if __name__ == "__main__":
asyncio.run(run())
```

The data from the research process will be logged and stored in the `CustomLogsHandler` instance. You can customize the logging behavior as needed for your application.

Here's a sample of the output:

```
{
"type": "logs",
"content": "added_source_url",
"output": "✅ Added source url to research: https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery\n",
"metadata": "https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery"
}
```

The `metadata` field will include whatever metadata is relevant to the log entry. Let the script above run to completion for the full logs output of a given research task.
30 changes: 30 additions & 0 deletions docs/docs/gpt-researcher/llms/testing-your-llm.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Testing your LLM

Here is a snippet of code to help you verify that your LLM-related environment variables are set up correctly.

```python
from gpt_researcher.config.config import Config
from gpt_researcher.utils.llm import create_chat_completion
import asyncio
from dotenv import load_dotenv
load_dotenv()

async def main():
cfg = Config()

try:
report = await create_chat_completion(
model=cfg.smart_llm_model,
messages = [{"role": "user", "content": "sup?"}],
temperature=0.35,
llm_provider=cfg.smart_llm_provider,
stream=True,
max_tokens=cfg.smart_token_limit,
llm_kwargs=cfg.llm_kwargs
)
except Exception as e:
print(f"Error in calling LLM: {e}")

# Run the async function
asyncio.run(main())
```
68 changes: 68 additions & 0 deletions docs/docs/gpt-researcher/search-engines/test-your-retriever.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Testing your Retriever

To test your retriever, you can use the following code snippet. The script will search for a sub-query and display the search results.

```python
import asyncio
from dotenv import load_dotenv
from gpt_researcher.config.config import Config
from gpt_researcher.actions.retriever import get_retrievers
from gpt_researcher.skills.researcher import ResearchConductor
import pprint
# Load environment variables from .env file
load_dotenv()

async def test_scrape_data_by_query():
# Initialize the Config object
config = Config()

# Retrieve the retrievers based on the current configuration
retrievers = get_retrievers({}, config)
print("Retrievers:", retrievers)

# Create a mock researcher object with necessary attributes
class MockResearcher:
def init(self):
self.retrievers = retrievers
self.cfg = config
self.verbose = True
self.websocket = None
self.scraper_manager = None # Mock or implement scraper manager
self.vector_store = None # Mock or implement vector store

researcher = MockResearcher()
research_conductor = ResearchConductor(researcher)
# print('research_conductor',dir(research_conductor))
# print('MockResearcher',dir(researcher))
# Define a sub-query to test
sub_query = "design patterns for autonomous ai agents"

# Iterate through all retrievers
for retriever_class in retrievers:
# Instantiate the retriever with the sub-query
retriever = retriever_class(sub_query)

# Perform the search using the current retriever
search_results = await asyncio.to_thread(
retriever.search, max_results=10
)

print("\033[35mSearch results:\033[0m")
pprint.pprint(search_results, indent=4, width=80)

if __name__ == "__main__":
asyncio.run(test_scrape_data_by_query())
```

The output of the search results will include the title, body, and href of each search result. For example:

```json
[{
"body": "Jun 5, 2024 ... Three AI Design Patterns of Autonomous "
"Agents. Overview of the Three Patterns. Three notable AI "
"design patterns for autonomous agents include:.",
"href": "https://accredianpublication.medium.com/building-smarter-systems-the-role-of-agentic-design-patterns-in-genai-13617492f5df",
"title": "Building Smarter Systems: The Role of Agentic Design "
"Patterns in ..."},
...]
```
5 changes: 4 additions & 1 deletion docs/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
'gpt-researcher/gptr/example',
'gpt-researcher/gptr/config',
'gpt-researcher/gptr/scraping',
'gpt-researcher/gptr/handling-logs-as-they-stream',
'gpt-researcher/gptr/querying-the-backend',
'gpt-researcher/gptr/automated-tests',
'gpt-researcher/gptr/troubleshooting',
Expand Down Expand Up @@ -70,7 +71,8 @@
collapsed: true,
items: [
'gpt-researcher/llms/llms',
'gpt-researcher/llms/running-with-ollama'
'gpt-researcher/llms/running-with-ollama',
'gpt-researcher/llms/testing-your-llm'
]
},
{
Expand All @@ -80,6 +82,7 @@
collapsed: true,
items: [
'gpt-researcher/search-engines/retrievers',
'gpt-researcher/search-engines/test-your-retriever'
]
},
{
Expand Down
43 changes: 43 additions & 0 deletions tests/gptr-logs-handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import Dict, Any
import asyncio
from gpt_researcher import GPTResearcher

class CustomLogsHandler:
"""A custom Logs handler class to handle JSON data."""
def __init__(self):
self.logs = [] # Initialize logs to store data

async def send_json(self, data: Dict[str, Any]) -> None:
"""Send JSON data and log it."""
self.logs.append(data) # Append data to logs
print(f"My custom Log: {data}") # For demonstration, print the log

async def run():
# Define the necessary parameters with sample values

query = "What happened in the latest burning man floods?"
report_type = "research_report" # Type of report to generate
report_source = "online" # Could specify source like 'online', 'books', etc.
tone = "informative" # Tone of the report ('informative', 'casual', etc.)
config_path = None # Path to a config file, if needed

# Initialize researcher with a custom WebSocket
custom_logs_handler = CustomLogsHandler()

researcher = GPTResearcher(
query=query,
report_type=report_type,
report_source=report_source,
tone=tone,
config_path=config_path,
websocket=custom_logs_handler
)

await researcher.conduct_research() # Conduct the research
report = await researcher.write_report() # Write the research report

return report

# Run the asynchronous function using asyncio
if __name__ == "__main__":
asyncio.run(run())
31 changes: 31 additions & 0 deletions tests/test-openai-llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import asyncio
from gpt_researcher.utils.llm import get_llm
from gpt_researcher import GPTResearcher
from dotenv import load_dotenv
load_dotenv()

async def main():

# Example usage of get_llm function
llm_provider = "openai"
model = "gpt-3.5-turbo"
temperature = 0.7
max_tokens = 1000

llm = get_llm(llm_provider, model=model, temperature=temperature, max_tokens=max_tokens)
print(f"LLM Provider: {llm_provider}, Model: {model}, Temperature: {temperature}, Max Tokens: {max_tokens}")
print('llm: ',llm)
await test_llm(llm=llm)


async def test_llm(llm):
# Test the connection with a simple query
messages = [{"role": "user", "content": "sup?"}]
try:
response = await llm.get_chat_response(messages, stream=False)
print("LLM response:", response)
except Exception as e:
print(f"Error: {e}")

# Run the async function
asyncio.run(main())
24 changes: 24 additions & 0 deletions tests/test-your-llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from gpt_researcher.config.config import Config
from gpt_researcher.utils.llm import create_chat_completion
import asyncio
from dotenv import load_dotenv
load_dotenv()

async def main():
cfg = Config()

try:
report = await create_chat_completion(
model=cfg.smart_llm_model,
messages = [{"role": "user", "content": "sup?"}],
temperature=0.35,
llm_provider=cfg.smart_llm_provider,
stream=True,
max_tokens=cfg.smart_token_limit,
llm_kwargs=cfg.llm_kwargs
)
except Exception as e:
print(f"Error in calling LLM: {e}")

# Run the async function
asyncio.run(main())
49 changes: 49 additions & 0 deletions tests/test-your-retriever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import asyncio
from dotenv import load_dotenv
from gpt_researcher.config.config import Config
from gpt_researcher.actions.retriever import get_retrievers
from gpt_researcher.skills.researcher import ResearchConductor
import pprint
# Load environment variables from .env file
load_dotenv()

async def test_scrape_data_by_query():
# Initialize the Config object
config = Config()

# Retrieve the retrievers based on the current configuration
retrievers = get_retrievers({}, config)
print("Retrievers:", retrievers)

# Create a mock researcher object with necessary attributes
class MockResearcher:
def init(self):
self.retrievers = retrievers
self.cfg = config
self.verbose = True
self.websocket = None
self.scraper_manager = None # Mock or implement scraper manager
self.vector_store = None # Mock or implement vector store

researcher = MockResearcher()
research_conductor = ResearchConductor(researcher)
# print('research_conductor',dir(research_conductor))
# print('MockResearcher',dir(researcher))
# Define a sub-query to test
sub_query = "design patterns for autonomous ai agents"

# Iterate through all retrievers
for retriever_class in retrievers:
# Instantiate the retriever with the sub-query
retriever = retriever_class(sub_query)

# Perform the search using the current retriever
search_results = await asyncio.to_thread(
retriever.search, max_results=10
)

print("\033[35mSearch results:\033[0m")
pprint.pprint(search_results, indent=4, width=80)

if __name__ == "__main__":
asyncio.run(test_scrape_data_by_query())