diff --git a/docs/docs/gpt-researcher/gptr/handling-logs-as-they-stream.md b/docs/docs/gpt-researcher/gptr/handling-logs-as-they-stream.md new file mode 100644 index 000000000..22a9964d1 --- /dev/null +++ b/docs/docs/gpt-researcher/gptr/handling-logs-as-they-stream.md @@ -0,0 +1,64 @@ +# Handling Logs + +Here is a snippet of code to help you handle the streaming logs of your Research tasks. + +```python +from typing import Dict, Any +import asyncio +from gpt_researcher import GPTResearcher + +class CustomLogsHandler: + """A custom Logs handler class to handle JSON data.""" + def __init__(self): + self.logs = [] # Initialize logs to store data + + async def send_json(self, data: Dict[str, Any]) -> None: + """Send JSON data and log it.""" + self.logs.append(data) # Append data to logs + print(f"My custom Log: {data}") # For demonstration, print the log + +async def run(): + # Define the necessary parameters with sample values + + query = "What happened in the latest burning man floods?" + report_type = "research_report" # Type of report to generate + report_source = "online" # Could specify source like 'online', 'books', etc. + tone = "informative" # Tone of the report ('informative', 'casual', etc.) + config_path = None # Path to a config file, if needed + + # Initialize researcher with a custom WebSocket + custom_logs_handler = CustomLogsHandler() + + researcher = GPTResearcher( + query=query, + report_type=report_type, + report_source=report_source, + tone=tone, + config_path=config_path, + websocket=custom_logs_handler + ) + + await researcher.conduct_research() # Conduct the research + report = await researcher.write_report() # Write the research report + + return report + +# Run the asynchronous function using asyncio +if __name__ == "__main__": + asyncio.run(run()) +``` + +The data from the research process will be logged and stored in the `CustomLogsHandler` instance. You can customize the logging behavior as needed for your application. + +Here's a sample of the output: + +``` +{ + "type": "logs", + "content": "added_source_url", + "output": "✅ Added source url to research: https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery\n", + "metadata": "https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery" +} +``` + +The `metadata` field will include whatever metadata is relevant to the log entry. Let the script above run to completion for the full logs output of a given research task. \ No newline at end of file diff --git a/docs/docs/gpt-researcher/llms/testing-your-llm.md b/docs/docs/gpt-researcher/llms/testing-your-llm.md new file mode 100644 index 000000000..2980be953 --- /dev/null +++ b/docs/docs/gpt-researcher/llms/testing-your-llm.md @@ -0,0 +1,30 @@ +# Testing your LLM + +Here is a snippet of code to help you verify that your LLM-related environment variables are set up correctly. + +```python +from gpt_researcher.config.config import Config +from gpt_researcher.utils.llm import create_chat_completion +import asyncio +from dotenv import load_dotenv +load_dotenv() + +async def main(): + cfg = Config() + + try: + report = await create_chat_completion( + model=cfg.smart_llm_model, + messages = [{"role": "user", "content": "sup?"}], + temperature=0.35, + llm_provider=cfg.smart_llm_provider, + stream=True, + max_tokens=cfg.smart_token_limit, + llm_kwargs=cfg.llm_kwargs + ) + except Exception as e: + print(f"Error in calling LLM: {e}") + +# Run the async function +asyncio.run(main()) +``` \ No newline at end of file diff --git a/docs/docs/gpt-researcher/search-engines/test-your-retriever.md b/docs/docs/gpt-researcher/search-engines/test-your-retriever.md new file mode 100644 index 000000000..432888208 --- /dev/null +++ b/docs/docs/gpt-researcher/search-engines/test-your-retriever.md @@ -0,0 +1,68 @@ +# Testing your Retriever + +To test your retriever, you can use the following code snippet. The script will search for a sub-query and display the search results. + +```python +import asyncio +from dotenv import load_dotenv +from gpt_researcher.config.config import Config +from gpt_researcher.actions.retriever import get_retrievers +from gpt_researcher.skills.researcher import ResearchConductor +import pprint +# Load environment variables from .env file +load_dotenv() + +async def test_scrape_data_by_query(): + # Initialize the Config object + config = Config() + + # Retrieve the retrievers based on the current configuration + retrievers = get_retrievers({}, config) + print("Retrievers:", retrievers) + + # Create a mock researcher object with necessary attributes + class MockResearcher: + def init(self): + self.retrievers = retrievers + self.cfg = config + self.verbose = True + self.websocket = None + self.scraper_manager = None # Mock or implement scraper manager + self.vector_store = None # Mock or implement vector store + + researcher = MockResearcher() + research_conductor = ResearchConductor(researcher) + # print('research_conductor',dir(research_conductor)) + # print('MockResearcher',dir(researcher)) + # Define a sub-query to test + sub_query = "design patterns for autonomous ai agents" + + # Iterate through all retrievers + for retriever_class in retrievers: + # Instantiate the retriever with the sub-query + retriever = retriever_class(sub_query) + + # Perform the search using the current retriever + search_results = await asyncio.to_thread( + retriever.search, max_results=10 + ) + + print("\033[35mSearch results:\033[0m") + pprint.pprint(search_results, indent=4, width=80) + +if __name__ == "__main__": + asyncio.run(test_scrape_data_by_query()) +``` + +The output of the search results will include the title, body, and href of each search result. For example: + +```json +[{ + "body": "Jun 5, 2024 ... Three AI Design Patterns of Autonomous " + "Agents. Overview of the Three Patterns. Three notable AI " + "design patterns for autonomous agents include:.", + "href": "https://accredianpublication.medium.com/building-smarter-systems-the-role-of-agentic-design-patterns-in-genai-13617492f5df", + "title": "Building Smarter Systems: The Role of Agentic Design " + "Patterns in ..."}, + ...] +``` \ No newline at end of file diff --git a/docs/sidebars.js b/docs/sidebars.js index 9c5377c11..17747a9b5 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -36,6 +36,7 @@ 'gpt-researcher/gptr/example', 'gpt-researcher/gptr/config', 'gpt-researcher/gptr/scraping', + 'gpt-researcher/gptr/handling-logs-as-they-stream', 'gpt-researcher/gptr/querying-the-backend', 'gpt-researcher/gptr/automated-tests', 'gpt-researcher/gptr/troubleshooting', @@ -70,7 +71,8 @@ collapsed: true, items: [ 'gpt-researcher/llms/llms', - 'gpt-researcher/llms/running-with-ollama' + 'gpt-researcher/llms/running-with-ollama', + 'gpt-researcher/llms/testing-your-llm' ] }, { @@ -80,6 +82,7 @@ collapsed: true, items: [ 'gpt-researcher/search-engines/retrievers', + 'gpt-researcher/search-engines/test-your-retriever' ] }, { diff --git a/tests/gptr-logs-handler.py b/tests/gptr-logs-handler.py new file mode 100644 index 000000000..db84af0a1 --- /dev/null +++ b/tests/gptr-logs-handler.py @@ -0,0 +1,43 @@ +from typing import Dict, Any +import asyncio +from gpt_researcher import GPTResearcher + +class CustomLogsHandler: + """A custom Logs handler class to handle JSON data.""" + def __init__(self): + self.logs = [] # Initialize logs to store data + + async def send_json(self, data: Dict[str, Any]) -> None: + """Send JSON data and log it.""" + self.logs.append(data) # Append data to logs + print(f"My custom Log: {data}") # For demonstration, print the log + +async def run(): + # Define the necessary parameters with sample values + + query = "What happened in the latest burning man floods?" + report_type = "research_report" # Type of report to generate + report_source = "online" # Could specify source like 'online', 'books', etc. + tone = "informative" # Tone of the report ('informative', 'casual', etc.) + config_path = None # Path to a config file, if needed + + # Initialize researcher with a custom WebSocket + custom_logs_handler = CustomLogsHandler() + + researcher = GPTResearcher( + query=query, + report_type=report_type, + report_source=report_source, + tone=tone, + config_path=config_path, + websocket=custom_logs_handler + ) + + await researcher.conduct_research() # Conduct the research + report = await researcher.write_report() # Write the research report + + return report + +# Run the asynchronous function using asyncio +if __name__ == "__main__": + asyncio.run(run()) diff --git a/tests/test-openai-llm.py b/tests/test-openai-llm.py new file mode 100644 index 000000000..9cc0164ef --- /dev/null +++ b/tests/test-openai-llm.py @@ -0,0 +1,31 @@ +import asyncio +from gpt_researcher.utils.llm import get_llm +from gpt_researcher import GPTResearcher +from dotenv import load_dotenv +load_dotenv() + +async def main(): + + # Example usage of get_llm function + llm_provider = "openai" + model = "gpt-3.5-turbo" + temperature = 0.7 + max_tokens = 1000 + + llm = get_llm(llm_provider, model=model, temperature=temperature, max_tokens=max_tokens) + print(f"LLM Provider: {llm_provider}, Model: {model}, Temperature: {temperature}, Max Tokens: {max_tokens}") + print('llm: ',llm) + await test_llm(llm=llm) + + +async def test_llm(llm): + # Test the connection with a simple query + messages = [{"role": "user", "content": "sup?"}] + try: + response = await llm.get_chat_response(messages, stream=False) + print("LLM response:", response) + except Exception as e: + print(f"Error: {e}") + +# Run the async function +asyncio.run(main()) \ No newline at end of file diff --git a/tests/test-your-llm.py b/tests/test-your-llm.py new file mode 100644 index 000000000..02a153747 --- /dev/null +++ b/tests/test-your-llm.py @@ -0,0 +1,24 @@ +from gpt_researcher.config.config import Config +from gpt_researcher.utils.llm import create_chat_completion +import asyncio +from dotenv import load_dotenv +load_dotenv() + +async def main(): + cfg = Config() + + try: + report = await create_chat_completion( + model=cfg.smart_llm_model, + messages = [{"role": "user", "content": "sup?"}], + temperature=0.35, + llm_provider=cfg.smart_llm_provider, + stream=True, + max_tokens=cfg.smart_token_limit, + llm_kwargs=cfg.llm_kwargs + ) + except Exception as e: + print(f"Error in calling LLM: {e}") + +# Run the async function +asyncio.run(main()) \ No newline at end of file diff --git a/tests/test-your-retriever.py b/tests/test-your-retriever.py new file mode 100644 index 000000000..3e0c85ce0 --- /dev/null +++ b/tests/test-your-retriever.py @@ -0,0 +1,49 @@ +import asyncio +from dotenv import load_dotenv +from gpt_researcher.config.config import Config +from gpt_researcher.actions.retriever import get_retrievers +from gpt_researcher.skills.researcher import ResearchConductor +import pprint +# Load environment variables from .env file +load_dotenv() + +async def test_scrape_data_by_query(): + # Initialize the Config object + config = Config() + + # Retrieve the retrievers based on the current configuration + retrievers = get_retrievers({}, config) + print("Retrievers:", retrievers) + + # Create a mock researcher object with necessary attributes + class MockResearcher: + def init(self): + self.retrievers = retrievers + self.cfg = config + self.verbose = True + self.websocket = None + self.scraper_manager = None # Mock or implement scraper manager + self.vector_store = None # Mock or implement vector store + + researcher = MockResearcher() + research_conductor = ResearchConductor(researcher) + # print('research_conductor',dir(research_conductor)) + # print('MockResearcher',dir(researcher)) + # Define a sub-query to test + sub_query = "design patterns for autonomous ai agents" + + # Iterate through all retrievers + for retriever_class in retrievers: + # Instantiate the retriever with the sub-query + retriever = retriever_class(sub_query) + + # Perform the search using the current retriever + search_results = await asyncio.to_thread( + retriever.search, max_results=10 + ) + + print("\033[35mSearch results:\033[0m") + pprint.pprint(search_results, indent=4, width=80) + +if __name__ == "__main__": + asyncio.run(test_scrape_data_by_query()) \ No newline at end of file