Merge pull request #940 from ElishaKay/custom-logs-handler

Docs upgrades
assafelovic · Oct 23, 2024 · cadb2ce · cadb2ce
2 parents b0aa661 + b9d2cea
commit cadb2ce
Show file tree

Hide file tree

Showing 8 changed files with 313 additions and 1 deletion.
diff --git a/docs/docs/gpt-researcher/gptr/handling-logs-as-they-stream.md b/docs/docs/gpt-researcher/gptr/handling-logs-as-they-stream.md
@@ -0,0 +1,64 @@
+# Handling Logs
+
+Here is a snippet of code to help you handle the streaming logs of your Research tasks.
+
+```python
+from typing import Dict, Any
+import asyncio
+from gpt_researcher import GPTResearcher
+
+class CustomLogsHandler:
+    """A custom Logs handler class to handle JSON data."""
+    def __init__(self):
+        self.logs = []  # Initialize logs to store data
+
+    async def send_json(self, data: Dict[str, Any]) -> None:
+        """Send JSON data and log it."""
+        self.logs.append(data)  # Append data to logs
+        print(f"My custom Log: {data}")  # For demonstration, print the log
+
+async def run():
+    # Define the necessary parameters with sample values
+
+    query = "What happened in the latest burning man floods?"
+    report_type = "research_report"  # Type of report to generate
+    report_source = "online"  # Could specify source like 'online', 'books', etc.
+    tone = "informative"  # Tone of the report ('informative', 'casual', etc.)
+    config_path = None  # Path to a config file, if needed
+
+    # Initialize researcher with a custom WebSocket
+    custom_logs_handler = CustomLogsHandler()
+
+    researcher = GPTResearcher(
+        query=query,
+        report_type=report_type,
+        report_source=report_source,
+        tone=tone,
+        config_path=config_path,
+        websocket=custom_logs_handler
+    )
+
+    await researcher.conduct_research()  # Conduct the research
+    report = await researcher.write_report()  # Write the research report
+
+    return report
+
+# Run the asynchronous function using asyncio
+if __name__ == "__main__":
+    asyncio.run(run())
+```
+
+The data from the research process will be logged and stored in the `CustomLogsHandler` instance. You can customize the logging behavior as needed for your application.
+
+Here's a sample of the output:
+
+```
+{
+    "type": "logs",
+    "content": "added_source_url",
+    "output": "✅ Added source url to research: https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery\n",
+    "metadata": "https://www.npr.org/2023/09/28/1202110410/how-rumors-and-conspiracy-theories-got-in-the-way-of-mauis-fire-recovery"
+}
+```
+
+The `metadata` field will include whatever metadata is relevant to the log entry. Let the script above run to completion for the full logs output of a given research task.
diff --git a/docs/docs/gpt-researcher/llms/testing-your-llm.md b/docs/docs/gpt-researcher/llms/testing-your-llm.md
@@ -0,0 +1,30 @@
+# Testing your LLM
+
+Here is a snippet of code to help you verify that your LLM-related environment variables are set up correctly.
+
+```python
+from gpt_researcher.config.config import Config
+from gpt_researcher.utils.llm import create_chat_completion
+import asyncio
+from dotenv import load_dotenv
+load_dotenv()
+
+async def main():
+    cfg = Config()
+
+    try:
+        report = await create_chat_completion(
+            model=cfg.smart_llm_model,
+            messages = [{"role": "user", "content": "sup?"}],
+            temperature=0.35,
+            llm_provider=cfg.smart_llm_provider,
+            stream=True,
+            max_tokens=cfg.smart_token_limit,
+            llm_kwargs=cfg.llm_kwargs
+        )
+    except Exception as e:
+        print(f"Error in calling LLM: {e}")
+
+# Run the async function
+asyncio.run(main())
+```
diff --git a/docs/docs/gpt-researcher/search-engines/test-your-retriever.md b/docs/docs/gpt-researcher/search-engines/test-your-retriever.md
@@ -0,0 +1,68 @@
+# Testing your Retriever
+
+To test your retriever, you can use the following code snippet. The script will search for a sub-query and display the search results.
+
+```python
+import asyncio
+from dotenv import load_dotenv
+from gpt_researcher.config.config import Config
+from gpt_researcher.actions.retriever import get_retrievers
+from gpt_researcher.skills.researcher import ResearchConductor
+import pprint
+# Load environment variables from .env file
+load_dotenv()
+
+async def test_scrape_data_by_query():
+    # Initialize the Config object
+    config = Config()
+
+    # Retrieve the retrievers based on the current configuration
+    retrievers = get_retrievers({}, config)
+    print("Retrievers:", retrievers)
+
+    # Create a mock researcher object with necessary attributes
+    class MockResearcher:
+        def init(self):
+            self.retrievers = retrievers
+            self.cfg = config
+            self.verbose = True
+            self.websocket = None
+            self.scraper_manager = None  # Mock or implement scraper manager
+            self.vector_store = None  # Mock or implement vector store
+
+    researcher = MockResearcher()
+    research_conductor = ResearchConductor(researcher)
+    # print('research_conductor',dir(research_conductor))
+    # print('MockResearcher',dir(researcher))
+    # Define a sub-query to test
+    sub_query = "design patterns for autonomous ai agents"
+
+    # Iterate through all retrievers
+    for retriever_class in retrievers:
+        # Instantiate the retriever with the sub-query
+        retriever = retriever_class(sub_query)
+
+        # Perform the search using the current retriever
+        search_results = await asyncio.to_thread(
+            retriever.search, max_results=10
+        )
+
+        print("\033[35mSearch results:\033[0m")
+        pprint.pprint(search_results, indent=4, width=80)
+
+if __name__ == "__main__":
+    asyncio.run(test_scrape_data_by_query())
+```
+
+The output of the search results will include the title, body, and href of each search result. For example:
+
+```json
+[{   
+    "body": "Jun 5, 2024 ... Three AI Design Patterns of Autonomous "
+                "Agents. Overview of the Three Patterns. Three notable AI "
+                "design patterns for autonomous agents include:.",
+    "href": "https://accredianpublication.medium.com/building-smarter-systems-the-role-of-agentic-design-patterns-in-genai-13617492f5df",
+    "title": "Building Smarter Systems: The Role of Agentic Design "
+                "Patterns in ..."},
+    ...]
+```
diff --git a/docs/sidebars.js b/docs/sidebars.js
@@ -36,6 +36,7 @@
         'gpt-researcher/gptr/example',
         'gpt-researcher/gptr/config',
         'gpt-researcher/gptr/scraping',
+        'gpt-researcher/gptr/handling-logs-as-they-stream',
         'gpt-researcher/gptr/querying-the-backend',
         'gpt-researcher/gptr/automated-tests',
         'gpt-researcher/gptr/troubleshooting',
@@ -70,7 +71,8 @@
       collapsed: true,
       items: [
         'gpt-researcher/llms/llms',
-        'gpt-researcher/llms/running-with-ollama'
+        'gpt-researcher/llms/running-with-ollama',
+        'gpt-researcher/llms/testing-your-llm'
       ]
     },
     {
@@ -80,6 +82,7 @@
       collapsed: true,
       items: [
         'gpt-researcher/search-engines/retrievers',
+        'gpt-researcher/search-engines/test-your-retriever'
         ]
     },
     {

diff --git a/tests/gptr-logs-handler.py b/tests/gptr-logs-handler.py
@@ -0,0 +1,43 @@
+from typing import Dict, Any
+import asyncio
+from gpt_researcher import GPTResearcher
+
+class CustomLogsHandler:
+    """A custom Logs handler class to handle JSON data."""
+    def __init__(self):
+        self.logs = []  # Initialize logs to store data
+
+    async def send_json(self, data: Dict[str, Any]) -> None:
+        """Send JSON data and log it."""
+        self.logs.append(data)  # Append data to logs
+        print(f"My custom Log: {data}")  # For demonstration, print the log
+
+async def run():
+    # Define the necessary parameters with sample values
+
+    query = "What happened in the latest burning man floods?"
+    report_type = "research_report"  # Type of report to generate
+    report_source = "online"  # Could specify source like 'online', 'books', etc.
+    tone = "informative"  # Tone of the report ('informative', 'casual', etc.)
+    config_path = None  # Path to a config file, if needed
+
+    # Initialize researcher with a custom WebSocket
+    custom_logs_handler = CustomLogsHandler()
+
+    researcher = GPTResearcher(
+        query=query,
+        report_type=report_type,
+        report_source=report_source,
+        tone=tone,
+        config_path=config_path,
+        websocket=custom_logs_handler
+    )
+
+    await researcher.conduct_research()  # Conduct the research
+    report = await researcher.write_report()  # Write the research report
+
+    return report
+
+# Run the asynchronous function using asyncio
+if __name__ == "__main__":
+    asyncio.run(run())
diff --git a/tests/test-openai-llm.py b/tests/test-openai-llm.py
@@ -0,0 +1,31 @@
+import asyncio
+from gpt_researcher.utils.llm import get_llm
+from gpt_researcher import GPTResearcher
+from dotenv import load_dotenv
+load_dotenv()
+
+async def main():
+
+    # Example usage of get_llm function
+    llm_provider = "openai"
+    model = "gpt-3.5-turbo" 
+    temperature = 0.7
+    max_tokens = 1000
+
+    llm = get_llm(llm_provider, model=model, temperature=temperature, max_tokens=max_tokens)
+    print(f"LLM Provider: {llm_provider}, Model: {model}, Temperature: {temperature}, Max Tokens: {max_tokens}")
+    print('llm: ',llm)
+    await test_llm(llm=llm)
+
+
+async def test_llm(llm):
+    # Test the connection with a simple query
+    messages = [{"role": "user", "content": "sup?"}]
+    try:
+        response = await llm.get_chat_response(messages, stream=False)
+        print("LLM response:", response)
+    except Exception as e:
+        print(f"Error: {e}")
+
+# Run the async function
+asyncio.run(main())
diff --git a/tests/test-your-llm.py b/tests/test-your-llm.py
@@ -0,0 +1,24 @@
+from gpt_researcher.config.config import Config
+from gpt_researcher.utils.llm import create_chat_completion
+import asyncio
+from dotenv import load_dotenv
+load_dotenv()
+
+async def main():
+    cfg = Config()
+
+    try:
+        report = await create_chat_completion(
+            model=cfg.smart_llm_model,
+            messages = [{"role": "user", "content": "sup?"}],
+            temperature=0.35,
+            llm_provider=cfg.smart_llm_provider,
+            stream=True,
+            max_tokens=cfg.smart_token_limit,
+            llm_kwargs=cfg.llm_kwargs
+        )
+    except Exception as e:
+        print(f"Error in calling LLM: {e}")
+
+# Run the async function
+asyncio.run(main())
diff --git a/tests/test-your-retriever.py b/tests/test-your-retriever.py
@@ -0,0 +1,49 @@
+import asyncio
+from dotenv import load_dotenv
+from gpt_researcher.config.config import Config
+from gpt_researcher.actions.retriever import get_retrievers
+from gpt_researcher.skills.researcher import ResearchConductor
+import pprint
+# Load environment variables from .env file
+load_dotenv()
+
+async def test_scrape_data_by_query():
+    # Initialize the Config object
+    config = Config()
+
+    # Retrieve the retrievers based on the current configuration
+    retrievers = get_retrievers({}, config)
+    print("Retrievers:", retrievers)
+
+    # Create a mock researcher object with necessary attributes
+    class MockResearcher:
+        def init(self):
+            self.retrievers = retrievers
+            self.cfg = config
+            self.verbose = True
+            self.websocket = None
+            self.scraper_manager = None  # Mock or implement scraper manager
+            self.vector_store = None  # Mock or implement vector store
+
+    researcher = MockResearcher()
+    research_conductor = ResearchConductor(researcher)
+    # print('research_conductor',dir(research_conductor))
+    # print('MockResearcher',dir(researcher))
+    # Define a sub-query to test
+    sub_query = "design patterns for autonomous ai agents"
+
+    # Iterate through all retrievers
+    for retriever_class in retrievers:
+        # Instantiate the retriever with the sub-query
+        retriever = retriever_class(sub_query)
+
+        # Perform the search using the current retriever
+        search_results = await asyncio.to_thread(
+            retriever.search, max_results=10
+        )
+
+        print("\033[35mSearch results:\033[0m")
+        pprint.pprint(search_results, indent=4, width=80)
+
+if __name__ == "__main__":
+    asyncio.run(test_scrape_data_by_query())