Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/improved detailed report #829

Merged
merged 2 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions backend/report_type/detailed_report/detailed_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from fastapi import WebSocket

from gpt_researcher.master.actions import (
add_source_urls,
add_references,
extract_headers,
extract_sections,
table_of_contents,
Expand All @@ -17,10 +17,10 @@ def __init__(
query: str,
report_type: str,
report_source: str,
source_urls: List[str],
config_path: str,
tone: Tone,
websocket: WebSocket,
source_urls: List[str] = [],
config_path: str = None,
tone: Tone = Tone.Formal,
websocket: WebSocket = None,
subtopics: List[Dict] = [],
headers: Optional[Dict] = None
):
Expand Down Expand Up @@ -121,5 +121,6 @@ async def _get_subtopic_report(self, subtopic: Dict) -> Dict[str, str]:

async def _construct_detailed_report(self, introduction: str, report_body: str) -> str:
toc = table_of_contents(report_body)
report_with_references = add_source_urls(report_body, self.main_task_assistant.visited_urls)
return f"{introduction}\n\n{toc}\n\n{report_with_references}"
conclusion = await self.main_task_assistant.write_report_conclusion(report_body)
conclusion_with_references = add_references(conclusion, self.main_task_assistant.visited_urls)
return f"{introduction}\n\n{toc}\n\n{report_body}\n\n{conclusion_with_references}"
25 changes: 18 additions & 7 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from gpt_researcher import GPTResearcher
from gpt_researcher.utils.enum import ReportType
from backend.report_type import DetailedReport

# =============================================================================
# CLI
Expand Down Expand Up @@ -45,6 +46,7 @@

report_type_descriptions = {
ReportType.ResearchReport.value: "Summary - Short and fast (~2 min)",
ReportType.DetailedReport.value: "Detailed - In depth and longer (~5 min)",
ReportType.ResourceReport.value: "",
ReportType.OutlineReport.value: "",
ReportType.CustomReport.value: "",
Expand All @@ -71,13 +73,22 @@ async def main(args):
Conduct research on the given query, generate the report, and write
it as a markdown file to the output directory.
"""
researcher = GPTResearcher(
query=args.query,
report_type=args.report_type)

await researcher.conduct_research()

report = await researcher.write_report()
if args.report_type == 'detailed_report':
detailed_report = DetailedReport(
query=args.query,
report_type="research_report",
report_source="web_search",
)

report = await detailed_report.run()
else:
researcher = GPTResearcher(
query=args.query,
report_type=args.report_type)

await researcher.conduct_research()

report = await researcher.write_report()

# Write the report to a file
artifact_filepath = f"outputs/{uuid4()}.md"
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/examples/detailed_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Here's how you can use the `DetailedReport` class in your project:
import asyncio
from fastapi import WebSocket
from gpt_researcher.utils.enum import Tone
from backend.report_types.detailed_report import DetailedReport
from backend.report_type import DetailedReport

async def generate_report(websocket: WebSocket):
detailed_report = DetailedReport(
Expand Down
38 changes: 31 additions & 7 deletions gpt_researcher/master/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ async def choose_agent(
{"role": "system", "content": f"{auto_agent_instructions()}"},
{"role": "user", "content": f"task: {query}"},
],
temperature=0,
temperature=0.15,
llm_provider=cfg.llm_provider,
llm_kwargs=cfg.llm_kwargs,
cost_callback=cost_callback,
Expand Down Expand Up @@ -223,7 +223,7 @@ async def get_sub_queries(
),
},
],
temperature=0,
temperature=0.1,
llm_provider=cfg.llm_provider,
llm_kwargs=cfg.llm_kwargs,
cost_callback=cost_callback,
Expand Down Expand Up @@ -318,6 +318,30 @@ def chunk_content(raw_content, chunk_size=10000):

return concatenated_summaries

async def write_conclusion(
report, agent_role_prompt, cfg, cost_callback: callable = None
):
conclusion_prompt = generate_report_conclusion(report_content=report)
conclusion = ""
try:
conclusion = await create_chat_completion(
model=cfg.fast_llm_model,
messages=[
{"role": "system", "content": f"{agent_role_prompt}"},
{
"role": "user",
"content": f"{conclusion_prompt}",
},
],
temperature=0.35,
llm_provider=cfg.llm_provider,
llm_kwargs=cfg.llm_kwargs,
cost_callback=cost_callback,
)
except Exception as e:
print(f"{Fore.RED}Error in generating report conclusion: {e}{Style.RESET_ALL}")
return conclusion


async def summarize_url(
query, raw_data, agent_role_prompt, cfg, cost_callback: callable = None
Expand Down Expand Up @@ -346,7 +370,7 @@ async def summarize_url(
"content": f"{generate_summary_prompt(query, raw_data)}",
},
],
temperature=0,
temperature=0.35,
llm_provider=cfg.llm_provider,
llm_kwargs=cfg.llm_kwargs,
cost_callback=cost_callback,
Expand Down Expand Up @@ -375,7 +399,7 @@ async def generate_draft_section_titles(
{"role": "system", "content": f"{agent_role_prompt}"},
{"role": "user", "content": content},
],
temperature=0,
temperature=0.15,
llm_provider=cfg.llm_provider,
llm_kwargs=cfg.llm_kwargs,
cost_callback=cost_callback,
Expand Down Expand Up @@ -433,7 +457,7 @@ async def generate_report(
{"role": "system", "content": f"{agent_role_prompt}"},
{"role": "user", "content": content},
],
temperature=0,
temperature=0.35,
llm_provider=cfg.llm_provider,
stream=True,
websocket=websocket,
Expand Down Expand Up @@ -486,7 +510,7 @@ async def get_report_introduction(
"content": generate_report_introduction(query, context),
},
],
temperature=0,
temperature=0.25,
llm_provider=config.llm_provider,
stream=True,
websocket=websocket,
Expand Down Expand Up @@ -600,7 +624,7 @@ def generate_table_of_contents(headers, indent_level=0):
return markdown_text # Return original markdown text if an exception occurs


def add_source_urls(report_markdown: str, visited_urls: set):
def add_references(report_markdown: str, visited_urls: set):
"""
This function takes a Markdown report and a set of visited URLs as input parameters.

Expand Down
41 changes: 41 additions & 0 deletions gpt_researcher/master/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,40 @@ def get_costs(self) -> float:
def set_verbose(self, verbose: bool):
self.verbose = verbose

async def write_report_conclusion(self, report_body: str) -> str:
"""
Writes the conclusion of the report based on the research conducted.

Args:
report_body (str): The body of the report.

Returns:
str: The conclusion of the report.
"""
if self.verbose:
await stream_output(
"logs",
"writing_conclusion",
f"🙇️ Concluding report for research task: {self.query}...",
self.websocket,
)

conclusion = await write_conclusion(
report=report_body,
agent_role_prompt=self.role,
cfg=self.cfg,
)

if self.verbose:
await stream_output(
"logs",
"report_conclusion",
f"✍️ Writing final conclusion: {conclusion}...",
self.websocket,
)

return conclusion

def add_costs(self, cost: int) -> None:
if not isinstance(cost, float) and not isinstance(cost, int):
raise ValueError("Cost must be an integer or float")
Expand All @@ -492,6 +526,13 @@ def add_costs(self, cost: int) -> None:
# DETAILED REPORT

async def write_introduction(self):
if self.verbose:
await stream_output(
"logs",
"generating_conclusion",
f"🤔 Generating subtopics...",
self.websocket,
)
# Construct Report Introduction from main topic research
introduction = await get_report_introduction(
self.query,
Expand Down
20 changes: 20 additions & 0 deletions gpt_researcher/master/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,26 @@ def generate_report_introduction(question: str, research_summary: str = "") -> s
Assume that the current date is {datetime.now(timezone.utc).strftime('%B %d, %Y')} if required.
"""

def generate_report_conclusion(report_content: str) -> str:
prompt = f"""
Based on the following research report, please write a concise conclusion that summarizes the main findings and their implications:

{report_content}

Your conclusion should:
1. Recap the main points of the research
2. Highlight the most important findings
3. Discuss any implications or next steps
4. Be approximately 2-3 paragraphs long

If there is no "## Conclusion" section title written at the end of the report, please add it to the top of your conclusion.
You must include hyperlinks with markdown syntax ([url website](url)) related to the sentences wherever necessary.

Write the conclusion:
"""

return prompt


report_type_mapping = {
ReportType.ResearchReport.value: generate_report_prompt,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from setuptools import find_packages, setup

LATEST_VERSION = "0.9.4"
LATEST_VERSION = "0.9.5"

exclude_packages = [
"selenium",
Expand Down