Skip to content

Commit fe46141

Browse files
authored
Export NLP community reports prompt (microsoft#1697)
* Properly export the NLP community reports prompt * Semver * Fix verb tests
1 parent b94290e commit fe46141

File tree

9 files changed

+32
-13
lines changed

9 files changed

+32
-13
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "Export NLP community reports prompt."
4+
}

graphrag/cli/initialize.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
from graphrag.prompts.index.community_report import (
1111
COMMUNITY_REPORT_PROMPT,
1212
)
13+
from graphrag.prompts.index.community_report_text_units import (
14+
COMMUNITY_REPORT_TEXT_PROMPT,
15+
)
1316
from graphrag.prompts.index.extract_claims import EXTRACT_CLAIMS_PROMPT
1417
from graphrag.prompts.index.extract_graph import GRAPH_EXTRACTION_PROMPT
1518
from graphrag.prompts.index.summarize_descriptions import SUMMARIZE_PROMPT
@@ -72,7 +75,8 @@ def initialize_project_at(path: Path, force: bool) -> None:
7275
"extract_graph": GRAPH_EXTRACTION_PROMPT,
7376
"summarize_descriptions": SUMMARIZE_PROMPT,
7477
"extract_claims": EXTRACT_CLAIMS_PROMPT,
75-
"community_report": COMMUNITY_REPORT_PROMPT,
78+
"community_report_graph": COMMUNITY_REPORT_PROMPT,
79+
"community_report_text": COMMUNITY_REPORT_TEXT_PROMPT,
7680
"drift_search_system_prompt": DRIFT_LOCAL_SYSTEM_PROMPT,
7781
"drift_reduce_prompt": DRIFT_REDUCE_PROMPT,
7882
"global_search_map_system_prompt": MAP_SYSTEM_PROMPT,

graphrag/config/init_content.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@
114114
115115
community_reports:
116116
model_id: {defs.COMMUNITY_REPORT_MODEL_ID}
117-
prompt: "prompts/community_report.txt"
117+
graph_prompt: "prompts/community_report_graph.txt"
118+
text_prompt: "prompts/community_report_text.txt"
118119
max_length: {defs.COMMUNITY_REPORT_MAX_LENGTH}
119120
max_input_length: {defs.COMMUNITY_REPORT_MAX_INPUT_LENGTH}
120121

graphrag/config/models/community_reports_config.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,13 @@
1414
class CommunityReportsConfig(BaseModel):
1515
"""Configuration section for community reports."""
1616

17-
prompt: str | None = Field(
18-
description="The community report extraction prompt to use.", default=None
17+
graph_prompt: str | None = Field(
18+
description="The community report extraction prompt to use for graph-based summarization.",
19+
default=None,
20+
)
21+
text_prompt: str | None = Field(
22+
description="The community report extraction prompt to use for text-based summarization.",
23+
default=None,
1924
)
2025
max_length: int = Field(
2126
description="The community report maximum length in tokens.",
@@ -46,10 +51,15 @@ def resolved_strategy(
4651
"llm": model_config.model_dump(),
4752
"stagger": model_config.parallelization_stagger,
4853
"num_threads": model_config.parallelization_num_threads,
49-
"extraction_prompt": (Path(root_dir) / self.prompt).read_text(
54+
"graph_prompt": (Path(root_dir) / self.graph_prompt).read_text(
55+
encoding="utf-8"
56+
)
57+
if self.graph_prompt
58+
else None,
59+
"text_prompt": (Path(root_dir) / self.text_prompt).read_text(
5060
encoding="utf-8"
5161
)
52-
if self.prompt
62+
if self.text_prompt
5363
else None,
5464
"max_report_length": self.max_length,
5565
"max_input_length": self.max_input_length,

graphrag/index/flows/create_community_reports.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ async def create_community_reports(
4646
if claims_input is not None:
4747
claims = _prep_claims(claims_input)
4848

49+
summarization_strategy["extraction_prompt"] = summarization_strategy["graph_prompt"]
50+
4951
max_input_length = summarization_strategy.get(
5052
"max_input_length", defaults.COMMUNITY_REPORT_MAX_INPUT_LENGTH
5153
)

graphrag/index/flows/create_community_reports_text.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@
2424
build_level_context,
2525
build_local_context,
2626
)
27-
from graphrag.prompts.index.community_report_text_units import (
28-
COMMUNITY_REPORT_PROMPT,
29-
)
3027

3128
log = logging.getLogger(__name__)
3229

@@ -44,8 +41,7 @@ async def create_community_reports_text(
4441
"""All the steps to transform community reports."""
4542
nodes = explode_communities(communities, entities)
4643

47-
# TEMP: forcing override of the prompt until we can put it into config
48-
summarization_strategy["extraction_prompt"] = COMMUNITY_REPORT_PROMPT
44+
summarization_strategy["extraction_prompt"] = summarization_strategy["text_prompt"]
4945

5046
max_input_length = summarization_strategy.get(
5147
"max_input_length", defaults.COMMUNITY_REPORT_MAX_INPUT_LENGTH

graphrag/prompts/index/community_report_text_units.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
"""A file containing prompts definition."""
55

6-
COMMUNITY_REPORT_PROMPT = """
6+
COMMUNITY_REPORT_TEXT_PROMPT = """
77
You are an AI assistant that helps a human analyst to perform general information discovery.
88
Information discovery is the process of identifying and assessing relevant information associated with certain entities (e.g., organizations and individuals) within a network.
99

tests/unit/config/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,8 @@ def assert_summarize_descriptions_configs(
418418
def assert_community_reports_configs(
419419
actual: CommunityReportsConfig, expected: CommunityReportsConfig
420420
) -> None:
421-
assert actual.prompt == expected.prompt
421+
assert actual.graph_prompt == expected.graph_prompt
422+
assert actual.text_prompt == expected.text_prompt
422423
assert actual.max_length == expected.max_length
423424
assert actual.max_input_length == expected.max_input_length
424425
assert actual.strategy == expected.strategy

tests/verbs/test_create_community_reports.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ async def test_create_community_reports():
6161
config.community_reports.strategy = {
6262
"type": "graph_intelligence",
6363
"llm": llm_settings,
64+
"graph_prompt": "",
6465
}
6566

6667
await run_workflow(

0 commit comments

Comments
 (0)