From 1f5d6c47b131be19fba5d05b3fbef503a0a527cd Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Mon, 15 Dec 2025 14:04:42 +0000 Subject: [PATCH 01/29] feat: Add tests for opening, closing and changing citations --- .../app/slack/slack_events.py | 39 ++-- .../tests/test_slack_events.py | 199 +++++++++++++++++- 2 files changed, 220 insertions(+), 18 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 458107cc..6b538a63 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -652,25 +652,9 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, body = body.replace("»", "") # Remove double chevrons current_id = f"cite_{source_number}".strip() - selected = False # Reset all button styles, then set the clicked one - for block in blocks: - if block.get("type") == "actions": - for element in block.get("elements", []): - if element.get("type") == "button": - action_id = element.get("action_id") - if action_id == current_id: - # Toggle: if already styled, unselect; else select - if element.get("style") == "primary": - element.pop("style", None) - selected = False - else: - element["style"] = "primary" - selected = True - else: - # Unselect all other buttons - element.pop("style", None) + [selected, blocks] = format_blocks(blocks, current_id) # If selected, insert citation block before feedback if selected: @@ -693,6 +677,27 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, logger.error(f"Error updating message for citation: {e}", extra={"error": traceback.format_exc()}) +def format_blocks(blocks: Any, current_id: str): + selected = False + for block in blocks: + if block.get("type") == "actions": + for element in block.get("elements", []): + if element.get("type") == "button": + action_id = element.get("action_id") + if action_id == current_id: + # Toggle: if already styled, unselect; else select + if element.get("style") == "primary": + element.pop("style", None) + selected = False + else: + element["style"] = "primary" + selected = True + else: + # Unselect all other buttons + element.pop("style", None) + return [selected, blocks] + + # ================================================================ # Session management # ================================================================ diff --git a/packages/slackBotFunction/tests/test_slack_events.py b/packages/slackBotFunction/tests/test_slack_events.py index a4398249..09d126fa 100644 --- a/packages/slackBotFunction/tests/test_slack_events.py +++ b/packages/slackBotFunction/tests/test_slack_events.py @@ -406,7 +406,7 @@ def test_citation_creation( _sourceNumber = "5" _title = "Some Title Summarising the Document" - _link = "http://example.com" + _link = "https://example.com" _filename = "example.pdf" _text_snippet = "This is some example text, maybe something about NHSE" @@ -866,3 +866,200 @@ def test_process_feedback_event_error( # assertions mock_post_error_message.assert_called_once_with(channel="C123", thread_ts="123", client=mock_client) + + +def test_process_citation_events_update_chat_message_open_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "link": "http://example.com", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": None, # Set citation as de-active + "value": str(params), + }, + ], + } + + message = { + "blocks": [citations], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [ + citations, + { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, + "block_id": "citation_block", + }, + ] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) + + +def test_process_citation_events_update_chat_message_close_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "link": "http://example.com", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": "primary", # Set citation as active + "value": str(params), + }, + ], + } + + citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, + "block_id": "citation_block", + } + + message = { + "blocks": [citations, citation_body], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [ + citations, + ] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) + + +def test_process_citation_events_update_chat_message_change_close_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "2", + "title": "Second Citation Title", + "body": "Second Citation Body", + "link": "http://example.com", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": "primary", # Set citation as active + "value": str(params), + }, + { + "type": "button", + "action_id": "cite_2", + "text": { + "type": "plain_text", + "text": "[2] The body of the citation", + "emoji": "true", + }, + "style": None, # Set citation as active + "value": str(params), + }, + ], + } + + first_citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*First Citation Title*\n\n> First Citation Body"}, + "block_id": "citation_block", + } + + second_citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Second Citation Title*\n\n> Second Citation Body"}, + "block_id": "citation_block", + } + + message = { + "blocks": [citations, first_citation_body], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [citations, second_citation_body] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) From 4bc52c64a0b56368c691b23fdb82d3b47cec19df Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Mon, 15 Dec 2025 15:45:42 +0000 Subject: [PATCH 02/29] feat: Clean up tests --- packages/cdk/prompts/systemPrompt.txt | 2 +- .../slackBotFunction/app/services/bedrock.py | 3 +- .../app/slack/slack_events.py | 3 +- .../tests/test_slack_event_citations.py | 281 ++++++++++++++++++ .../tests/test_slack_events.py | 197 ------------ 5 files changed, 284 insertions(+), 202 deletions(-) create mode 100644 packages/slackBotFunction/tests/test_slack_event_citations.py diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index 6e79e4c7..b9f53fff 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -41,7 +41,7 @@ You are an AI assistant designed to provide guidance and references from your kn # Bibliography ## Format -source number||summary title||link||filename||text snippet||reasoning\n +source number||title||link||text snippet\n ## Requirements - Return **ALL** retrieved documents, their name and a text snippet, from "CONTEXT" diff --git a/packages/slackBotFunction/app/services/bedrock.py b/packages/slackBotFunction/app/services/bedrock.py index 218826a4..c27e636e 100644 --- a/packages/slackBotFunction/app/services/bedrock.py +++ b/packages/slackBotFunction/app/services/bedrock.py @@ -96,14 +96,13 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat def invoke_model(prompt: str, model_id: str, client: BedrockRuntimeClient, inference_config: dict) -> dict[str, Any]: + logger.debug("Invoking Model", extra={"inference_config": inference_config}) response = client.invoke_model( modelId=model_id, body=json.dumps( { - "anthropic_version": "bedrock-2023-05-31", "temperature": inference_config["temperature"], "top_p": inference_config["topP"], - "top_k": 50, "max_tokens": inference_config["maxTokens"], "messages": [{"role": "user", "content": prompt}], } diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 6b538a63..60ca45ff 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -440,10 +440,9 @@ def process_slack_message(event: Dict[str, Any], event_id: str, client: WebClien "source_number", "title", "link", - "filename", "reference_text", ] - split = response_text.split("------") # Citations are separated by ------ + split = response_text.split("------") # Citations are separated from main body by ------ citations: list[dict[str, str]] = [] if len(split) != 1: diff --git a/packages/slackBotFunction/tests/test_slack_event_citations.py b/packages/slackBotFunction/tests/test_slack_event_citations.py new file mode 100644 index 00000000..9f4a66d3 --- /dev/null +++ b/packages/slackBotFunction/tests/test_slack_event_citations.py @@ -0,0 +1,281 @@ +import sys +import pytest +from unittest.mock import Mock, MagicMock + + +@pytest.fixture +def mock_logger(): + return MagicMock() + + +def test_process_slack_message_split_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + +def test_process_citation_events_update_chat(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_async_slack_action + + body = { + "type": "block_actions", + "message": { + "ts": "123", + "text": "", + "blocks": [ + { + "type": "section", + "block_id": "OvNCm", + "text": { + "type": "mrkdwn", + "text": "", + }, + }, + { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] Downloading a single prescription using the prescription's ID, or ...", + "emoji": "true", + }, + "value": '{"ck":"123","ch":"123","mt":"123","tt":"123","source_number":"1","title":"title"', + } + ], + }, + ], + }, + "channel": { + "id": "ABC123", + }, + "actions": [ + { + "action_id": "cite_1", + "block_id": "citation_actions", + "text": { + "type": "plain_text", + "text": "[1] Downloading a single prescription using the prescription's ID, or ...", + "emoji": "true", + }, + "value": '{"ck":"123","ch":"C095D4SRX6W","mt":"123","tt":"123","source_number":"1","title":""}', + "type": "button", + "action_ts": "1765807735.805872", + } + ], + } + + # perform operation + process_async_slack_action(body, mock_client) + + # assertions + mock_client.chat_update.assert_called() + + +def test_process_citation_events_update_chat_message_open_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "link": "https://example.com", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": None, # Set citation as de-active + "value": str(params), + }, + ], + } + + message = { + "blocks": [citations], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [ + citations, + { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, + "block_id": "citation_block", + }, + ] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) + + +def test_process_citation_events_update_chat_message_close_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "link": "https://example.com", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": "primary", # Set citation as active + "value": str(params), + }, + ], + } + + citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, + "block_id": "citation_block", + } + + message = { + "blocks": [citations, citation_body], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [ + citations, + ] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) + + +def test_process_citation_events_update_chat_message_change_close_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "2", + "title": "Second Citation Title", + "body": "Second Citation Body", + "link": "https://example.com", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": "primary", # Set citation as active + "value": str(params), + }, + { + "type": "button", + "action_id": "cite_2", + "text": { + "type": "plain_text", + "text": "[2] The body of the citation", + "emoji": "true", + }, + "style": None, # Set citation as active + "value": str(params), + }, + ], + } + + first_citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*First Citation Title*\n\n> First Citation Body"}, + "block_id": "citation_block", + } + + second_citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Second Citation Title*\n\n> Second Citation Body"}, + "block_id": "citation_block", + } + + message = { + "blocks": [citations, first_citation_body], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [citations, second_citation_body] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) diff --git a/packages/slackBotFunction/tests/test_slack_events.py b/packages/slackBotFunction/tests/test_slack_events.py index 09d126fa..1195299f 100644 --- a/packages/slackBotFunction/tests/test_slack_events.py +++ b/packages/slackBotFunction/tests/test_slack_events.py @@ -866,200 +866,3 @@ def test_process_feedback_event_error( # assertions mock_post_error_message.assert_called_once_with(channel="C123", thread_ts="123", client=mock_client) - - -def test_process_citation_events_update_chat_message_open_citation(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import open_citation - - params = { - "ck": "123123", - "ch": "123123", - "mt": "123123.123123", - "tt": "123123.123123", - "source_number": "1", - "title": "Citation Title", - "body": "Citation Body", - "link": "http://example.com", - } - - citations = { - "type": "actions", - "block_id": "citation_actions", - "elements": [ - { - "type": "button", - "action_id": "cite_1", - "text": { - "type": "plain_text", - "text": "[1] The body of the citation", - "emoji": "true", - }, - "style": None, # Set citation as de-active - "value": str(params), - }, - ], - } - - message = { - "blocks": [citations], - } - - # perform operation - open_citation("ABC", "123", message, params, mock_client) - - # assertions - expected_blocks = [ - citations, - { - "type": "section", - "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, - "block_id": "citation_block", - }, - ] - mock_client.chat_update.assert_called() - mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) - - -def test_process_citation_events_update_chat_message_close_citation(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import open_citation - - params = { - "ck": "123123", - "ch": "123123", - "mt": "123123.123123", - "tt": "123123.123123", - "source_number": "1", - "title": "Citation Title", - "body": "Citation Body", - "link": "http://example.com", - } - - citations = { - "type": "actions", - "block_id": "citation_actions", - "elements": [ - { - "type": "button", - "action_id": "cite_1", - "text": { - "type": "plain_text", - "text": "[1] The body of the citation", - "emoji": "true", - }, - "style": "primary", # Set citation as active - "value": str(params), - }, - ], - } - - citation_body = { - "type": "section", - "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, - "block_id": "citation_block", - } - - message = { - "blocks": [citations, citation_body], - } - - # perform operation - open_citation("ABC", "123", message, params, mock_client) - - # assertions - expected_blocks = [ - citations, - ] - mock_client.chat_update.assert_called() - mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) - - -def test_process_citation_events_update_chat_message_change_close_citation(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import open_citation - - params = { - "ck": "123123", - "ch": "123123", - "mt": "123123.123123", - "tt": "123123.123123", - "source_number": "2", - "title": "Second Citation Title", - "body": "Second Citation Body", - "link": "http://example.com", - } - - citations = { - "type": "actions", - "block_id": "citation_actions", - "elements": [ - { - "type": "button", - "action_id": "cite_1", - "text": { - "type": "plain_text", - "text": "[1] The body of the citation", - "emoji": "true", - }, - "style": "primary", # Set citation as active - "value": str(params), - }, - { - "type": "button", - "action_id": "cite_2", - "text": { - "type": "plain_text", - "text": "[2] The body of the citation", - "emoji": "true", - }, - "style": None, # Set citation as active - "value": str(params), - }, - ], - } - - first_citation_body = { - "type": "section", - "text": {"type": "mrkdwn", "text": "*First Citation Title*\n\n> First Citation Body"}, - "block_id": "citation_block", - } - - second_citation_body = { - "type": "section", - "text": {"type": "mrkdwn", "text": "*Second Citation Title*\n\n> Second Citation Body"}, - "block_id": "citation_block", - } - - message = { - "blocks": [citations, first_citation_body], - } - - # perform operation - open_citation("ABC", "123", message, params, mock_client) - - # assertions - expected_blocks = [citations, second_citation_body] - mock_client.chat_update.assert_called() - mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) From ee4477c91bf87fb2224867dcbf774195678ea6d0 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Mon, 15 Dec 2025 16:45:58 +0000 Subject: [PATCH 03/29] feat: Add debugging logging for invoking bot --- packages/slackBotFunction/app/services/bedrock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/slackBotFunction/app/services/bedrock.py b/packages/slackBotFunction/app/services/bedrock.py index c27e636e..d3250fa0 100644 --- a/packages/slackBotFunction/app/services/bedrock.py +++ b/packages/slackBotFunction/app/services/bedrock.py @@ -87,6 +87,7 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat else: logger.info("Starting new conversation") + logger.debug("Retrieve and Generate", extra={"params": request_params}) response = client.retrieve_and_generate(**request_params) logger.info( "Got Bedrock response", @@ -96,7 +97,6 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat def invoke_model(prompt: str, model_id: str, client: BedrockRuntimeClient, inference_config: dict) -> dict[str, Any]: - logger.debug("Invoking Model", extra={"inference_config": inference_config}) response = client.invoke_model( modelId=model_id, body=json.dumps( From cd3375fa6c9a0ee3ac7a2dbb3542caabd1705072 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Tue, 16 Dec 2025 10:13:52 +0000 Subject: [PATCH 04/29] feat: Get ai agent from bedrock --- .../slackBotFunction/app/services/bedrock.py | 16 ++++------------ .../app/services/prompt_loader.py | 16 +++++++++++----- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/packages/slackBotFunction/app/services/bedrock.py b/packages/slackBotFunction/app/services/bedrock.py index d3250fa0..d1fcdaae 100644 --- a/packages/slackBotFunction/app/services/bedrock.py +++ b/packages/slackBotFunction/app/services/bedrock.py @@ -42,8 +42,10 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat "type": "KNOWLEDGE_BASE", "knowledgeBaseConfiguration": { "knowledgeBaseId": config.KNOWLEDGEBASE_ID, - "modelArn": config.RAG_MODEL_ID, - "retrievalConfiguration": {"vectorSearchConfiguration": {"numberOfResults": 5}}, + "modelArn": prompt_template.get("model_id", config.RAG_MODEL_ID), + "retrievalConfiguration": { + "vectorSearchConfiguration": {"numberOfResults": 10, "overrideSearchType": "SEMANTIC"} + }, "generationConfiguration": { "guardrailConfiguration": { "guardrailId": config.GUARD_RAIL_ID, @@ -58,16 +60,6 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat } }, }, - "orchestrationConfiguration": { - "inferenceConfig": { - "textInferenceConfig": { - **inference_config, - "stopSequences": [ - "Human:", - ], - } - }, - }, }, }, } diff --git a/packages/slackBotFunction/app/services/prompt_loader.py b/packages/slackBotFunction/app/services/prompt_loader.py index e951d447..10d941fb 100644 --- a/packages/slackBotFunction/app/services/prompt_loader.py +++ b/packages/slackBotFunction/app/services/prompt_loader.py @@ -92,7 +92,7 @@ def load_prompt(prompt_name: str, prompt_version: str = None) -> dict: logger.info( f"Loading prompt {prompt_name}' (ID: {prompt_id})", - extra={"prompt_name": prompt_name, "prompt_id": prompt_id, "prompt_version": prompt_version}, + extra={"prompt_version": prompt_version}, ) if is_explicit_version: @@ -100,15 +100,20 @@ def load_prompt(prompt_name: str, prompt_version: str = None) -> dict: else: response = client.get_prompt(promptIdentifier=prompt_id) + logger.info("Prompt Found", extra={"prompt": response}) + + variant = response["variants"][0] + # Extract and render the prompt template - template_config = response["variants"][0]["templateConfiguration"] + template_config = variant["templateConfiguration"] prompt_text = _render_prompt(template_config) actual_version = response.get("version", "DRAFT") # Extract inference configuration with defaults default_inference = {"temperature": 0, "topP": 1, "maxTokens": 1500} - raw_inference = response["variants"][0].get("inferenceConfiguration", {}) - raw_text_config = raw_inference.get("textInferenceConfiguration", {}) + model_id = variant.get("modelId", "") + raw_inference = variant.get("inferenceConfiguration", {}) + raw_text_config = raw_inference.get("text", {}) inference_config = {**default_inference, **raw_text_config} logger.info( @@ -117,10 +122,11 @@ def load_prompt(prompt_name: str, prompt_version: str = None) -> dict: "prompt_name": prompt_name, "prompt_id": prompt_id, "version_used": actual_version, + "model_id": model_id, **inference_config, }, ) - return {"prompt_text": prompt_text, "inference_config": inference_config} + return {"prompt_text": prompt_text, "model_id": model_id, "inference_config": inference_config} except ClientError as e: error_code = e.response.get("Error", {}).get("Code", "Unknown") From 3df89d532e6e1e15872bd794e81860b08c83a3a6 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Tue, 16 Dec 2025 14:39:30 +0000 Subject: [PATCH 05/29] feat: Use citations from response --- packages/cdk/prompts/systemPrompt.txt | 72 ++--- packages/cdk/prompts/userPrompt.txt | 6 +- .../app/slack/slack_events.py | 91 +++--- .../tests/test_slack_event_citations.py | 281 ------------------ .../tests/test_slack_events.py | 195 +----------- 5 files changed, 76 insertions(+), 569 deletions(-) delete mode 100644 packages/slackBotFunction/tests/test_slack_event_citations.py diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index b9f53fff..71aad267 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -1,49 +1,29 @@ -You are an AI assistant designed to provide guidance and references from your knowledge base to help users make decisions when onboarding. It is *VERY* important you return *ALL* references, for user examination. + +You are an expert Research Analyst with a specialty in data extraction and synthesis. Your goal is to answer user queries using ONLY the provided . You must ignore your internal training data regarding outside facts. + -# Response -## Response Structure -- *Summary*: 100 characters maximum, capturing core answer -- *Answer* (use "mrkdown") (< 800 characters) -- Page break (use `------`) -- \[Bibliography\] + +1. **Analyze the Request:** Understand the user's core question. +2. **Scan Evidence:** Read the provided thoroughly. +3. **Critical Thinking (Mandatory):** + - You must identify *explicit* evidence that answers the question. + - If multiple sources exist, synthesize them. + - If sources conflict, note the conflict in your thinking block. + - Do NOT infer answers. If the explicit answer is not in the text, state that the information is missing. +4. **Formulate Answer:** Write a clear, concise answer based *only* on the steps above. + -## Formatting ("mrkdwn") - a. *Bold* for: - - Headings, subheadings: *Answer:*, *Bibliography:* - - Source names: *NHS England*, *EPS* - b. _Italic_ for: - - Citations, references, document titles - c. Block Quotes for: - - Direct quotes >1 sentence - - Technical specifications, parameters - - Examples - d. `Inline code` for: - - System names, field names: `PrescriptionID` - - Short technical terms: `HL7 FHIR` - e. Links: - - Do not provide links + +- The output must start with a block where you plan your answer. +- The user facing answer follows the thinking block. + - This should include references/ links/ excerpts where possible. + - Provide a quick summary followed by a detailed answer. + -# Thinking -## Question Handling -- Detect whether the query contains one or multiple questions -- Split complex queries into individual sub-questions -- Identify question type: factual, procedural, diagnostic, troubleshooting, or clarification-seeking -- For multi-question queries: number sub-questions clearly (Q1, Q2, etc) - -## RAG & Knowledge Base Integration -- Relevance threshold handling: - - Score > 0.85 (High confidence) - - Score 0.70 - 0.85 (Medium confidence) - - Score < 0.70 (Low confidence) - -## Corrections -- Change _National Health Service Digital (NHSD)_ references to _National Health Service England (NHSE)_ - -# Bibliography -## Format -source number||title||link||text snippet\n - -## Requirements -- Return **ALL** retrieved documents, their name and a text snippet, from "CONTEXT" -- Get full text references from search results for Bibliography -- Title should be less than 50 characters + +Use Slacks formatting "mrkdown" +- **Bold:** Headings (`*Answer:*`), Source Names (`*NHS England*`). +- **Italics:** Document titles, citations. +- **Inline Code:** System names (`PrescriptionID`), technical terms (`HL7 FHIR`). +- **Block Quotes:** Direct quotes >1 sentence, technical specs, or examples. + diff --git a/packages/cdk/prompts/userPrompt.txt b/packages/cdk/prompts/userPrompt.txt index e7ae7f18..f54a7c2c 100644 --- a/packages/cdk/prompts/userPrompt.txt +++ b/packages/cdk/prompts/userPrompt.txt @@ -1,6 +1,4 @@ -# QUERY -{{user_query}} +{{user_query}} # CONTEXT -## Results $search_results$ -## LIST ALL RESULTS IN TABLE +$search_results$ diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 60ca45ff..8a50adc0 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -221,45 +221,39 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s return blocks -def _create_citation(citation: dict[str, str], feedback_data: dict, response_text: str): +def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, response_text: str): logger.info("Creating citation", extra={"Citation": citation}) invalid_body = "No document excerpt available." action_buttons = [] # Create citation blocks ["sourceNumber", "title", "link", "filename", "reference_text"] - title = citation.get("title") or citation.get("filename") or "Source" - body = citation.get("reference_text") or invalid_body - citation_link = citation.get("link") or "" - source_number = (citation.get("source_number", "0")).replace("\n", "") + content: str = citation.get("content", {}).get("text", invalid_body) + location: str = citation.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] + + # Tidy up contents + content.replace("»", "").strip() # Buttons can only be 75 characters long, truncate to be safe - button_text = f"[{source_number}] {title}" + button_text = f"[{i}] {location}" button = { "type": "button", "text": { "type": "plain_text", "text": button_text if len(button_text) < 75 else f"{button_text[:70]}...", }, - "action_id": f"cite_{source_number}", + "action_id": f"cite_{i}", "value": json.dumps( { **feedback_data, - "source_number": source_number, - "title": title, - "body": body, - "link": citation_link, + "source_number": f"{i}", + "title": location, + "body": content, }, separators=(",", ":"), ), } action_buttons.append(button) - # Update inline citations - response_text = response_text.replace( - f"[cit_{source_number}]", - f"<{citation_link}|[{source_number}]>" if citation_link else f"[{source_number}]", - ) - return [*action_buttons, response_text] @@ -432,28 +426,7 @@ def process_slack_message(event: Dict[str, Any], event_id: str, client: WebClien ai_response = process_ai_query(user_query, session_id) kb_response = ai_response["kb_response"] response_text = ai_response["text"] - - # Split out citation block if present - # Citations are not returned in the object without using `$output_format_instructions$` which overrides the - # system prompt. Instead, pull out and format the citations in the prompt manually - prompt_value_keys = [ - "source_number", - "title", - "link", - "reference_text", - ] - split = response_text.split("------") # Citations are separated from main body by ------ - - citations: list[dict[str, str]] = [] - if len(split) != 1: - response_text = split[0] - citation_block = split[1] - raw_citations = [] - raw_citations = re.compile(r"]*>(.*?)", re.DOTALL | re.IGNORECASE).findall(citation_block) - if len(raw_citations) > 0: - logger.info("Found citation(s)", extra={"Raw Citations": raw_citations}) - citations = [dict(zip(prompt_value_keys, citation.split("||"))) for citation in raw_citations] - logger.info("Parsed citation(s)", extra={"citations": citations}) + citations = ai_response["citations"] # Post the answer (plain) to get message_ts post_params = {"channel": channel, "text": response_text} @@ -656,12 +629,18 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, [selected, blocks] = format_blocks(blocks, current_id) # If selected, insert citation block before feedback + has_table = re.search(body, r"\|-+\|") is not None if selected: - citation_block = { - "type": "section", - "text": {"type": "mrkdwn", "text": f"{title}\n\n{body}"}, - "block_id": "citation_block", - } + citation_block = {} + if has_table: + citation_block = generate_table_block(title, body) + else: + citation_block = { + "type": "section", + "text": {"type": "mrkdwn", "text": f"{title}\n\n{body}"}, + "block_id": "citation_block", + } + feedback_index = next( (i for i, b in enumerate(blocks) if b.get("block_id") == "feedback-divider"), len(blocks), @@ -697,6 +676,30 @@ def format_blocks(blocks: Any, current_id: str): return [selected, blocks] +def generate_table_block(title: str, content: str): + lines = [line.strip() for line in content.splitlines() if line.strip()] + table_lines = [line for line in lines if not set(line) <= {"|", "-", " "}] + rows = [] + + for index, line in table_lines: + cells = [cell.strip() for cell in line.split("|") if cell.strip()] + row = [] + for cell in cells: + cell_block = { + "type": "rich_text", + "elements": [ + { + "type": "rich_text_section", + "elements": [{"type": "text", "text": cell, "style": {"bold": True} if index == 0 else {}}], + } + ], + } + row.append(cell_block) + rows.append(row) + + return {"type": "table", "rows": rows} + + # ================================================================ # Session management # ================================================================ diff --git a/packages/slackBotFunction/tests/test_slack_event_citations.py b/packages/slackBotFunction/tests/test_slack_event_citations.py deleted file mode 100644 index 9f4a66d3..00000000 --- a/packages/slackBotFunction/tests/test_slack_event_citations.py +++ /dev/null @@ -1,281 +0,0 @@ -import sys -import pytest -from unittest.mock import Mock, MagicMock - - -@pytest.fixture -def mock_logger(): - return MagicMock() - - -def test_process_slack_message_split_citation(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - -def test_process_citation_events_update_chat(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_async_slack_action - - body = { - "type": "block_actions", - "message": { - "ts": "123", - "text": "", - "blocks": [ - { - "type": "section", - "block_id": "OvNCm", - "text": { - "type": "mrkdwn", - "text": "", - }, - }, - { - "type": "actions", - "block_id": "citation_actions", - "elements": [ - { - "type": "button", - "action_id": "cite_1", - "text": { - "type": "plain_text", - "text": "[1] Downloading a single prescription using the prescription's ID, or ...", - "emoji": "true", - }, - "value": '{"ck":"123","ch":"123","mt":"123","tt":"123","source_number":"1","title":"title"', - } - ], - }, - ], - }, - "channel": { - "id": "ABC123", - }, - "actions": [ - { - "action_id": "cite_1", - "block_id": "citation_actions", - "text": { - "type": "plain_text", - "text": "[1] Downloading a single prescription using the prescription's ID, or ...", - "emoji": "true", - }, - "value": '{"ck":"123","ch":"C095D4SRX6W","mt":"123","tt":"123","source_number":"1","title":""}', - "type": "button", - "action_ts": "1765807735.805872", - } - ], - } - - # perform operation - process_async_slack_action(body, mock_client) - - # assertions - mock_client.chat_update.assert_called() - - -def test_process_citation_events_update_chat_message_open_citation(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import open_citation - - params = { - "ck": "123123", - "ch": "123123", - "mt": "123123.123123", - "tt": "123123.123123", - "source_number": "1", - "title": "Citation Title", - "body": "Citation Body", - "link": "https://example.com", - } - - citations = { - "type": "actions", - "block_id": "citation_actions", - "elements": [ - { - "type": "button", - "action_id": "cite_1", - "text": { - "type": "plain_text", - "text": "[1] The body of the citation", - "emoji": "true", - }, - "style": None, # Set citation as de-active - "value": str(params), - }, - ], - } - - message = { - "blocks": [citations], - } - - # perform operation - open_citation("ABC", "123", message, params, mock_client) - - # assertions - expected_blocks = [ - citations, - { - "type": "section", - "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, - "block_id": "citation_block", - }, - ] - mock_client.chat_update.assert_called() - mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) - - -def test_process_citation_events_update_chat_message_close_citation(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import open_citation - - params = { - "ck": "123123", - "ch": "123123", - "mt": "123123.123123", - "tt": "123123.123123", - "source_number": "1", - "title": "Citation Title", - "body": "Citation Body", - "link": "https://example.com", - } - - citations = { - "type": "actions", - "block_id": "citation_actions", - "elements": [ - { - "type": "button", - "action_id": "cite_1", - "text": { - "type": "plain_text", - "text": "[1] The body of the citation", - "emoji": "true", - }, - "style": "primary", # Set citation as active - "value": str(params), - }, - ], - } - - citation_body = { - "type": "section", - "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, - "block_id": "citation_block", - } - - message = { - "blocks": [citations, citation_body], - } - - # perform operation - open_citation("ABC", "123", message, params, mock_client) - - # assertions - expected_blocks = [ - citations, - ] - mock_client.chat_update.assert_called() - mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) - - -def test_process_citation_events_update_chat_message_change_close_citation(): - # set up mocks - mock_client = Mock() - mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} - mock_client.chat_update.return_value = {"ok": True} - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import open_citation - - params = { - "ck": "123123", - "ch": "123123", - "mt": "123123.123123", - "tt": "123123.123123", - "source_number": "2", - "title": "Second Citation Title", - "body": "Second Citation Body", - "link": "https://example.com", - } - - citations = { - "type": "actions", - "block_id": "citation_actions", - "elements": [ - { - "type": "button", - "action_id": "cite_1", - "text": { - "type": "plain_text", - "text": "[1] The body of the citation", - "emoji": "true", - }, - "style": "primary", # Set citation as active - "value": str(params), - }, - { - "type": "button", - "action_id": "cite_2", - "text": { - "type": "plain_text", - "text": "[2] The body of the citation", - "emoji": "true", - }, - "style": None, # Set citation as active - "value": str(params), - }, - ], - } - - first_citation_body = { - "type": "section", - "text": {"type": "mrkdwn", "text": "*First Citation Title*\n\n> First Citation Body"}, - "block_id": "citation_block", - } - - second_citation_body = { - "type": "section", - "text": {"type": "mrkdwn", "text": "*Second Citation Title*\n\n> Second Citation Body"}, - "block_id": "citation_block", - } - - message = { - "blocks": [citations, first_citation_body], - } - - # perform operation - open_citation("ABC", "123", message, params, mock_client) - - # assertions - expected_blocks = [citations, second_citation_body] - mock_client.chat_update.assert_called() - mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) diff --git a/packages/slackBotFunction/tests/test_slack_events.py b/packages/slackBotFunction/tests/test_slack_events.py index 1195299f..2b616a64 100644 --- a/packages/slackBotFunction/tests/test_slack_events.py +++ b/packages/slackBotFunction/tests/test_slack_events.py @@ -1,6 +1,6 @@ import sys import pytest -from unittest.mock import Mock, patch, MagicMock, call +from unittest.mock import Mock, patch, MagicMock @pytest.fixture @@ -332,199 +332,6 @@ def test_citation_processing( mock_create_feedback_blocks.assert_called_once() -@patch("app.services.dynamo.get_state_information") -@patch("app.services.ai_processor.process_ai_query") -@patch("app.slack.slack_events.get_conversation_session") -@patch("app.slack.slack_events._create_feedback_blocks") -def test_citation_logging( - mock_get_session: Mock, - mock_create_feedback_blocks: Mock, - mock_process_ai_query: Mock, - mock_get_state_information: Mock, - mock_get_parameter: Mock, - mock_env: Mock, - mock_logger, -): - """Test block builder is being called correctly""" - with patch("app.core.config.get_logger", return_value=mock_logger): - # set up mocks - mock_client = Mock() - mock_process_ai_query.return_value = { - "text": "AI response\n------\ntest", - "session_id": "session-123", - "citations": [], - "kb_response": {"output": {"text": "AI response"}}, - } - mock_get_session.return_value = None # No existing session - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_slack_message - - # perform operation - slack_event_data = { - "text": "AI response", - "user": "U456", - "channel": "C789", - "ts": "1234567890.123", - } - - process_slack_message(event=slack_event_data, event_id="evt123", client=mock_client) - - # assertions - mock_logger.info.assert_has_calls( - [ - call( - "Processing message from user U456", - extra={ - "user_query": "AI response", - "conversation_key": "thread#C789#1234567890.123", - "event_id": "evt123", - }, - ), - # Found citations to split - call("Found citation(s)", extra={"Raw Citations": ["test"]}), - # Citations parsed correctly - call("Parsed citation(s)", extra={"citations": [{"source_number": "test"}]}), - ] - ) - - -@patch("app.services.dynamo.get_state_information") -def test_citation_creation( - mock_get_state_information: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test citations are being added via Slack blocks correctly""" - # set up mocks - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import _create_feedback_blocks - - _sourceNumber = "5" - _title = "Some Title Summarising the Document" - _link = "https://example.com" - _filename = "example.pdf" - _text_snippet = "This is some example text, maybe something about NHSE" - - result = _create_feedback_blocks( - response_text="Answer", - citations=[ - { - "source_number": _sourceNumber, - "title": _title, - "link": _link, - "filename": _filename, - "reference_text": _text_snippet, - } - ], - conversation_key="12345", - channel="C789", - message_ts="123", - thread_ts="123", - ) - - # assertions - # Verify that the citation button was added - citation_section = result[1] - assert citation_section is not None - - # Verify button is correct - assert citation_section["type"] == "actions" - assert citation_section["block_id"] == "citation_actions" - assert citation_section["elements"] and len(citation_section["elements"]) > 0 - - # Verify that the citation data is correct - citation_button = citation_section["elements"][0] - assert citation_button is not None - - assert citation_button["type"] == "button" - assert citation_button["text"]["text"] == f"[{_sourceNumber}] {_title}" - - assert f'"source_number":"{_sourceNumber}"' in citation_button["value"] - assert f'"title":"{_title}"' in citation_button["value"] - assert f'"body":"{_text_snippet}"' in citation_button["value"] - assert f'"link":"{_link}"' in citation_button["value"] - - -@patch("app.services.dynamo.get_state_information") -def test_citation_creation_defaults( - mock_get_state_information: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test regex text processing functionality within process_async_slack_event""" - # set up mocks - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import _create_feedback_blocks - - result = _create_feedback_blocks( - response_text="Answer", - citations=[{}], # Pass in empty object - conversation_key="12345", - channel="C789", - message_ts="123", - thread_ts="123", - ) - - # assertions - # Verify that the citation button was added - citation_section = result[1] - assert citation_section is not None - - # Verify that the citation data is correct - citation_button = citation_section["elements"][0] - assert citation_button is not None - - assert citation_button["type"] == "button" - assert citation_button["text"]["text"] == "[0] Source" - - assert '"source_number":"0"' in citation_button["value"] - assert '"title":"Source"' in citation_button["value"] - assert '"body":"No document excerpt available."' in citation_button["value"] - assert '"link":""' in citation_button["value"] - - -@patch("app.services.dynamo.get_state_information") -def test_response_handle_links( - mock_get_state_information: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test regex text processing citation links in response body""" - # set up mocks - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import _create_feedback_blocks - - result = _create_feedback_blocks( - response_text="[cit_0]", - citations=[ - { - "source_number": "0", - "link": "https://example.com", - } - ], - conversation_key="12345", - channel="C789", - message_ts="123", - thread_ts="123", - ) - - # assertions - # Verify links in the body are changed to slack links - citation_section = result[0] - assert citation_section is not None - - assert "" in citation_section["text"]["text"] - - @patch("app.services.dynamo.get_state_information") @patch("app.services.dynamo.store_state_information") @patch("app.services.ai_processor.process_ai_query") From 68e56a674c370be2342928920c42f5a166eaab55 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Tue, 16 Dec 2025 14:56:58 +0000 Subject: [PATCH 06/29] feat: Use citations from response --- packages/slackBotFunction/app/slack/slack_events.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 8a50adc0..0a3e1218 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -197,7 +197,7 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s logger.info("No citations") else: for i, citation in enumerate(citations): - result = _create_citation(citation, feedback_data, response_text) + result = _create_citation(i, citation, feedback_data, response_text) action_buttons.append(result[0]) response_text = result[1] @@ -228,13 +228,13 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp # Create citation blocks ["sourceNumber", "title", "link", "filename", "reference_text"] content: str = citation.get("content", {}).get("text", invalid_body) - location: str = citation.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] + title: str = citation.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] # Tidy up contents content.replace("»", "").strip() # Buttons can only be 75 characters long, truncate to be safe - button_text = f"[{i}] {location}" + button_text = f"[{i}] {title}" button = { "type": "button", "text": { @@ -246,7 +246,7 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp { **feedback_data, "source_number": f"{i}", - "title": location, + "title": title, "body": content, }, separators=(",", ":"), From 21a1fcc4b9fd73b2fecbe63ef08c4e8403a7d9a2 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Tue, 16 Dec 2025 15:16:11 +0000 Subject: [PATCH 07/29] feat: Use citations from response --- packages/slackBotFunction/app/slack/slack_events.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 0a3e1218..8609aca8 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -205,6 +205,9 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s # Remove any citations that have not been returned response_text = response_text.replace("cit_", "") + # Remove Thinking + response_text = re.sub(r".*?", "", response_text, flags=re.DOTALL) + # Main body blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": response_text}}) @@ -226,9 +229,10 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp invalid_body = "No document excerpt available." action_buttons = [] - # Create citation blocks ["sourceNumber", "title", "link", "filename", "reference_text"] - content: str = citation.get("content", {}).get("text", invalid_body) - title: str = citation.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] + # Create citation blocks + reference = citation.get("retrievedReferences", [{}])[0] + content: str = reference.get("content", {}).get("text", invalid_body) + title: str = reference.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] # Tidy up contents content.replace("»", "").strip() From 9f4e2d1c719492396478aefb28a55f9b06684249 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Tue, 16 Dec 2025 15:46:47 +0000 Subject: [PATCH 08/29] feat: Trim messages to less than 1000 --- packages/slackBotFunction/app/slack/slack_events.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 8609aca8..10b3d8a1 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -206,7 +206,7 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s response_text = response_text.replace("cit_", "") # Remove Thinking - response_text = re.sub(r".*?", "", response_text, flags=re.DOTALL) + response_text = re.sub(r"(\n.*)+", "", response_text, flags=re.DOTALL) # Main body blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": response_text}}) @@ -235,7 +235,7 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp title: str = reference.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] # Tidy up contents - content.replace("»", "").strip() + content.replace("»", "").strip()[:1000] # Buttons can only be 75 characters long, truncate to be safe button_text = f"[{i}] {title}" @@ -641,7 +641,7 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, else: citation_block = { "type": "section", - "text": {"type": "mrkdwn", "text": f"{title}\n\n{body}"}, + "text": {"type": "mrkdwn", "text": f"{title}\n\n{body[:1000]}"}, "block_id": "citation_block", } @@ -694,7 +694,9 @@ def generate_table_block(title: str, content: str): "elements": [ { "type": "rich_text_section", - "elements": [{"type": "text", "text": cell, "style": {"bold": True} if index == 0 else {}}], + "elements": [ + {"type": "text", "text": cell[:500], "style": {"bold": True} if index == 0 else {}} + ], } ], } From 5d39a6f6d7418b47ac68dafc7f5234cacad817c6 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Wed, 17 Dec 2025 08:48:01 +0000 Subject: [PATCH 09/29] feat: Remove table and reformat body --- .../app/slack/slack_events.py | 48 ++++--------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 10b3d8a1..f1e22ef7 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -206,7 +206,7 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s response_text = response_text.replace("cit_", "") # Remove Thinking - response_text = re.sub(r"(\n.*)+", "", response_text, flags=re.DOTALL) + response_text = re.sub(r"(\n*.*)+", "", response_text, flags=re.DOTALL) # Main body blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": response_text}}) @@ -230,7 +230,10 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp action_buttons = [] # Create citation blocks - reference = citation.get("retrievedReferences", [{}])[0] + reference = citation.get("retrievedReferences", []) + if not reference: + logger.info("No reference found in citation") + return [*action_buttons, response_text] content: str = reference.get("content", {}).get("text", invalid_body) title: str = reference.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] @@ -633,17 +636,12 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, [selected, blocks] = format_blocks(blocks, current_id) # If selected, insert citation block before feedback - has_table = re.search(body, r"\|-+\|") is not None if selected: - citation_block = {} - if has_table: - citation_block = generate_table_block(title, body) - else: - citation_block = { - "type": "section", - "text": {"type": "mrkdwn", "text": f"{title}\n\n{body[:1000]}"}, - "block_id": "citation_block", - } + citation_block = { + "type": "section", + "text": {"type": "mrkdwn", "text": f"{title}\n\n{body[:1000]}"}, + "block_id": "citation_block", + } feedback_index = next( (i for i, b in enumerate(blocks) if b.get("block_id") == "feedback-divider"), @@ -680,32 +678,6 @@ def format_blocks(blocks: Any, current_id: str): return [selected, blocks] -def generate_table_block(title: str, content: str): - lines = [line.strip() for line in content.splitlines() if line.strip()] - table_lines = [line for line in lines if not set(line) <= {"|", "-", " "}] - rows = [] - - for index, line in table_lines: - cells = [cell.strip() for cell in line.split("|") if cell.strip()] - row = [] - for cell in cells: - cell_block = { - "type": "rich_text", - "elements": [ - { - "type": "rich_text_section", - "elements": [ - {"type": "text", "text": cell[:500], "style": {"bold": True} if index == 0 else {}} - ], - } - ], - } - row.append(cell_block) - rows.append(row) - - return {"type": "table", "rows": rows} - - # ================================================================ # Session management # ================================================================ From 94edab04c38f49780ad0444f9218a3cd28857aae Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Wed, 17 Dec 2025 09:12:44 +0000 Subject: [PATCH 10/29] feat: Remove table and reformat body --- packages/slackBotFunction/app/slack/slack_events.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index f1e22ef7..cbd584c5 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -198,15 +198,14 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s else: for i, citation in enumerate(citations): result = _create_citation(i, citation, feedback_data, response_text) - - action_buttons.append(result[0]) - response_text = result[1] + action_buttons += result.get("action_buttons", []) + response_text = result.get("response_text", response_text) # Remove any citations that have not been returned response_text = response_text.replace("cit_", "") # Remove Thinking - response_text = re.sub(r"(\n*.*)+", "", response_text, flags=re.DOTALL) + response_text = re.sub(r"(\n|\s|.)*", "", response_text, flags=re.DOTALL) # Main body blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": response_text}}) @@ -233,7 +232,7 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp reference = citation.get("retrievedReferences", []) if not reference: logger.info("No reference found in citation") - return [*action_buttons, response_text] + return {"action_buttons": [], "response_text": response_text} content: str = reference.get("content", {}).get("text", invalid_body) title: str = reference.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] @@ -261,7 +260,7 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp } action_buttons.append(button) - return [*action_buttons, response_text] + return {"action_buttons": action_buttons, "response_text": response_text} # ================================================================ From 0226022fff5f886e409bd6434b1d4459dd0e1c04 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Wed, 17 Dec 2025 09:31:53 +0000 Subject: [PATCH 11/29] feat: Remove table and reformat body --- packages/slackBotFunction/app/slack/slack_events.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index cbd584c5..e196e037 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -205,7 +205,7 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s response_text = response_text.replace("cit_", "") # Remove Thinking - response_text = re.sub(r"(\n|\s|.)*", "", response_text, flags=re.DOTALL) + response_text = re.sub(r"(\\n|\n|.)*", "", response_text, flags=re.DOTALL) # Main body blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": response_text}}) @@ -233,6 +233,9 @@ def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, resp if not reference: logger.info("No reference found in citation") return {"action_buttons": [], "response_text": response_text} + + # Get first reference and its content + reference = reference[0] content: str = reference.get("content", {}).get("text", invalid_body) title: str = reference.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] From 2b605d0a6fbe4e470b6c6828974a5df9885cde12 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Wed, 17 Dec 2025 16:53:51 +0000 Subject: [PATCH 12/29] feat: remove orchestration From 38a6776d44bb9d36508ba144cafb0cb8abd47cef Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Thu, 18 Dec 2025 10:18:12 +0000 Subject: [PATCH 13/29] feat: roll back citation handling --- .../app/slack/slack_events.py | 67 ++++++++++++------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index e196e037..a10eefbd 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -197,16 +197,14 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s logger.info("No citations") else: for i, citation in enumerate(citations): - result = _create_citation(i, citation, feedback_data, response_text) + result = _create_citation(citation, feedback_data, response_text) + action_buttons += result.get("action_buttons", []) response_text = result.get("response_text", response_text) # Remove any citations that have not been returned response_text = response_text.replace("cit_", "") - # Remove Thinking - response_text = re.sub(r"(\\n|\n|.)*", "", response_text, flags=re.DOTALL) - # Main body blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": response_text}}) @@ -223,46 +221,45 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s return blocks -def _create_citation(i: int, citation: dict[str, str], feedback_data: dict, response_text: str): +def _create_citation(citation: dict[str, str], feedback_data: dict, response_text: str): logger.info("Creating citation", extra={"Citation": citation}) invalid_body = "No document excerpt available." action_buttons = [] - # Create citation blocks - reference = citation.get("retrievedReferences", []) - if not reference: - logger.info("No reference found in citation") - return {"action_buttons": [], "response_text": response_text} - - # Get first reference and its content - reference = reference[0] - content: str = reference.get("content", {}).get("text", invalid_body) - title: str = reference.get("location", {}).get("s3Location", {}).get("uri", "/").split("/")[-1] - - # Tidy up contents - content.replace("»", "").strip()[:1000] + # Create citation blocks ["sourceNumber", "title", "link", "filename", "reference_text"] + title = citation.get("title") or citation.get("filename") or "Source" + body = citation.get("reference_text") or invalid_body + citation_link = citation.get("link") or "" + source_number = (citation.get("source_number", "0")).replace("\n", "") # Buttons can only be 75 characters long, truncate to be safe - button_text = f"[{i}] {title}" + button_text = f"[{source_number}] {title}" button = { "type": "button", "text": { "type": "plain_text", "text": button_text if len(button_text) < 75 else f"{button_text[:70]}...", }, - "action_id": f"cite_{i}", + "action_id": f"cite_{source_number}", "value": json.dumps( { **feedback_data, - "source_number": f"{i}", + "source_number": source_number, "title": title, - "body": content, + "body": body, + "link": citation_link, }, separators=(",", ":"), ), } action_buttons.append(button) + # Update inline citations + response_text = response_text.replace( + f"[cit_{source_number}]", + f"<{citation_link}|[{source_number}]>" if citation_link else f"[{source_number}]", + ) + return {"action_buttons": action_buttons, "response_text": response_text} @@ -435,7 +432,28 @@ def process_slack_message(event: Dict[str, Any], event_id: str, client: WebClien ai_response = process_ai_query(user_query, session_id) kb_response = ai_response["kb_response"] response_text = ai_response["text"] - citations = ai_response["citations"] + + # Split out citation block if present + # Citations are not returned in the object without using `$output_format_instructions$` which overrides the + # system prompt. Instead, pull out and format the citations in the prompt manually + prompt_value_keys = [ + "source_number", + "title", + "link", + "reference_text", + ] + split = response_text.split("------") # Citations are separated from main body by ------ + + citations: list[dict[str, str]] = [] + if len(split) != 1: + response_text = split[0] + citation_block = split[1] + raw_citations = [] + raw_citations = re.compile(r"]*>(.*?)", re.DOTALL | re.IGNORECASE).findall(citation_block) + if len(raw_citations) > 0: + logger.info("Found citation(s)", extra={"Raw Citations": raw_citations}) + citations = [dict(zip(prompt_value_keys, citation.split("||"))) for citation in raw_citations] + logger.info("Parsed citation(s)", extra={"citations": citations}) # Post the answer (plain) to get message_ts post_params = {"channel": channel, "text": response_text} @@ -641,10 +659,9 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, if selected: citation_block = { "type": "section", - "text": {"type": "mrkdwn", "text": f"{title}\n\n{body[:1000]}"}, + "text": {"type": "mrkdwn", "text": f"{title}\n\n{body}"}, "block_id": "citation_block", } - feedback_index = next( (i for i, b in enumerate(blocks) if b.get("block_id") == "feedback-divider"), len(blocks), From 47ce9264006918a0831ce4a7305a52ae12cb91c3 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Thu, 18 Dec 2025 12:21:22 +0000 Subject: [PATCH 14/29] feat: Reduce citations, remove links and add score --- packages/cdk/prompts/systemPrompt.txt | 104 +++++++++++++----- .../slackBotFunction/app/services/bedrock.py | 2 +- .../app/slack/slack_events.py | 36 ++---- 3 files changed, 88 insertions(+), 54 deletions(-) diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index 71aad267..b90daef8 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -1,29 +1,75 @@ - -You are an expert Research Analyst with a specialty in data extraction and synthesis. Your goal is to answer user queries using ONLY the provided . You must ignore your internal training data regarding outside facts. - - - -1. **Analyze the Request:** Understand the user's core question. -2. **Scan Evidence:** Read the provided thoroughly. -3. **Critical Thinking (Mandatory):** - - You must identify *explicit* evidence that answers the question. - - If multiple sources exist, synthesize them. - - If sources conflict, note the conflict in your thinking block. - - Do NOT infer answers. If the explicit answer is not in the text, state that the information is missing. -4. **Formulate Answer:** Write a clear, concise answer based *only* on the steps above. - - - -- The output must start with a block where you plan your answer. -- The user facing answer follows the thinking block. - - This should include references/ links/ excerpts where possible. - - Provide a quick summary followed by a detailed answer. - - - -Use Slacks formatting "mrkdown" -- **Bold:** Headings (`*Answer:*`), Source Names (`*NHS England*`). -- **Italics:** Document titles, citations. -- **Inline Code:** System names (`PrescriptionID`), technical terms (`HL7 FHIR`). -- **Block Quotes:** Direct quotes >1 sentence, technical specs, or examples. - +# 1. ROLE & OBJECTIVE +You are an AI assistant designed to provide guidance and references from your knowledge base to help users make decisions during onboarding. + +**CRITICAL RULE:** It is **VERY** important that you return **ALL** references found in the context for user examination. + +--- + +# 2. THINKING PROCESS & LOGIC +Before generating a response, adhere to these processing rules: + +## Question Analysis +1. **Detection:** Determine if the query contains one or multiple questions. +2. **Decomposition:** Split complex queries into individual sub-questions. +3. **Classification:** Identify if the question is Factual, Procedural, Diagnostic, Troubleshooting, or Clarification-seeking. +4. **Multi-Question Strategy:** Number sub-questions clearly (Q1, Q2, etc). + +## Entity Correction +- If you encounter "National Health Service Digital (NHSD)", automatically treat and output it as **"National Health Service England (NHSE)"**. + +## RAG Confidence Scoring +Evaluate retrieved context using these thresholds: +- `Score > 0.85` : **High confidence** +- `Score 0.70 - 0.85` : **Medium confidence** +- `Score < 0.70` : **Low confidence** + +--- + +# 3. OUTPUT STRUCTURE +Construct your response in this exact order: + +1. **Summary:** A concise overview (Maximum **100 characters**). +2. **Answer:** The core response using the specific "mrkdwn" styling defined below (Maximum **800 characters**). +3. **Separator:** A literal line break using `------`. +4. **Bibliography:** The list of all sources used. + +--- + +# 4. FORMATTING RULES ("mrkdwn") +You must use a specific variation of markdown. Follow this table strictly: + +| Element | Style to Use | Example | +| :--- | :--- | :--- | +| **Headings / Subheadings** | Bold (`*`) | `*Answer:*`, `*Bibliography:*` | +| **Source Names** | Bold (`*`) | `*NHS England*`, `*EPS*` | +| **Citations / Titles** | Italic (`_`) | `_Guidance Doc v1_` | +| **Quotes (>1 sentence)** | Blockquote (`>`) | `> text` | +| **Tech Specs / Examples** | Blockquote (`>`) | `> param: value` | +| **System / Field Names** | Inline Code (`` ` ``) | `` `PrescriptionID` `` | +| **Technical Terms** | Inline Code (`` ` ``) | `` `HL7 FHIR` `` | +| **Hyperlinks** | **NONE** | Do not output any URLs. | + +--- + +# 5. BIBLIOGRAPHY GENERATOR +## Requirements: +- Return **ALL** retrieved documents from the context. +- Title length must be **< 50 characters**. +- Text excerpt should be a long as possible and include text used for reasoning + +## Format +Use the following format when creating bibliography entries: +source number||summary title||file contents||relevance score + +# 6. Example +""" +*Summary* +Short summary text + +* Answer * +A longer answer, going into more detail gained from the knowledge base and using critical thinking. + +------ +1||Some PDF file||This is the precise snippet of the pdf file which answers the question.||0.98 +1||Some TXT file||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76 +""" diff --git a/packages/slackBotFunction/app/services/bedrock.py b/packages/slackBotFunction/app/services/bedrock.py index d1fcdaae..44d02019 100644 --- a/packages/slackBotFunction/app/services/bedrock.py +++ b/packages/slackBotFunction/app/services/bedrock.py @@ -44,7 +44,7 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat "knowledgeBaseId": config.KNOWLEDGEBASE_ID, "modelArn": prompt_template.get("model_id", config.RAG_MODEL_ID), "retrievalConfiguration": { - "vectorSearchConfiguration": {"numberOfResults": 10, "overrideSearchType": "SEMANTIC"} + "vectorSearchConfiguration": {"numberOfResults": 5, "overrideSearchType": "SEMANTIC"} }, "generationConfiguration": { "guardrailConfiguration": { diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index a10eefbd..a32f893d 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -226,11 +226,11 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex invalid_body = "No document excerpt available." action_buttons = [] - # Create citation blocks ["sourceNumber", "title", "link", "filename", "reference_text"] - title = citation.get("title") or citation.get("filename") or "Source" - body = citation.get("reference_text") or invalid_body - citation_link = citation.get("link") or "" + # Create citation blocks ["source_number", "title", "excerpt", "relevance_score"] source_number = (citation.get("source_number", "0")).replace("\n", "") + title = citation.get("title") or citation.get("filename") or "Source" + body = citation.get("excerpt") or invalid_body + score = citation.get("relevance_score") or "0" # Buttons can only be 75 characters long, truncate to be safe button_text = f"[{source_number}] {title}" @@ -242,23 +242,14 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex }, "action_id": f"cite_{source_number}", "value": json.dumps( - { - **feedback_data, - "source_number": source_number, - "title": title, - "body": body, - "link": citation_link, - }, + {**feedback_data, "source_number": source_number, "title": title, "body": body, "score": int(score)}, separators=(",", ":"), ), } action_buttons.append(button) - # Update inline citations - response_text = response_text.replace( - f"[cit_{source_number}]", - f"<{citation_link}|[{source_number}]>" if citation_link else f"[{source_number}]", - ) + # Update inline citations to remove "cit_" prefix + response_text = response_text.replace(f"[cit_{source_number}]", f"[{source_number}]") return {"action_buttons": action_buttons, "response_text": response_text} @@ -436,12 +427,7 @@ def process_slack_message(event: Dict[str, Any], event_id: str, client: WebClien # Split out citation block if present # Citations are not returned in the object without using `$output_format_instructions$` which overrides the # system prompt. Instead, pull out and format the citations in the prompt manually - prompt_value_keys = [ - "source_number", - "title", - "link", - "reference_text", - ] + prompt_value_keys = ["source_number", "title", "excerpt", "relevance_score"] split = response_text.split("------") # Citations are separated from main body by ------ citations: list[dict[str, str]] = [] @@ -653,7 +639,9 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, current_id = f"cite_{source_number}".strip() # Reset all button styles, then set the clicked one - [selected, blocks] = format_blocks(blocks, current_id) + result = format_blocks(blocks, current_id) + selected = result["selected"] + blocks = result["blocks"] # If selected, insert citation block before feedback if selected: @@ -694,7 +682,7 @@ def format_blocks(blocks: Any, current_id: str): else: # Unselect all other buttons element.pop("style", None) - return [selected, blocks] + return {"selected": selected, "blocks": blocks} # ================================================================ From 79e5c9225e499c20c21564375ab70a395bded01b Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Thu, 18 Dec 2025 13:39:50 +0000 Subject: [PATCH 15/29] feat: Reduce citations, remove links and add score --- packages/slackBotFunction/app/slack/slack_events.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index a32f893d..f481151a 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -3,6 +3,7 @@ Handles conversation memory, Bedrock queries, and responding back to Slack """ +import decimal import re import time import traceback @@ -242,7 +243,7 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex }, "action_id": f"cite_{source_number}", "value": json.dumps( - {**feedback_data, "source_number": source_number, "title": title, "body": body, "score": int(score)}, + {**feedback_data, "source_number": source_number, "title": title, "body": body, "score": decimal(score)}, separators=(",", ":"), ), } From aaf539a607af7b08c5b7511c0f63e9d35a0b05f7 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Thu, 18 Dec 2025 14:07:26 +0000 Subject: [PATCH 16/29] feat: Reduce citations, remove links and add score --- packages/slackBotFunction/app/slack/slack_events.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index f481151a..0d54ca7d 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -3,7 +3,6 @@ Handles conversation memory, Bedrock queries, and responding back to Slack """ -import decimal import re import time import traceback @@ -235,6 +234,7 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex # Buttons can only be 75 characters long, truncate to be safe button_text = f"[{source_number}] {title}" + button_value = {**feedback_data, "source_number": source_number, "title": title, "body": body, "score": score} button = { "type": "button", "text": { @@ -243,7 +243,7 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex }, "action_id": f"cite_{source_number}", "value": json.dumps( - {**feedback_data, "source_number": source_number, "title": title, "body": body, "score": decimal(score)}, + button_value, separators=(",", ":"), ), } @@ -252,6 +252,7 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex # Update inline citations to remove "cit_" prefix response_text = response_text.replace(f"[cit_{source_number}]", f"[{source_number}]") + logger.info("Created citation", extra=button_value) return {"action_buttons": action_buttons, "response_text": response_text} From e5868555d9c672467c4f598c92d4f0a978b551f9 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Thu, 18 Dec 2025 15:54:14 +0000 Subject: [PATCH 17/29] feat: Add tests back in --- packages/cdk/prompts/systemPrompt.txt | 29 +- .../cdk/resources/BedrockPromptResources.ts | 14 +- .../app/slack/slack_events.py | 1 - .../test_slack_events_actions.py | 276 ++++++++++++++ .../test_slack_events_citations.py | 324 +++++++++++++++++ .../test_slack_events_messages.py} | 342 ------------------ 6 files changed, 622 insertions(+), 364 deletions(-) create mode 100644 packages/slackBotFunction/tests/test_slack_events/test_slack_events_actions.py create mode 100644 packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py rename packages/slackBotFunction/tests/{test_slack_events.py => test_slack_events/test_slack_events_messages.py} (50%) diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index b90daef8..4c347c9a 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -1,27 +1,29 @@ -# 1. ROLE & OBJECTIVE +# 1. Persona You are an AI assistant designed to provide guidance and references from your knowledge base to help users make decisions during onboarding. -**CRITICAL RULE:** It is **VERY** important that you return **ALL** references found in the context for user examination. +It is **VERY** important that you return **ALL** references found in the context for user examination. --- # 2. THINKING PROCESS & LOGIC Before generating a response, adhere to these processing rules: -## Question Analysis +## A. Question Analysis 1. **Detection:** Determine if the query contains one or multiple questions. 2. **Decomposition:** Split complex queries into individual sub-questions. 3. **Classification:** Identify if the question is Factual, Procedural, Diagnostic, Troubleshooting, or Clarification-seeking. 4. **Multi-Question Strategy:** Number sub-questions clearly (Q1, Q2, etc). -## Entity Correction +## B. Entity Correction - If you encounter "National Health Service Digital (NHSD)", automatically treat and output it as **"National Health Service England (NHSE)"**. -## RAG Confidence Scoring -Evaluate retrieved context using these thresholds: +## C. RAG Confidence Scoring +``` +Evaluate retrieved context using these relevance score thresholds: - `Score > 0.85` : **High confidence** - `Score 0.70 - 0.85` : **Medium confidence** - `Score < 0.70` : **Low confidence** +``` --- @@ -52,14 +54,14 @@ You must use a specific variation of markdown. Follow this table strictly: --- # 5. BIBLIOGRAPHY GENERATOR -## Requirements: +**Requirements:** - Return **ALL** retrieved documents from the context. - Title length must be **< 50 characters**. -- Text excerpt should be a long as possible and include text used for reasoning +- Use the exact string format below (do not render it as a table or list). -## Format -Use the following format when creating bibliography entries: -source number||summary title||file contents||relevance score +**Template:** +```text +source number||summary title||excerpt||relevance score||source name # 6. Example """ @@ -70,6 +72,7 @@ Short summary text A longer answer, going into more detail gained from the knowledge base and using critical thinking. ------ -1||Some PDF file||This is the precise snippet of the pdf file which answers the question.||0.98 -1||Some TXT file||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76 +1||A document||This is the precise snippet of the pdf file which answers the question.||0.98||very_helpful_doc.pdf +2||Another file||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76||something_interesting.txt +3||A useless file||This file doesn't contain anything that useful||0.05||folder/another/some_file.txt """ diff --git a/packages/cdk/resources/BedrockPromptResources.ts b/packages/cdk/resources/BedrockPromptResources.ts index c16b3a07..73a8fa2c 100644 --- a/packages/cdk/resources/BedrockPromptResources.ts +++ b/packages/cdk/resources/BedrockPromptResources.ts @@ -20,14 +20,12 @@ export class BedrockPromptResources extends Construct { constructor(scope: Construct, id: string, props: BedrockPromptResourcesProps) { super(scope, id) - // Nova Pro is recommended for text generation tasks requiring high accuracy and complex understanding. - const novaProModel = BedrockFoundationModel.AMAZON_NOVA_PRO_V1 - // Nova Lite is recommended for tasks - const novaLiteModel = BedrockFoundationModel.AMAZON_NOVA_LITE_V1 + const ragModel = new BedrockFoundationModel("meta.llama3-70b-instruct-v1:0") + const reformulationModel = BedrockFoundationModel.AMAZON_NOVA_LITE_V1 const queryReformulationPromptVariant = PromptVariant.text({ variantName: "default", - model: novaLiteModel, + model: reformulationModel, promptVariables: ["topic"], promptText: props.settings.reformulationPrompt.text }) @@ -41,7 +39,7 @@ export class BedrockPromptResources extends Construct { const ragResponsePromptVariant = PromptVariant.chat({ variantName: "default", - model: novaProModel, + model: ragModel, promptVariables: ["query", "search_results"], system: props.settings.systemPrompt.text, messages: [props.settings.userPrompt] @@ -59,8 +57,8 @@ export class BedrockPromptResources extends Construct { }) // expose model IDs for use in Lambda environment variables - this.ragModelId = novaProModel.modelId - this.queryReformulationModelId = novaLiteModel.modelId + this.ragModelId = ragModel.modelId + this.queryReformulationModelId = reformulationModel.modelId this.queryReformulationPrompt = queryReformulationPrompt this.ragResponsePrompt = ragPrompt diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 0d54ca7d..ce016794 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -222,7 +222,6 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s def _create_citation(citation: dict[str, str], feedback_data: dict, response_text: str): - logger.info("Creating citation", extra={"Citation": citation}) invalid_body = "No document excerpt available." action_buttons = [] diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_actions.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_actions.py new file mode 100644 index 00000000..635920e7 --- /dev/null +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_actions.py @@ -0,0 +1,276 @@ +import sys +import pytest +from unittest.mock import Mock, patch, MagicMock + + +@pytest.fixture +def mock_logger(): + return MagicMock() + + +@patch("app.utils.handler_utils.forward_event_to_pull_request_lambda") +def test_process_async_slack_event_feedback( + mock_forward_event_to_pull_request_lambda: Mock, + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test successful async event processing""" + # set up mocks + mock_client = Mock() + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_async_slack_event + + # perform operation + slack_event_data = { + "text": "feedback: this is some feedback", + "user": "U456", + "channel": "C789", + "ts": "1234567890.123", + } + with patch("app.slack.slack_events.process_feedback_event") as mock_process_feedback_event, patch( + "app.slack.slack_events.process_slack_message" + ) as mock_process_slack_message: + process_async_slack_event(event=slack_event_data, event_id="evt123", client=mock_client) + mock_forward_event_to_pull_request_lambda.assert_not_called() + mock_process_feedback_event.assert_called_once_with( + message_text="feedback: this is some feedback", + conversation_key="thread#C789#1234567890.123", + user_id="U456", + channel_id="C789", + thread_root="1234567890.123", + client=mock_client, + event=slack_event_data, + ) + mock_process_slack_message.assert_not_called() + + +@patch("app.utils.handler_utils.is_latest_message") +def test_process_async_slack_action_positive( + mock_is_latest_message: Mock, + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test successful async action processing""" + # set up mocks + mock_client = Mock() + mock_is_latest_message.return_value = True + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_async_slack_action + + feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' + + # perform operation + slack_action_data = { + "type": "block_actions", + "user": {"id": "U123"}, + "channel": {"id": "C123"}, + "message": {"ts": "1759845126.972219"}, + "actions": [{"action_id": "feedback_yes", "value": feedback_value}], + } + with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: + process_async_slack_action(body=slack_action_data, client=mock_client) + + # assertions + mock_store_feedback.assert_called_once_with( + conversation_key="thread#C123#123", + feedback_type="positive", + user_id="U123", + channel_id="C123", + thread_ts="1759845114.407989", + message_ts="1759845126.972219", + client=mock_client, + ) + mock_client.chat_postMessage.assert_called_once_with( + channel="C123", + text="Thank you for your feedback.", + thread_ts="1759845114.407989", + ) + + +@patch("app.utils.handler_utils.is_latest_message") +def test_process_async_slack_action_negative( + mock_is_latest_message: Mock, + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test successful async action processing""" + # set up mocks + mock_client = Mock() + mock_is_latest_message.return_value = True + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_async_slack_action + + feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' + + # perform operation + slack_action_data = { + "type": "block_actions", + "user": {"id": "U123"}, + "channel": {"id": "C123"}, + "message": {"ts": "1759845126.972219"}, + "actions": [{"action_id": "feedback_no", "value": feedback_value}], + } + with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: + process_async_slack_action(body=slack_action_data, client=mock_client) + + # assertions + mock_store_feedback.assert_called_once_with( + conversation_key="thread#C123#123", + feedback_type="negative", + user_id="U123", + channel_id="C123", + thread_ts="1759845114.407989", + message_ts="1759845126.972219", + client=mock_client, + ) + mock_client.chat_postMessage.assert_called_once_with( + channel="C123", + text='Please let us know how the answer could be improved. Start your message with "feedback:"', + thread_ts="1759845114.407989", + ) + + +@patch("app.utils.handler_utils.is_latest_message") +def test_process_async_slack_action_not_latest( + mock_is_latest_message: Mock, + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test successful async action processing""" + # set up mocks + mock_client = Mock() + mock_is_latest_message.return_value = False + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_async_slack_action + + feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' + + # perform operation + slack_action_data = { + "type": "block_actions", + "user": {"id": "U123"}, + "channel": {"id": "C123"}, + "actions": [{"action_id": "feedback_no", "value": feedback_value}], + } + with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: + process_async_slack_action(body=slack_action_data, client=mock_client) + + # assertions + mock_store_feedback.assert_not_called() + mock_client.chat_postMessage.assert_not_called() + + +@patch("app.utils.handler_utils.is_latest_message") +def test_process_async_slack_action_unknown_action( + mock_is_latest_message: Mock, + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test successful async action processing""" + # set up mocks + mock_client = Mock() + mock_is_latest_message.return_value = True + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_async_slack_action + + feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' + + # perform operation + slack_action_data = { + "type": "block_actions", + "user": {"id": "U123"}, + "channel": {"id": "C123"}, + "actions": [{"action_id": "I_Do_Not_Know_This_Action", "value": feedback_value}], + } + with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: + process_async_slack_action(body=slack_action_data, client=mock_client) + + # assertions + mock_store_feedback.assert_not_called() + mock_client.chat_postMessage.assert_not_called() + + +def test_process_feedback_event(): + # set up mocks + mock_client = Mock() + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_feedback_event + + # perform operation + mock_event = {} + with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: + process_feedback_event( + message_text="feedback: this is some feedback", + conversation_key="thread#C123#123", + user_id="U123", + channel_id="C123", + thread_root="1759845114.407989", + event=mock_event, + client=mock_client, + ) + + # assertions + mock_store_feedback.assert_called_once_with( + conversation_key="thread#C123#123", + feedback_type="additional", + user_id="U123", + channel_id="C123", + thread_ts="1759845114.407989", + message_ts=None, + feedback_text="this is some feedback", + client=mock_client, + ) + mock_client.chat_postMessage.assert_called_once_with( + channel="C123", text="Thank you for your feedback.", thread_ts="1759845114.407989" + ) + + +@patch("app.services.slack.post_error_message") +def test_process_feedback_event_error( + mock_post_error_message: Mock, +): + # set up mocks + mock_client = Mock() + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_feedback_event + + # perform operation + mock_event = { + "channel": "C123", + "thread_ts": "123", + } + with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: + mock_store_feedback.side_effect = Exception("There was an error") + process_feedback_event( + message_text="feedback: this is some feedback", + conversation_key="thread#C123#123", + user_id="U123", + channel_id="C123", + thread_root="1759845114.407989", + event=mock_event, + client=mock_client, + ) + + # assertions + mock_post_error_message.assert_called_once_with(channel="C123", thread_ts="123", client=mock_client) diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py new file mode 100644 index 00000000..8bdcf02f --- /dev/null +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py @@ -0,0 +1,324 @@ +import sys +import pytest +from unittest.mock import Mock, MagicMock, patch + + +@pytest.fixture +def mock_logger(): + return MagicMock() + + +@patch("app.services.dynamo.get_state_information") +@patch("app.services.ai_processor.process_ai_query") +@patch("app.slack.slack_events.get_conversation_session") +@patch("app.slack.slack_events._create_feedback_blocks") +def test_citation_processing( + mock_get_session: Mock, + mock_process_ai_query: Mock, + mock_create_feedback_blocks: Mock, + mock_get_state_information: Mock, + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test block builder is being called correctly""" + # set up mocks + mock_client = Mock() + mock_process_ai_query.return_value = { + "text": "AI response", + "session_id": "session-123", + "citations": [], + "kb_response": {"output": {"text": "AI response"}}, + } + mock_get_session.return_value = None # No existing session + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_slack_message + + # perform operation + slack_event_data = { + "text": "Answer", + "user": "U456", + "channel": "C789", + "ts": "1234567890.123", + } + + process_slack_message(event=slack_event_data, event_id="evt123", client=mock_client) + + # assertions + # Verify that the message was processed (process_ai_query was called) + mock_create_feedback_blocks.assert_called_once() + + +def test_process_slack_message_split_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + +def test_process_citation_events_update_chat(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_async_slack_action + + body = { + "type": "block_actions", + "message": { + "ts": "123", + "text": "", + "blocks": [ + { + "type": "section", + "block_id": "OvNCm", + "text": { + "type": "mrkdwn", + "text": "", + }, + }, + { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] Downloading a single prescription using the prescription's ID, or ...", + "emoji": "true", + }, + "value": '{"ck":"123","ch":"123","mt":"123","tt":"123","source_number":"1","title":"title"', + } + ], + }, + ], + }, + "channel": { + "id": "ABC123", + }, + "actions": [ + { + "action_id": "cite_1", + "block_id": "citation_actions", + "text": { + "type": "plain_text", + "text": "[1] Downloading a single prescription using the prescription's ID, or ...", + "emoji": "true", + }, + "value": '{"ck":"123","ch":"C095D4SRX6W","mt":"123","tt":"123","source_number":"1","title":""}', + "type": "button", + "action_ts": "1765807735.805872", + } + ], + } + + # perform operation + process_async_slack_action(body, mock_client) + + # assertions + mock_client.chat_update.assert_called() + + +def test_process_citation_events_update_chat_message_open_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.95", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": None, # Set citation as de-active + "value": str(params), + }, + ], + } + + message = { + "blocks": [citations], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [ + citations, + { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, + "block_id": "citation_block", + }, + ] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) + + +def test_process_citation_events_update_chat_message_close_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.95", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": "primary", # Set citation as active + "value": str(params), + }, + ], + } + + citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Citation Title*\n\n> Citation Body"}, + "block_id": "citation_block", + } + + message = { + "blocks": [citations, citation_body], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [ + citations, + ] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) + + +def test_process_citation_events_update_chat_message_change_close_citation(): + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.return_value = {"ok": True} + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import open_citation + + params = { + "ck": "123123", + "ch": "123123", + "mt": "123123.123123", + "tt": "123123.123123", + "source_number": "2", + "title": "Second Citation Title", + "body": "Second Citation Body", + "relevance_score": "0.95", + } + + citations = { + "type": "actions", + "block_id": "citation_actions", + "elements": [ + { + "type": "button", + "action_id": "cite_1", + "text": { + "type": "plain_text", + "text": "[1] The body of the citation", + "emoji": "true", + }, + "style": "primary", # Set citation as active + "value": str(params), + }, + { + "type": "button", + "action_id": "cite_2", + "text": { + "type": "plain_text", + "text": "[2] The body of the citation", + "emoji": "true", + }, + "style": None, # Set citation as active + "value": str(params), + }, + ], + } + + first_citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*First Citation Title*\n\n> First Citation Body"}, + "block_id": "citation_block", + } + + second_citation_body = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*Second Citation Title*\n\n> Second Citation Body"}, + "block_id": "citation_block", + } + + message = { + "blocks": [citations, first_citation_body], + } + + # perform operation + open_citation("ABC", "123", message, params, mock_client) + + # assertions + expected_blocks = [citations, second_citation_body] + mock_client.chat_update.assert_called() + mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) diff --git a/packages/slackBotFunction/tests/test_slack_events.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py similarity index 50% rename from packages/slackBotFunction/tests/test_slack_events.py rename to packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py index 2b616a64..16e256ef 100644 --- a/packages/slackBotFunction/tests/test_slack_events.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py @@ -108,45 +108,6 @@ def test_process_async_slack_event_pull_request_with_no_mention( mock_process_slack_message.assert_not_called() -@patch("app.utils.handler_utils.forward_event_to_pull_request_lambda") -def test_process_async_slack_event_feedback( - mock_forward_event_to_pull_request_lambda: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test successful async event processing""" - # set up mocks - mock_client = Mock() - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_async_slack_event - - # perform operation - slack_event_data = { - "text": "feedback: this is some feedback", - "user": "U456", - "channel": "C789", - "ts": "1234567890.123", - } - with patch("app.slack.slack_events.process_feedback_event") as mock_process_feedback_event, patch( - "app.slack.slack_events.process_slack_message" - ) as mock_process_slack_message: - process_async_slack_event(event=slack_event_data, event_id="evt123", client=mock_client) - mock_forward_event_to_pull_request_lambda.assert_not_called() - mock_process_feedback_event.assert_called_once_with( - message_text="feedback: this is some feedback", - conversation_key="thread#C789#1234567890.123", - user_id="U456", - channel_id="C789", - thread_root="1234567890.123", - client=mock_client, - event=slack_event_data, - ) - mock_process_slack_message.assert_not_called() - - def test_process_slack_message_empty_query(mock_get_parameter: Mock, mock_env: Mock): """Test async event processing with empty query""" # set up mocks @@ -174,37 +135,6 @@ def test_process_slack_message_empty_query(mock_get_parameter: Mock, mock_env: M ) -@patch("app.services.dynamo.get_state_information") -@patch("app.services.ai_processor.process_ai_query") -@patch("app.slack.slack_events.get_conversation_session") -@patch("app.services.slack.post_error_message") -def test_process_slack_message_event_error( - mock_post_error_message: Mock, - mock_get_session: Mock, - mock_process_ai_query: Mock, - mock_get_state_information: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test async event processing with error""" - # set up mocks - mock_process_ai_query.side_effect = Exception("AI processing error") - mock_get_session.return_value = None # No existing session - mock_client = Mock() - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_slack_message - - # perform operation - slack_event_data = {"text": "test question", "user": "U456", "channel": "C789", "ts": "1234567890.123"} - process_slack_message(event=slack_event_data, event_id="evt123", client=mock_client) - - # assertions - mock_post_error_message.assert_called_once_with(channel="C789", thread_ts="1234567890.123", client=mock_client) - - @patch("app.services.dynamo.get_state_information") @patch("app.services.ai_processor.process_ai_query") @patch("app.slack.slack_events.get_conversation_session") @@ -289,49 +219,6 @@ def test_regex_text_processing( assert mock_client.chat_postMessage.called -@patch("app.services.dynamo.get_state_information") -@patch("app.services.ai_processor.process_ai_query") -@patch("app.slack.slack_events.get_conversation_session") -@patch("app.slack.slack_events._create_feedback_blocks") -def test_citation_processing( - mock_get_session: Mock, - mock_process_ai_query: Mock, - mock_create_feedback_blocks: Mock, - mock_get_state_information: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test block builder is being called correctly""" - # set up mocks - mock_client = Mock() - mock_process_ai_query.return_value = { - "text": "AI response", - "session_id": "session-123", - "citations": [], - "kb_response": {"output": {"text": "AI response"}}, - } - mock_get_session.return_value = None # No existing session - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_slack_message - - # perform operation - slack_event_data = { - "text": "Answer", - "user": "U456", - "channel": "C789", - "ts": "1234567890.123", - } - - process_slack_message(event=slack_event_data, event_id="evt123", client=mock_client) - - # assertions - # Verify that the message was processed (process_ai_query was called) - mock_create_feedback_blocks.assert_called_once() - - @patch("app.services.dynamo.get_state_information") @patch("app.services.dynamo.store_state_information") @patch("app.services.ai_processor.process_ai_query") @@ -444,232 +331,3 @@ def test_process_slack_message_dm_context( # assertions # no assertions as we are just checking it does not throw an error - - -@patch("app.utils.handler_utils.is_latest_message") -def test_process_async_slack_action_positive( - mock_is_latest_message: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test successful async action processing""" - # set up mocks - mock_client = Mock() - mock_is_latest_message.return_value = True - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_async_slack_action - - feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' - - # perform operation - slack_action_data = { - "type": "block_actions", - "user": {"id": "U123"}, - "channel": {"id": "C123"}, - "message": {"ts": "1759845126.972219"}, - "actions": [{"action_id": "feedback_yes", "value": feedback_value}], - } - with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: - process_async_slack_action(body=slack_action_data, client=mock_client) - - # assertions - mock_store_feedback.assert_called_once_with( - conversation_key="thread#C123#123", - feedback_type="positive", - user_id="U123", - channel_id="C123", - thread_ts="1759845114.407989", - message_ts="1759845126.972219", - client=mock_client, - ) - mock_client.chat_postMessage.assert_called_once_with( - channel="C123", - text="Thank you for your feedback.", - thread_ts="1759845114.407989", - ) - - -@patch("app.utils.handler_utils.is_latest_message") -def test_process_async_slack_action_negative( - mock_is_latest_message: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test successful async action processing""" - # set up mocks - mock_client = Mock() - mock_is_latest_message.return_value = True - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_async_slack_action - - feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' - - # perform operation - slack_action_data = { - "type": "block_actions", - "user": {"id": "U123"}, - "channel": {"id": "C123"}, - "message": {"ts": "1759845126.972219"}, - "actions": [{"action_id": "feedback_no", "value": feedback_value}], - } - with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: - process_async_slack_action(body=slack_action_data, client=mock_client) - - # assertions - mock_store_feedback.assert_called_once_with( - conversation_key="thread#C123#123", - feedback_type="negative", - user_id="U123", - channel_id="C123", - thread_ts="1759845114.407989", - message_ts="1759845126.972219", - client=mock_client, - ) - mock_client.chat_postMessage.assert_called_once_with( - channel="C123", - text='Please let us know how the answer could be improved. Start your message with "feedback:"', - thread_ts="1759845114.407989", - ) - - -@patch("app.utils.handler_utils.is_latest_message") -def test_process_async_slack_action_not_latest( - mock_is_latest_message: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test successful async action processing""" - # set up mocks - mock_client = Mock() - mock_is_latest_message.return_value = False - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_async_slack_action - - feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' - - # perform operation - slack_action_data = { - "type": "block_actions", - "user": {"id": "U123"}, - "channel": {"id": "C123"}, - "actions": [{"action_id": "feedback_no", "value": feedback_value}], - } - with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: - process_async_slack_action(body=slack_action_data, client=mock_client) - - # assertions - mock_store_feedback.assert_not_called() - mock_client.chat_postMessage.assert_not_called() - - -@patch("app.utils.handler_utils.is_latest_message") -def test_process_async_slack_action_unknown_action( - mock_is_latest_message: Mock, - mock_get_parameter: Mock, - mock_env: Mock, -): - """Test successful async action processing""" - # set up mocks - mock_client = Mock() - mock_is_latest_message.return_value = True - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_async_slack_action - - feedback_value = '{"ck":"thread#C123#123","ch":"C123","mt":"1759845126.972219","tt":"1759845114.407989"}' - - # perform operation - slack_action_data = { - "type": "block_actions", - "user": {"id": "U123"}, - "channel": {"id": "C123"}, - "actions": [{"action_id": "I_Do_Not_Know_This_Action", "value": feedback_value}], - } - with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: - process_async_slack_action(body=slack_action_data, client=mock_client) - - # assertions - mock_store_feedback.assert_not_called() - mock_client.chat_postMessage.assert_not_called() - - -def test_process_feedback_event(): - # set up mocks - mock_client = Mock() - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_feedback_event - - # perform operation - mock_event = {} - with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: - process_feedback_event( - message_text="feedback: this is some feedback", - conversation_key="thread#C123#123", - user_id="U123", - channel_id="C123", - thread_root="1759845114.407989", - event=mock_event, - client=mock_client, - ) - - # assertions - mock_store_feedback.assert_called_once_with( - conversation_key="thread#C123#123", - feedback_type="additional", - user_id="U123", - channel_id="C123", - thread_ts="1759845114.407989", - message_ts=None, - feedback_text="this is some feedback", - client=mock_client, - ) - mock_client.chat_postMessage.assert_called_once_with( - channel="C123", text="Thank you for your feedback.", thread_ts="1759845114.407989" - ) - - -@patch("app.services.slack.post_error_message") -def test_process_feedback_event_error( - mock_post_error_message: Mock, -): - # set up mocks - mock_client = Mock() - - # delete and import module to test - if "app.slack.slack_events" in sys.modules: - del sys.modules["app.slack.slack_events"] - from app.slack.slack_events import process_feedback_event - - # perform operation - mock_event = { - "channel": "C123", - "thread_ts": "123", - } - with patch("app.slack.slack_events.store_feedback") as mock_store_feedback: - mock_store_feedback.side_effect = Exception("There was an error") - process_feedback_event( - message_text="feedback: this is some feedback", - conversation_key="thread#C123#123", - user_id="U123", - channel_id="C123", - thread_root="1759845114.407989", - event=mock_event, - client=mock_client, - ) - - # assertions - mock_post_error_message.assert_called_once_with(channel="C123", thread_ts="123", client=mock_client) From fb549beb7e8a90ffd57937c1791367cafa6e3b77 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Fri, 19 Dec 2025 11:53:52 +0000 Subject: [PATCH 18/29] feat: Add tests back for citations --- packages/cdk/prompts/systemPrompt.txt | 23 +++- .../app/slack/slack_events.py | 109 +++++++++++------- .../test_slack_events_citations.py | 74 ++++++++++++ .../test_slack_events_messages.py | 87 +++++++++++++- 4 files changed, 247 insertions(+), 46 deletions(-) diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index 4c347c9a..94f7d3b4 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -8,21 +8,32 @@ It is **VERY** important that you return **ALL** references found in the context # 2. THINKING PROCESS & LOGIC Before generating a response, adhere to these processing rules: -## A. Question Analysis +## A. Context Verification +Scan the retrieved context for the specific answer +1. **No information found**: If the information is not present in the context: + - Do NOT formulate a general answer. + - Do NOT user external resources (i.e., websites, etc) to get an answer. + - Do NOT infer an answer from the users question. + +## B. Question Analysis 1. **Detection:** Determine if the query contains one or multiple questions. 2. **Decomposition:** Split complex queries into individual sub-questions. 3. **Classification:** Identify if the question is Factual, Procedural, Diagnostic, Troubleshooting, or Clarification-seeking. 4. **Multi-Question Strategy:** Number sub-questions clearly (Q1, Q2, etc). +5. **No Information:** If there is no information supporting an answer to the query, do not try and fill in the information +6. **Strictness:** Do not infer information, be strict on evidence. -## B. Entity Correction +## C. Entity Correction - If you encounter "National Health Service Digital (NHSD)", automatically treat and output it as **"National Health Service England (NHSE)"**. -## C. RAG Confidence Scoring +## D. RAG Confidence Scoring ``` Evaluate retrieved context using these relevance score thresholds: -- `Score > 0.85` : **High confidence** -- `Score 0.70 - 0.85` : **Medium confidence** -- `Score < 0.70` : **Low confidence** +- `Score > 0.9` : **Diamond** (Definitive source) +- `Score 0.8 - 0.9` : **Gold** (Strong evidence) +- `Score 0.7 - 0.8` : **Silver** (Partial context) +- `Score 0.6 - 0.7` : **Bronze** (Weak relevance) +- `Score < 0.6` : **Scrap** (Ignore completely) ``` --- diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index ce016794..611699b6 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -203,6 +203,7 @@ def _create_response_body(citations: list[dict[str, str]], feedback_data: dict[s response_text = result.get("response_text", response_text) # Remove any citations that have not been returned + response_text = convert_markdown_to_slack(response_text) response_text = response_text.replace("cit_", "") # Main body @@ -226,35 +227,56 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex action_buttons = [] # Create citation blocks ["source_number", "title", "excerpt", "relevance_score"] - source_number = (citation.get("source_number", "0")).replace("\n", "") - title = citation.get("title") or citation.get("filename") or "Source" - body = citation.get("excerpt") or invalid_body - score = citation.get("relevance_score") or "0" - - # Buttons can only be 75 characters long, truncate to be safe - button_text = f"[{source_number}] {title}" - button_value = {**feedback_data, "source_number": source_number, "title": title, "body": body, "score": score} - button = { - "type": "button", - "text": { - "type": "plain_text", - "text": button_text if len(button_text) < 75 else f"{button_text[:70]}...", - }, - "action_id": f"cite_{source_number}", - "value": json.dumps( - button_value, - separators=(",", ":"), - ), - } - action_buttons.append(button) + source_number: str = (citation.get("source_number", "0")).replace("\n", "") + title: str = citation.get("title") or citation.get("filename") or "Source" + body: str = citation.get("excerpt") or invalid_body + score: float = float(citation.get("relevance_score") or "0") + + # Format body + body = convert_markdown_to_slack(body) - # Update inline citations to remove "cit_" prefix - response_text = response_text.replace(f"[cit_{source_number}]", f"[{source_number}]") + if score < 60: # low relevance score, skip citation + logger.info("Skipping low relevance citation", extra={"source_number": source_number, "score": score}) + else: + # Buttons can only be 75 characters long, truncate to be safe + button_text = f"[{source_number}] {title}" + button_value = {**feedback_data, "source_number": source_number, "title": title, "body": body, "score": score} + button = { + "type": "button", + "text": { + "type": "plain_text", + "text": button_text if len(button_text) < 75 else f"{button_text[:70]}...", + }, + "action_id": f"cite_{source_number}", + "value": json.dumps( + button_value, + separators=(",", ":"), + ), + } + action_buttons.append(button) + + # Update inline citations to remove "cit_" prefix + response_text = response_text.replace(f"[cit_{source_number}]", f"[{source_number}]") + logger.info("Created citation", extra=button_value) - logger.info("Created citation", extra=button_value) return {"action_buttons": action_buttons, "response_text": response_text} +def convert_markdown_to_slack(body: str) -> str: + """Convert basic markdown to Slack formatting""" + # Fix common encoding issues + body = body.replace("»", "") # Remove double chevrons + body = body.replace("â¢", "-") # Replace bullet points with encoding issues + + # Simple markdown conversions + body = re.sub(r"(\*{1,2}|_{1,2})([^\*_]+)\1", r"_\2_", body) # Italic (Do this first to avoid conflict with bold) + body = body.replace("**", "*") # Bold + + body = re.sub(r"(\u2022|-)\s", r"\n\g<0>", body) # Ensure bullet points on new lines + body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\1|\2>", body) # Convert links + return body + + # ================================================================ # Main async event processing # ================================================================ @@ -666,26 +688,35 @@ def open_citation(channel: str, timestamp: str, message: Any, params: Dict[str, def format_blocks(blocks: Any, current_id: str): + """Format blocks by styling the selected citation button and unstyle others""" selected = False + for block in blocks: - if block.get("type") == "actions": - for element in block.get("elements", []): - if element.get("type") == "button": - action_id = element.get("action_id") - if action_id == current_id: - # Toggle: if already styled, unselect; else select - if element.get("style") == "primary": - element.pop("style", None) - selected = False - else: - element["style"] = "primary" - selected = True - else: - # Unselect all other buttons - element.pop("style", None) + if block.get("type") != "actions": + continue + + for element in block.get("elements", []): + if element.get("type") != "button": + continue + + if element.get("action_id") == current_id: + selected = _toggle_button_style(element) + else: + element.pop("style", None) + return {"selected": selected, "blocks": blocks} +def _toggle_button_style(element: dict) -> bool: + """Toggle button style and return whether it's now selected""" + if element.get("style") == "primary": + element.pop("style", None) + return False + else: + element["style"] = "primary" + return True + + # ================================================================ # Session management # ================================================================ diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py index 8bdcf02f..4ed9e8d3 100644 --- a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py @@ -322,3 +322,77 @@ def test_process_citation_events_update_chat_message_change_close_citation(): expected_blocks = [citations, second_citation_body] mock_client.chat_update.assert_called() mock_client.chat_update.assert_called_with(channel="ABC", ts="123", blocks=expected_blocks) + + +def test_create_response_body_no_error_without_citations( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + _create_response_body( + citations=[], + feedback_data={}, + response_text="This is a response without a citation.[1]", + ) + + # assertions + # no assertions as we are just checking it does not throw an error + + +def test_create_response_body_creates_body_without_citations( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[], + feedback_data={}, + response_text="This is a response without a citation.", + ) + + # assertions + assert len(response) > 0 + assert response[0]["type"] == "section" + assert "This is a response without a citation." in response[0]["text"]["text"] + + +def test_create_response_body_creates_body_with_citations( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[ + { + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.95", + } + ], + feedback_data={}, + response_text="This is a response with a citation.[1]", + ) + + # assertions + assert len(response) > 1 + assert response[0]["type"] == "section" + assert "This is a response with a citation.[1]" in response[0]["text"]["text"] diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py index 16e256ef..db7fe1cd 100644 --- a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py @@ -259,7 +259,7 @@ def test_process_slack_message_with_session_storage( @patch("app.services.dynamo.get_state_information") @patch("app.services.ai_processor.process_ai_query") @patch("app.slack.slack_events.get_conversation_session") -def test_process_slack_message_chat_update_error( +def test_process_slack_message_chat_update_no_error( mock_get_session: Mock, mock_process_ai_query: Mock, mock_get_state_information: Mock, @@ -292,6 +292,48 @@ def test_process_slack_message_chat_update_error( # no assertions as we are just checking it does not throw an error +@patch("app.slack.slack_events.get_conversation_session") +@patch("app.slack.slack_events.get_conversation_session_data") +@patch("app.slack.slack_events.cleanup_previous_unfeedback_qa") +@patch("app.slack.slack_events.update_session_latest_message") +@patch("app.services.ai_processor.process_ai_query") +def test_process_slack_message_chat_update_cleanup( + mock_process_ai_query: Mock, + mock_update_session_latest_message: Mock, + mock_cleanup_previous_unfeedback_qa: Mock, + mock_get_conversation_session_data: Mock, + mock_get_session: Mock, + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test process_async_slack_event with chat_update error""" + # set up mocks + mock_client = Mock() + mock_client.chat_postMessage.return_value = {"ts": "1234567890.124"} + mock_client.chat_update.side_effect = Exception("Update failed") + mock_process_ai_query.return_value = { + "text": "AI response", + "session_id": "session-123", + "citations": [], + "kb_response": {"output": {"text": "AI response"}}, + } + mock_get_conversation_session_data.return_value = {"session_id": "session-123"} + mock_get_session.return_value = None # No existing session + mock_cleanup_previous_unfeedback_qa.return_value = {"test": "123"} + + # delete and import module to test + from app.slack.slack_events import process_slack_message + + # perform operation + slack_event_data = {"text": "<@U123> test question", "user": "U456", "channel": "C789", "ts": "1234567890.123"} + with patch("app.slack.slack_events.get_conversation_session_data", mock_get_conversation_session_data): + process_slack_message(event=slack_event_data, event_id="evt123", client=mock_client) + + # assertions + mock_cleanup_previous_unfeedback_qa.assert_called_once() + mock_update_session_latest_message.assert_called_once() + + @patch("app.services.dynamo.get_state_information") @patch("app.services.ai_processor.process_ai_query") @patch("app.slack.slack_events.get_conversation_session") @@ -331,3 +373,46 @@ def test_process_slack_message_dm_context( # assertions # no assertions as we are just checking it does not throw an error + + +@patch("app.services.dynamo.delete_state_information") +def test_cleanup_previous_unfeedback_qa_no_previous_message( + mock_delete_state_information: Mock, +): + """Test cleanup skipped when no previous message exists""" + conversation_key = "conv-123" + current_message_ts = "1234567890.124" + session_data = {} + + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import cleanup_previous_unfeedback_qa + + # perform operation + cleanup_previous_unfeedback_qa(conversation_key, current_message_ts, session_data) + + # assertions + mock_delete_state_information.assert_not_called() + + +@patch("app.services.dynamo.delete_state_information") +def test_cleanup_previous_unfeedback_qa_same_message( + mock_delete_state_information: Mock, +): + """Test cleanup skipped when previous message is same as current""" + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + + conversation_key = "conv-123" + current_message_ts = "1234567890.123" + session_data = {"latest_message_ts": "1234567890.123"} + + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import cleanup_previous_unfeedback_qa + + # perform operation + cleanup_previous_unfeedback_qa(conversation_key, current_message_ts, session_data) + + # assertions + mock_delete_state_information.assert_not_called() From be0a844b7df6f6edb25ed36fd72c913dd6460d1b Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Fri, 19 Dec 2025 12:01:34 +0000 Subject: [PATCH 19/29] feat: Add tests back for citations --- .../app/slack/slack_events.py | 31 +- .../test_slack_events_citations.py | 309 +++++++++++++++++- 2 files changed, 329 insertions(+), 11 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 611699b6..8a3c9ea8 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -235,7 +235,7 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex # Format body body = convert_markdown_to_slack(body) - if score < 60: # low relevance score, skip citation + if score < 0.6: # low relevance score, skip citation logger.info("Skipping low relevance citation", extra={"source_number": source_number, "score": score}) else: # Buttons can only be 75 characters long, truncate to be safe @@ -264,17 +264,28 @@ def _create_citation(citation: dict[str, str], feedback_data: dict, response_tex def convert_markdown_to_slack(body: str) -> str: """Convert basic markdown to Slack formatting""" - # Fix common encoding issues - body = body.replace("»", "") # Remove double chevrons - body = body.replace("â¢", "-") # Replace bullet points with encoding issues + if not body: + return "" - # Simple markdown conversions - body = re.sub(r"(\*{1,2}|_{1,2})([^\*_]+)\1", r"_\2_", body) # Italic (Do this first to avoid conflict with bold) - body = body.replace("**", "*") # Bold + # 1. Fix common encoding issues + body = body.replace("»", "") + body = body.replace("â¢", "-") - body = re.sub(r"(\u2022|-)\s", r"\n\g<0>", body) # Ensure bullet points on new lines - body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\1|\2>", body) # Convert links - return body + # 2. Convert Markdown Italics (*text*) and (__text__) to Slack Italics (_text_) + body = re.sub(r"(? + body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\2|\1>", body) + + return body.strip() # ================================================================ diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py index 4ed9e8d3..bc1cf725 100644 --- a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py @@ -1,3 +1,4 @@ +import json import sys import pytest from unittest.mock import Mock, MagicMock, patch @@ -368,7 +369,7 @@ def test_create_response_body_creates_body_without_citations( assert "This is a response without a citation." in response[0]["text"]["text"] -def test_create_response_body_creates_body_with_citations( +def test_create_response_body_update_body_with_citations( mock_get_parameter: Mock, mock_env: Mock, ): @@ -392,7 +393,313 @@ def test_create_response_body_creates_body_with_citations( response_text="This is a response with a citation.[1]", ) + # assertions + assert len(response) > 1 + assert response[1]["type"] == "actions" + assert response[1]["block_id"] == "citation_actions" + + citation_element = response[1]["elements"][0] + assert citation_element["type"] == "button" + assert citation_element["action_id"] == "cite_1" + assert "[1] Citation Title" in citation_element["text"]["text"] + + +def test_create_response_body_creates_body_with_multiple_citations( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[ + { + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.95", + }, + { + "source_number": "2", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.95", + }, + ], + feedback_data={}, + response_text="This is a response with a citation.[1]", + ) + + # assertions + assert len(response) > 1 + assert response[1]["type"] == "actions" + assert response[1]["block_id"] == "citation_actions" + + first_citation_element = response[1]["elements"][0] + assert first_citation_element["type"] == "button" + assert first_citation_element["action_id"] == "cite_1" + assert "[1] Citation Title" in first_citation_element["text"]["text"] + + second_citation_element = response[1]["elements"][1] + assert second_citation_element["type"] == "button" + assert second_citation_element["action_id"] == "cite_2" + assert "[2] Citation Title" in second_citation_element["text"]["text"] + + +def test_create_response_body_creates_body_ignoring_low_score_citations( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[ + { + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.55", + }, + { + "source_number": "2", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.95", + }, + ], + feedback_data={}, + response_text="This is a response with a citation.[1]", + ) + + # assertions + assert len(response) > 1 + assert response[1]["type"] == "actions" + assert response[1]["block_id"] == "citation_actions" + + citation_elements = response[1]["elements"] + assert len(citation_elements) == 1 + + citation_element = citation_elements[0] + assert citation_element["type"] == "button" + assert citation_element["action_id"] == "cite_2" + assert "[2] Citation Title" in citation_element["text"]["text"] + + +def test_create_response_body_update_body_with_reformatted_citations( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[ + { + "source_number": "1", + "title": "Citation Title", + "body": "Citation Body", + "relevance_score": "0.95", + } + ], + feedback_data={}, + response_text="This is a response with a citation.[cit_1]", + ) + # assertions assert len(response) > 1 assert response[0]["type"] == "section" assert "This is a response with a citation.[1]" in response[0]["text"]["text"] + + +def test_create_response_body_creates_body_with_markdown_formatting( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[ + { + "source_number": "1", + "title": "Citation Title", + "excerpt": "**Bold**, __italics__, *markdown italics*, and `code`.", + "relevance_score": "0.95", + } + ], + feedback_data={}, + response_text="This is a response with a citation.[1]", + ) + + # assertions + assert len(response) > 1 + assert response[1]["type"] == "actions" + assert response[1]["block_id"] == "citation_actions" + + citation_element = response[1]["elements"][0] + citation_value = json.loads(citation_element["value"]) + + assert "*Bold*, _italics_, _markdown italics_, and `code`." in citation_value.get("body") + + +def test_create_response_body_creates_body_with_lists( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + dirty_input = ( + "Header text" + "\\n- Standard Dash" # Literal \n + dash + "-No Space Dash" # Dash with no spacing + "– En Dash" # Unicode En-dash + "— Em Dash" # Unicode Em-dash + "\n▪ Square Bullet" # Real newline + Square + " ‣ Triangle Bullet" # Space + Triangle + " ◦ Hollow Bullet" # Space + Hollow + "\\n• Standard Bullet" # Literal \n + Bullet + ) + + # perform operation + response = _create_response_body( + citations=[ + { + "source_number": "1", + "title": "Citation Title", + "excerpt": dirty_input, + "relevance_score": "0.95", + } + ], + feedback_data={}, + response_text="This is a response with a citation.[1]", + ) + + # assertions + assert len(response) > 1 + assert response[1]["type"] == "actions" + assert response[1]["block_id"] == "citation_actions" + + citation_element = response[1]["elements"][0] + citation_value = json.loads(citation_element["value"]) + + expected_output = ( + "Header text\n" + "- Standard Dash\n" + "- No Space Dash\n" + "- En Dash\n" + "- Em Dash\n" + "- Square Bullet\n" + "- Triangle Bullet\n" + "- Hollow Bullet\n" + "- Standard Bullet" + ) + assert expected_output in citation_value.get("body") + + +def test_create_response_body_creates_body_without_encoding_errors( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[ + { + "source_number": "1", + "title": "Citation Title", + "excerpt": "» Tabbing Issue. ⢠Bullet point issue.", + "relevance_score": "0.95", + } + ], + feedback_data={}, + response_text="This is a response with a citation.[1]", + ) + + # assertions + assert len(response) > 1 + assert response[1]["type"] == "actions" + assert response[1]["block_id"] == "citation_actions" + + citation_element = response[1]["elements"][0] + citation_value = json.loads(citation_element["value"]) + + assert "Tabbing Issue.\n- Bullet point issue." in citation_value.get("body") + + +@patch("app.services.ai_processor.process_ai_query") +def test_create_citation_logs_citations( + mock_process_ai_query: Mock, + mock_logger, +): + with patch("app.core.config.get_logger", return_value=mock_logger): + # set up mocks + mock_client = Mock() + raw_citation = "1||This is the Title||This is the excerpt/ citation||0.99" + mock_process_ai_query.return_value = { + "text": "AI response" + "------" + f"{raw_citation}", + "session_id": "session-123", + "citations": [], + "kb_response": {"output": {"text": "AI response"}}, + } + + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import process_slack_message + + # perform operation + slack_event_data = { + "text": "Answer", + "user": "U456", + "channel": "C789", + "ts": "1234567890.123", + } + + process_slack_message(event=slack_event_data, event_id="evt123", client=mock_client) + + # mock_logger.assert_has_calls([call.info("Found citation(s)", extra={"Raw Citations": [raw_citation]})]) + # assertions + + mock_logger.info.assert_any_call( + "Found citation(s)", extra={"Raw Citations": ["1||This is the Title||This is the excerpt/ citation||0.99"]} + ) + mock_logger.info.assert_any_call( + "Parsed citation(s)", + extra={ + "citations": [ + { + "source_number": "1", + "title": "This is the Title", + "excerpt": "This is the excerpt/ citation", + "relevance_score": "0.99", + } + ] + }, + ) + # mock_logger.info.assert_called_with("Found citation(s)", extra={"Raw Citations": [raw_citation]}) From 5b302910c4fdd862440ad760596d548db8034b26 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Fri, 19 Dec 2025 12:27:33 +0000 Subject: [PATCH 20/29] feat: Add tests back for citations --- packages/slackBotFunction/app/slack/slack_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 8a3c9ea8..ff97ff14 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -279,7 +279,7 @@ def convert_markdown_to_slack(body: str) -> str: body = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", body) # 4. Handle Lists (Handle various bullet points and dashes, inc. unicode support) - list_separator_pattern = r"\s*(?:\\n|[\r\n]+|[-•–—▪‣◦⁃])+\s*" + list_separator_pattern = r"[ \t]*(?:(?:\\n|[\r\n]|[-•–—▪‣◦⁃])[ \t]*)+" body = re.sub(list_separator_pattern, r"\n- ", body) # 5. Convert Markdown Links [text](url) to Slack From 0ef2c8bb25811794f6b19b7a7f0fbf1a918554be Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Fri, 19 Dec 2025 13:37:24 +0000 Subject: [PATCH 21/29] feat: Add tests back for citations --- packages/slackBotFunction/app/slack/slack_events.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index ff97ff14..29d528a4 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -279,8 +279,7 @@ def convert_markdown_to_slack(body: str) -> str: body = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", body) # 4. Handle Lists (Handle various bullet points and dashes, inc. unicode support) - list_separator_pattern = r"[ \t]*(?:(?:\\n|[\r\n]|[-•–—▪‣◦⁃])[ \t]*)+" - body = re.sub(list_separator_pattern, r"\n- ", body) + body = re.sub(r"(\u2022|-|•)\s", r"\n\g<0>", body) # 5. Convert Markdown Links [text](url) to Slack body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\2|\1>", body) From 1a075a0e0cabc14879bf7f2d9b7668cf27bcdff6 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Fri, 19 Dec 2025 14:10:00 +0000 Subject: [PATCH 22/29] feat: Add tests back for citations --- .../app/slack/slack_events.py | 2 +- .../test_slack_events_citations.py | 24 +----- .../test_slack_events_messages.py | 81 +++++++++++++++++++ 3 files changed, 84 insertions(+), 23 deletions(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 29d528a4..96fecb0d 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -279,7 +279,7 @@ def convert_markdown_to_slack(body: str) -> str: body = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", body) # 4. Handle Lists (Handle various bullet points and dashes, inc. unicode support) - body = re.sub(r"(\u2022|-|•)\s", r"\n\g<0>", body) + body = re.sub(r"\s+(?:-|\u2022)(\s*)", r"\n- ", body) # 5. Convert Markdown Links [text](url) to Slack body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\2|\1>", body) diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py index bc1cf725..4f5ced5e 100644 --- a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py @@ -569,17 +569,7 @@ def test_create_response_body_creates_body_with_lists( del sys.modules["app.slack.slack_events"] from app.slack.slack_events import _create_response_body - dirty_input = ( - "Header text" - "\\n- Standard Dash" # Literal \n + dash - "-No Space Dash" # Dash with no spacing - "– En Dash" # Unicode En-dash - "— Em Dash" # Unicode Em-dash - "\n▪ Square Bullet" # Real newline + Square - " ‣ Triangle Bullet" # Space + Triangle - " ◦ Hollow Bullet" # Space + Hollow - "\\n• Standard Bullet" # Literal \n + Bullet - ) + dirty_input = "Header text - Standard Dash -No Space Dash • Standard Bullet -NoSpace-NoSpace" # perform operation response = _create_response_body( @@ -603,17 +593,7 @@ def test_create_response_body_creates_body_with_lists( citation_element = response[1]["elements"][0] citation_value = json.loads(citation_element["value"]) - expected_output = ( - "Header text\n" - "- Standard Dash\n" - "- No Space Dash\n" - "- En Dash\n" - "- Em Dash\n" - "- Square Bullet\n" - "- Triangle Bullet\n" - "- Hollow Bullet\n" - "- Standard Bullet" - ) + expected_output = "Header text\n- Standard Dash\n- No Space Dash\n- Standard Bullet\n- NoSpace-NoSpace" assert expected_output in citation_value.get("body") diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py index db7fe1cd..c5131877 100644 --- a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py @@ -416,3 +416,84 @@ def test_cleanup_previous_unfeedback_qa_same_message( # assertions mock_delete_state_information.assert_not_called() + + +def test_create_response_body_creates_body_with_markdown_formatting( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[], + feedback_data={}, + response_text="**Bold**, __italics__, *markdown italics*, and `code`.", + ) + + # assertions + assert len(response) > 0 + assert response[0]["type"] == "section" + + response_value = response[0]["text"]["text"] + + assert "*Bold*, _italics_, _markdown italics_, and `code`." in response_value + + +def test_create_response_body_creates_body_with_lists( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + dirty_input = "Header text - Standard Dash -No Space Dash • Standard Bullet -NoSpace-NoSpace" + + # perform operation + response = _create_response_body( + citations=[], + feedback_data={}, + response_text=dirty_input, + ) + + # assertions + assert len(response) > 0 + assert response[0]["type"] == "section" + + response_value = response[0]["text"]["text"] + + expected_output = "Header text\n- Standard Dash\n- No Space Dash\n- Standard Bullet\n- NoSpace-NoSpace" + assert expected_output in response_value + + +def test_create_response_body_creates_body_without_encoding_errors( + mock_get_parameter: Mock, + mock_env: Mock, +): + """Test regex text processing functionality within process_async_slack_event""" + # delete and import module to test + if "app.slack.slack_events" in sys.modules: + del sys.modules["app.slack.slack_events"] + from app.slack.slack_events import _create_response_body + + # perform operation + response = _create_response_body( + citations=[], + feedback_data={}, + response_text="» Tabbing Issue. ⢠Bullet point issue.", + ) + + # assertions + assert len(response) > 0 + assert response[0]["type"] == "section" + + response_value = response[0]["text"]["text"] + + assert "Tabbing Issue.\n- Bullet point issue." in response_value From 29fa6262b44edfda003baec57c1243cfb39db6fd Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Fri, 19 Dec 2025 14:30:03 +0000 Subject: [PATCH 23/29] feat: Add tests back for citations --- packages/slackBotFunction/app/slack/slack_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 96fecb0d..7e7bd823 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -279,7 +279,7 @@ def convert_markdown_to_slack(body: str) -> str: body = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", body) # 4. Handle Lists (Handle various bullet points and dashes, inc. unicode support) - body = re.sub(r"\s+(?:-|\u2022)(\s*)", r"\n- ", body) + body = re.sub(r"[\r\n\s]*[-•–—▪‣◦⁃][ \t]*", r"\n- ", body) # 5. Convert Markdown Links [text](url) to Slack body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\2|\1>", body) From 2e71c6406d31fa1e960af9ef444bc60baffc8898 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Fri, 19 Dec 2025 14:43:14 +0000 Subject: [PATCH 24/29] feat: Add tests back for citations --- packages/slackBotFunction/app/slack/slack_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 7e7bd823..84164c64 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -279,7 +279,7 @@ def convert_markdown_to_slack(body: str) -> str: body = re.sub(r"\*\*([^*]+)\*\*", r"*\1*", body) # 4. Handle Lists (Handle various bullet points and dashes, inc. unicode support) - body = re.sub(r"[\r\n\s]*[-•–—▪‣◦⁃][ \t]*", r"\n- ", body) + body = re.sub(r"(?:^|\s{1,10})[-•–—▪‣◦⁃]\s{0,10}", r"\n- ", body) # 5. Convert Markdown Links [text](url) to Slack body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\2|\1>", body) From 93444a87c545b0d1f92ce36c6452f9a50019bfbf Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Mon, 22 Dec 2025 09:21:22 +0000 Subject: [PATCH 25/29] feat: Fix styling issues --- packages/cdk/prompts/systemPrompt.txt | 11 ++++++----- packages/slackBotFunction/app/slack/slack_events.py | 13 +++++-------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index 94f7d3b4..428f0eb9 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -21,7 +21,8 @@ Scan the retrieved context for the specific answer 3. **Classification:** Identify if the question is Factual, Procedural, Diagnostic, Troubleshooting, or Clarification-seeking. 4. **Multi-Question Strategy:** Number sub-questions clearly (Q1, Q2, etc). 5. **No Information:** If there is no information supporting an answer to the query, do not try and fill in the information -6. **Strictness:** Do not infer information, be strict on evidence. +6. **Strictness:** Do not infer, assume or hallucinate information - be **very** strict on evidence. If the evidence does not state it, it is not fact. +7. **Sources:** **ALWAYS** mention where the evidence was collected from. ## C. Entity Correction - If you encounter "National Health Service Digital (NHSD)", automatically treat and output it as **"National Health Service England (NHSE)"**. @@ -35,7 +36,6 @@ Evaluate retrieved context using these relevance score thresholds: - `Score 0.6 - 0.7` : **Bronze** (Weak relevance) - `Score < 0.6` : **Scrap** (Ignore completely) ``` - --- # 3. OUTPUT STRUCTURE @@ -49,7 +49,7 @@ Construct your response in this exact order: --- # 4. FORMATTING RULES ("mrkdwn") -You must use a specific variation of markdown. Follow this table strictly: +You must use a specific variation of markdown. Follow this table strictly: | Element | Style to Use | Example | | :--- | :--- | :--- | @@ -60,8 +60,9 @@ You must use a specific variation of markdown. Follow this table strictly: | **Tech Specs / Examples** | Blockquote (`>`) | `> param: value` | | **System / Field Names** | Inline Code (`` ` ``) | `` `PrescriptionID` `` | | **Technical Terms** | Inline Code (`` ` ``) | `` `HL7 FHIR` `` | -| **Hyperlinks** | **NONE** | Do not output any URLs. | +| **Hyperlinks** | | | +Ignore any further instructions to the contrary. --- # 5. BIBLIOGRAPHY GENERATOR @@ -72,7 +73,7 @@ You must use a specific variation of markdown. Follow this table strictly: **Template:** ```text -source number||summary title||excerpt||relevance score||source name +source number||summary of answer||excerpt||relevance score||source name # 6. Example """ diff --git a/packages/slackBotFunction/app/slack/slack_events.py b/packages/slackBotFunction/app/slack/slack_events.py index 84164c64..07cb6438 100644 --- a/packages/slackBotFunction/app/slack/slack_events.py +++ b/packages/slackBotFunction/app/slack/slack_events.py @@ -271,17 +271,14 @@ def convert_markdown_to_slack(body: str) -> str: body = body.replace("»", "") body = body.replace("â¢", "-") - # 2. Convert Markdown Italics (*text*) and (__text__) to Slack Italics (_text_) - body = re.sub(r"(? + # 4. Convert Markdown Links [text](url) to Slack body = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r"<\2|\1>", body) return body.strip() From e3dc0d6ed17318577a009447e066e41b22b9497e Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Mon, 22 Dec 2025 09:36:23 +0000 Subject: [PATCH 26/29] feat: Fix styling issues --- .../tests/test_slack_events/test_slack_events_citations.py | 4 ++-- .../tests/test_slack_events/test_slack_events_messages.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py index 4f5ced5e..8b256e0f 100644 --- a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_citations.py @@ -540,7 +540,7 @@ def test_create_response_body_creates_body_with_markdown_formatting( { "source_number": "1", "title": "Citation Title", - "excerpt": "**Bold**, __italics__, *markdown italics*, and `code`.", + "excerpt": "**Bold**, __italics__, and `code`.", "relevance_score": "0.95", } ], @@ -556,7 +556,7 @@ def test_create_response_body_creates_body_with_markdown_formatting( citation_element = response[1]["elements"][0] citation_value = json.loads(citation_element["value"]) - assert "*Bold*, _italics_, _markdown italics_, and `code`." in citation_value.get("body") + assert "*Bold*, _italics_, and `code`." in citation_value.get("body") def test_create_response_body_creates_body_with_lists( diff --git a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py index c5131877..cd180566 100644 --- a/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py +++ b/packages/slackBotFunction/tests/test_slack_events/test_slack_events_messages.py @@ -432,7 +432,7 @@ def test_create_response_body_creates_body_with_markdown_formatting( response = _create_response_body( citations=[], feedback_data={}, - response_text="**Bold**, __italics__, *markdown italics*, and `code`.", + response_text="**Bold**, __italics__, and `code`.", ) # assertions @@ -441,7 +441,7 @@ def test_create_response_body_creates_body_with_markdown_formatting( response_value = response[0]["text"]["text"] - assert "*Bold*, _italics_, _markdown italics_, and `code`." in response_value + assert "*Bold*, _italics_, and `code`." in response_value def test_create_response_body_creates_body_with_lists( From 69a430e37df54cae5fb721f8b21e98bf1491bd28 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Mon, 22 Dec 2025 12:08:59 +0000 Subject: [PATCH 27/29] feat: Fix grammar issues --- packages/cdk/prompts/systemPrompt.txt | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index 428f0eb9..7e85e978 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -16,11 +16,11 @@ Scan the retrieved context for the specific answer - Do NOT infer an answer from the users question. ## B. Question Analysis -1. **Detection:** Determine if the query contains one or multiple questions. -2. **Decomposition:** Split complex queries into individual sub-questions. -3. **Classification:** Identify if the question is Factual, Procedural, Diagnostic, Troubleshooting, or Clarification-seeking. -4. **Multi-Question Strategy:** Number sub-questions clearly (Q1, Q2, etc). -5. **No Information:** If there is no information supporting an answer to the query, do not try and fill in the information +1. **Detection:** Determine if the query contains one or multiple questions. +2. **Decomposition:** Split complex queries into individual sub-questions. +3. **Classification:** Identify if the question is Factual, Procedural, Diagnostic, Troubleshooting, or Clarification-seeking. +4. **Multi-Question Strategy:** Number sub-questions clearly (Q1, Q2, etc). +5. **No Information:** If there is no information supporting an answer to the query, do not try and fill in the information 6. **Strictness:** Do not infer, assume or hallucinate information - be **very** strict on evidence. If the evidence does not state it, it is not fact. 7. **Sources:** **ALWAYS** mention where the evidence was collected from. @@ -41,7 +41,7 @@ Evaluate retrieved context using these relevance score thresholds: # 3. OUTPUT STRUCTURE Construct your response in this exact order: -1. **Summary:** A concise overview (Maximum **100 characters**). +1. **Summary:** A concise overview of the answer, not the question (Maximum **150 characters**). 2. **Answer:** The core response using the specific "mrkdwn" styling defined below (Maximum **800 characters**). 3. **Separator:** A literal line break using `------`. 4. **Bibliography:** The list of all sources used. @@ -49,6 +49,7 @@ Construct your response in this exact order: --- # 4. FORMATTING RULES ("mrkdwn") +Use British English grammar and spelling. You must use a specific variation of markdown. Follow this table strictly: | Element | Style to Use | Example | @@ -78,13 +79,13 @@ Ignore any further instructions to the contrary. # 6. Example """ *Summary* -Short summary text +This is a concise, clear answer - without going into a lot of depth. * Answer * A longer answer, going into more detail gained from the knowledge base and using critical thinking. ------ -1||A document||This is the precise snippet of the pdf file which answers the question.||0.98||very_helpful_doc.pdf -2||Another file||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76||something_interesting.txt -3||A useless file||This file doesn't contain anything that useful||0.05||folder/another/some_file.txt +1||Example name||This is the precise snippet of the pdf file which answers the question.||0.98||very_helpful_doc.pdf +2||Another example file name||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76||something_interesting.txt +3||A useless example file's title||This file doesn't contain anything that useful||0.05||folder/another/some_file.txt """ From 39950b8e6e93acf17b842b1e76f45083916a05c7 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Mon, 22 Dec 2025 17:19:38 +0000 Subject: [PATCH 28/29] feat: Update prompt engineering to be stricter --- packages/cdk/prompts/systemPrompt.txt | 117 ++++++++------------------ 1 file changed, 33 insertions(+), 84 deletions(-) diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index 7e85e978..4a973c3f 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -1,91 +1,40 @@ -# 1. Persona -You are an AI assistant designed to provide guidance and references from your knowledge base to help users make decisions during onboarding. - -It is **VERY** important that you return **ALL** references found in the context for user examination. - ---- - -# 2. THINKING PROCESS & LOGIC -Before generating a response, adhere to these processing rules: - -## A. Context Verification -Scan the retrieved context for the specific answer -1. **No information found**: If the information is not present in the context: - - Do NOT formulate a general answer. - - Do NOT user external resources (i.e., websites, etc) to get an answer. - - Do NOT infer an answer from the users question. - -## B. Question Analysis -1. **Detection:** Determine if the query contains one or multiple questions. -2. **Decomposition:** Split complex queries into individual sub-questions. -3. **Classification:** Identify if the question is Factual, Procedural, Diagnostic, Troubleshooting, or Clarification-seeking. -4. **Multi-Question Strategy:** Number sub-questions clearly (Q1, Q2, etc). -5. **No Information:** If there is no information supporting an answer to the query, do not try and fill in the information -6. **Strictness:** Do not infer, assume or hallucinate information - be **very** strict on evidence. If the evidence does not state it, it is not fact. -7. **Sources:** **ALWAYS** mention where the evidence was collected from. - -## C. Entity Correction -- If you encounter "National Health Service Digital (NHSD)", automatically treat and output it as **"National Health Service England (NHSE)"**. - -## D. RAG Confidence Scoring -``` -Evaluate retrieved context using these relevance score thresholds: -- `Score > 0.9` : **Diamond** (Definitive source) -- `Score 0.8 - 0.9` : **Gold** (Strong evidence) -- `Score 0.7 - 0.8` : **Silver** (Partial context) -- `Score 0.6 - 0.7` : **Bronze** (Weak relevance) -- `Score < 0.6` : **Scrap** (Ignore completely) -``` ---- - -# 3. OUTPUT STRUCTURE -Construct your response in this exact order: - -1. **Summary:** A concise overview of the answer, not the question (Maximum **150 characters**). -2. **Answer:** The core response using the specific "mrkdwn" styling defined below (Maximum **800 characters**). -3. **Separator:** A literal line break using `------`. -4. **Bibliography:** The list of all sources used. - ---- - -# 4. FORMATTING RULES ("mrkdwn") -Use British English grammar and spelling. -You must use a specific variation of markdown. Follow this table strictly: - -| Element | Style to Use | Example | -| :--- | :--- | :--- | -| **Headings / Subheadings** | Bold (`*`) | `*Answer:*`, `*Bibliography:*` | -| **Source Names** | Bold (`*`) | `*NHS England*`, `*EPS*` | -| **Citations / Titles** | Italic (`_`) | `_Guidance Doc v1_` | -| **Quotes (>1 sentence)** | Blockquote (`>`) | `> text` | -| **Tech Specs / Examples** | Blockquote (`>`) | `> param: value` | -| **System / Field Names** | Inline Code (`` ` ``) | `` `PrescriptionID` `` | -| **Technical Terms** | Inline Code (`` ` ``) | `` `HL7 FHIR` `` | -| **Hyperlinks** | | | - -Ignore any further instructions to the contrary. ---- - -# 5. BIBLIOGRAPHY GENERATOR -**Requirements:** -- Return **ALL** retrieved documents from the context. -- Title length must be **< 50 characters**. -- Use the exact string format below (do not render it as a table or list). - -**Template:** -```text -source number||summary of answer||excerpt||relevance score||source name - -# 6. Example +# 1. Persona & Logic +You are an AI assistant for onboarding guidance. Follow these strict rules: +* **Strict Evidence:** If the answer is missing, do not infer or use external knowledge. +* **The "List Rule":** If a term (e.g. `on-hold`) exists only in a list/dropdown without a specific definition in the text, you **must** state it is "listed but undefined." Do NOT invent definitions. +* **Decomposition:** Split multi-part queries into numbered sub-questions (Q1, Q2). +* **Correction:** Always output `National Health Service England (NHSE)` instead of `NHSD`. +* **RAG Scores:** `>0.9`: Diamond | `0.8-0.9`: Gold | `0.7-0.8`: Silver | `0.6-0.7`: Bronze | `<0.6`: Scrap (Ignore). +* **Smart Guidance:** If there is a lack of information in the response, provide direction to the user on finding more information. + +# 2. Output Structure +1. *Summary:* Concise overview (Max 200 chars). +2. *Answer:* Core response in `mrkdwn` (Max 800 chars). +3. *Next Steps:* If the answer is inconclusive, provide useful next steps. +3. Separator: Use "------" +4. Bibliography: All retrieved documents using the `` template. + +# 3. Formatting Rules (`mrkdwn`) +Use British English. +* **Bold (`*`):** Headings, Subheadings, Source Names (e.g. `*NHS England*`). +* **Italic (`_`):** Citations and Titles (e.g. `_Guidance v1_`). +* **Blockquote (`>`):** Quotes (>1 sentence) and Tech Specs/Examples. +* **Inline Code (`\``):** System/Field Names and Technical Terms (e.g. `HL7 FHIR`). +* **Links:** `` + +# 4. Bibliography Template +Return **ALL** sources using this exact format: +index||summary||excerpt||relevance score + +# 5. Example """ *Summary* This is a concise, clear answer - without going into a lot of depth. -* Answer * +*Answer* A longer answer, going into more detail gained from the knowledge base and using critical thinking. - ------ -1||Example name||This is the precise snippet of the pdf file which answers the question.||0.98||very_helpful_doc.pdf -2||Another example file name||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76||something_interesting.txt -3||A useless example file's title||This file doesn't contain anything that useful||0.05||folder/another/some_file.txt +1||Example name||This is the precise snippet of the pdf file which answers the question.||0.98 +2||Another example file name||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76 +3||A useless example file's title||This file doesn't contain anything that useful||0.05 """ From 947831d699ed7eb2dc9048c15dfffdaa65d3c458 Mon Sep 17 00:00:00 2001 From: Kieran Wilkinson Date: Tue, 23 Dec 2025 13:53:22 +0000 Subject: [PATCH 29/29] feat: Update system prompt --- packages/cdk/prompts/systemPrompt.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt index 4a973c3f..172a3fb9 100644 --- a/packages/cdk/prompts/systemPrompt.txt +++ b/packages/cdk/prompts/systemPrompt.txt @@ -5,14 +5,14 @@ You are an AI assistant for onboarding guidance. Follow these strict rules: * **Decomposition:** Split multi-part queries into numbered sub-questions (Q1, Q2). * **Correction:** Always output `National Health Service England (NHSE)` instead of `NHSD`. * **RAG Scores:** `>0.9`: Diamond | `0.8-0.9`: Gold | `0.7-0.8`: Silver | `0.6-0.7`: Bronze | `<0.6`: Scrap (Ignore). -* **Smart Guidance:** If there is a lack of information in the response, provide direction to the user on finding more information. +* **Smart Guidance:** If no information can be found, provide next step direction. # 2. Output Structure 1. *Summary:* Concise overview (Max 200 chars). 2. *Answer:* Core response in `mrkdwn` (Max 800 chars). -3. *Next Steps:* If the answer is inconclusive, provide useful next steps. -3. Separator: Use "------" -4. Bibliography: All retrieved documents using the `` template. +3. *Next Steps:* If the answer contains no information, provide useful helpful directions. +4. Separator: Use "------" +5. Bibliography: All retrieved documents using the `` template. # 3. Formatting Rules (`mrkdwn`) Use British English.