NHSDigital · kieran-wilkinson-4 · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025 · Dec 16, 2025
diff --git a/packages/cdk/prompts/systemPrompt.txt b/packages/cdk/prompts/systemPrompt.txt
@@ -1,49 +1,40 @@
-You are an AI assistant designed to provide guidance and references from your knowledge base to help users make decisions when onboarding. It is *VERY* important you return *ALL* references, for user examination. 
+# 1. Persona & Logic
+You are an AI assistant for onboarding guidance. Follow these strict rules:
+* **Strict Evidence:** If the answer is missing, do not infer or use external knowledge. 
+* **The "List Rule":** If a term (e.g. `on-hold`) exists only in a list/dropdown without a specific definition in the text, you **must** state it is "listed but undefined." Do NOT invent definitions.
+* **Decomposition:** Split multi-part queries into numbered sub-questions (Q1, Q2).
+* **Correction:** Always output `National Health Service England (NHSE)` instead of `NHSD`.
+* **RAG Scores:** `>0.9`: Diamond | `0.8-0.9`: Gold | `0.7-0.8`: Silver | `0.6-0.7`: Bronze | `<0.6`: Scrap (Ignore).
+* **Smart Guidance:** If no information can be found, provide next step direction.
 
-# Response
-## Response Structure
-- *Summary*: 100 characters maximum, capturing core answer
-- *Answer* (use "mrkdown") (< 800 characters)
-- Page break (use `------`)
-- \[Bibliography\]
+# 2. Output Structure
+1. *Summary:* Concise overview (Max 200 chars).
+2. *Answer:* Core response in `mrkdwn` (Max 800 chars).
+3. *Next Steps:* If the answer contains no information, provide useful helpful directions.
+4. Separator: Use "------"
+5. Bibliography: All retrieved documents using the `<cit>` template.
 
-## Formatting ("mrkdwn")
-  a. *Bold* for:
-    - Headings, subheadings: *Answer:*, *Bibliography:*
-    - Source names: *NHS England*, *EPS*
-  b. _Italic_ for:
-    - Citations, references, document titles
-  c. Block Quotes for:
-    - Direct quotes >1 sentence
-    - Technical specifications, parameters
-    - Examples
-  d. `Inline code` for:
-    - System names, field names: `PrescriptionID`
-    - Short technical terms: `HL7 FHIR`
-  e. Links:
-    - Do not provide links
+# 3. Formatting Rules (`mrkdwn`)
+Use British English.
+* **Bold (`*`):** Headings, Subheadings, Source Names (e.g. `*NHS England*`).
+* **Italic (`_`):** Citations and Titles (e.g. `_Guidance v1_`).
+* **Blockquote (`>`):** Quotes (>1 sentence) and Tech Specs/Examples.
+* **Inline Code (`\``):** System/Field Names and Technical Terms (e.g. `HL7 FHIR`).
+* **Links:** `<text|link>`
 
-# Thinking
-## Question Handling
-- Detect whether the query contains one or multiple questions
-- Split complex queries into individual sub-questions
-- Identify question type: factual, procedural, diagnostic, troubleshooting, or clarification-seeking
-- For multi-question queries: number sub-questions clearly (Q1, Q2, etc)
+# 4. Bibliography Template
+Return **ALL** sources using this exact format:
+<cit>index||summary||excerpt||relevance score</cit>
 
-## RAG & Knowledge Base Integration
-- Relevance threshold handling:
-  - Score > 0.85 (High confidence)
-  - Score 0.70 - 0.85 (Medium confidence)
-  - Score < 0.70 (Low confidence)
+# 5. Example
+"""
+*Summary*
+This is a concise, clear answer - without going into a lot of depth.
 
-## Corrections
-- Change _National Health Service Digital (NHSD)_ references to _National Health Service England (NHSE)_
-
-# Bibliography
-## Format
-<cit>source number||summary title||link||filename||text snippet||reasoning</cit>\n
-
-## Requirements
-- Return **ALL** retrieved documents, their name and a text snippet, from "CONTEXT"
-- Get full text references from search results for Bibliography
-- Title should be less than 50 characters
+*Answer*
+A longer answer, going into more detail gained from the knowledge base and using critical thinking.
+------
+<cit>1||Example name||This is the precise snippet of the pdf file which answers the question.||0.98</cit>
+<cit>2||Another example file name||A 500 word text excerpt which gives some inference to the answer, but the long citation helps fill in the information for the user, so it's worth the tokens.||0.76</cit>
+<cit>3||A useless example file's title||This file doesn't contain anything that useful||0.05</cit>
+"""
diff --git a/packages/cdk/prompts/userPrompt.txt b/packages/cdk/prompts/userPrompt.txt
@@ -1,6 +1,4 @@
-# QUERY
-{{user_query}}
+<user_query>{{user_query}}<user_query>
 
 # CONTEXT
-## Results $search_results$
-## LIST ALL RESULTS IN TABLE
+<search_results>$search_results$<search_results>
diff --git a/packages/cdk/resources/BedrockPromptResources.ts b/packages/cdk/resources/BedrockPromptResources.ts
@@ -20,14 +20,12 @@ export class BedrockPromptResources extends Construct {
   constructor(scope: Construct, id: string, props: BedrockPromptResourcesProps) {
     super(scope, id)
 
-    // Nova Pro is recommended for text generation tasks requiring high accuracy and complex understanding.
-    const novaProModel = BedrockFoundationModel.AMAZON_NOVA_PRO_V1
-    // Nova Lite is recommended for tasks
-    const novaLiteModel = BedrockFoundationModel.AMAZON_NOVA_LITE_V1
+    const ragModel = new BedrockFoundationModel("meta.llama3-70b-instruct-v1:0")
+    const reformulationModel = BedrockFoundationModel.AMAZON_NOVA_LITE_V1
 
     const queryReformulationPromptVariant = PromptVariant.text({
       variantName: "default",
-      model: novaLiteModel,
+      model: reformulationModel,
       promptVariables: ["topic"],
       promptText: props.settings.reformulationPrompt.text
     })
@@ -41,7 +39,7 @@ export class BedrockPromptResources extends Construct {
 
     const ragResponsePromptVariant = PromptVariant.chat({
       variantName: "default",
-      model: novaProModel,
+      model: ragModel,
       promptVariables: ["query", "search_results"],
       system: props.settings.systemPrompt.text,
       messages: [props.settings.userPrompt]
@@ -59,8 +57,8 @@ export class BedrockPromptResources extends Construct {
     })
 
     // expose model IDs for use in Lambda environment variables
-    this.ragModelId = novaProModel.modelId
-    this.queryReformulationModelId = novaLiteModel.modelId
+    this.ragModelId = ragModel.modelId
+    this.queryReformulationModelId = reformulationModel.modelId
 
     this.queryReformulationPrompt = queryReformulationPrompt
     this.ragResponsePrompt = ragPrompt

diff --git a/packages/slackBotFunction/app/services/bedrock.py b/packages/slackBotFunction/app/services/bedrock.py
@@ -42,8 +42,10 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat
             "type": "KNOWLEDGE_BASE",
             "knowledgeBaseConfiguration": {
                 "knowledgeBaseId": config.KNOWLEDGEBASE_ID,
-                "modelArn": config.RAG_MODEL_ID,
-                "retrievalConfiguration": {"vectorSearchConfiguration": {"numberOfResults": 5}},
+                "modelArn": prompt_template.get("model_id", config.RAG_MODEL_ID),
+                "retrievalConfiguration": {
+                    "vectorSearchConfiguration": {"numberOfResults": 5, "overrideSearchType": "SEMANTIC"}
+                },
                 "generationConfiguration": {
                     "guardrailConfiguration": {
                         "guardrailId": config.GUARD_RAIL_ID,
@@ -58,16 +60,6 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat
                         }
                     },
                 },
-                "orchestrationConfiguration": {
-                    "inferenceConfig": {
-                        "textInferenceConfig": {
-                            **inference_config,
-                            "stopSequences": [
-                                "Human:",
-                            ],
-                        }
-                    },
-                },
             },
         },
     }
@@ -87,6 +79,7 @@ def query_bedrock(user_query: str, session_id: str = None) -> RetrieveAndGenerat
     else:
         logger.info("Starting new conversation")
 
+    logger.debug("Retrieve and Generate", extra={"params": request_params})
     response = client.retrieve_and_generate(**request_params)
     logger.info(
         "Got Bedrock response",
@@ -100,10 +93,8 @@ def invoke_model(prompt: str, model_id: str, client: BedrockRuntimeClient, infer
         modelId=model_id,
         body=json.dumps(
             {
-                "anthropic_version": "bedrock-2023-05-31",
                 "temperature": inference_config["temperature"],
                 "top_p": inference_config["topP"],
-                "top_k": 50,
                 "max_tokens": inference_config["maxTokens"],
                 "messages": [{"role": "user", "content": prompt}],
             }

diff --git a/packages/slackBotFunction/app/services/prompt_loader.py b/packages/slackBotFunction/app/services/prompt_loader.py
@@ -92,23 +92,28 @@ def load_prompt(prompt_name: str, prompt_version: str = None) -> dict:
 
         logger.info(
             f"Loading prompt {prompt_name}' (ID: {prompt_id})",
-            extra={"prompt_name": prompt_name, "prompt_id": prompt_id, "prompt_version": prompt_version},
+            extra={"prompt_version": prompt_version},
         )
 
         if is_explicit_version:
             response = client.get_prompt(promptIdentifier=prompt_id, promptVersion=selected_version)
         else:
             response = client.get_prompt(promptIdentifier=prompt_id)
 
+        logger.info("Prompt Found", extra={"prompt": response})
+
+        variant = response["variants"][0]
+
         # Extract and render the prompt template
-        template_config = response["variants"][0]["templateConfiguration"]
+        template_config = variant["templateConfiguration"]
         prompt_text = _render_prompt(template_config)
         actual_version = response.get("version", "DRAFT")
 
         # Extract inference configuration with defaults
         default_inference = {"temperature": 0, "topP": 1, "maxTokens": 1500}
-        raw_inference = response["variants"][0].get("inferenceConfiguration", {})
-        raw_text_config = raw_inference.get("textInferenceConfiguration", {})
+        model_id = variant.get("modelId", "")
+        raw_inference = variant.get("inferenceConfiguration", {})
+        raw_text_config = raw_inference.get("text", {})
         inference_config = {**default_inference, **raw_text_config}
 
         logger.info(
@@ -117,10 +122,11 @@ def load_prompt(prompt_name: str, prompt_version: str = None) -> dict:
                 "prompt_name": prompt_name,
                 "prompt_id": prompt_id,
                 "version_used": actual_version,
+                "model_id": model_id,
                 **inference_config,
             },
         )
-        return {"prompt_text": prompt_text, "inference_config": inference_config}
+        return {"prompt_text": prompt_text, "model_id": model_id, "inference_config": inference_config}
 
     except ClientError as e:
         error_code = e.response.get("Error", {}).get("Code", "Unknown")