From 5aff0e045a04f6c9c447335cb75b071885a61657 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Tue, 24 Jun 2025 17:45:52 -0400
Subject: [PATCH 01/56] fix(test runs): sample the source data lookup DataFrame

---
 testgen/settings.py                         |  5 +++++
 testgen/ui/pdf/test_result_report.py        | 13 +++++++++++--
 testgen/ui/services/test_results_service.py |  8 ++++++--
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/testgen/settings.py b/testgen/settings.py
index 2d2c91c7..193aa4df 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -494,3 +494,8 @@
 """
 Random key used to sign/verify the authentication token
 """
+
+ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 20)
+"""
+Limit the number of records used to generate the PDF with test results issue report.
+"""
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index 883b0346..54564eaa 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -10,6 +10,7 @@
     TableStyle,
 )
 
+from testgen.settings import ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT
 from testgen.ui.pdf.dataframe_table import TABLE_STYLE_DATA, DataFrameTableBuilder
 from testgen.ui.pdf.style import (
     COLOR_GRAY_BG,
@@ -241,9 +242,17 @@ def get_report_content(document, tr_data):
     yield build_history_table(document, tr_data)
 
     if tr_data["test_type"] == "CUSTOM":
-        sample_data_tuple = do_source_data_lookup_custom(get_schema(), tr_data)
+        sample_data_tuple = do_source_data_lookup_custom(
+            get_schema(),
+            tr_data,
+            limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT,
+        )
     else:
-        sample_data_tuple = do_source_data_lookup(get_schema(), tr_data)
+        sample_data_tuple = do_source_data_lookup(
+            get_schema(),
+            tr_data,
+            limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT,
+        )
 
     yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
     yield Paragraph("Sample Data", PARA_STYLE_H1)
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index 7f2d886b..0623ec34 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -153,7 +153,7 @@ def get_test_result_history(db_schema, tr_data):
     return df
 
 
-def do_source_data_lookup_custom(db_schema, tr_data):
+def do_source_data_lookup_custom(db_schema, tr_data, limit: int | None = None):
     # Define the query
     str_sql = f"""
             SELECT d.custom_query as lookup_query, tg.table_group_schema,
@@ -193,6 +193,8 @@ def do_source_data_lookup_custom(db_schema, tr_data):
             if df.empty:
                 return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
             else:
+                if limit:
+                    df = df.sample(n=limit)
                 return "OK", None, str_sql, df
         else:
             return "NA", "Source data lookup is not available for this test.", None, None
@@ -201,7 +203,7 @@ def do_source_data_lookup_custom(db_schema, tr_data):
         return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", str_sql, None
 
 
-def do_source_data_lookup(db_schema, tr_data, sql_only=False):
+def do_source_data_lookup(db_schema, tr_data, sql_only=False, limit: int | None = None):
     # Define the query
     str_sql = f"""
             SELECT t.lookup_query, tg.table_group_schema,
@@ -298,6 +300,8 @@ def replace_parms(df_test, str_query):
             if df.empty:
                 return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
             else:
+                if limit:
+                    df = df.sample(n=limit)
                 return "OK", None, str_sql, df
         else:
             return "NA", "A source data lookup for this Test is not available.", None, None

From d406e7bdf2403b625a448bbfc9f72fe2132eb4b6 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Tue, 24 Jun 2025 17:52:35 -0400
Subject: [PATCH 02/56] misc(test runs): limit result history in issue PDF
 report

---
 testgen/ui/pdf/test_result_report.py        | 2 +-
 testgen/ui/services/test_results_service.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index 54564eaa..dd5e9ed9 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -165,7 +165,7 @@ def build_summary_table(document, tr_data):
 
 
 def build_history_table(document, tr_data):
-    history_data = get_test_result_history(get_schema(), tr_data)
+    history_data = get_test_result_history(get_schema(), tr_data, limit=15)
 
     history_table_style = TableStyle(
         (
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index 0623ec34..e8a7452d 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -124,7 +124,7 @@ def get_test_results(
     return df
 
 
-def get_test_result_history(db_schema, tr_data):
+def get_test_result_history(db_schema, tr_data, limit: int | None = None):
     if tr_data["auto_gen"]:
         str_where = f"""
             WHERE test_suite_id = '{tr_data["test_suite_id"]}'
@@ -143,7 +143,8 @@ def get_test_result_history(db_schema, tr_data):
                   test_name_short, test_name_long, measure_uom, test_operator,
                   threshold_value::NUMERIC, result_measure, result_status
              FROM {db_schema}.v_test_results {str_where}
-           ORDER BY test_date DESC;
+           ORDER BY test_date DESC
+           {'LIMIT ' + str(limit) if limit else ''};
     """
 
     df = db.retrieve_data(str_sql)

From 2d2dc39d0b1bd0d20aee517ca332bd097d3ca6cb Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 25 Jun 2025 08:26:20 -0400
Subject: [PATCH 03/56] misc: apply limit to hygiene issue report

---
 testgen/settings.py                           | 4 ++--
 testgen/ui/pdf/hygiene_issue_report.py        | 3 ++-
 testgen/ui/services/hygiene_issues_service.py | 4 +++-
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/testgen/settings.py b/testgen/settings.py
index 193aa4df..b9f991ca 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -495,7 +495,7 @@
 Random key used to sign/verify the authentication token
 """
 
-ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 20)
+ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 500)
 """
-Limit the number of records used to generate the PDF with test results issue report.
+Limit the number of records used to generate the PDF with test results and hygiene issue reports.
 """
diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py
index 1e3ddda3..aa5747fd 100644
--- a/testgen/ui/pdf/hygiene_issue_report.py
+++ b/testgen/ui/pdf/hygiene_issue_report.py
@@ -4,6 +4,7 @@
 from reportlab.lib.styles import ParagraphStyle
 from reportlab.platypus import CondPageBreak, KeepTogether, Paragraph, Table, TableStyle
 
+from testgen.settings import ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT
 from testgen.ui.pdf.dataframe_table import DataFrameTableBuilder
 from testgen.ui.pdf.style import (
     COLOR_GRAY_BG,
@@ -185,7 +186,7 @@ def get_report_content(document, hi_data):
     yield Paragraph("Suggested Action", style=PARA_STYLE_H1)
     yield Paragraph(hi_data["suggested_action"], style=PARA_STYLE_TEXT)
 
-    sample_data_tuple = get_source_data(hi_data)
+    sample_data_tuple = get_source_data(hi_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT)
 
     yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
     yield Paragraph("Sample Data", PARA_STYLE_H1)
diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py
index 71a24fe7..f4490a23 100644
--- a/testgen/ui/services/hygiene_issues_service.py
+++ b/testgen/ui/services/hygiene_issues_service.py
@@ -4,7 +4,7 @@
 from testgen.ui.services import database_service as db
 
 
-def get_source_data(hi_data):
+def get_source_data(hi_data, limit: int | None = None):
     str_schema = st.session_state["dbschema"]
     # Define the query
     str_sql = f"""
@@ -83,6 +83,8 @@ def replace_parms(str_query):
             if df.empty:
                 return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", str_sql, None
             else:
+                if limit:
+                    df = df.sample(n=limit)
                 return "OK", None, str_sql, df
         else:
             return "NA", "Source data lookup is not available for this Issue.", None, None

From c48bd3a86e0d932d1862d82aa3ff60ccd3e56117 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 25 Jun 2025 10:58:32 -0400
Subject: [PATCH 04/56] misc: set limit to source data lookups in ui dialog

---
 testgen/settings.py                | 2 +-
 testgen/ui/views/hygiene_issues.py | 2 +-
 testgen/ui/views/test_results.py   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/testgen/settings.py b/testgen/settings.py
index b9f991ca..528db06a 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -495,7 +495,7 @@
 Random key used to sign/verify the authentication token
 """
 
-ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 500)
+ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 50)
 """
 Limit the number of records used to generate the PDF with test results and hygiene issue reports.
 """
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 3cf0fe3d..45fc1d61 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -529,7 +529,7 @@ def source_data_dialog(selected_row):
     fm.render_html_list(selected_row, ["detail"], None, 700, ["Hygiene Issue Detail"])
 
     with st.spinner("Retrieving source data..."):
-        bad_data_status, bad_data_msg, _, df_bad = get_source_data(selected_row)
+        bad_data_status, bad_data_msg, _, df_bad = get_source_data(selected_row, limit=500)
     if bad_data_status in {"ND", "NA"}:
         st.info(bad_data_msg)
     elif bad_data_status == "ERR":
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 39373a40..d472e731 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -380,13 +380,13 @@ def get_test_definition(str_test_def_id):
 @st.cache_data(show_spinner=False)
 def do_source_data_lookup(selected_row):
     schema = st.session_state["dbschema"]
-    return test_results_service.do_source_data_lookup(schema, selected_row)
+    return test_results_service.do_source_data_lookup(schema, selected_row, limit=500)
 
 
 @st.cache_data(show_spinner=False)
 def do_source_data_lookup_custom(selected_row):
     schema = st.session_state["dbschema"]
-    return test_results_service.do_source_data_lookup_custom(schema, selected_row)
+    return test_results_service.do_source_data_lookup_custom(schema, selected_row, limit=500)
 
 
 @st.cache_data(show_spinner=False)

From aff987a79fdf0f27e48d646a2f1f9e0f56521469 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 25 Jun 2025 17:32:14 -0400
Subject: [PATCH 05/56] fix(source-data): bug when sampling dataframe

---
 testgen/ui/services/hygiene_issues_service.py | 2 +-
 testgen/ui/services/test_results_service.py   | 4 ++--
 testgen/ui/views/hygiene_issues.py            | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py
index f4490a23..53ca43e7 100644
--- a/testgen/ui/services/hygiene_issues_service.py
+++ b/testgen/ui/services/hygiene_issues_service.py
@@ -84,7 +84,7 @@ def replace_parms(str_query):
                 return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", str_sql, None
             else:
                 if limit:
-                    df = df.sample(n=limit)
+                    df = df.sample(n=min(len(df), limit))
                 return "OK", None, str_sql, df
         else:
             return "NA", "Source data lookup is not available for this Issue.", None, None
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index e8a7452d..57de1dc0 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -195,7 +195,7 @@ def do_source_data_lookup_custom(db_schema, tr_data, limit: int | None = None):
                 return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
             else:
                 if limit:
-                    df = df.sample(n=limit)
+                    df = df.sample(n=min(len(df), limit))
                 return "OK", None, str_sql, df
         else:
             return "NA", "Source data lookup is not available for this test.", None, None
@@ -302,7 +302,7 @@ def replace_parms(df_test, str_query):
                 return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
             else:
                 if limit:
-                    df = df.sample(n=limit)
+                    df = df.sample(n=min(len(df), limit))
                 return "OK", None, str_sql, df
         else:
             return "NA", "A source data lookup for this Test is not available.", None, None
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 45fc1d61..6b766a26 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -515,8 +515,8 @@ def get_excel_report_data(
 
 
 @st.cache_data(show_spinner=False)
-def get_source_data(hi_data):
-    return get_source_data_uncached(hi_data)
+def get_source_data(hi_data, limit):
+    return get_source_data_uncached(hi_data, limit)
 
 
 @st.dialog(title="Source Data")

From 3f4861590be4fe31640a399270f66af2cab5d3b7 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 24 Jun 2025 01:36:39 -0400
Subject: [PATCH 06/56] fix(logo): prevent logo resize

---
 testgen/ui/assets/style.css | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index a57b453c..791b4079 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -47,10 +47,14 @@ header {
 /* ... */
 
 /* Sidebar */
-[data-testid="stSidebarHeader"] {
+[data-testid="stSidebarContent"] [data-testid="stSidebarHeader"] {
     padding: 16px 20px;
 }
 
+[data-testid="stSidebarHeader"] [data-testid="stLogo"] {
+    max-width: fit-content;
+}
+
 section[data-testid="stSidebar"] {
     width: 250px;
     z-index: 999;

From 325569b7d867efb14c24746b5da6ecb454dd4faf Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 24 Jun 2025 16:53:43 -0400
Subject: [PATCH 07/56] feat: add help menu, support mailto, and upgrade
 notification

---
 deploy/docker-bake.hcl                        |   5 +
 deploy/testgen.dockerfile                     |   1 +
 testgen/__main__.py                           |   9 +-
 testgen/common/version_service.py             |  49 +++++-
 testgen/settings.py                           |   9 +-
 testgen/ui/app.py                             |   8 +-
 testgen/ui/assets/style.css                   |  53 +++++-
 testgen/ui/bootstrap.py                       |  22 +--
 testgen/ui/components/frontend/css/shared.css |   2 +-
 .../frontend/js/components/help_menu.js       | 161 ++++++++++++++++++
 .../frontend/js/components/sidebar.js         | 140 ++++-----------
 testgen/ui/components/frontend/js/main.js     |   2 +
 testgen/ui/components/widgets/__init__.py     |   2 +-
 testgen/ui/components/widgets/page.py         |  62 +++++--
 testgen/ui/components/widgets/sidebar.py      |  19 ++-
 .../components/widgets/testgen_component.py   |   1 +
 testgen/ui/navigation/menu.py                 |  11 --
 testgen/ui/session.py                         |  10 +-
 testgen/ui/views/login.py                     |   2 +-
 testgen/ui/views/project_dashboard.py         |   1 -
 tests/unit/test_version_service.py            |  32 ++--
 21 files changed, 401 insertions(+), 200 deletions(-)
 create mode 100644 testgen/ui/components/frontend/js/components/help_menu.js

diff --git a/deploy/docker-bake.hcl b/deploy/docker-bake.hcl
index 2518cfc4..35efb2b4 100644
--- a/deploy/docker-bake.hcl
+++ b/deploy/docker-bake.hcl
@@ -4,12 +4,16 @@ variable "TESTGEN_VERSION" {}
 variable "TESTGEN_DOCKER_HUB_REPO" {
   default = "datakitchen/dataops-testgen"
 }
+variable "TESTGEN_SUPPORT_EMAIL" {
+  default = "open-source-support@datakitchen.io"
+}
 
 target "testgen-release" {
   args = {
     TESTGEN_VERSION = "${TESTGEN_VERSION}"
     TESTGEN_BASE_LABEL = "${TESTGEN_BASE_LABEL}"
     TESTGEN_DOCKER_HUB_REPO = "${TESTGEN_DOCKER_HUB_REPO}"
+    TESTGEN_SUPPORT_EMAIL = "${TESTGEN_SUPPORT_EMAIL}"
   }
   context = "."
   dockerfile = "deploy/testgen.dockerfile"
@@ -31,6 +35,7 @@ target "testgen-qa" {
     TESTGEN_VERSION = "${TESTGEN_VERSION}"
     TESTGEN_BASE_LABEL = "${TESTGEN_BASE_LABEL}"
     TESTGEN_DOCKER_HUB_REPO = "${TESTGEN_DOCKER_HUB_REPO}"
+    TESTGEN_SUPPORT_EMAIL = "${TESTGEN_SUPPORT_EMAIL}"
   }
   context = "."
   dockerfile = "deploy/testgen.dockerfile"
diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile
index 318a3add..0e85c3f6 100644
--- a/deploy/testgen.dockerfile
+++ b/deploy/testgen.dockerfile
@@ -24,6 +24,7 @@ RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/s
 
 ENV TESTGEN_VERSION=${TESTGEN_VERSION}
 ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO}
+ENV TESTGEN_SUPPORT_EMAIL=${TESTGEN_SUPPORT_EMAIL}
 ENV TG_RELEASE_CHECK=docker
 
 USER testgen
diff --git a/testgen/__main__.py b/testgen/__main__.py
index 2f09b169..74541d76 100644
--- a/testgen/__main__.py
+++ b/testgen/__main__.py
@@ -48,6 +48,7 @@
 LOG = logging.getLogger("testgen")
 
 APP_MODULES = ["ui", "scheduler"]
+VERSION_DATA = version_service.get_version()
 
 
 @dataclass
@@ -69,7 +70,13 @@ def invoke(self, ctx: Context):
 
 @click.group(
     cls=CliGroup,
-    help=f"This version: {settings.VERSION} \n\nLatest version: {version_service.get_latest_version()} \n\nSchema revision: {get_schema_revision()}"
+    help=f"""
+    {VERSION_DATA.edition} {VERSION_DATA.current or ""}
+    
+    {f"New version available! {VERSION_DATA.latest}" if VERSION_DATA.latest != VERSION_DATA.current else ""}
+    
+    Schema revision: {get_schema_revision()}
+    """
 )
 @click.option(
     "-v",
diff --git a/testgen/common/version_service.py b/testgen/common/version_service.py
index 8e03cb17..5621d4c6 100644
--- a/testgen/common/version_service.py
+++ b/testgen/common/version_service.py
@@ -1,28 +1,59 @@
 import logging
+from dataclasses import dataclass
 
 import requests
 
 from testgen import settings
+from testgen.ui.session import session
 
 LOG = logging.getLogger("testgen")
 
 
-def get_latest_version() -> str:
+@dataclass
+class Version:
+    edition: str
+    current: str
+    latest: str
+
+
+def get_version() -> Version:
+    if not session.version:
+        session.version = Version(
+            edition=_get_app_edition(),
+            current=settings.VERSION,
+            latest=_get_latest_version(),
+        )
+    return session.version
+
+
+def _get_app_edition() -> str:
+    edition = (
+        settings.DOCKER_HUB_REPOSITORY
+        .replace("datakitchen/dataops-testgen", "")
+        .replace("-", " ")
+        .strip()
+        .title()
+        .replace("Qa", "QA")        
+    )
+    return f"TestGen{' ' + edition if edition else ''}"
+
+
+def _get_latest_version() -> str | None:
     try:
         return {
             "pypi": _get_last_pypi_release,
             "docker": _get_last_docker_release,
             "yes": _get_last_docker_release,  # NOTE: kept for retrocompatibility
-        }.get(settings.CHECK_FOR_LATEST_VERSION, lambda: "unknown")()
+        }.get(settings.CHECK_FOR_LATEST_VERSION, lambda: None)()
     except:
-        return "unknown"
+        return None
 
 
-def _get_last_pypi_release() -> str:
+def _get_last_pypi_release() -> str | None:
     response = requests.get("https://pypi.org/pypi/dataops-testgen/json", timeout=3)
     if response.status_code != 200:
         LOG.warning(f"version_service: Failed to fetch PyPi releases. Status code: {response.status_code}")
-        return "unknown"
+        return None
 
     package_data = response.json()
     package_releases = list((package_data.get("releases") or {}).keys())
@@ -30,7 +61,7 @@ def _get_last_pypi_release() -> str:
     return _sorted_tags(package_releases)[0]
 
 
-def _get_last_docker_release() -> str:
+def _get_last_docker_release() -> str | None:
     headers = {}
     if settings.DOCKER_HUB_USERNAME and settings.DOCKER_HUB_PASSWORD:
         auth_response = requests.post(
@@ -43,7 +74,7 @@ def _get_last_docker_release() -> str:
                 "version_service: unable to login against https://hub.docker.com."
                 f" Status code: {auth_response.status_code}"
             )
-            return "unknown"
+            return None
         headers["Authorization"] = f"Bearer {auth_response.json()['token']}"
 
     response = requests.get(
@@ -55,7 +86,7 @@ def _get_last_docker_release() -> str:
 
     if response.status_code != 200:
         LOG.debug(f"version_service: Failed to fetch docker tags. Status code: {response.status_code}")
-        return "unknown"
+        return None
 
     tags_to_return = []
     tags_data = response.json()
@@ -66,7 +97,7 @@ def _get_last_docker_release() -> str:
             tags_to_return.append(tag_name)
 
     if len(tags_to_return) <= 0:
-        return "unkown"
+        return None
 
     return _sorted_tags(tags_to_return)[0]
 
diff --git a/testgen/settings.py b/testgen/settings.py
index 528db06a..205caf96 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -452,12 +452,17 @@
 defaults to: None
 """
 
-VERSION: str = os.getenv("TESTGEN_VERSION", "unknown")
+VERSION: str = os.getenv("TESTGEN_VERSION", None)
 """
 Current deployed version. The value is displayed in the UI menu.
 
 from env variable: `TESTGEN_VERSION`
-defaults to: `unknown`
+defaults to: None
+"""
+
+SUPPORT_EMAIL: str = os.getenv("TESTGEN_SUPPORT_EMAIL", "open-source-support@datakitchen.io")
+"""
+Email for contacting DataKitchen support.
 """
 
 SSL_CERT_FILE: str = os.getenv("SSL_CERT_FILE", "")
diff --git a/testgen/ui/app.py b/testgen/ui/app.py
index 36bff7b5..de4f8d0e 100644
--- a/testgen/ui/app.py
+++ b/testgen/ui/app.py
@@ -3,6 +3,7 @@
 import streamlit as st
 
 from testgen import settings
+from testgen.common import version_service
 from testgen.common.docker_service import check_basic_configuration
 from testgen.common.models import with_database_session
 from testgen.ui import bootstrap
@@ -51,9 +52,12 @@ def render(log_level: int = logging.INFO):
             testgen.sidebar(
                 projects=project_service.get_projects(),
                 current_project=session.sidebar_project,
-                menu=application.menu.update_version(application.get_version()),
-                username=session.username,
+                menu=application.menu,
                 current_page=session.current_page,
+                username=session.username,
+                role=session.auth_role,
+                version=version_service.get_version(),
+                support_email=settings.SUPPORT_EMAIL,
             )
 
     application.router.run()
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index 791b4079..b60981cf 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -125,6 +125,8 @@ div.st-key-data_catalog-spinner {
 }
 
 /* Theming for buttons, tabs and form inputs */
+button[data-testid="stPopoverButton"]:hover,
+button[data-testid="stPopoverButton"]:focus:not(:active),
 button[data-testid="stBaseButton-secondary"]:hover,
 button[data-testid="stBaseButton-secondary"]:focus:not(:active),
 button[data-testid="stBaseButton-secondaryFormSubmit"]:hover,
@@ -133,6 +135,7 @@ button[data-testid="stBaseButton-secondaryFormSubmit"]:focus:not(:active) {
     color: var(--primary-color);
 }
 
+button[data-testid="stPopoverButton"]:active,
 button[data-testid="stBaseButton-secondary"]:active,
 button[data-testid="stBaseButton-secondaryFormSubmit"]:active,
 label[data-baseweb="checkbox"]:has(input[aria-checked="true"]) > span {
@@ -305,17 +308,53 @@ Use as testgen.text("text", "extra_styles") */
     background-color: var(--disabled-text-color);
 }
 
-div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.tg-header--links) [data-testid="stLinkButton"] a {
+/* Help menu */
+.st-key-tg-header--help [data-testid="stPageLink"] {
+    position: absolute;
+    top: -7px;
+    right: 0;
+    z-index: 5;
+}
+
+.st-key-tg-header--help [data-testid="stPageLink"] [data-testid="stPageLink-NavLink"] {
+    line-height: 1;
+}
+
+.st-key-tg-header--help [data-testid="stPopover"] {
+    width: auto;
+}
+
+.st-key-tg-header--help button[data-testid="stPopoverButton"] {
     border: none;
     background: none;
-    padding: 6px;
-    min-height: 24px;
-    color: var(--primary-text-color);
+    padding: 0;
+    margin-top: 8px;
+    min-height: fit-content;
 }
 
-div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.tg-header--links) [data-testid="stLinkButton"] a p {
-    font-size: 20px;
-    line-height: 1;
+.st-key-tg-header--help button[data-testid="stPopoverButton"]:focus:not(:hover) {
+    color: inherit;
+}
+
+.st-key-tg-header--help-dummy [data-testid="stMarkdownContainer"] p {
+    display: flex;
+    align-items: center;
+    margin-top: 8px;
+    min-height: fit-content;
+}
+
+.st-key-tg-header--help-dummy p span {
+    width: 1.25rem;
+    height: 1.25rem;
+    font-size: 1.25rem;
+    line-height: 1.25rem;
+    margin-top: 0.125rem;
+    margin-left: 0.125rem;
+    margin-right: -0.3125rem;
+}
+
+div[data-testid="stPopoverBody"]:has(i.tg-header--help-wrapper) {
+    padding: 0;
 }
 /*  */
 
diff --git a/testgen/ui/bootstrap.py b/testgen/ui/bootstrap.py
index 6b0fed7a..3b048414 100644
--- a/testgen/ui/bootstrap.py
+++ b/testgen/ui/bootstrap.py
@@ -2,12 +2,10 @@
 import logging
 
 from testgen import settings
-from testgen.commands.run_upgrade_db_config import get_schema_revision
-from testgen.common import configure_logging, version_service
-from testgen.ui.navigation.menu import Menu, Version
+from testgen.common import configure_logging
+from testgen.ui.navigation.menu import Menu
 from testgen.ui.navigation.page import Page
 from testgen.ui.navigation.router import Router
-from testgen.ui.session import session
 from testgen.ui.views.connections import ConnectionsPage
 from testgen.ui.views.data_catalog import DataCatalogPage
 from testgen.ui.views.hygiene_issues import HygieneIssuesPage
@@ -55,17 +53,6 @@ def __init__(self, logo: plugins.Logo, router: Router, menu: Menu, logger: loggi
         self.menu = menu
         self.logger = logger
 
-    def get_version(self) -> Version:
-        latest_version = self.menu.version.latest
-        if not session.latest_version:
-            latest_version = version_service.get_latest_version()
-
-        return Version(
-            current=settings.VERSION,
-            latest=latest_version,
-            schema=get_schema_revision(),
-        )
-
 
 def run(log_level: int = logging.INFO) -> Application:
     pages = [*BUILTIN_PAGES]
@@ -106,11 +93,6 @@ def run(log_level: int = logging.INFO) -> Application:
                     for page in pages if page.menu_item
                 }.values()
             ),
-            version=Version(
-                current=settings.VERSION,
-                latest="...",
-                schema=get_schema_revision(),
-            ),
         ),
         logger=LOG,
     )
diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index 643b4ffb..c2574212 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -118,7 +118,7 @@ body {
 
         --portal-background: #14181f;
         --portal-box-shadow: rgba(0, 0, 0, 0.95) 0px 4px 16px;
-        --select-hover-background: rgba(255, 255, 255, .32);
+        --select-hover-background: rgb(38, 39, 48);
     }
 }
 
diff --git a/testgen/ui/components/frontend/js/components/help_menu.js b/testgen/ui/components/frontend/js/components/help_menu.js
new file mode 100644
index 00000000..1a364a23
--- /dev/null
+++ b/testgen/ui/components/frontend/js/components/help_menu.js
@@ -0,0 +1,161 @@
+/**
+ * @typedef Version
+ * @type {object}
+ * @property {string} edition
+ * @property {string} current
+ * @property {string} latest
+ * 
+ * @typedef Permissions
+ * @type {object}
+ * @property {boolean} can_edit
+ * 
+ * @typedef Properties
+ * @type {object}
+ * @property {string} page_help
+ * @property {string} support_email
+ * @property {Version} version
+ * @property {Permissions} permissions
+*/
+import van from '../van.min.js';
+import { emitEvent, getRandomId, getValue, loadStylesheet, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
+import { Streamlit } from '../streamlit.js';
+import { Icon } from './icon.js';
+
+const { a, div, span } = van.tags;
+
+const baseHelpUrl = 'https://docs.datakitchen.io/articles/#!dataops-testgen-help/';
+const releaseNotesTopic = 'testgen-release-notes';
+const upgradeTopic = 'upgrade-testgen';
+
+const slackUrl = 'https://data-observability-slack.datakitchen.io/join';
+const trainingUrl = 'https://info.datakitchen.io/data-quality-training-and-certifications';
+
+const HelpMenu = (/** @type Properties */ props) => {
+    loadStylesheet('help-menu', stylesheet);
+    Streamlit.setFrameHeight(1);
+    window.testgen.isPage = true;
+
+    const domId = `help-menu-${getRandomId()}`;
+    const version = getValue(props.version) ?? {};
+    
+    resizeFrameHeightToElement(domId);
+    resizeFrameHeightOnDOMChange(domId);    
+
+    return div(
+        { id: domId },
+        div(
+            { class: 'flex-column pt-3' },
+            getValue(props.help_topic) 
+                ? HelpLink(`${baseHelpUrl}${getValue(props.help_topic)}`, 'Help for this Page', 'description')
+                : null,
+            HelpLink(baseHelpUrl, 'TestGen Help', 'help'),
+            HelpLink(trainingUrl, 'Training Portal', 'school'),
+            getValue(props.permissions)?.can_edit
+                ? div(
+                    { class: 'help-item', onclick: () => emitEvent('AppLogsClicked') },
+                    Icon({ classes: 'help-item-icon' }, 'browse_activity'),
+                    'Application Logs',
+                )
+                : null,
+            span({ class: 'help-divider' }),
+            HelpLink(slackUrl, 'Slack Community', 'group'),
+            getValue(props.support_email)
+                ? HelpLink(
+                    `mailto:${getValue(props.support_email)}
+                        ?subject=${version.edition}: Contact Support
+                        &body=%0D%0D%0DVersion: ${version.edition} ${version.current}`,
+                    'Contact Support',
+                    'email',
+                )
+                : null,
+            span({ class: 'help-divider' }),
+            version.current || version.latest
+                ? div(
+                    { class: 'help-version' },
+                    version.current
+                        ? HelpLink(`${baseHelpUrl}${releaseNotesTopic}`, `${version.edition} ${version.current}`, null, null)
+                        : null,
+                    version.latest !== version.current 
+                        ? HelpLink(
+                            `${baseHelpUrl}${upgradeTopic}`,
+                            `New version available! ${version.latest}`,
+                            null,
+                            'latest',
+                        )
+                        : null,
+                )
+                : null,
+        ),
+    );
+}
+
+const HelpLink = (
+    /** @type string */ url,
+    /** @type string */ label,
+    /** @type string? */ icon,
+    /** @type string */ classes = 'help-item',
+) => {
+    return a(
+        {
+            class: classes,
+            href: url,
+            target: '_blank',
+            onclick: () => emitEvent('ExternalLinkClicked'),
+        },
+        icon ? Icon({ classes: 'help-item-icon' }, icon) : null,
+        label,
+    );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.help-item {
+    padding: 12px 24px;
+    color: var(--primary-text-color);
+    text-decoration: none;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    cursor: pointer;
+    transition: 0.3s;
+}
+
+.help-item:hover {
+    background-color: var(--select-hover-background);
+    color: var(--primary-color);
+}
+
+.help-item-icon {
+    color: var(--primary-text-color);
+    transition: 0.3s;
+}
+
+.help-item:hover .help-item-icon {
+    color: var(--primary-color);
+}
+
+.help-divider {
+    height: 1px;
+    background-color: var(--border-color);
+    margin: 0 16px;
+}
+
+.help-version {
+    padding: 16px 16px 8px;
+    display: flex;
+    flex-direction: column;
+    align-items: flex-end;
+    gap: 8px;
+}
+
+.help-version > a {
+    color: var(--secondary-text-color);
+    text-decoration: none;
+}
+
+.help-version > a.latest {
+    color: var(--red);
+}
+`);
+
+export { HelpMenu };
diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js
index b2da6405..70b93883 100644
--- a/testgen/ui/components/frontend/js/components/sidebar.js
+++ b/testgen/ui/components/frontend/js/components/sidebar.js
@@ -9,38 +9,35 @@
  *
  * @typedef Version
  * @type {object}
+ * @property {string} edition
  * @property {string} current
  * @property {string} latest
- * @property {string} schema
  *
  * @typedef Menu
  * @type {object}
  * @property {Array.<MenuItem>} items
- * @property {Version} version
  *
  * @typedef Project
  * @type {object}
  * @property {string} code
  * @property {string} name
  *
- * @typedef Permissions
- * @type {object}
- * @property {boolean} can_edit
- *
  * @typedef Properties
  * @type {object}
  * @property {Menu} menu
  * @property {Project[]} projects
- * @property {string} username
- * @property {string} current_page
  * @property {string} current_project
+ * @property {string} current_page
+ * @property {string} username
+ * @property {string} role
  * @property {string} logout_path
- * @property {Permissions} permissions
+ * @property {Version} version
+ * @property {string} support_email
  */
 const van = window.top.van;
 const { a, button, div, i, img, label, option, select, span } = van.tags;
 
-const PROJECT_CODE_QUERY_PARAM = "project_code"
+const PROJECT_CODE_QUERY_PARAM = 'project_code';
 
 const Sidebar = (/** @type {Properties} */ props) => {
     if (Sidebar.StreamlitInstance) {
@@ -76,7 +73,11 @@ const Sidebar = (/** @type {Properties} */ props) => {
             },
         ),
         div(
-            span({class: 'menu--username'}, props.username),
+            div(
+                { class: 'menu--user' },
+                span({class: 'menu--username', title: props.username}, props.username),
+                span({class: 'menu--role'}, props.role.val?.replace('_', ' ')),
+            ),
             div(
                 { class: 'menu--buttons' },
                 button(
@@ -87,15 +88,16 @@ const Sidebar = (/** @type {Properties} */ props) => {
                     i({class: 'material-symbols-rounded'}, 'logout'),
                     span('Logout'),
                 ),
-                props.permissions.val?.can_edit ? button(
+                props.support_email?.val ? a(
                     {
-                        class: 'tg-button',
-                        onclick: () => emitEvent({ view_logs: true }),
+                        href: `mailto:${props.support_email?.val}
+                            ?subject=${props.version.val?.edition}: Contact Us
+                            &body=%0D%0D%0DVersion: ${props.version.val?.edition} ${props.version.val?.current}`,
+                        target: '_blank',
                     },
-                    'App Logs',
+                    'Contact Us',
                 ) : null,
             ),
-            () => Version(props.menu?.val.version),
         ),
     );
 };
@@ -181,36 +183,6 @@ const MenuItem = (
     );
 };
 
-const Version = (/** @type {Version} */ version) => {
-    const expanded = van.state(false);
-
-    const icon = van.derive(() => expanded.val ? 'expand_less' : 'expand_more');
-    const classes = van.derive(() => expanded.val ? ' version expanded' : 'version');
-
-    return div(
-        {class: classes, onclick: () => { expanded.val = !expanded.val; }},
-        VersionRow(
-            'Version',
-            version.current,
-            i({class: 'material-symbols-rounded version--dropdown-icon'}, icon),
-        ),
-        div(
-            {class: 'version--details'},
-            VersionRow('latest version', version.latest),
-            VersionRow('schema revision', version.schema),
-        ),
-    );
-};
-
-const VersionRow = (/** @type string */ label, /** @type string */ version, iconEl = undefined) => {
-    return div(
-        {class: 'version--row'},
-        span({class: 'version--row--label'}, `${label}:`),
-        span({class: 'version--row--value'}, version),
-        iconEl,
-    );
-};
-
 function emitEvent(/** @type Object */ data) {
     if (Sidebar.StreamlitInstance) {
         Sidebar.StreamlitInstance.sendData({ ...data, _id: Math.random() }); // Identify the event so its handler is called once
@@ -298,20 +270,22 @@ stylesheet.replace(`
     color: var(--primary-color);
 }
 
-.menu .menu--username {
-    padding-left: 16px;
-    padding-bottom: 8px;
+.menu .menu--user {
+    display: flex;
+    flex-direction: column;
+    padding: 16px;
+}
 
-    max-width: 35%;
+.menu .menu--username {
     overflow-x: hidden;
     text-overflow: ellipsis;
     text-wrap: nowrap;
-
-    color: var(--secondary-text-color);
 }
 
-.menu .menu--username:before {
-    content: 'User: ';
+.menu .menu--role {
+    text-transform: uppercase;
+    font-size: 12px;
+    color: var(--secondary-text-color);
 }
 
 .menu .content > .menu--section > .menu--section--label {
@@ -354,64 +328,12 @@ stylesheet.replace(`
 .menu .menu--buttons {
     display: flex;
     justify-content: space-between;
+    margin-bottom: 16px;
 }
 
-.menu .version {
-    color: var(--secondary-text-color);
-    display: flex;
-    flex-direction: column;
+.menu--buttons a {
     padding: 8px 16px;
-    cursor: pointer;
-}
-
-.menu .version .version--dropdown-icon {
-    font-size: 19px;
-}
-
-.menu .version .version--row {
-    display: flex;
-    align-items: center;
-    justify-content: flex-end;
-}
-
-.menu .version .version--row .version--row--label {
-    font-weight: 500;
-    margin-right: 4px;
-}
-
-.menu .version .version--details {
-    display: none;
-    flex-direction: column;
-}
-
-.menu .version .version--details {
-    display: none;
-    margin-top: 4px;
-}
-
-.menu .version.expanded .version--details {
-    display: block;
-}
-
-.version--row + .version--row {
-    margin-top: 4px;
-}
-
-.menu > :nth-child(1 of button) {
-    margin-top: auto !important;
-}
-
-.menu > button {
-    margin: 16px;
-    color: var(--secondary-text-color) !important;
-}
-
-.menu > button.logout {
-    margin-top: 8px;
-}
-
-.menu > button.users {
-    margin-bottom: 0px;
+    font-size: 14px;
 }
 
 /* Intentionally duplicate from button.js */
diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js
index d4854cc5..c39d439e 100644
--- a/testgen/ui/components/frontend/js/main.js
+++ b/testgen/ui/components/frontend/js/main.js
@@ -29,6 +29,7 @@ import { ColumnProfilingHistory } from './data_profiling/column_profiling_histor
 import { ScheduleList } from './pages/schedule_list.js';
 import { Connections } from './pages/connections.js';
 import { TableGroupWizard } from './pages/table_group_wizard.js';
+import { HelpMenu } from './components/help_menu.js'
 
 let currentWindowVan = van;
 let topWindowVan = window.top.van;
@@ -56,6 +57,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props)
         column_selector: ColumnSelector,
         connections: Connections,
         table_group_wizard: TableGroupWizard,
+        help_menu: HelpMenu,
     };
 
     if (Object.keys(window.testgen.plugins).includes(id)) {
diff --git a/testgen/ui/components/widgets/__init__.py b/testgen/ui/components/widgets/__init__.py
index fcda10a0..5716f6c9 100644
--- a/testgen/ui/components/widgets/__init__.py
+++ b/testgen/ui/components/widgets/__init__.py
@@ -14,9 +14,9 @@
     flex_row_center,
     flex_row_end,
     flex_row_start,
+    help_menu,
     no_flex_gap,
     page_header,
-    page_links,
     text,
     whitespace,
 )
diff --git a/testgen/ui/components/widgets/page.py b/testgen/ui/components/widgets/page.py
index 02a8c34e..721c1063 100644
--- a/testgen/ui/components/widgets/page.py
+++ b/testgen/ui/components/widgets/page.py
@@ -1,13 +1,16 @@
 import streamlit as st
 from streamlit.delta_generator import DeltaGenerator
 
+from testgen import settings
+from testgen.common import version_service
 from testgen.ui.components.widgets.breadcrumbs import Breadcrumb
 from testgen.ui.components.widgets.breadcrumbs import breadcrumbs as tg_breadcrumbs
+from testgen.ui.components.widgets.testgen_component import testgen_component
+from testgen.ui.services import user_session_service
+from testgen.ui.views.dialogs.application_logs_dialog import application_logs_dialog
+
+UPGRADE_URL = "https://docs.datakitchen.io/articles/#!dataops-testgen-help/upgrade-testgen"
 
-BASE_HELP_URL = "https://docs.datakitchen.io/articles/#!dataops-testgen-help/"
-DEFAULT_HELP_TOPIC = "dataops-testgen-help"
-SLACK_URL = "https://data-observability-slack.datakitchen.io/join"
-TRAINING_URL = "https://info.datakitchen.io/data-quality-training-and-certifications"
 
 def page_header(
     title: str,
@@ -16,7 +19,7 @@ def page_header(
 ):
     with st.container():
         no_flex_gap()
-        title_column, links_column = st.columns([0.95, 0.05], vertical_alignment="bottom")
+        title_column, links_column = st.columns([0.75, 0.25], vertical_alignment="bottom")
 
         with title_column:
             no_flex_gap()
@@ -25,17 +28,52 @@ def page_header(
                 tg_breadcrumbs(breadcrumbs=breadcrumbs)
 
         with links_column:
-            page_links(help_topic)
+            help_menu(help_topic)
 
         st.html('<hr size="3" class="tg-header--line">')
 
 
-def page_links(help_topic: str | None = None):
-    css_class("tg-header--links")
-    flex_row_end()
-    st.link_button(":material/question_mark:", f"{BASE_HELP_URL}{help_topic or DEFAULT_HELP_TOPIC}", help="Help Center")
-    st.link_button(":material/group:", SLACK_URL, help="Slack Community")
-    st.link_button(":material/school:", TRAINING_URL, help="Training Portal")
+def help_menu(help_topic: str | None = None) -> None:
+    with st.container(key="tg-header--help"):
+        version = version_service.get_version()
+        if version.latest != version.current:
+            st.page_link(UPGRADE_URL, label=f":small[:red[New version available! {version.latest}]]")
+
+        help_container = st.empty()
+
+        # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+        def close_help(rerun: bool = False) -> None:
+            with help_container.container(key="tg-header--help-dummy"):
+                flex_row_end()
+                st.markdown("Help :material/keyboard_arrow_down:")
+            if rerun:
+                st.rerun()
+
+        def open_app_logs():
+            close_help()
+            application_logs_dialog()
+            
+        with help_container.container():
+            flex_row_end()
+            with st.popover("Help"):
+                css_class("tg-header--help-wrapper")
+                testgen_component(
+                    "help_menu",   
+                    props={
+                        "help_topic": help_topic,
+                        "support_email": settings.SUPPORT_EMAIL,
+                        "version": version.__dict__,
+                        "permissions": {
+                            "can_edit": user_session_service.user_can_edit(),
+                        },
+                    },
+                    on_change_handlers={
+                        "AppLogsClicked": lambda _: open_app_logs(),
+                    },
+                    event_handlers={
+                        "ExternalLinkClicked": lambda _: close_help(rerun=True),
+                    },
+                )
 
 
 def whitespace(size: float, unit: str = "rem", container: DeltaGenerator | None = None):
diff --git a/testgen/ui/components/widgets/sidebar.py b/testgen/ui/components/widgets/sidebar.py
index 9ff16734..e1f8002e 100644
--- a/testgen/ui/components/widgets/sidebar.py
+++ b/testgen/ui/components/widgets/sidebar.py
@@ -2,12 +2,12 @@
 import time
 from typing import Literal
 
+from testgen.common.version_service import Version
 from testgen.ui.components.utils.component import component
 from testgen.ui.navigation.menu import Menu
 from testgen.ui.navigation.router import Router
 from testgen.ui.services import javascript_service, user_session_service
 from testgen.ui.session import session
-from testgen.ui.views.dialogs.application_logs_dialog import application_logs_dialog
 
 LOG = logging.getLogger("testgen")
 
@@ -19,9 +19,12 @@ def sidebar(
     key: str = SIDEBAR_KEY,
     projects: list[dict[Literal["name", "codde"], str]] | None = None,
     current_project: str | None = None,
-    username: str | None = None,
     menu: Menu = None,
     current_page: str | None = None,
+    username: str | None = None,
+    role: str | None = None,
+    version: Version | None = None,
+    support_email: str | None = None,
 ) -> None:
     """
     Testgen custom component to display a styled menu over streamlit's
@@ -38,13 +41,13 @@ def sidebar(
         props={
             "projects": projects,
             "current_project": current_project,
-            "username": username,
             "menu": menu.filter_for_current_user().sort_items().unflatten().asdict(),
             "current_page": current_page,
+            "username": username,
+            "role": role,
             "logout_path": LOGOUT_PATH,
-            "permissions": {
-                "can_edit": user_session_service.user_can_edit(),
-            },
+            "version": version.__dict__,
+            "support_email": support_email,
         },
         key=key,
         on_change=on_change,
@@ -65,9 +68,7 @@ def on_change():
         return
     session.sidebar_event_id = event_id
 
-    if event_data.get("view_logs"):
-        application_logs_dialog()
-    elif event_data.get("path") == LOGOUT_PATH:
+    if event_data.get("path") == LOGOUT_PATH:
         javascript_service.clear_component_states()
         user_session_service.end_user_session()
         Router().queue_navigation(to="")
diff --git a/testgen/ui/components/widgets/testgen_component.py b/testgen/ui/components/widgets/testgen_component.py
index ee80d18d..d52b2bdc 100644
--- a/testgen/ui/components/widgets/testgen_component.py
+++ b/testgen/ui/components/widgets/testgen_component.py
@@ -20,6 +20,7 @@
     "column_selector",
     "connections",
     "table_group_wizard",
+    "help_menu",
 ]
 
 
diff --git a/testgen/ui/navigation/menu.py b/testgen/ui/navigation/menu.py
index 7c519baf..d44002a1 100644
--- a/testgen/ui/navigation/menu.py
+++ b/testgen/ui/navigation/menu.py
@@ -17,17 +17,9 @@ class MenuItem:
     items: list["MenuItem"] | None = dataclasses.field(default=None)
 
 
-@dataclasses.dataclass
-class Version:
-    current: str
-    latest: str
-    schema: str
-
-
 @dataclasses.dataclass
 class Menu:
     items: list[MenuItem]
-    version: Version
 
     def filter_for_current_user(self) -> "Menu":
         filtered_items = []
@@ -53,8 +45,5 @@ def unflatten(self) -> "Menu":
                 unflattened_items.append(MenuItem(label=label, items=items))
         return dataclasses.replace(self, items=unflattened_items)
 
-    def update_version(self, version: Version) -> "Menu":
-        return dataclasses.replace(self, version=version)
-
     def asdict(self):
         return dataclasses.asdict(self)
diff --git a/testgen/ui/session.py b/testgen/ui/session.py
index b82cbc21..4ac634ed 100644
--- a/testgen/ui/session.py
+++ b/testgen/ui/session.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Literal, TypeVar
+
+if TYPE_CHECKING:
+    from testgen.common.version_service import Version
+
 from collections.abc import Callable
-from typing import Any, Literal, TypeVar
 
 import streamlit as st
 from streamlit.runtime.state import SessionStateProxy
@@ -31,7 +37,7 @@ class TestgenSession(Singleton):
 
     sidebar_project: str
     add_project: bool
-    latest_version: str | None
+    version: Version | None
 
     testgen_event_id: str | None
     sidebar_event_id: str | None
diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py
index 14bda7c0..be0376d8 100644
--- a/testgen/ui/views/login.py
+++ b/testgen/ui/views/login.py
@@ -32,7 +32,7 @@ def render(self, **_kwargs) -> None:
         _, login_column, links_column = st.columns([0.25, 0.5, 0.25])
 
         with links_column:
-            testgen.page_links()
+            testgen.help_menu()
 
         with login_column:
             st.html("""
diff --git a/testgen/ui/views/project_dashboard.py b/testgen/ui/views/project_dashboard.py
index 3515e68c..5b49f3c7 100644
--- a/testgen/ui/views/project_dashboard.py
+++ b/testgen/ui/views/project_dashboard.py
@@ -34,7 +34,6 @@ class ProjectDashboardPage(Page):
     def render(self, project_code: str, **_kwargs):
         testgen.page_header(
             PAGE_TITLE,
-            "introduction-to-dataops-testgen",
         )
 
         table_groups = get_table_groups_summary(project_code)
diff --git a/tests/unit/test_version_service.py b/tests/unit/test_version_service.py
index b97890cf..0afd410f 100644
--- a/tests/unit/test_version_service.py
+++ b/tests/unit/test_version_service.py
@@ -2,33 +2,36 @@
 
 import pytest
 
-from testgen.common.version_service import get_latest_version
+from testgen.common.version_service import get_version
 
 
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
+@mock.patch("testgen.common.version_service.session.version", None)
 def test_calls_pypi_api(requests: mock.Mock, settings: mock.Mock):
     settings.CHECK_FOR_LATEST_VERSION = "pypi"
-    get_latest_version()
+    get_version()
     requests.get.assert_called_with("https://pypi.org/pypi/dataops-testgen/json", timeout=3)
 
 
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
-def test_return_unknown_when_pypi_request_fails(requests: mock.Mock, settings: mock.Mock):
+@mock.patch("testgen.common.version_service.session.version", None)
+def test_return_none_when_pypi_request_fails(requests: mock.Mock, settings: mock.Mock):
     response = mock.Mock()
     response.status_code = 400
     requests.get.return_value = response
     settings.CHECK_FOR_LATEST_VERSION = "pypi"
 
-    assert get_latest_version() == "unknown"
+    assert get_version().latest == None
 
 
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
+@mock.patch("testgen.common.version_service.session.version", None)
 def test_get_the_latest_version_from_pypi(requests: mock.Mock, settings: mock.Mock):
     response = mock.Mock()
     response.status_code = 200
@@ -45,18 +48,19 @@ def test_get_the_latest_version_from_pypi(requests: mock.Mock, settings: mock.Mo
     }
     settings.CHECK_FOR_LATEST_VERSION = "pypi"
 
-    assert get_latest_version() == "1.2.3"
+    assert get_version().latest == "1.2.3"
 
 
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
+@mock.patch("testgen.common.version_service.session.version", None)
 def test_calls_docker_tags_api(requests: mock.Mock, settings: mock.Mock):
     settings.DOCKER_HUB_USERNAME = None
     settings.DOCKER_HUB_PASSWORD = None
     settings.DOCKER_HUB_REPOSITORY = "datakitchen/testgen-a"
     settings.CHECK_FOR_LATEST_VERSION = "docker"
-    get_latest_version()
+    get_version()
 
     requests.get.assert_called_with(
         "https://hub.docker.com/v2/repositories/datakitchen/testgen-a/tags",
@@ -69,7 +73,8 @@ def test_calls_docker_tags_api(requests: mock.Mock, settings: mock.Mock):
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
-def test_return_unknown_when_docker_request_fails(requests: mock.Mock, settings: mock.Mock):
+@mock.patch("testgen.common.version_service.session.version", None)
+def test_return_none_when_docker_request_fails(requests: mock.Mock, settings: mock.Mock):
     response = mock.Mock()
     response.status_code = 400
     requests.get.return_value = response
@@ -77,12 +82,13 @@ def test_return_unknown_when_docker_request_fails(requests: mock.Mock, settings:
     settings.DOCKER_HUB_PASSWORD = None
     settings.CHECK_FOR_LATEST_VERSION = "docker"
 
-    assert get_latest_version() == "unknown"
+    assert get_version().latest == None
 
 
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
+@mock.patch("testgen.common.version_service.session.version", None)
 def test_get_the_latest_version_from_dockerhub(requests: mock.Mock, settings: mock.Mock):
     settings.DOCKER_HUB_USERNAME = None
     settings.DOCKER_HUB_PASSWORD = None
@@ -102,11 +108,12 @@ def test_get_the_latest_version_from_dockerhub(requests: mock.Mock, settings: mo
         ],
     }
 
-    assert get_latest_version() == "1.2.0"
+    assert get_version().latest == "1.2.0"
 
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
+@mock.patch("testgen.common.version_service.session.version", None)
 def test_authenticates_docker_request(requests: mock.Mock, settings: mock.Mock):
     username = settings.DOCKER_HUB_USERNAME = "docker-username"
     password = settings.DOCKER_HUB_PASSWORD = "docker-password"  # noqa: S105
@@ -119,7 +126,7 @@ def test_authenticates_docker_request(requests: mock.Mock, settings: mock.Mock):
     response.json.return_value = {"token": docker_auth_token}
     requests.post.return_value = response
 
-    get_latest_version()
+    get_version()
 
     requests.post.assert_called_with(
         "https://hub.docker.com/v2/users/login",
@@ -137,7 +144,8 @@ def test_authenticates_docker_request(requests: mock.Mock, settings: mock.Mock):
 @pytest.mark.unit
 @mock.patch("testgen.common.version_service.settings")
 @mock.patch("testgen.common.version_service.requests")
-def test_return_unknown_when_docker_auth_request_fails(requests: mock.Mock, settings: mock.Mock):
+@mock.patch("testgen.common.version_service.session.version", None)
+def test_return_none_when_docker_auth_request_fails(requests: mock.Mock, settings: mock.Mock):
     settings.DOCKER_HUB_USERNAME = "docker-username"
     settings.DOCKER_HUB_PASSWORD = "docker-password"  # noqa: S105
     settings.CHECK_FOR_LATEST_VERSION = "docker"
@@ -147,4 +155,4 @@ def test_return_unknown_when_docker_auth_request_fails(requests: mock.Mock, sett
     response.status_code = 400
     requests.post.return_value = response
 
-    assert get_latest_version() == "unknown"
+    assert get_version().latest == None

From 5aabcbdf799bce4e400c09c0af3953d3023a3afd Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Thu, 26 Jun 2025 01:11:11 -0400
Subject: [PATCH 08/56] feat(project-dashboard): redesign cards layout

---
 .../frontend/js/components/score_metric.js    |   2 +-
 .../frontend/js/pages/project_dashboard.js    | 291 ++++++------------
 testgen/ui/views/project_dashboard.py         | 105 +------
 3 files changed, 111 insertions(+), 287 deletions(-)

diff --git a/testgen/ui/components/frontend/js/components/score_metric.js b/testgen/ui/components/frontend/js/components/score_metric.js
index 27ea3122..321caed0 100644
--- a/testgen/ui/components/frontend/js/components/score_metric.js
+++ b/testgen/ui/components/frontend/js/components/score_metric.js
@@ -16,7 +16,7 @@ const ScoreMetric = function(
         { class: 'flex-column fx-align-flex-center score-metric' },
         Caption({ content: 'Score' }),
         span(
-            { style: 'font-size: 36px;' },
+            { style: 'font-size: 28px;' },
             score ?? '--',
         ),
         (profilingScore || testingScore) ? div(
diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js
index 940d7708..4e3d3799 100644
--- a/testgen/ui/components/frontend/js/pages/project_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js
@@ -2,9 +2,6 @@
  * @typedef ProjectSummary
  * @type {object}
  * @property {string} project_code
- * @property {number} table_groups_count
- * @property {number} test_suites_count
- * @property {number} test_definitions_count
  * @property {number} test_runs_count
  * @property {number} profiling_runs_count
  * @property {number} connections_count
@@ -15,9 +12,8 @@
  * @property {string} id
  * @property {string} test_suite
  * @property {number} test_ct
- * @property {string} latest_auto_gen_date
- * @property {string} latest_run_start
- * @property {string} latest_run_id
+ * @property {number?} latest_run_start
+ * @property {string?} latest_run_id
  * @property {number} last_run_test_ct
  * @property {number} last_run_passed_ct
  * @property {number} last_run_warning_ct
@@ -29,12 +25,11 @@
  * @type {object}
  * @property {string} id
  * @property {string} table_groups_name
- * @property {string} table_groups_name
- * @property {number?} dq_score
- * @property {number?} dq_score_profiling
- * @property {number?} dq_score_testing
- * @property {string} latest_profile_id
- * @property {string} latest_profile_start
+ * @property {string?} dq_score
+ * @property {string?} dq_score_profiling
+ * @property {string?} dq_score_testing
+ * @property {string?} latest_profile_id
+ * @property {number?} latest_profile_start
  * @property {number} latest_profile_table_ct
  * @property {number} latest_profile_column_ct
  * @property {number} latest_anomalies_ct
@@ -42,16 +37,8 @@
  * @property {number} latest_anomalies_likely_ct
  * @property {number} latest_anomalies_possible_ct
  * @property {number} latest_anomalies_dismissed_ct
- * @property {string} latest_tests_start
- * @property {number} latest_tests_suite_ct
- * @property {number} latest_tests_ct
- * @property {number} latest_tests_passed_ct
- * @property {number} latest_tests_warning_ct
- * @property {number} latest_tests_failed_ct
- * @property {number} latest_tests_error_ct
- * @property {number} latest_tests_dismissed_ct
+ * @property {number?} latest_tests_start
  * @property {TestSuiteSummary[]} test_suites
- * @property {boolean} expanded
  *
  * @typedef SortOption
  * @type {object}
@@ -67,11 +54,9 @@
  */
 import van from '../van.min.js';
 import { Streamlit } from '../streamlit.js';
-import { emitEvent, getValue, loadStylesheet, friendlyPercent, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
+import { getValue, loadStylesheet, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
 import { formatTimestamp } from '../display_utils.js';
 import { Card } from '../components/card.js';
-import { Caption } from '../components/caption.js';
-import { ExpanderToggle } from '../components/expander_toggle.js';
 import { Select } from '../components/select.js';
 import { Input } from '../components/input.js';
 import { Link } from '../components/link.js';
@@ -79,7 +64,9 @@ import { SummaryBar } from '../components/summary_bar.js';
 import { EmptyState, EMPTY_STATE_MESSAGE } from '../components/empty_state.js';
 import { ScoreMetric } from '../components/score_metric.js';
 
-const { div, h3, hr, span, strong } = van.tags;
+const { div, h3, hr, span } = van.tags;
+
+const staleProfileDays = 60;
 
 const ProjectDashboard = (/** @type Properties */ props) => {
     loadStylesheet('project-dashboard', stylesheet);
@@ -124,43 +111,7 @@ const ProjectDashboard = (/** @type Properties */ props) => {
         { id: wrapperId, class: 'flex-column tg-overview' },
         () => !getValue(isEmpty)
             ? div(
-                { class: 'flex-row fx-align-stretch fx-gap-4' },
-                Card({
-                    id: 'overview-project-summary',
-                    class: 'tg-overview--project',
-                    testId: 'project-summary',
-                    border: true,
-                    content: [
-                        () => div(
-                            { class: 'flex-row fx-align-flex-start' },
-                            () => {
-                                return div(
-                                    { class: 'flex-column fx-gap-2 tg-overview--project--summary' },
-                                    Caption({content: 'Project Summary', style: 'margin-bottom: 8px;' }),
-                                    div(
-                                        strong({ style: 'margin-right: 4px;' }, props.project.val.table_groups_count),
-                                        span('table groups'),
-                                    ),
-                                    div(
-                                        strong({ style: 'margin-right: 4px;' }, props.project.val.test_suites_count),
-                                        span('test suites'),
-                                    ),
-                                    div(
-                                        strong({ style: 'margin-right: 4px;' }, props.project.val.test_definitions_count),
-                                        span('test definitions'),
-                                    ),
-                                );
-                            }
-                        ),
-                    ],
-                }),
-            )
-            : ConditionalEmptyState(getValue(props.project)),
-        () => !getValue(isEmpty)
-            ? div(
-                { class: 'flex-row fx-align-flex-end' },
-                h3(() => `Table Groups (${tableGroups?.val?.length ?? 0})`),
-                span({ style: 'margin-right: auto;' }),
+                { class: 'flex-row fx-align-flex-end fx-gap-4' },
                 Input({
                     width: 230,
                     height: 38,
@@ -171,7 +122,6 @@ const ProjectDashboard = (/** @type Properties */ props) => {
                     testId: 'table-groups-filter',
                     onChange: (value) => tableGroupsSearchTerm.val = value,
                 }),
-                span({ style: 'margin-right: 1rem;' }),
                 Select({
                     label: 'Sort by',
                     value: tableGroupsSortOption,
@@ -184,10 +134,10 @@ const ProjectDashboard = (/** @type Properties */ props) => {
             : '',
         () => !getValue(isEmpty)
             ? div(
-                { class: 'flex-column mt-2' },
+                { class: 'flex-column mt-4' },
                 getValue(filteredTableGroups).map(tableGroup => TableGroupCard(tableGroup)),
             )
-            : '',
+            : ConditionalEmptyState(getValue(props.project)),
     );
 }
 
@@ -195,153 +145,112 @@ const TableGroupCard = (/** @type TableGroupSummary */ tableGroup) => {
     return Card({
         testId: 'table-group-summary-card',
         border: true,
-        title: tableGroup.table_groups_name,
-        actionContent: () => ExpanderToggle({
-            default: tableGroup.expanded,
-            style: 'font-size: 14px !important; font-weight: 400;',
-            onExpand: () => {
-                emitEvent('TableGroupExpanded', {payload: tableGroup.id});
-            },
-            onCollapse: () => {
-                emitEvent('TableGroupCollapsed', {payload: tableGroup.id});
-            },
-        }),
         content: () => div(
             { class: 'flex-column' },
             div(
-                { class: 'flex-row fx-align-flex-start' },
+                { class: 'flex-row fx-align-flex-start fx-justify-space-between' },
                 div(
-                    { class: 'flex-column fx-flex' },
-                    TableGroupLatestProfile(tableGroup),
-                ),
-                div(
-                    { class: 'flex-column fx-flex' },
-                    TableGroupLatestTestResults(tableGroup),
+                    { class: 'flex-column', style: 'flex: auto;' },
+                    h3(
+                        { class: 'tg-overview--title' },
+                        tableGroup.table_groups_name,
+                    ),
+                    span(
+                        { class: 'text-caption mt-1 mb-3 tg-overview--subtitle' },
+                        `${tableGroup.latest_profile_table_ct} tables | ${tableGroup.latest_profile_column_ct} columns`,
+                    ),
+                    TableGroupTestSuiteSummary(tableGroup.test_suites),
                 ),
                 ScoreMetric(tableGroup.dq_score, tableGroup.dq_score_profiling, tableGroup.dq_score_testing),
             ),
-            tableGroup.expanded
-                ? hr({ class: 'tg-overview--table-group-divider' })
-                : undefined,
-            tableGroup.expanded
-                ? TableGroupTestSuiteSummary(tableGroup.test_suites)
-                : undefined,
+            hr({ class: 'tg-overview--table-group-divider' }),
+            TableGroupLatestProfile(tableGroup),
         )
     });
 };
 
 const TableGroupLatestProfile = (/** @type TableGroupSummary */ tableGroup) => {
-    return [
-        Caption({ content: 'Latest profile' }),
-        () => tableGroup.latest_profile_start ? div(
-            div(
-                { class: 'flex-row mb-3' },
-                Link({
-                    label: formatTimestamp(tableGroup.latest_profile_start),
-                    href: 'profiling-runs:results',
-                    params: { run_id: tableGroup.latest_profile_id },
-                }),
-            ),
-            div(
-                { class: 'flex-row mb-3' },
-                strong({ class: 'mr-1' }, tableGroup.latest_profile_table_ct),
-                span('tables'),
-                span({ class: 'mr-1 ml-1' }, '|'),
-                strong({ class: 'mr-1' }, tableGroup.latest_profile_column_ct),
-                span('columns'),
-                span({ class: 'mr-1 ml-1' }, '|'),
-                Link({
-                    label: `${tableGroup.latest_anomalies_ct} hygiene issues`,
-                    href: 'profiling-runs:hygiene',
-                    params: {
-                        run_id: tableGroup.latest_profile_id,
-                    },
-                    width: 150,
-                })
-            ),
-            () => tableGroup.latest_anomalies_ct
-                ? SummaryBar({
-                    items: [
-                        { label: 'Definite', value: parseInt(tableGroup.latest_anomalies_definite_ct), color: 'red' },
-                        { label: 'Likely', value: parseInt(tableGroup.latest_anomalies_likely_ct), color: 'orange' },
-                        { label: 'Possible', value: parseInt(tableGroup.latest_anomalies_possible_ct), color: 'yellow' },
-                        { label: 'Dismissed', value: parseInt(tableGroup.latest_anomalies_dismissed_ct), color: 'grey' },
-                    ],
-                    height: 12,
-                    width: 280,
-                })
-                : '',
-        )
-        : span('--'),
-    ];
-};
+    if (!tableGroup.latest_profile_start) {
+        return div(
+            { class: 'mt-1 mb-1 text-secondary' },
+            'No profiling data yet',
+        );
+    }
 
-const TableGroupLatestTestResults = (/** @type TableGroupSummary */ tableGroup) => {
-    return [
-        Caption({ content: 'Latest test results' }),
-        () => tableGroup.latest_tests_ct
-            ? div(
-                { class: 'flex-column' },
-                span(
-                    { class: 'mb-3' },
-                    `${friendlyPercent(tableGroup.latest_tests_passed_ct * 100 / tableGroup.latest_tests_ct)}% passed`,
-                ),
-                div(
-                    { class: 'flex-row mb-3' },
-                    strong({ class: 'mr-1' }, tableGroup.latest_tests_ct),
-                    span({ class: 'mr-1' }, 'tests in'),
-                    strong({ class: 'mr-1' }, tableGroup.latest_tests_suite_ct),
-                    span('test suites'),
-                ),
-                SummaryBar({
-                    items: [
-                        { label: 'Passed', value: parseInt(tableGroup.latest_tests_passed_ct), color: 'green' },
-                        { label: 'Warning', value: parseInt(tableGroup.latest_tests_warning_ct), color: 'yellow' },
-                        { label: 'Failed', value: parseInt(tableGroup.latest_tests_failed_ct), color: 'red' },
-                        { label: 'Error', value: parseInt(tableGroup.latest_tests_error_ct), color: 'brown' },
-                        { label: 'Dismissed', value: parseInt(tableGroup.latest_tests_dismissed_ct), color: 'grey' },
-                    ],
-                    height: 12,
-                    width: 350,
-                })
-            )
-            : span('--'),
-    ];
+    const daysAgo = Math.round((new Date() - new Date(tableGroup.latest_profile_start)) / (1000 * 60 * 60 * 24));
+
+    return div(
+        div(
+            { class: 'flex-row fx-gap-1 mb-2' },
+            span('Latest profile:'),
+            Link({
+                label: formatTimestamp(tableGroup.latest_profile_start),
+                href: 'profiling-runs:results',
+                params: { run_id: tableGroup.latest_profile_id },
+            }),
+            daysAgo > staleProfileDays
+                ? span({ class: 'text-error' }, `(${daysAgo} days ago)`)
+                : null,
+            span('|'),
+            Link({
+                label: `${tableGroup.latest_anomalies_ct} hygiene issues`,
+                href: 'profiling-runs:hygiene',
+                params: {
+                    run_id: tableGroup.latest_profile_id,
+                },
+                width: 150,
+            }),
+        ),
+        tableGroup.latest_anomalies_ct
+            ? SummaryBar({
+                items: [
+                    { label: 'Definite', value: parseInt(tableGroup.latest_anomalies_definite_ct), color: 'red' },
+                    { label: 'Likely', value: parseInt(tableGroup.latest_anomalies_likely_ct), color: 'orange' },
+                    { label: 'Possible', value: parseInt(tableGroup.latest_anomalies_possible_ct), color: 'yellow' },
+                    { label: 'Dismissed', value: parseInt(tableGroup.latest_anomalies_dismissed_ct), color: 'grey' },
+                ],
+                height: 3,
+                width: 350,
+            })
+            : '',
+    );
 };
 
 const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites) => {
+    if (!testSuites?.length) {
+        return div(
+            { class: 'mt-1 mb-1 text-secondary' },
+            'No test suites yet',
+        );
+    }
+
     return div(
         { class: 'flex-column' },
         div(
-            { class: 'flex-row mb-4' },
-            Caption({ content: 'Test Suite', style: 'flex: 1 1 20%;' }),
-            Caption({ content: 'Latest Generation', style: 'flex: 1 1 15%;' }),
-            Caption({ content: 'Latest Run', style: 'flex: 1 1 15%;' }),
-            Caption({ content: 'Latest Results', style: 'flex: 1 1 50%;' }),
+            { class: 'flex-row mb-1 tg-overview--row' },
+            span({ style: 'flex: 1 1 25%;' }, 'Test Suite'),
+            span({ style: 'flex: 1 1 25%;' }, 'Latest Run'),
+            span({ style: 'flex: 1 1 50%;' }, 'Latest Results'),
         ),
         testSuites.map(suite => div(
-            { class: 'flex-row mb-2' },
+            { class: 'flex-row fx-align-flex-start mt-2 tg-overview--row' },
             div(
-                { class: 'flex-column', style: 'flex: 1 1 20%;' },
+                { class: 'flex-column', style: 'flex: 1 1 25%; word-break: break-word;' },
                 Link({
                     label: suite.test_suite,
                     href: 'test-suites:definitions',
                     params: { test_suite_id: suite.id },
                 }),
-                Caption({ content: `${suite.test_ct ?? 0} tests`}),
-            ),
-            span(
-                { style: 'flex: 1 1 15%;' },
-                suite.latest_auto_gen_date ? formatTimestamp(suite.latest_auto_gen_date) : '--',
+                span({ class: 'text-caption' }, `${suite.test_ct ?? 0} tests`),
             ),
             suite.latest_run_id
                 ? Link({
                     label: formatTimestamp(suite.latest_run_start),
                     href: 'test-runs:results',
                     params: { run_id: suite.latest_run_id },
-                    style: 'flex: 1 1 15%;',
+                    style: 'flex: 1 1 25%;',
                 })
-                : span({ style: 'flex: 1 1 15%;' }, '--'),
+                : span({ style: 'flex: 1 1 25%;' }, '--'),
             div(
                 { style: 'flex: 1 1 50%;' },
                 suite.last_run_test_ct ? SummaryBar({
@@ -352,7 +261,7 @@ const TableGroupTestSuiteSummary = (/** @type TestSuiteSummary[] */testSuites) =
                         { label: 'Error', 'value': parseInt(suite.last_run_error_ct), color: 'brown' },
                         { label: 'Dismissed', 'value': parseInt(suite.last_run_dismissed_ct), color: 'grey' },
                     ],
-                    width: 200,
+                    width: 350,
                     height: 8,
                 }) : '--',
             ),
@@ -393,22 +302,20 @@ stylesheet.replace(`
     width: 100%;
 }
 
-.tg-overview--project {
-    margin: 8px 0;
-    width: 50%;
-}
-
-.tg-overview--project--score {
-    margin-right: auto;
+.tg-overview--title {
+    margin: 0;
+    font-size: 18px;
+    font-weight: 500;
 }
 
-.tg-overview--project--summary {
-    margin-right: auto;
+.tg-overview--subtitle {
+    text-transform: none;
+    font-weight: 400;
 }
 
 hr.tg-overview--table-group-divider {
     height: 1px;
-    margin: 8px 0 12px;
+    margin: 12px 0;
     padding: 0px;
     color: inherit;
     background-color: transparent;
@@ -416,7 +323,11 @@ hr.tg-overview--table-group-divider {
     border-right: none;
     border-left: none;
     border-image: initial;
-    border-bottom: 1px solid rgba(49, 51, 63, 0.2);
+    border-bottom: 1px solid var(--border-color);
+}
+
+.tg-overview--row > * {
+    padding: 0 4px;
 }
 `);
 
diff --git a/testgen/ui/views/project_dashboard.py b/testgen/ui/views/project_dashboard.py
index 5b49f3c7..68c5ef9b 100644
--- a/testgen/ui/views/project_dashboard.py
+++ b/testgen/ui/views/project_dashboard.py
@@ -12,7 +12,6 @@
 from testgen.ui.session import session
 from testgen.utils import format_field, friendly_score, score
 
-STALE_PROFILE_DAYS = 30
 PAGE_TITLE = "Project Dashboard"
 PAGE_ICON = "home"
 
@@ -36,8 +35,10 @@ def render(self, project_code: str, **_kwargs):
             PAGE_TITLE,
         )
 
-        table_groups = get_table_groups_summary(project_code)
-        project_summary_df = project_queries.get_summary_by_code(project_code)
+        with st.spinner("Loading data ..."):
+            table_groups = get_table_groups_summary(project_code)
+            test_suites = test_suite_service.get_by_project(project_code)
+            project_summary_df = project_queries.get_summary_by_code(project_code)
 
         table_groups_fields: list[str] = [
             "id",
@@ -51,20 +52,11 @@ def render(self, project_code: str, **_kwargs):
             "latest_anomalies_likely_ct",
             "latest_anomalies_possible_ct",
             "latest_anomalies_dismissed_ct",
-            "latest_tests_start",
-            "latest_tests_suite_ct",
-            "latest_tests_ct",
-            "latest_tests_passed_ct",
-            "latest_tests_warning_ct",
-            "latest_tests_failed_ct",
-            "latest_tests_error_ct",
-            "latest_tests_dismissed_ct",
         ]
         test_suite_fields: list[str] = [
             "id",
             "test_suite",
             "test_ct",
-            "latest_auto_gen_date",
             "latest_run_start",
             "latest_run_id",
             "last_run_test_ct",
@@ -76,16 +68,12 @@ def render(self, project_code: str, **_kwargs):
         ]
 
         table_groups_sort = st.session_state.get("overview_table_groups_sort") or "latest_activity_date"
-        expanded_table_groups = st.session_state.get("overview_table_groups_expanded", [])
 
         testgen.testgen_component(
             "project_dashboard",
             props={
                 "project": {
                     "project_code": project_code,
-                    "table_groups_count": len(table_groups.index),
-                    "test_suites_count": int(table_groups["latest_tests_suite_ct"].sum()),
-                    "test_definitions_count": int(table_groups["latest_tests_ct"].sum()),
                     "test_runs_count": int(project_summary_df["test_runs_ct"]),
                     "profiling_runs_count": int(project_summary_df["profiling_runs_ct"]),
                     "connections_count": int(project_summary_df["connections_ct"]),
@@ -96,9 +84,9 @@ def render(self, project_code: str, **_kwargs):
                         **{field: format_field(table_group[field]) for field in table_groups_fields},
                         "test_suites": [
                             { field: format_field(test_suite[field]) for field in test_suite_fields}
-                            for _, test_suite in test_suite_service.get_by_project(project_code, table_group_id).iterrows()
-                        ] if table_group_id in expanded_table_groups else None,
-                        "expanded": table_group_id in expanded_table_groups,
+                            for _, test_suite in test_suites[test_suites["table_groups_id"] == table_group_id].iterrows()
+                        ],
+                        "latest_tests_start": format_field(test_suites[test_suites["table_groups_id"] == table_group_id]["latest_run_start"].max()),
                         "dq_score": friendly_score(score(table_group["dq_score_profiling"], table_group["dq_score_testing"])),
                         "dq_score_profiling": friendly_score(table_group["dq_score_profiling"]),
                         "dq_score_testing": friendly_score(table_group["dq_score_testing"]),
@@ -124,28 +112,10 @@ def render(self, project_code: str, **_kwargs):
                     },
                 ],
             },
-            on_change_handlers={
-                "TableGroupExpanded": on_table_group_expanded,
-                "TableGroupCollapsed": on_table_group_collapsed,
-            },
-            event_handlers={},
         )
 
 
-def on_table_group_expanded(table_group_id: str) -> None:
-    expanded_table_groups = st.session_state.get("overview_table_groups_expanded", [])
-    expanded_table_groups.append(table_group_id)
-    st.session_state["overview_table_groups_expanded"] = expanded_table_groups
-
-
-def on_table_group_collapsed(table_group_id: str) -> None:
-    expanded_table_groups = st.session_state.get("overview_table_groups_expanded", [])
-    try:
-        expanded_table_groups.remove(table_group_id)
-    except ValueError: ...
-    st.session_state["overview_table_groups_expanded"] = expanded_table_groups
-
-
+@st.cache_data(show_spinner=False)
 def get_table_groups_summary(project_code: str) -> pd.DataFrame:
     schema = st.session_state["dbschema"]
     sql = f"""
@@ -195,54 +165,6 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame:
                 anomaly_types.id = latest_anomalies.anomaly_id
             )
         GROUP BY latest_run.id
-    ),
-    latest_tests AS (
-        SELECT suites.table_groups_id,
-            MAX(latest_run.test_starttime) AS test_starttime,
-            COUNT(DISTINCT latest_run.test_suite_id) as test_suite_ct,
-            COUNT(latest_results.id) as test_ct,
-            SUM(
-                CASE
-                    WHEN COALESCE(latest_results.disposition, 'Confirmed') = 'Confirmed'
-                    AND latest_results.result_status = 'Passed' THEN 1
-                    ELSE 0
-                END
-            ) as passed_ct,
-            SUM(
-                CASE
-                    WHEN COALESCE(latest_results.disposition, 'Confirmed') = 'Confirmed'
-                    AND latest_results.result_status = 'Warning' THEN 1
-                    ELSE 0
-                END
-            ) as warning_ct,
-            SUM(
-                CASE
-                    WHEN COALESCE(latest_results.disposition, 'Confirmed') = 'Confirmed'
-                    AND latest_results.result_status = 'Failed' THEN 1
-                    ELSE 0
-                END
-            ) as failed_ct,
-            SUM(
-                CASE
-                    WHEN COALESCE(latest_results.disposition, 'Confirmed') = 'Confirmed'
-                    AND latest_results.result_status = 'Error' THEN 1
-                    ELSE 0
-                END
-            ) as error_ct,
-            SUM(
-                CASE
-                    WHEN COALESCE(latest_results.disposition, 'Confirmed') IN ('Dismissed', 'Inactive') THEN 1
-                    ELSE 0
-                END
-            ) as dismissed_ct
-        FROM {schema}.test_suites suites
-            LEFT JOIN {schema}.test_runs latest_run ON (
-                suites.last_complete_test_run_id = latest_run.id
-            )
-            LEFT JOIN {schema}.test_results latest_results ON (
-                latest_run.id = latest_results.test_run_id
-            )
-        GROUP BY suites.table_groups_id
     )
     SELECT groups.id::VARCHAR(50),
         groups.table_groups_name,
@@ -256,18 +178,9 @@ def get_table_groups_summary(project_code: str) -> pd.DataFrame:
         latest_profile.definite_ct as latest_anomalies_definite_ct,
         latest_profile.likely_ct as latest_anomalies_likely_ct,
         latest_profile.possible_ct as latest_anomalies_possible_ct,
-        latest_profile.dismissed_ct as latest_anomalies_dismissed_ct,
-        latest_tests.test_starttime as latest_tests_start,
-        latest_tests.test_suite_ct as latest_tests_suite_ct,
-        latest_tests.test_ct as latest_tests_ct,
-        latest_tests.passed_ct as latest_tests_passed_ct,
-        latest_tests.warning_ct as latest_tests_warning_ct,
-        latest_tests.failed_ct as latest_tests_failed_ct,
-        latest_tests.error_ct as latest_tests_error_ct,
-        latest_tests.dismissed_ct as latest_tests_dismissed_ct
+        latest_profile.dismissed_ct as latest_anomalies_dismissed_ct
     FROM {schema}.table_groups as groups
         LEFT JOIN latest_profile ON (groups.id = latest_profile.table_groups_id)
-        LEFT JOIN latest_tests ON (groups.id = latest_tests.table_groups_id)
     WHERE groups.project_code = '{project_code}';
     """
 

From 4e206cf8cc481ff89dd337aff0fbe27cb01b74e1 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Fri, 27 Jun 2025 08:54:28 -0400
Subject: [PATCH 09/56] feat: Adding duplicated rows test and hygiene issues;
 Tuning test selection

---
 .../commands/queries/generate_tests_query.py  |   4 +-
 testgen/commands/queries/profiling_query.py   |   1 +
 testgen/commands/run_generate_tests.py        |   7 +-
 .../030_initialize_new_schema_structure.sql   |   7 +-
 .../050_populate_new_schema_metadata.sql      | 191 +++++++++++++---
 .../dbupgrade/0141_incremental_upgrade.sql    |  15 ++
 .../execution/ex_get_tests_non_cat.sql        |   7 +-
 .../project_profiling_query_databricks.yaml   |   4 +-
 .../ex_aggregate_match_same_generic.sql       |   2 +-
 .../exec_query_tests/ex_dupe_rows_generic.sql |  37 ++++
 .../project_profiling_query_mssql.yaml        |   4 +-
 .../project_profiling_query_postgresql.yaml   |   4 +-
 .../project_profiling_query_redshift.yaml     |   4 +-
 .../project_profiling_query_snowflake.yaml    |   4 +-
 .../project_profiling_query_trino.yaml        |   5 +-
 .../gen_funny_cat_tests/gen_test_constant.sql |   6 +-
 .../gen_test_distinct_value_ct.sql            |   6 +-
 .../gen_query_tests/gen_dupe_rows_test.sql    |  46 ++++
 .../profiling/datatype_suggestions.sql        | 207 +++++++++++++-----
 .../profiling/functional_datatype.sql         |  16 +-
 20 files changed, 464 insertions(+), 113 deletions(-)
 create mode 100644 testgen/template/dbupgrade/0141_incremental_upgrade.sql
 create mode 100644 testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql
 create mode 100644 testgen/template/gen_query_tests/gen_dupe_rows_test.sql

diff --git a/testgen/commands/queries/generate_tests_query.py b/testgen/commands/queries/generate_tests_query.py
index 73fbb325..374e50f5 100644
--- a/testgen/commands/queries/generate_tests_query.py
+++ b/testgen/commands/queries/generate_tests_query.py
@@ -60,11 +60,11 @@ def GetTestTypesSQL(self, booClean):
 
         return strQuery
 
-    def GetTestDerivationQueriesAsList(self, booClean):
+    def GetTestDerivationQueriesAsList(self, template_directory, booClean):
         # This assumes the queries run in no particular order,
         # and will order them alphabetically by file name
         lstQueries = sorted(
-            get_template_files(mask=r"^.*sql$", sub_directory="gen_funny_cat_tests"), key=lambda key: str(key)
+            get_template_files(mask=r"^.*sql$", sub_directory=template_directory), key=lambda key: str(key)
         )
         lstTemplate = []
 
diff --git a/testgen/commands/queries/profiling_query.py b/testgen/commands/queries/profiling_query.py
index 6aba1c8c..155cc98a 100644
--- a/testgen/commands/queries/profiling_query.py
+++ b/testgen/commands/queries/profiling_query.py
@@ -125,6 +125,7 @@ def ReplaceParms(self, strInputString):
         strInputString = strInputString.replace("{CONTINGENCY_COLUMNS}", self.contingency_columns)
         strInputString = strInputString.replace("{CONTINGENCY_MAX_VALUES}", self.contingency_max_values)
         strInputString = strInputString.replace("{PROCESS_ID}", str(self.process_id))
+        strInputString = strInputString.replace("{SQL_FLAVOR}", self.flavor)
         strInputString = replace_templated_functions(strInputString, self.flavor)
 
         return strInputString
diff --git a/testgen/commands/run_generate_tests.py b/testgen/commands/run_generate_tests.py
index 17d97266..01fd8fe0 100644
--- a/testgen/commands/run_generate_tests.py
+++ b/testgen/commands/run_generate_tests.py
@@ -65,7 +65,8 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None):
 
     LOG.info("CurrentStep: Compiling Test Gen Queries")
 
-    lstFunnyTemplateQueries = clsTests.GetTestDerivationQueriesAsList(booClean)
+    lstFunnyTemplateQueries = clsTests.GetTestDerivationQueriesAsList("gen_funny_cat_tests", booClean)
+    lstQueryTemplateQueries = clsTests.GetTestDerivationQueriesAsList("gen_query_tests", booClean)
     lstGenericTemplateQueries = []
 
     # Delete old Tests
@@ -102,7 +103,7 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None):
     LOG.info("TestGen CAT Queries were compiled")
 
     # Make sure delete, then generic templates run before the funny templates
-    lstQueries = [strDeleteQuery, *lstGenericTemplateQueries, *lstFunnyTemplateQueries]
+    lstQueries = [strDeleteQuery, *lstGenericTemplateQueries, *lstFunnyTemplateQueries, *lstQueryTemplateQueries]
 
     if lstQueries:
         LOG.info("Running Test Generation Template Queries")
@@ -110,7 +111,7 @@ def run_test_gen_queries(strTableGroupsID, strTestSuite, strGenerationSet=None):
         message = "Test generation completed successfully."
     else:
         message = "No TestGen Queries were compiled."
-    
+
     MixpanelService().send_event(
         "generate-tests",
         source=settings.ANALYTICS_JOB_SOURCE,
diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
index 897cb7c5..058f5eb0 100644
--- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
+++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
@@ -187,7 +187,7 @@ CREATE TABLE test_definitions (
    baseline_avg           VARCHAR(1000),
    baseline_sd            VARCHAR(1000),
    subset_condition       VARCHAR(500),
-   groupby_names          VARCHAR(200),
+   groupby_names          VARCHAR,
    having_condition       VARCHAR(500),
    window_date_column     VARCHAR(100),
    window_days            INTEGER,
@@ -195,7 +195,7 @@ CREATE TABLE test_definitions (
    match_table_name       VARCHAR(100),
    match_column_names     VARCHAR(200),
    match_subset_condition VARCHAR(500),
-   match_groupby_names    VARCHAR(200),
+   match_groupby_names    VARCHAR,
    match_having_condition VARCHAR(500),
    test_mode              VARCHAR(20),
    custom_query           VARCHAR,
@@ -255,6 +255,7 @@ CREATE TABLE profile_results (
    upper_case_ct         BIGINT,
    lower_case_ct         BIGINT,
    non_alpha_ct          BIGINT,
+   non_printing_ct       BIGINT,
    mixed_case_ct         BIGINT GENERATED ALWAYS AS ( value_ct - upper_case_ct - lower_case_ct - non_alpha_ct ) STORED,
    numeric_ct            BIGINT,
    date_ct               BIGINT,
@@ -529,7 +530,7 @@ CREATE TABLE test_results (
    table_name             VARCHAR(100),
    column_names           VARCHAR(500),
    skip_errors            INTEGER,
-   input_parameters       VARCHAR(1000),
+   input_parameters       VARCHAR,
    result_code            INTEGER,
    severity               VARCHAR(10),
    result_status          VARCHAR(10),
diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
index f7dfed09..92905fba 100644
--- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
+++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
@@ -17,12 +17,12 @@ INSERT INTO profile_anomaly_types
 VALUES  ('1001', 'Suggested_Type', 'Column', 'Suggested Data Type', 'Data stored as text all meets criteria for a more suitable type. ', '(functional_data_type NOT IN (''Boolean'', ''Flag'') ) AND (column_type ILIKE ''%ch
 ar%'' OR column_type ILIKE ''text'') AND NOT (datatype_suggestion ILIKE ''%char%'' OR datatype_suggestion ILIKE ''text'')', 'p.datatype_suggestion::VARCHAR(200)', 'Likely', 'Consider changing the column data type to tighte
 n controls over data ingested and to make values more efficient, consistent and suitable for downstream analysis.', NULL, NULL, NULL),
-        ('1002', 'Non_Standard_Blanks', 'Column', 'Non-Standard Blank Values', 'Values representing missing data may be unexpected or inconsistent. Non-standard values may include empty strings as opposed to nulls, dummy entries such as "MISSING" or repeated characters that may have been used to bypass entry requirements, processing artifacts such as "NULL", or spreadsheet artifacts such as "NA", "ERROR".', '(p.filled_value_ct > 0 OR p.zero_length_ct > 0)', '''Filled Values: '' || p.filled_value_ct::VARCHAR || '', Empty String: '' || p.zero_length_ct::VARCHAR || '', Null: '' || p.null_value_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Consider cleansing the column upon ingestion to replace all variants of missing data with a standard designation, like Null.', 'p.filled_value_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '1.0', 'Completeness'),
-        ('1003', 'Invalid_Zip_USA', 'Column', 'Invalid USA Zip Code Format', 'Some values present do not conform with the expected format of USA Zip Codes.', 'p.functional_data_type = ''ZIP_USA'' AND (p.general_type <> ''A'' OR p.filled_value_ct > 0 OR p.min_length >= 1 AND p.min_length <= 4 OR p.max_length > 10)', 'CASE WHEN p.general_type = ''N'' THEN ''Type: '' || p.column_type || '', '' ELSE '''' END || ''Min Length: '' || p.min_length::VARCHAR || '', Max Length: '' || p.max_length::VARCHAR || '', Dummy Values: '' || p.filled_value_ct::VARCHAR', 'Definite', 'Consider correcting invalid column values or changing them to indicate a missing value if corrections cannot be made.', NULL, '1.0', 'Validity'),
+        ('1002', 'Non_Standard_Blanks', 'Column', 'Non-Standard Blank Values', 'Values representing missing data may be unexpected or inconsistent. Non-standard values may include empty strings as opposed to nulls, dummy entries such as "MISSING" or repeated characters that may have been used to bypass entry requirements, processing artifacts such as "NULL", or spreadsheet artifacts such as "NA", "ERROR".', '(p.filled_value_ct > 0 OR p.zero_length_ct > 0)', '''Dummy Values: '' || p.filled_value_ct::VARCHAR || '', Empty String: '' || p.zero_length_ct::VARCHAR || '', Null: '' || p.null_value_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Consider cleansing the column upon ingestion to replace all variants of missing data with a standard designation, like Null.', 'p.filled_value_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '1.0', 'Completeness'),
+        ('1003', 'Invalid_Zip_USA', 'Column', 'Invalid USA Zip Code Format', 'Some values present do not conform with the expected format of USA Zip Codes.', 'p.functional_data_type = ''Zip'' AND (p.general_type <> ''A'' OR p.filled_value_ct > 0 OR EXISTS (SELECT 1 FROM UNNEST(STRING_TO_ARRAY(p.top_patterns, '' | '')) WITH ORDINALITY AS u(val, idx) WHERE idx % 2 = 0 AND val NOT IN (''NNNNN'',''NNNNN-NNNN'',''NNNNNNNNN'')))', 'CASE WHEN p.general_type = ''N'' THEN ''Type: '' || p.column_type ELSE '''' END || CASE WHEN p.general_type = ''A'' THEN ''Patterns: '' || (SELECT string_agg(val, '','') FROM UNNEST(STRING_TO_ARRAY(top_patterns, '' | '')) WITH ORDINALITY AS u(val, idx) WHERE idx % 2 = 0)  || '', Dummy Values: '' || p.filled_value_ct::VARCHAR ELSE '''' END', 'Definite', 'Consider correcting invalid column values or changing them to indicate a missing value if corrections cannot be made.', NULL, '1.0', 'Validity'),
         ('1004', 'Multiple_Types_Minor', 'Multi-Col', 'Multiple Data Types per Column Name - Minor', 'Columns with the same name have the same general type across tables, but the types do not exactly match. Truncation issues may result if columns are commingled and assumed to be the same format.', 'm.general_type_ct = 1 AND m.type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Possible', 'Consider changing the column data types to be fully consistent. This will tighten your standards at ingestion and assure that data is consistent between tables.', NULL, NULL, 'Consistency'),
         ('1005', 'Multiple_Types_Major', 'Multi-Col', 'Multiple Data Types per Column Name - Major', 'Columns with the same name have broadly different types across tables. Differences could be significant enough to cause errors in downstream analysis, extra steps resulting in divergent business logic and inconsistencies in results.', 'm.general_type_ct > 1', '''Found '' || m.column_ct::VARCHAR || '' columns, '' || m.type_ct::VARCHAR(10) || '' types, '' || m.min_type || '' to '' || m.max_type', 'Likely', 'Ideally, you should change the column data types to be fully consistent. If the data is meant to be different, you should change column names so downstream users aren''t led astray.', NULL, NULL, 'Consistency'),
         ('1006', 'No_Values', 'Column', 'No Column Values Present', 'This column is present in the table, but no values have been ingested or assigned in any records. This could indicate missing data or a processing error. Note that this considers dummy values and zero-length values as missing data. ', '(p.null_value_ct + p.filled_value_ct + p.zero_length_ct) = p.record_ct', '''Null: '' || p.null_value_ct::VARCHAR(10) || '', Dummy: '' || p.filled_value_ct::VARCHAR(10) || '', Zero Len: '' || p.zero_length_ct::VARCHAR(10)', 'Possible', 'Review your source data, ingestion process, and any processing steps that update this column.', '1.0', '0.33', 'Completeness'),
-        ('1007', 'Column_Pattern_Mismatch', 'Column', 'Pattern Inconsistency Within Column', 'Alpha-numeric string data within this column conforms to 2-4 different patterns, with 95% matching the first pattern. This could indicate data errors in the remaining values. ', 'p.general_type = ''A''
+        ('1007', 'Column_Pattern_Mismatch', 'Column', 'Pattern Inconsistency Within Column', 'Alpha-numeric string data within this column conforms to 2-4 different patterns, with 95% matching the first pattern. This could indicate data errors in the remaining values. ', 'p.general_type = ''A'' AND functional_data_type NOT ILIKE ''Measurement%''
    AND p.max_length > 3
    AND p.value_ct > (p.numeric_ct + p.filled_value_ct + p.zero_length_ct)
    AND p.distinct_pattern_ct BETWEEN 2 AND 4
@@ -45,6 +45,7 @@ n controls over data ingested and to make values more efficient, consistent and
         ('1011', 'Char_Column_Number_Values', 'Column', 'Character Column with Mostly Numeric Values', 'This column is defined as alpha, but more than 95% of its values are numeric. Numbers in alpha columns won''t sort correctly, and might contradict user expectations downstream. It''s also possible that more than one type of information is stored in the column, making it harder to retrieve.', 'p.general_type = ''A''
    AND p.column_name NOT ILIKE ''%zip%''
    AND p.functional_data_type NOT ILIKE ''id%''
+   AND p.functional_data_type NOT ILIKE ''Period%''
    AND p.value_ct > p.numeric_ct
    AND p.numeric_ct::NUMERIC > (0.95 * p.value_ct::NUMERIC)', '''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5) / p.value_ct::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', 'Likely', 'Review your source data and ingestion process. Consider whether it might be better to store the numeric data in a numeric column. If the alpha data is significant, you could store it in a different column.', 'p.numeric_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66', 'Validity'),
         ('1012', 'Char_Column_Date_Values', 'Column', 'Character Column with Mostly Date Values', 'This column is defined as alpha, but more than 95% of its values are dates. Dates in alpha columns might not sort correctly, and might contradict user expectations downstream. It''s also possible that more than one type of information is stored in the column, making it harder to retrieve.    ', 'p.general_type = ''A''
@@ -55,7 +56,7 @@ n controls over data ingested and to make values more efficient, consistent and
           '' of '' || p.record_ct::VARCHAR(20) || '' blank values:  '' ||
           ROUND(100.0 * (p.record_ct - (p.value_ct - p.zero_length_ct - p.filled_value_ct))::NUMERIC(18, 5)
                    / NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2)::VARCHAR(40) || ''%''', 'Possible', 'Review your source data and follow-up with data owners to determine whether this data needs to be corrected, supplemented or excluded.', '(p.null_value_ct + filled_value_ct + zero_length_ct)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.33', 'Completeness'),
-        ('1014', 'Small Divergent Value Ct', 'Column', 'Small Percentage of Divergent Values Found', 'Under 3% of values in this column were found to be different from the most common value. This could indicate a data error.', '(100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT /
+        ('1014', 'Small Divergent Value Ct', 'Column', 'Small Percentage of Divergent Values Found', 'Under 3% of values in this column were found to be different from the most common value. This could indicate a data error.', 'functional_data_type <> ''Boolean'' AND (100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT /
         p.value_ct::FLOAT) > 97::FLOAT
    AND (100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT /
         NULLIF(p.value_ct, 0)::FLOAT) < 100::FLOAT', '''Single Value Pct: '' || ROUND(100.0 * fn_parsefreq(p.top_freq_values, 1, 2)::FLOAT
@@ -76,11 +77,11 @@ n controls over data ingested and to make values more efficient, consistent and
         ('1020', 'Recency_Six_Months', 'Dates', 'Recency - No Table Dates within 6 Months', 'Among all date columns present in the table, the most recent date falls 6 months to 1 year back from Profile date. ', 'MAX(p.max_date) >= CURRENT_DATE - INTERVAL ''1 year'' AND MAX(p.max_date) < CURRENT_DATE - INTERVAL ''6 months''', '''Most Recent Date: '' || MAX(p.max_date)::VARCHAR', 'Possible', 'Review your source data and follow-up with data owners to determine whether dates in table should be more recent.', NULL, NULL, 'Timeliness'),
         ('1021', 'Unexpected US States', 'Column', 'Unexpected Column Contains US States', 'This column is not labeled as a state, but contains mostly US State abbreviations. This could indicate shifted or switched source data columns.', 'p.std_pattern_match = ''STATE_USA''
        AND p.distinct_value_ct > 5
-       AND NOT (p.column_name ILIKE ''%state%'' OR p.column_name ILIKE ''%_st'')', '''Value Range: '' || p.min_text || '' thru '' || max_text || CASE WHEN p.top_freq_values > '''' THEN ''Top Freq Values: '' || REPLACE(p.top_freq_values, CHR(10), '' ; '') ELSE '''' END ', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with US states.', NULL, '0.33', 'Consistency'),
+       AND NOT (p.column_name = ''st'' OR p.column_name ILIKE ''%state%'' OR p.column_name ILIKE ''%_st'')', '''Value Range: '' || p.min_text || '' thru '' || max_text || CASE WHEN p.top_freq_values > '''' THEN ''Top Freq Values: '' || REPLACE(p.top_freq_values, CHR(10), '' ; '') ELSE '''' END ', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with US states.', NULL, '0.33', 'Consistency'),
         ('1022', 'Unexpected Emails', 'Column', 'Unexpected Column Contains Emails', 'This column is not labeled as email, but contains mostly email addresses. This could indicate shifted or switched source data columns.', 'p.std_pattern_match = ''EMAIL''
        AND NOT (p.column_name ILIKE ''%email%'' OR p.column_name ILIKE ''%addr%'')', '''Value Range: '' || p.min_text || '' thru '' || max_text', 'Possible', 'Review your source data and follow-up with data owners to determine whether column should be populated with email addresses.', NULL, '0.33', 'Consistency'),
-        ('1023', 'Small_Numeric_Value_Ct', 'Column', 'Unexpected Numeric Values Found', 'Under 3% of values in this column were found to be numeric. This could indicate a data error.', 'p.general_type = ''A''
-   AND p.numeric_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT < 0.03
+        ('1023', 'Small_Numeric_Value_Ct', 'Column', 'Unexpected Numeric Values Found', 'A small fraction (under 3%) of values in this column were found to be numeric. They could be erroneous.', 'p.general_type = ''A''
+   AND p.numeric_ct::FLOAT/NULLIF(p.value_ct, 0)::FLOAT < 0.03
    AND p.numeric_ct > 0', '''Numeric Ct: '' || p.numeric_ct || '' of '' || p.value_ct || '' (Numeric Percent: '' || ROUND(100.0 * p.numeric_ct::NUMERIC(18, 5)/NULLIF(p.value_ct, 0)::NUMERIC(18, 5), 2) || '' )''::VARCHAR(200)', 'Likely', 'Review your source data and follow-up with data owners to determine whether numeric values are invalid entries here.', 'p.numeric_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '0.66', 'Validity'),
         ('1024', 'Invalid_Zip3_USA', 'Column', 'Invalid USA ZIP-3 Format', 'The majority of values in this column are 3-digit zips, but divergent patterns were found. This could indicate an incorrect roll-up category or a PII concern.', 'p.distinct_pattern_ct > 1
    AND (p.column_name ilike ''%zip%'' OR p.column_name ILIKE ''%postal%'')
@@ -89,7 +90,11 @@ n controls over data ingested and to make values more efficient, consistent and
         ('1025', 'Delimited_Data_Embedded', 'Column', 'Delimited Data Embedded in Column', 'Delimited data, separated by a common delimiter (comma, tab, pipe or caret) is present in over 80% of column values. This could indicate data that was incorrectly ingested, or data that would be better represented in parsed form.', 'p.std_pattern_match = ''DELIMITED_DATA''', 'CASE WHEN p.top_freq_values IS NULL THEN ''Min: '' || p.min_text || '', Max: '' || p.max_text ELSE ''Top Freq: '' || p.top_freq_values END', 'Likely', 'Review your source data and follow-up with data consumers to determine the most useful representation of this data.', NULL, '0.66', 'Validity'),
         ('1026', 'Char_Column_Number_Units', 'Column', 'Character Column with Numbers and Units', 'This column is defined as alpha, but values include numbers with percents or common units. Embedded measures in alpha columns are harder to access, won''t sort correctly, and might contradict user expectations downstream. Consider parsing into numeric and UOM columns to improve usability.', 'p.includes_digit_ct::FLOAT/NULLIF(p.value_ct, 0)::FLOAT > 0.5 AND TRIM(fn_parsefreq(p.top_freq_values, 1, 1))  ~ ''(?i)^[0-9]+(\.[0-9]+)? ?(%|lb|oz|kg|g|mg|km|m|cm|mm|mi|ft|in)$''', '''Top Freq: '' || p.top_freq_values', 'Possible', 'Review your source data and ingestion process. Consider whether it might be better to parse the numeric and unit data and store in separate columns.', NULL, '0.33', 'Consistency'),
         ('1027', 'Variant_Coded_Values', 'Variant', 'Variant Codings for Same Values', 'This column contains more than one common variants that represent a single value or state. This can occur when data is integrated from multiple sources with different standards, or when free entry is permitted without validation. The variations can cause confusion and error for downstream data users and multiple versions of the truth. ', 'p.distinct_value_ct <= 20', '''Variants Found: '' || intersect_list', 'Definite', 'Review your source data and ingestion process. Consider cleansing this data to standardize on a single set of definitive codes.', NULL, NULL, 'Consistency'),
-        ('1100', 'Potential_PII', 'Column', 'Personally Identifiable Information', 'This column contains data that could be Personally Identifiable Information (PII)', 'p.pii_flag > ''''', '''Risk: '' || CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN ''HIGH'' WHEN ''B'' THEN ''MODERATE'' WHEN ''C'' THEN ''LOW'' END || '', PII Type: '' || SUBSTRING(p.pii_flag, 3)', 'Potential PII', 'PII may require steps to ensure data security and compliance with relevant privacy regulations and legal requirements. You may have to classify and inventory PII, implement appropriate access controls, encrypt data, and monitor for unauthorized access. Your organization might be required to update privacy policies and train staff on data protection practices. Note that PII that is lower-risk in isolation might be high-risk in conjunction with other data.', NULL, 'CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN 1 WHEN ''B'' THEN 0.66 WHEN ''C'' THEN 0.33 END', 'Validity')
+        ('1100', 'Potential_PII', 'Column', 'Personally Identifiable Information', 'This column contains data that could be Personally Identifiable Information (PII)', 'p.pii_flag > ''''', '''Risk: '' || CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN ''HIGH'' WHEN ''B'' THEN ''MODERATE'' WHEN ''C'' THEN ''LOW'' END || '', PII Type: '' || SUBSTRING(p.pii_flag, 3)', 'Potential PII', 'PII may require steps to ensure data security and compliance with relevant privacy regulations and legal requirements. You may have to classify and inventory PII, implement appropriate access controls, encrypt data, and monitor for unauthorized access. Your organization might be required to update privacy policies and train staff on data protection practices. Note that PII that is lower-risk in isolation might be high-risk in conjunction with other data.', NULL, 'CASE LEFT(p.pii_flag, 1) WHEN ''A'' THEN 1 WHEN ''B'' THEN 0.66 WHEN ''C'' THEN 0.33 END', 'Validity'),
+        ('1028', 'Inconsistent_Casing', 'Column', 'Inconsistent Casing', 'Casing is inconsistent for a column representing an entity name or address elements. Mixed-Case and All-Upper-Case values were found in the same column.', 'mixed_case_ct > 0 AND upper_case_ct > 0 AND functional_data_type IN (''Address'', ''City'', ''Entity Name'', ''Person Given Name'', ''Person Last Name'', ''Person Full Name'')', '''Mixed-Case: '' || p.mixed_case_ct::VARCHAR || '', All-Upper-Case: '' || p.upper_case_ct::VARCHAR || '' for Semantic Data Type: '' || p.functional_data_type || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Review your source data and follow-up with data owners to determine whether consistent casing should be applied at the source. If source data corrections are not possible, consider standardizing the column upon ingestion to ensure consistent casing.', 'LEAST(p.mixed_case_ct, p.upper_case_ct)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '1.0', 'Validity'),
+        ('1029', 'Non_Alpha_Name_Address', 'Column', 'Non-Alpha Name or Address', 'Entirely non-alphabetic values were found in a column representing an entity name or address element.', 'non_alpha_ct - zero_length_ct > 0 AND functional_data_type IN (''Address'', ''City'', ''Entity Name'', ''Person Given Name'', ''Person Last Name'', ''Person Full Name'')', '''Non-Alpha Values: '' || (non_alpha_ct - zero_length_ct)::VARCHAR || '', Semantic Type: '' || p.functional_data_type || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Non-alphabetic values are highly likely to be invalid for this kind of column. This may indicate a file format change, error in an ingestion process, or incorrect source data. Review your pipeline process and source data to determine the root-cause. If this data accurately reflects source data, and upstream corrections are not possible, consider assigning the processed value to null to reflect that data is missing.', '(non_alpha_ct - zero_length_ct)::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '1.0', 'Validity'),
+        ('1030', 'Non_Alpha_Prefixed_Name', 'Column', 'Non-Alpha Prefixed Name', 'Non-alphabetic characters were found at the start of a column representing an entity name.', 'min_text < ''A'' AND LEFT(min_text, 1) NOT IN (''"'', '' '') AND RIGHT(min_text, 1) <> '''''''' AND functional_data_type IN (''City'', ''Person Given Name'', ''Person Last Name'', ''Person Full Name'')', '''Minimum Value: '' || min_text', 'Definite', 'Values starting with a non-alphabetic character are highly likely to be invalid for this kind of column. This may indicate a file format change, error in an ingestion process, or incorrect source data. It could also indicate flagging or coding of some kind that can be broken out in a separate column in processed data. Review your pipeline process and source data to determine the root-cause. If this data accurately reflects source data, and upstream corrections are not possible, consider applying corrections directly to processed data where possible.', '0.25', '1.0', 'Validity'),
+        ('1031', 'Non_Printing_Chars', 'Column', 'Non-Printing Characters', 'Non-printing characters were found embedded in a text column.', 'non_printing_ct > 0', '''Non-Printing Chars: '' || non_printing_ct::VARCHAR || '', Records: '' || p.record_ct::VARCHAR', 'Definite', 'Embedded non-printing characters are typically stripped from data. They affect filters and aggregations, and may cause problems for downstream users who don''t recognize their presence. Review your source data and follow-up with data owners to determine whether this data can be corrected upstream. If not, strip these characters from processed data.', 'non_printing_ct::FLOAT/NULLIF(p.record_ct, 0)::FLOAT', '1.0', 'Validity')
 ;
 
 
@@ -97,17 +102,17 @@ TRUNCATE TABLE test_types;
 
 INSERT INTO test_types
   (id, test_type, test_name_short, test_name_long, test_description, except_message, measure_uom, measure_uom_description, selection_criteria, dq_score_prevalence_formula, dq_score_risk_factor, column_name_prompt, column_name_help, default_parm_columns, default_parm_values, default_parm_prompts, default_parm_help, default_severity, run_type, test_scope, dq_dimension, health_dimension, threshold_description, usage_notes, active)
-VALUES  ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count consistent', 'Tests that the maximum count of characters in a column value has not dropped vs. baseline data', 'Maximum length of values has dropped from prior expected length.', 'Values over max', NULL, 'general_type =''A'' AND max_length > 0 AND ( (min_length = avg_length AND max_length = avg_length) OR (numeric_ct <> value_ct ) ) AND functional_table_type NOT LIKE  ''%window%'' /*  The conditions below are to eliminate overlap with : LOV_Match (excluded selection criteria for this test_type),  Pattern_Match (excluded selection criteria for this test_type), Constant (excluded functional_data_type Constant and Boolean) */ AND ( (distinct_value_ct NOT BETWEEN 2 AND 10  AND functional_data_type NOT IN ( ''Constant'', ''Boolean'') ) AND NOT ( fn_charcount(top_patterns, E'' \| '' ) = 1   AND fn_charcount(top_patterns, E'' \| '' ) IS NOT NULL AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > ''''))', '{VALUE_CT}::FLOAT * (FN_NORMAL_CDF(({MAX_LENGTH}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) - FN_NORMAL_CDF(({RESULT_MEASURE}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) ) /NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', 'max_length', 'Maximum String Length at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Maximum length expected', 'Alpha Truncation tests that the longest text value in a column hasn''t become shorter than the longest value at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'),
-        ('1005', 'Avg_Shift', 'Average Shift', 'Column mean is consistent with reference', 'Tests for statistically-significant shift in mean value for column from average calculated at baseline.', 'Standardized difference between averages is over the selected threshold level.', 'Difference Measure', 'Cohen''s D Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_avg,baseline_sd,threshold_value', 'value_ct,avg_value,stdev_value,0.5::VARCHAR', 'Value Ct at Baseline,Mean at Baseline,Std Deviation at Baseline,Threshold Difference Measure ', NULL, 'Warning', 'CAT', 'column', 'Consistency', 'Data Drift', 'Standardized Difference Measure', 'Average Shift tests that the average of a numeric column has not significantly changed since baseline, when profiling was done. A significant shift may indicate errors in processing, differences in source data, or valid changes that may nevertheless impact assumptions in downstream data products. The test uses Cohen''s D, a statistical technique to identify significant shifts in a value. Cohen''s D measures the difference between the two averages, reporting results on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. Depending on your data, some difference may be expected, so it''s reasonable to adjust the threshold value that triggers test failure. This test works well for measures, or even for identifiers if you expect them to increment consistently. You may want to periodically adjust the expected threshold, or even the expected average value if you expect shifting over time. Consider this test along with Variability Increase. If variability rises too, process or measurement flaws could be at work. If variability remains consistent, the issue is more likely to be with the source data itself.  ', 'Y'),
+VALUES  ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count consistent', 'Tests that the maximum count of characters in a column value has not dropped vs. baseline data', 'Maximum length of values has dropped from prior expected length.', 'Values over max', NULL, 'general_type =''A'' AND max_length > 0 AND ( (min_length = avg_length AND max_length = avg_length) OR (numeric_ct <> value_ct ) ) AND functional_table_type NOT LIKE  ''%window%'' /*  The conditions below are to eliminate overlap with : LOV_Match (excluded selection criteria for this test_type),  Pattern_Match (excluded selection criteria for this test_type), Constant (excluded functional_data_type Constant and Boolean) */ AND ( (distinct_value_ct NOT BETWEEN 2 AND 10  AND functional_data_type NOT IN ( ''Constant'', ''Boolean'') ) AND NOT ( fn_charcount(top_patterns, E'' \| '' ) = 1   AND fn_charcount(top_patterns, E'' \| '' ) IS NOT NULL AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > ''''))', '{VALUE_CT}::FLOAT * (FN_NORMAL_CDF(({MAX_LENGTH}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) - FN_NORMAL_CDF(({RESULT_MEASURE}::FLOAT - {AVG_LENGTH}::FLOAT) / (NULLIF({MAX_LENGTH}::FLOAT, 0) / 3)) ) /NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', 'FLOOR(0.95 * max_length::FLOAT)', 'Maximum String Length at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Maximum length expected', 'Alpha Truncation tests that the longest text value in a column hasn''t become shorter than the defined threshold, initially 95% of the longest value at baseline. This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'),
+        ('1005', 'Avg_Shift', 'Average Shift', 'Column mean is consistent with reference', 'Tests for statistically-significant shift in mean value for column from average calculated at baseline.', 'Standardized difference between averages is over the selected threshold level.', 'Difference Measure', 'Cohen''s D Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_avg,baseline_sd,threshold_value', 'value_ct,avg_value,stdev_value,0.5::VARCHAR', 'Value Ct at Baseline,Mean at Baseline,Std Deviation at Baseline,Threshold Difference Measure ', NULL, 'Warning', 'CAT', 'column', 'Consistency', 'Data Drift', 'Standardized Difference Measure', 'Average Shift tests that the average of a numeric column has not significantly changed since baseline, when profiling was done. A significant shift may indicate errors in processing, differences in source data, or valid changes that may nevertheless impact assumptions in downstream data products. The test uses Cohen''s D, a statistical technique to identify significant shifts in a value. Cohen''s D measures the difference between the two averages, reporting results on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. Depending on your data, some difference may be expected, so it''s reasonable to adjust the threshold value that triggers test failure. This test works well for measures, or even for identifiers if you expect them to increment consistently. You may want to periodically adjust the expected threshold, or even the expected average value if you expect shifting over time. Consider this test along with Variability Increase. If variability rises too, process or measurement flaws could be at work. If variability remains consistent, the issue is more likely to be with the source data itself.  ', 'Y'),
         ('1007', 'Constant', 'Constant Match', 'All column values match constant value', 'Tests that all values in the column match the constant value identified in baseline data', 'A constant value is expected for this column.', 'Mismatched values', NULL, 'TEMPLATE', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', NULL, 'Constant Value at Baseline,Threshold Error Count', 'The single, unchanging value of the column, per baseline|The number of errors that are acceptable before test fails.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Count of records with unexpected values', 'Constant Match tests that a single value determined to be a constant in baseline profiling is still the only value for the column that appears in subsequent versions of the dataset. Sometimes new data or business knowledge may reveal that the value is not a constant at all, even though only one value was present at profiling. In this case, you will want to disable this test. Alternatively, you can use the Value Match test to provide a limited number of valid values for the column.', 'Y'),
         ('1009', 'Daily_Record_Ct', 'Daily Records', 'All dates present within date range', 'Tests for presence of every calendar date within min/max date range, per baseline data', 'Not every date value between min and max dates is present, unlike at baseline.', 'Missing dates', NULL, 'general_type= ''D'' AND date_days_present > 21 AND date_days_present - (DATEDIFF(''day'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''day'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_DAYS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Calendar Days', NULL, 'Warning', 'CAT', 'column', 'Completeness', 'Volume', 'Missing calendar days within min/max range', 'Daily Records tests that at least one record is present for every day within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each day. A failure here would suggest missing records for the number of days identified without data. You can adjust the threshold to accept a number of days that you know legitimately have no records. ', 'Y'),
-        ('1011', 'Dec_Trunc', 'Decimal Truncation', 'Sum of fractional values at or above reference', 'Tests for decimal truncation by confirming that the sum of fractional values in data is no less than the sum at baseline', 'The sum of fractional values is under baseline, which may indicate decimal truncation', 'Fractional sum', 'The sum of all decimal values from all data for this column', 'fractional_sum IS NOT NULL AND functional_table_type LIKE''%cumulative%''', '1', '1.0', NULL, NULL, 'threshold_value', 'ROUND(fractional_sum, 0)', 'Sum of Fractional Values at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Minimum expected sum of all fractional values', 'Decimal Truncation tests that the fractional (decimal) part of a numeric column has not been truncated since Baseline.  This works by summing all the fractional values after the decimal point and confirming that the total is at least equal to the fractional total at baseline.  This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'),
+        ('1011', 'Dec_Trunc', 'Decimal Truncation', 'Sum of fractional values at or above reference', 'Tests for decimal truncation by confirming that the sum of fractional values in data is no less than the sum at baseline', 'The sum of fractional values is under baseline, which may indicate decimal truncation', 'Fractional sum', 'The sum of all decimal values from all data for this column', 'fractional_sum > 0 AND functional_table_type LIKE''%cumulative%''', '1', '1.0', NULL, NULL, 'threshold_value', 'ROUND(fractional_sum, 0)', 'Sum of Fractional Values at Baseline', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Minimum expected sum of all fractional values', 'Decimal Truncation tests that the fractional (decimal) part of a numeric column has not been truncated since Baseline.  This works by summing all the fractional values after the decimal point and confirming that the total is at least equal to the fractional total at baseline.  This could indicate a problem in a cumulative dataset, where prior values should still exist unchanged. A failure here would suggest that some process changed data that you would still expect to be present and matching its value when the column was profiled. This test would not be appropriate for an incremental or windowed dataset.', 'Y'),
         ('1012', 'Distinct_Date_Ct', 'Date Count', 'Count of distinct dates at or above reference', 'Tests that the count of distinct dates referenced in the column has not dropped vs. baseline data', 'Drop in count of unique dates recorded in column.', 'Unique dates', 'Count of unique dates in transactional date column', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE  ''%cumulative%''', '(({RECORD_CT}-{PRO_RECORD_CT})::FLOAT*{DISTINCT_VALUE_CT}::FLOAT/NULLIF({PRO_RECORD_CT}::FLOAT, 0))/NULLIF({PRO_RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'date_days_present,date_days_present', 'Distinct Date Count at Baseline,Min Expected Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Minimum distinct date count expected', 'Date Count tests that the count of distinct dates present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained. A failure here would indicate missing records, which could be caused by a processing error or changed upstream data sources.', 'Y'),
         ('1013', 'Distinct_Value_Ct', 'Value Count', 'Count of distinct values has not dropped', 'Tests that the count of unique values in the column has not changed from baseline.', 'Count of unique values in column has changed from baseline.', 'Unique Values', NULL, 'distinct_value_ct between 2 and 10 AND value_ct > 50 AND functional_data_type IN (''Code'', ''Category'', ''Attribute'', ''Description'') AND NOT coalesce(top_freq_values,'''') > ''''', 'ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DISTINCT_VALUE_CT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value_ct,threshold_value', 'distinct_value_ct,distinct_value_ct', 'Distinct Value Count at Baseline,Min Expected Value Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected distinct value count', 'Value Count tests that the  count of unique values present in the column has not dropped since baseline. The test is relevant for cumulative datasets, where old records are retained, or for any dataset where you would expect a set number of distinct values should be present. A failure here would indicate missing records or a change in categories or value assignment.', 'Y'),
         ('1014', 'Email_Format', 'Email Format', 'Email is correctly formatted', 'Tests that non-blank, non-empty email addresses match the standard format', 'Invalid email address formats found.', 'Invalid emails', 'Number of emails that do not match standard format', 'std_pattern_match=''EMAIL''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Invalid Email Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid email addresses', NULL, 'Y'),
         ('1015', 'Future_Date', 'Past Dates', 'Latest date is prior to test run date', 'Tests that the maximum date referenced in the column is no greater than the test date, consistent with baseline data', 'Future date found when absent in baseline data.', 'Future dates', NULL, 'general_type=''D''AND future_date_ct = 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates', NULL, 'Y'),
         ('1016', 'Future_Date_1Y', 'Future Year', 'Future dates within year of test run date', 'Tests that the maximum date referenced in the column is no greater than one year beyond the test date, consistent with baseline data', 'Future date beyond one-year found when absent in baseline.', 'Future dates post 1 year', NULL, 'general_type=''D''AND future_date_ct > 0 AND max_date <=''{AS_OF_DATE}''::DATE + INTERVAL''365 DAYS''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Maximum Post 1-Year Future Date Count', NULL, 'Fail', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected count of future dates beyond one year', 'Future Year looks for date values in the column that extend beyond one year after the test date. This would be appropriate for transactional dates where you would expect to find dates in the  near future, but not beyond one year ahead.  Errors could indicate invalid entries or possibly dummy dates representing blank values.', 'Y'),
-        ('1017', 'Incr_Avg_Shift', 'New Shift', 'New record mean is consistent with reference', 'Tests for statistically-significant shift in mean of new values for column compared to average calculated at baseline.', 'Significant shift in average of new values vs. baseline avg', 'Z-score of mean shift', 'Absolute Z-score (number of SD''s outside mean) of prior avg - incremental avg', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '{RECORD_CT}::FLOAT*(1-FN_NORMAL_CDF({RESULT_MEASURE}::FLOAT))/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'baseline_value_ct,baseline_sum,baseline_avg,baseline_sd,threshold_value', 'value_ct,(avg_value * value_ct)::FLOAT,avg_value,stdev_value,2', 'Value Count at Baseline,Sum at Baseline,Mean Value at Baseline,Std Deviation at Baseline,Threshold Max Z-Score', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Maximum Z-Score (number of SD''s beyond mean) expected', 'This is a more sensitive test than Average Shift, because it calculates an incremental difference in the average of new values compared to the average of values at baseline. This is appropriate for a cumulative dataset only, because it calculates the average of new entries based on the assumption that the count and average of records present at baseline are still present at the time of the test. This test compares the mean of new values with the standard deviation of the baseline average to calculate a Z-score.  If the new mean falls outside the Z-score threshold, a shift is detected. Potential Z-score thresholds may range from 0 to 3, depending on the sensitivity you prefer.  A failed test could indicate a quality issue or a legitimate shift in new data that should be noted and assessed by business users. Consider this test along with Variability Increase. If variability rises too, process, methodology or measurement flaws could be at issue. If variability remains consistent, the problem is more likely to be with the source data itself.', 'Y'),
+        ('1017', 'Incr_Avg_Shift', 'New Shift', 'New record mean is consistent with reference', 'Tests for statistically-significant shift in mean of new values for column compared to average calculated at baseline.', 'Significant shift in average of new values vs. baseline avg', 'Z-score of mean shift', 'Absolute Z-score (number of SD''s outside mean) of prior avg - incremental avg', 'general_type=''N'' AND distinct_value_ct > 10 AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%''', '{RECORD_CT}::FLOAT*(1-FN_NORMAL_CDF({RESULT_MEASURE}::FLOAT))/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'baseline_value_ct,baseline_sum,baseline_avg,baseline_sd,threshold_value', 'value_ct,(avg_value * value_ct)::FLOAT,avg_value,stdev_value,2', 'Value Count at Baseline,Sum at Baseline,Mean Value at Baseline,Std Deviation at Baseline,Threshold Max Z-Score', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Maximum Z-Score (number of SD''s beyond mean) expected', 'This is a more sensitive test than Average Shift, because it calculates an incremental difference in the average of new values compared to the average of values at baseline. This is appropriate for a cumulative dataset only, because it calculates the average of new entries based on the assumption that the count and average of records present at baseline are still present at the time of the test. This test compares the mean of new values with the standard deviation of the baseline average to calculate a Z-score.  If the new mean falls outside the Z-score threshold, a shift is detected. Potential Z-score thresholds may range from 0 to 3, depending on the sensitivity you prefer.  A failed test could indicate a quality issue or a legitimate shift in new data that should be noted and assessed by business users. Consider this test along with Variability Increase. If variability rises too, process, methodology or measurement flaws could be at issue. If variability remains consistent, the problem is more likely to be with the source data itself.', 'Y'),
         ('1018', 'LOV_All', 'Value Match All', 'List of expected values all present in column', 'Tests that all values match a pipe-delimited list of expected values and that all expected values are present', 'Column values found don''t exactly match the expected list of values', 'Values found', NULL, NULL, '1', '1.0', NULL, NULL, 'threshold_value', NULL, 'List of Expected Values', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This is a more restrictive form of Value Match, testing that all values in the dataset match the list provided, and also that all values present in the list appear at least once in the dataset. This would be appropriate for tables where all category values in the column are represented at least once.', 'Y'),
         ('1019', 'LOV_Match', 'Value Match', 'All column values present in expected list', 'Tests that all values in the column match the list-of-values identified in baseline data.', 'Values not matching expected List-of-Values from baseline.', 'Non-matching records', NULL, 'functional_data_type IN (''Boolean'', ''Code'', ''Category'') AND top_freq_values > '''' AND distinct_value_ct BETWEEN 2 and 10 AND value_ct > 5', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', '''('' || SUBSTRING( CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 2) > '''' THEN '','''''' || TRIM( REPLACE ( SPLIT_PART(top_freq_values, ''|'' , 2), '''''''' , '''''''''''' ) ) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 4) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 4), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 6) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 6), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 8) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 8), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 10) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 10), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 12) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 12), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 14) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 14), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 16) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 16), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 18) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 18), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END || CASE WHEN SPLIT_PART(top_freq_values, ''|'' , 20) > '''' THEN '','''''' || TRIM(REPLACE(SPLIT_PART(top_freq_values, ''|'' , 20), '''''''' , '''''''''''' )) || '''''''' ELSE '''' END, 2, 999) || '')'',0', 'List of Expected Values,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'List of values expected, in form (''Val1'',''Val2)', 'This tests that all values in the column match the hard-coded list provided. This is relevant when the list of allowable values is small and not expected to change often. Even if new values might occasionally be added, this test is useful for downstream data products to provide warning that assumptions and logic may need to change.', 'Y'),
         ('1020', 'Min_Date', 'Minimum Date', 'All dates on or after set minimum', 'Tests that the earliest date referenced in the column is no earlier than baseline data', 'The earliest date value found is before the earliest value at baseline.', 'Dates prior to limit', NULL, 'general_type=''D''and min_date IS NOT NULL AND distinct_value_ct > 1', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'min_date,0', 'Minimum Date at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of dates prior to minimum', 'This test is appropriate for a cumulative dataset only, because it assumes all prior values are still present. It''s appropriate where new records are added with more recent dates, but old dates dates do not change.', 'Y'),
@@ -121,11 +126,11 @@ VALUES  ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con
         ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'),
         ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses.  You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'),
         ('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness.  If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y'),
-        ('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline.  A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios.  Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge.  You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'),
+        ('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10 AND functional_data_type NOT ILIKE ''Measurement%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline.  A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios.  Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge.  You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'),
         ('1036', 'US_State', 'US State', 'Column value is two-letter US state code', 'Tests that the recorded column value is a valid US state.', 'Column Value is not a valid US state.', 'Not US States', 'Values that doo not match 2-character US state abbreviations.', 'general_type= ''A'' AND column_name ILIKE ''%state%'' AND distinct_value_ct < 70 AND max_length = 2', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Count not Matching State Abbreviations', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of values that are not US state abbreviations', 'This test validates entries against a fixed list of two-character US state codes and related Armed Forces codes.', 'Y'),
         ('1037', 'Weekly_Rec_Ct', 'Weekly Records', 'At least one date per week present within date range', 'Tests for presence of at least one date per calendar week within min/max date range, per baseline data', 'At least one date per week expected in min/max date range.', 'Missing weeks', 'Calendar weeks without date values present', 'functional_data_type ILIKE ''Transactional Date%'' AND date_days_present > 1 AND functional_table_type ILIKE  ''%cumulative%'' AND date_weeks_present > 3 AND date_weeks_present - (DATEDIFF(''week'', ''1800-01-05''::DATE, max_date) - DATEDIFF(''week'', ''1800-01-05''::DATE, min_date) + 1) = 0 AND future_date_ct::FLOAT / NULLIF(value_ct, 0) <= 0.75', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/NULLIF({DATE_WEEKS_PRESENT}::FLOAT, 0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Weeks without Dates', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Volume', 'Expected maximum count of calendar weeks without dates present', 'Weekly Records tests that at least one record is present for every calendar week within the minimum and maximum date range for the column. The test is relevant for transactional data, where you would expect at least one transaction to be recorded each week. A failure here would suggest missing records for the number of weeks identified without data. You can adjust the threshold to accept a number of weeks that you know legitimately have no records.', 'Y'),
-        ('1040', 'Variability_Increase', 'Variability Increase', 'Variability has increased above threshold', 'Tests that the spread or dispersion of column values has increased significantly over baseline, indicating a drop in stability of the measure.', 'The Standard Deviation of the measure has increased beyond the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value,120', 'Std Deviation at Baseline,Expected Maximum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time.  A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline.  An increase in particular could mark new problems in measurement,  a more heterogeneous cohort, or that significant outliers have been introduced. Consider this test along with Average Shift and New Shift.  If the average shifts as well, there may be a fundamental shift in the dataset or process used to collect the data point.  This might suggest a data shift that should be noted and assessed by business users. If the average does not shift, this may point to a data quality or data collection problem. ', 'Y'),
-        ('1041', 'Variability_Decrease', 'Variability Decrease', 'Variability has decreased below threshold', 'Tests that the spread or dispersion of column values has decreased significantly over baseline, indicating a shift in stability of the measure. This could signal a change in a process or a data quality issue.', 'The Standard Deviation of the measure has decreased below the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value, 80', 'Std Deviation at Baseline,Expected Minimum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected minimum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time.  A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline.  A decrease in particular could indicate an improved process, better precision in measurement, the elimination of outliers, or a more homogeneous cohort. ', 'Y'),
+        ('1040', 'Variability_Increase', 'Variability Increase', 'Variability has increased above threshold', 'Tests that the spread or dispersion of column values has increased significantly over baseline, indicating a drop in stability of the measure.', 'The Standard Deviation of the measure has increased beyond the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value,120', 'Std Deviation at Baseline,Expected Maximum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time.  A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline.  An increase in particular could mark new problems in measurement,  a more heterogeneous cohort, or that significant outliers have been introduced. Consider this test along with Average Shift and New Shift.  If the average shifts as well, there may be a fundamental shift in the dataset or process used to collect the data point.  This might suggest a data shift that should be noted and assessed by business users. If the average does not shift, this may point to a data quality or data collection problem. ', 'Y'),
+        ('1041', 'Variability_Decrease', 'Variability Decrease', 'Variability has decreased below threshold', 'Tests that the spread or dispersion of column values has decreased significantly over baseline, indicating a shift in stability of the measure. This could signal a change in a process or a data quality issue.', 'The Standard Deviation of the measure has decreased below the defined threshold. This could signal a change in a process or a data quality issue.', 'Pct SD shift', 'Percent of baseline Standard Deviation', 'general_type = ''N'' AND functional_data_type ilike ''Measure%'' AND functional_data_type <> ''Measurement Spike'' AND column_name NOT ilike ''%latitude%'' AND column_name NOT ilike ''%longitude%'' AND value_ct <> distinct_value_ct AND distinct_value_ct > 10 AND stdev_value > 0 AND avg_value IS NOT NULL AND NOT (distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct > 2)', '1', '0.75', NULL, NULL, 'baseline_sd,threshold_value', 'stdev_value, 80', 'Std Deviation at Baseline,Expected Minimum Percent', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected minimum pct of baseline Standard Deviation (SD)', 'This test looks for percent shifts in standard deviation as a measure of the stability of a measure over time.  A significant change could indicate that new values are erroneous, or that the cohort being evaluated is significantly different from baseline.  A decrease in particular could indicate an improved process, better precision in measurement, the elimination of outliers, or a more homogeneous cohort. ', 'Y'),
         ('1042', 'Valid_Month', 'Valid Month', 'Valid calendar month in expected format', 'Tests for the presence of a valid representation of a calendar month consistent with the format at baseline.', 'Column values are not a valid representation of a calendar month consistent with the format at baseline.', 'Invalid months', NULL, 'functional_data_type = ''Period Month''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value,baseline_value', '0,CASE WHEN max_length > 3 AND initcap(min_text) = min_text THEN ''''''January'''',''''February'''',''''March'''',''''April'''',''''May'''',''''June'''',''''July'''',''''August'''',''''September'''',''''October'''',''''November'''',''''December'''''' WHEN max_length > 3 AND upper(min_text) = min_text THEN ''''''JANUARY'''',''''FEBRUARY'''',''''MARCH'''',''''APRIL'''',''''MAY'''',''''JUNE'''',''''JULY'''',''''AUGUST'''',''''SEPTEMBER'''',''''OCTOBER'''',''''NOVEMBER'''',''''DECEMBER'''''' WHEN max_length > 3 AND lower(min_text) = min_text THEN ''''''january'''',''''february'''',''''march'''',''''april'''',''''may'''',''''june'''',''''july'''',''''august'''',''''september'''',''''october'''',''''november'''',''''december'''''' WHEN max_length = 3 AND initcap(min_text) = min_text THEN ''''''Jan'''',''''Feb'''',''''Mar'''',''''Apr'''',''''May'''',''''Jun'''',''''Jul'''',''''Aug'''',''''Sep'''',''''Oct'''',''''Nov'''',''''Dec'''''' WHEN max_length = 3 AND upper(min_text) = min_text THEN ''''''JAN'''',''''FEB'''',''''MAR'''',''''APR'''',''''MAY'''',''''JUN'''',''''JUL'''',''''AUG'''',''''SEP'''',''''OCT'''',''''NOV'''',''''DEC'''''' WHEN max_length = 3 AND lower(min_text) = min_text THEN ''''''jan'''',''''feb'''',''''mar'''',''''apr'''',''''may'''',''''jun'''',''''jul'''',''''aug'''',''''sep'''',''''oct'''',''''nov'''',''''dec'''''' WHEN max_length = 2 AND min_text = ''01'' THEN ''''''01'''',''''02'''',''''03'''',''''04'''',''''05'''',''''06'''',''''07'''',''''08'''',''''09'''',''''10'''',''''11'''',''''12'''''' WHEN max_length = 2 AND min_text = ''1'' THEN ''''''1'''',''''2'''',''''3'''',''''4'''',''''5'''',''''6'''',''''7'''',''''8'''',''''9'''',''''10'''',''''11'''',''''12'''''' WHEN min_value = 1 THEN ''1,2,3,4,5,6,7,8,9,10,11,12'' ELSE ''NULL'' END', 'Threshold Invalid Months,Valid Month List', 'The acceptable number of records with invalid months present.|List of valid month values for this field, in quotes if field is numeric, separated by commas.', 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of invalid months', NULL, 'N'),
         ('1043', 'Valid_Characters', 'Valid Characters', 'Column contains no invalid characters', 'Tests for the presence of non-printing characters, leading spaces, or surrounding quotes.', 'Invalid characters, such as non-printing characters, leading spaces, or surrounding quotes, were found.', 'Invalid records', 'Expected count of values with invalid characters', 'general_type = ''A''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, 'The acceptable number of records with invalid character values present.', 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', 'This test looks for the presence of non-printing ASCII characters that are considered non-standard in basic text processing. It also identifies leading spaces and values enclosed in quotes. Values that fail this test may be artifacts of data conversion, or just more difficult to process or analyze downstream.', 'N'),
         ('1044', 'Valid_US_Zip', 'Valid US Zip', 'Valid USA Postal Codes', 'Tests that postal codes match the 5 or 9 digit standard US format', 'Invalid US Zip Code formats found.', 'Invalid Zip Codes', 'Expected count of values with invalid Zip Codes', 'functional_data_type = ''Zip''', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', '0', NULL, NULL, 'Warning', 'CAT', 'column', 'Validity', 'Schema Drift', 'Threshold Invalid Value Count', NULL, 'Y'),
@@ -143,6 +148,7 @@ VALUES  ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con
         ('1503', 'Distribution_Shift', 'Distribution Shift', 'Probability distribution consistent with reference', 'Tests the closeness of match between two distributions of aggregate measures across combinations of column values, using Jensen-Shannon Divergence test', 'Divergence between two distributions exceeds specified threshold.', 'Divergence level (0-1)', 'Jensen-Shannon Divergence, from 0 (identical distributions), to 1.0 (max divergence)', NULL, '1', '0.75', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition', NULL, 'Record Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns to Compare,Matching Record Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to compare counts with source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL', 'Warning', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected maximum divergence level between 0 and 1', 'This test measures the similarity of two sets of counts per categories, by using their proportional counts as probability distributions.  Using Jensen-Shannon divergence, a measure of relative entropy or difference between two distributions, the test assigns a score ranging from 0, meaning that the distributions are identical, to 1, meaning that the distributions are completely unrelated. This test can be used to compare datasets that may not match exactly, but should have similar distributions.  For example, it is a useful sanity check for data from different sources that you would expect to have a consistent spread, such as shipment of building materials per state and construction projects by state. Scores can be compared over time even if the distributions are not identical -- a dataset can be expected to maintain a comparable divergence score with a reference dataset over time. Both tables must be present to run this test.', 'Y'),
         ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'),
         ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'),
+        ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y'),
 
         ('1504', 'Aggregate_Pct_Above', 'Aggregate Pct Above', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'),
         ('1505', 'Aggregate_Pct_Within', 'Aggregate Pct Within', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'),
@@ -207,7 +213,13 @@ VALUES  ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'),
         ('2406', 'Aggregate_Balance', 'databricks', 'ex_aggregate_match_same_generic.sql'),
         ('2407', 'Timeframe_Combo_Gain', 'databricks', 'ex_window_match_no_drops_databricks.sql'),
         ('2408', 'Timeframe_Combo_Match', 'databricks', 'ex_window_match_same_databricks.sql'),
-        ('2409', 'Aggregate_Increase', 'databricks', 'ex_aggregate_match_num_incr_generic.sql');
+        ('2409', 'Aggregate_Increase', 'databricks', 'ex_aggregate_match_num_incr_generic.sql'),
+        ('2010', 'Dupe_Rows', 'redshift', 'ex_dupe_rows_generic.sql'),
+        ('2110', 'Dupe_Rows', 'snowflake', 'ex_dupe_rows_generic.sql'),
+        ('2210', 'Dupe_Rows', 'mssql', 'ex_dupe_rows_generic.sql'),
+        ('2310', 'Dupe_Rows', 'postgresql', 'ex_dupe_rows_generic.sql'),
+        ('2410', 'Dupe_Rows', 'databricks', 'ex_dupe_rows_generic.sql')
+;
 
 TRUNCATE TABLE cat_test_conditions;
 
@@ -217,7 +229,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('1003', 'Condition_Flag', 'redshift', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('1004', 'Constant', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('1005', 'Daily_Record_Ct', 'redshift', 'DATEDIFF(''DAY'', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
-        ('1006', 'Dec_Trunc', 'redshift', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
+        ('1006', 'Dec_Trunc', 'redshift', 'SUM(ROUND(ABS(({COLUMN_NAME} % 1)), 5))+1', '<', '{THRESHOLD_VALUE}'),
         ('1007', 'Distinct_Date_Ct', 'redshift', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
         ('1008', 'Distinct_Value_Ct', 'redshift', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('1009', 'Email_Format', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} !~ ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
@@ -227,7 +239,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('1013', 'LOV_All', 'redshift', 'LISTAGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('1014', 'LOV_Match', 'redshift', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('1015', 'Min_Date', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
-        ('1016', 'Min_Val', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
+        ('1016', 'Min_Val', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('1017', 'Missing_Pct', 'redshift', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( {COLUMN_NAME} )::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
         ('1018', 'Monthly_Rec_Ct', 'redshift', '(MAX(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) - MIN(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE))', '>', '{THRESHOLD_VALUE}'),
         ('1019', 'Outlier_Pct_Above', 'redshift', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
@@ -247,7 +259,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('2003', 'Condition_Flag', 'snowflake', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('2004', 'Constant', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('2005', 'Daily_Record_Ct', 'snowflake', 'DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
-        ('2006', 'Dec_Trunc', 'snowflake', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
+        ('2006', 'Dec_Trunc', 'snowflake', 'SUM(ROUND(ABS(({COLUMN_NAME} % 1)), 5))+1', '<', '{THRESHOLD_VALUE}'),
         ('2007', 'Distinct_Date_Ct', 'snowflake', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
         ('2008', 'Distinct_Value_Ct', 'snowflake', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('2009', 'Email_Format', 'snowflake', 'SUM(CASE WHEN NOT REGEXP_LIKE({COLUMN_NAME}::VARCHAR, ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
@@ -257,7 +269,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('2013', 'LOV_All', 'snowflake', 'LISTAGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('2014', 'LOV_Match', 'snowflake', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('2015', 'Min_Date', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
-        ('2016', 'Min_Val', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
+        ('2016', 'Min_Val', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('2017', 'Missing_Pct', 'snowflake', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( {COLUMN_NAME} )::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
         ('2018', 'Monthly_Rec_Ct', 'snowflake', '(MAX(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) - MIN(DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE)) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, ''{RUN_DATE}''::DATE))', '>', '{THRESHOLD_VALUE}'),
         ('2019', 'Outlier_Pct_Above', 'snowflake', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
@@ -277,7 +289,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('3003', 'Condition_Flag', 'mssql', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('3004', 'Constant', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('3005', 'Daily_Record_Ct', 'mssql', 'DATEDIFF(day, MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
-        ('3006', 'Dec_Trunc', 'mssql', 'ROUND(SUM(ABS(CAST({COLUMN_NAME} AS DECIMAL(18,4))) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
+        ('3006', 'Dec_Trunc', 'mssql', 'SUM(ROUND(ABS(({COLUMN_NAME} % 1)), 5))+1', '<', '{THRESHOLD_VALUE}'),
         ('3007', 'Distinct_Date_Ct', 'mssql', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
         ('3008', 'Distinct_Value_Ct', 'mssql', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('3009', 'Email_Format', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} NOT LIKE ''[A-Za-z0-9._''''%+-]%@[A-Za-z0-9.-]%.[A-Za-z][A-Za-z]%'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
@@ -287,7 +299,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('3013', 'LOV_All', 'mssql', 'STRING_AGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('3014', 'LOV_Match', 'mssql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('3015', 'Min_Date', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
-        ('3016', 'Min_Val', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
+        ('3016', 'Min_Val', 'mssql', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('3017', 'Missing_Pct', 'mssql', 'ABS( 2.0 * ASIN( SQRT( CAST({BASELINE_VALUE_CT}  AS FLOAT) / CAST({BASELINE_CT} AS FLOAT) ) ) - 2 * ASIN( SQRT( CAST(COUNT( {COLUMN_NAME} ) AS FLOAT) / CAST(NULLIF(COUNT(*), 0) AS FLOAT) )) )', '>=', '{THRESHOLD_VALUE}'),
         ('3018', 'Monthly_Rec_Ct', 'mssql', '(MAX(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE))) - MIN(DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) + 1) - COUNT(DISTINCT DATEDIFF(month, {COLUMN_NAME}, CAST(''{RUN_DATE}''AS DATE)))', '>', '{THRESHOLD_VALUE}'),
         ('3019', 'Outlier_Pct_Above', 'mssql', 'CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS FLOAT) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS FLOAT) / CAST(COUNT({COLUMN_NAME}) AS FLOAT)', '>', '{THRESHOLD_VALUE}'),
@@ -307,7 +319,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('4003', 'Condition_Flag', 'postgresql', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('4004', 'Constant', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('4005', 'Daily_Record_Ct', 'postgresql', '<%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
-        ('4006', 'Dec_Trunc', 'postgresql', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
+        ('4006', 'Dec_Trunc', 'postgresql', 'SUM(ROUND(ABS(({COLUMN_NAME} % 1)), 5))+1', '<', '{THRESHOLD_VALUE}'),
         ('4007', 'Distinct_Date_Ct', 'postgresql', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
         ('4008', 'Distinct_Value_Ct', 'postgresql', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('4009', 'Email_Format', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} !~ ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
@@ -317,7 +329,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('4013', 'LOV_All', 'postgresql', 'STRING_AGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('4014', 'LOV_Match', 'postgresql', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('4015', 'Min_Date', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
-        ('4016', 'Min_Val', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
+        ('4016', 'Min_Val', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('4017', 'Missing_Pct', 'postgresql', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT( {COLUMN_NAME} )::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
         ('4018', 'Monthly_Rec_Ct', 'postgresql', '(MAX(<%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>) - MIN(<%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>) + 1) - COUNT(DISTINCT <%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>)', '>', '{THRESHOLD_VALUE}'),
         ('4019', 'Outlier_Pct_Above', 'postgresql', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
@@ -349,7 +361,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('5003', 'Condition_Flag', 'trino', 'SUM(CASE WHEN {BASELINE_VALUE} IS NOT NULL THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('5004', 'Constant', 'trino', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('5005', 'Daily_Record_Ct', 'trino', 'DATE_DIFF(''DAY'', MIN({COLUMN_NAME}), MAX({COLUMN_NAME}))+1-COUNT(DISTINCT {COLUMN_NAME})', '>', '{THRESHOLD_VALUE}'),
-        ('5006', 'Dec_Trunc', 'trino', 'ROUND(SUM(ABS(CAST({COLUMN_NAME} AS DECIMAL(18,4))) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
+        ('5006', 'Dec_Trunc', 'trino', 'SUM(ROUND(ABS(({COLUMN_NAME} % 1)), 5))+1', '<', '{THRESHOLD_VALUE}'),
         ('5007', 'Distinct_Date_Ct', 'trino', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
         ('5008', 'Distinct_Value_Ct', 'trino', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('5009', 'Email_Format', 'trino', 'SUM(CASE WHEN REGEXP_LIKE({COLUMN_NAME} , ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'') != TRUE THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
@@ -359,7 +371,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('5013', 'LOV_All', 'trino', 'LISTAGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('5014', 'LOV_Match', 'trino', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('5015', 'Min_Date', 'trino', 'SUM(CASE WHEN {COLUMN_NAME} < CAST(''{BASELINE_VALUE}'' AS DATE) THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
-        ('5016', 'Min_Val', 'trino', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
+        ('5016', 'Min_Val', 'trino', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('5017', 'Missing_Pct', 'trino', 'ABS(2.0 * ASIN(SQRT(CAST({BASELINE_VALUE_CT} AS REAL) / CAST({BASELINE_CT} AS REAL))) - 2 * ASIN(SQRT(CAST(COUNT({COLUMN_NAME}) AS REAL) / CAST(NULLIF(COUNT(*), 0) AS REAL) )))', '>=', '{THRESHOLD_VALUE}'),
         ('5018', 'Monthly_Rec_Ct', 'trino', '(MAX(DATE_DIFF(''month'', {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) - MIN(DATE_DIFF(''month'', {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE))) + 1) - COUNT(DISTINCT DATE_DIFF(''month'', {COLUMN_NAME}, CAST(''{RUN_DATE}'' AS DATE)))', '>', '{THRESHOLD_VALUE}'),
         ('5019', 'Outlier_Pct_Above', 'trino', 'CAST(SUM(CASE WHEN CAST({COLUMN_NAME} AS REAL) > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END) AS REAL) / CAST(COUNT({COLUMN_NAME}) AS REAL)', '>', '{THRESHOLD_VALUE}'),
@@ -382,7 +394,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('6003', 'Condition_Flag', 'databricks', 'SUM(CASE WHEN {CUSTOM_QUERY} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('6004', 'Constant', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} <> {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('6005', 'Daily_Record_Ct', 'databricks', '<%DATEDIFF_DAY;MIN({COLUMN_NAME});MAX({COLUMN_NAME})%>+1-COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
-        ('6006', 'Dec_Trunc', 'databricks', 'ROUND(SUM(ABS({COLUMN_NAME})::DECIMAL(18,4) % 1), 0)', '<', '{THRESHOLD_VALUE}'),
+        ('6006', 'Dec_Trunc', 'databricks', 'SUM(ROUND(ABS(({COLUMN_NAME} % 1)), 5))+1', '<', '{THRESHOLD_VALUE}'),
         ('6007', 'Distinct_Date_Ct', 'databricks', 'COUNT(DISTINCT {COLUMN_NAME})', '<', '{THRESHOLD_VALUE}'),
         ('6008', 'Distinct_Value_Ct', 'databricks', 'COUNT(DISTINCT {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('6009', 'Email_Format', 'databricks', 'SUM(CASE WHEN NOT REGEXP_LIKE({COLUMN_NAME}::STRING, ''^[A-Za-z0-9._''''%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'') THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
@@ -392,7 +404,7 @@ VALUES  ('1001', 'Alpha_Trunc', 'redshift', 'MAX(LENGTH({COLUMN_NAME}))', '<', '
         ('6013', 'LOV_All', 'databricks', 'STRING_AGG(DISTINCT {COLUMN_NAME}, ''|'') WITHIN GROUP (ORDER BY {COLUMN_NAME})', '<>', '{THRESHOLD_VALUE}'),
         ('6014', 'LOV_Match', 'databricks', 'SUM(CASE WHEN NULLIF({COLUMN_NAME}, '''') NOT IN {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('6015', 'Min_Date', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} < ''{BASELINE_VALUE}'' THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
-        ('6016', 'Min_Val', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
+        ('6016', 'Min_Val', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME} < {BASELINE_VALUE} - 1e-6 THEN 1 ELSE 0 END)', '>', '{THRESHOLD_VALUE}'),
         ('6017', 'Missing_Pct', 'databricks', 'ABS( 2.0 * ASIN( SQRT( {BASELINE_VALUE_CT}::FLOAT / {BASELINE_CT}::FLOAT ) ) - 2 * ASIN( SQRT( COUNT({COLUMN_NAME})::FLOAT / NULLIF(COUNT(*), 0)::FLOAT )) )', '>=', '{THRESHOLD_VALUE}'),
         ('6018', 'Monthly_Rec_Ct', 'databricks', '(MAX(<%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>) - MIN(<%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>) + 1) - COUNT(DISTINCT <%DATEDIFF_MONTH;{COLUMN_NAME};''{RUN_DATE}''::DATE%>)', '>', '{THRESHOLD_VALUE}'),
         ('6019', 'Outlier_Pct_Above', 'databricks', 'SUM(CASE WHEN {COLUMN_NAME}::FLOAT > {BASELINE_AVG}+(2.0*{BASELINE_SD}) THEN 1 ELSE 0 END)::FLOAT / NULLIF(COUNT({COLUMN_NAME}), 0)::FLOAT', '>', '{THRESHOLD_VALUE}'),
@@ -483,7 +495,7 @@ VALUES
     ('1039', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch',   'redshift', NULL, 'SELECT A.*  FROM (  SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count   FROM {TARGET_SCHEMA}.{TABLE_NAME},       (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b   WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern   GROUP BY b.top_pattern, "{COLUMN_NAME}"   ORDER BY count DESC       ) A  UNION ALL  SELECT B.*  FROM (  SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME},      (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b  WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern  GROUP BY b.top_pattern, "{COLUMN_NAME}"  ORDER BY count DESC       ) B  UNION ALL  SELECT C.*  FROM (  SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME},      (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b  WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern  GROUP BY b.top_pattern, "{COLUMN_NAME}"  ORDER BY count DESC       ) C  UNION ALL  SELECT D.*  FROM (  SELECT TOP 5 DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME},      (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b  WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a''),''[A-Z]'', ''A''),''[0-9]'', ''N'') = b.top_pattern  GROUP BY b.top_pattern, "{COLUMN_NAME}"  ORDER BY count DESC  ) D  ORDER BY top_pattern DESC, count DESC;' ),
     ('1040', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch',   'redshift', NULL, 'SELECT column_name, table_name, data_type FROM information_schema.columns WHERE table_schema = ''{TARGET_SCHEMA}''   AND column_name = ''{COLUMN_NAME}'' ORDER BY data_type;' ),
     ('1041', '1009', 'Profile Anomaly' , 'Leading_Spaces',   'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
-    ('1042', '1010', 'Profile Anomaly' , 'Quoted_Values',   'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
+    ('1042', '1010', 'Profile Anomaly' , 'Quoted_Values',   'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500;' ),
     ('1043', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values',   'redshift', NULL, 'SELECT A.* FROM (  SELECT TOP 10 DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM  ( SELECT TOP 10 DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC )  AS B ORDER BY data_type, count DESC;' ),
     ('1044', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values',   'redshift', NULL, 'SELECT A.* FROM (  SELECT TOP 10 DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC ) AS A UNION ALL SELECT B.* FROM  ( SELECT TOP 10 DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC )  AS B ORDER BY data_type, count DESC;' ),
     ('1045', '1013', 'Profile Anomaly' , 'Small Missing Value Ct',   'redshift', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'')           THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
@@ -509,7 +521,7 @@ VALUES
     ('1064', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'postgresql', NULL, 'SELECT A.* FROM (  SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5 ) A UNION ALL SELECT B.* FROM (  SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5 ) B UNION ALL SELECT C.* FROM (  SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5 ) C UNION ALL SELECT D.* FROM (  SELECT DISTINCT b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}", ''[a-z]'', ''a'', ''g''), ''[A-Z]'', ''A'', ''g''), ''[0-9]'', ''N'', ''g'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC LIMIT 5) D ORDER BY top_pattern DESC, count DESC;' ),
     ('1065', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'postgresql', NULL, 'SELECT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY columns.table_name;' ),
     ('1066', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
-    ('1067', '1010', 'Profile Anomaly' , 'Quoted_Values', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
+    ('1067', '1010', 'Profile Anomaly' , 'Quoted_Values', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500;' ),
     ('1068', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'postgresql', NULL, 'SELECT A.* FROM (  SELECT DISTINCT ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 ) AS A UNION ALL SELECT B.* FROM  ( SELECT DISTINCT ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10 )  AS B ORDER BY data_type, count DESC;' ),
     ('1069', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'postgresql', NULL, 'SELECT A.* FROM (  SELECT  DISTINCT ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM  ( SELECT DISTINCT ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC LIMIT 10)  AS B ORDER BY data_type, count DESC;' ),
     ('1070', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'postgresql', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}  WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1  WHEN LOWER("{COLUMN_NAME}") SIMILAR TO ''(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'')  THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1  WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
@@ -748,7 +760,7 @@ ORDER BY check_period DESC;'),
     ('1178', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) B UNION ALL SELECT C.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) C UNION ALL SELECT D.* FROM (SELECT DISTINCT TOP 5 b.top_pattern, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( "{COLUMN_NAME}"::VARCHAR, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, "{COLUMN_NAME}" ORDER BY count DESC) D ORDER BY top_pattern DESC, count DESC;' ),
     ('1179', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'snowflake', NULL, 'SELECT DISTINCT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name; ' ),
     ('1180', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
-    ('1181', '1010', 'Profile Anomaly' , 'Quoted_Values', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
+    ('1181', '1010', 'Profile Anomaly' , 'Quoted_Values', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" ILIKE ''"%"'' OR "{COLUMN_NAME}" ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500;' ),
     ('1182', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10  ''Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Numeric'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ),
     ('1183', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'snowflake', NULL, 'SELECT A.* FROM (SELECT DISTINCT TOP 10 ''Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;"{COLUMN_NAME}"%> = 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT TOP 10 ''Non-Date'' as data_type, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;"{COLUMN_NAME}"%> != 1 GROUP BY "{COLUMN_NAME}" ORDER BY count DESC) AS B ORDER BY data_type, count DESC;' ),
     ('1184', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'snowflake', NULL, 'SELECT DISTINCT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN "{COLUMN_NAME}" IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''-{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''0{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''9{2,}''     OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''x{2,}'' OR LOWER("{COLUMN_NAME}"::VARCHAR) REGEXP ''z{2,}'' THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'')           THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER("{COLUMN_NAME}") IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN "{COLUMN_NAME}" = '''' THEN 1 WHEN "{COLUMN_NAME}" IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}";' ),
@@ -1237,7 +1249,7 @@ WHERE {SUBSET_CONDITION}
 )'),
         ('1269', '1100', 'Profile Anomaly', 'Potential_PII', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
         ('1270', '1100', 'Profile Anomaly', 'Potential_PII', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
-        ('1271', '1100', 'Profile Anomaly', 'Potential_PII', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
+        ('1271', '1100', 'Profile Anomaly', 'Potential_PII', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC;'),
         ('1272', '1100', 'Profile Anomaly', 'Potential_PII', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" DESC LIMIT 500;'),
 
     ('1273', '1001', 'Profile Anomaly' , 'Suggested_Type', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS record_ct FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY record_ct DESC LIMIT 20;'),
@@ -1249,7 +1261,7 @@ WHERE {SUBSET_CONDITION}
     ('1279', '1007', 'Profile Anomaly' , 'Column_Pattern_Mismatch', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 4)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) A UNION ALL SELECT B.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 6)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) B UNION ALL SELECT C.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 8)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) C UNION ALL SELECT D.* FROM (SELECT DISTINCT b.top_pattern, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}, (SELECT trim(split_part(''{DETAIL_EXPRESSION}'', ''|'', 10)) AS top_pattern) b WHERE REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE( `{COLUMN_NAME}`::STRING, ''[a-z]'', ''a''), ''[A-Z]'', ''A''), ''[0-9]'', ''N'') = b.top_pattern GROUP BY b.top_pattern, `{COLUMN_NAME}` ORDER BY count DESC LIMIT 5) D ORDER BY top_pattern DESC, count DESC;' ),
     ('1280', '1008', 'Profile Anomaly' , 'Table_Pattern_Mismatch', 'databricks', NULL, 'SELECT DISTINCT column_name, columns.table_name FROM information_schema.columns JOIN information_schema.tables ON columns.table_name = tables.table_name AND columns.table_schema = tables.table_schema WHERE columns.table_schema = ''{TARGET_SCHEMA}'' AND columns.column_name = ''{COLUMN_NAME}'' AND UPPER(tables.table_type) = ''BASE TABLE'' ORDER BY table_name; ' ),
     ('1281', '1009', 'Profile Anomaly' , 'Leading_Spaces', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` BETWEEN '' !'' AND ''!'' THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ),
-    ('1282', '1010', 'Profile Anomaly' , 'Quoted_Values', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` ILIKE ''"%"'' OR `{COLUMN_NAME}` ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ),
+    ('1282', '1010', 'Profile Anomaly' , 'Quoted_Values', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` ILIKE ''"%"'' OR `{COLUMN_NAME}` ILIKE ''''''%'''''' THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500;' ),
     ('1283', '1011', 'Profile Anomaly' , 'Char_Column_Number_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT  ''Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT ''Non-Numeric'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_NUM;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ),
     ('1284', '1012', 'Profile Anomaly' , 'Char_Column_Date_Values', 'databricks', NULL, 'SELECT A.* FROM (SELECT DISTINCT ''Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> = 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC LIMIT 10) AS A UNION ALL SELECT B.* FROM (SELECT DISTINCT  ''Non-Date'' as data_type, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE <%IS_DATE;`{COLUMN_NAME}`%> != 1 GROUP BY `{COLUMN_NAME}` ORDER BY count DESC) AS B ORDER BY data_type, count DESC LIMIT 10;' ),
     ('1285', '1013', 'Profile Anomaly' , 'Small Missing Value Ct', 'databricks', NULL, 'SELECT DISTINCT `{COLUMN_NAME}`, COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} WHERE (CASE WHEN `{COLUMN_NAME}` IN (''.'', ''?'', '' '') THEN 1 WHEN LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''-{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''0{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''9{2,}''     OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''x{2,}'' OR LOWER(`{COLUMN_NAME}`::STRING) REGEXP ''z{2,}'' THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''blank'',''error'',''missing'',''tbd'', ''n/a'',''#na'',''none'',''null'',''unknown'')           THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''(blank)'',''(error)'',''(missing)'',''(tbd)'', ''(n/a)'',''(#na)'',''(none)'',''(null)'',''(unknown)'') THEN 1 WHEN LOWER(`{COLUMN_NAME}`) IN (''[blank]'',''[error]'',''[missing]'',''[tbd]'', ''[n/a]'',''[#na]'',''[none]'',''[null]'',''[unknown]'') THEN 1 WHEN `{COLUMN_NAME}` = '''' THEN 1 WHEN `{COLUMN_NAME}` IS NULL THEN 1 ELSE 0 END) = 1 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}`;' ),
@@ -1396,9 +1408,118 @@ FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE {SUBSET_CONDITION}
   AND {WINDOW_DATE_COLUMN} >= (SELECT MAX({WINDOW_DATE_COLUMN}) FROM {TARGET_SCHEMA}.{TABLE_NAME}) - {WINDOW_DAYS}
 )'),
-    ('1338', '1100', 'Profile Anomaly', 'Potential_PII', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;')
-
+    ('1338', '1100', 'Profile Anomaly', 'Potential_PII', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) AS count  FROM {TARGET_SCHEMA}.{TABLE_NAME} GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` DESC LIMIT 500;'),
 
+    ('1253', '1510', 'Test Results', 'Dupe_Rows', 'redshift', NULL, 'SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+           FROM {TARGET_SCHEMA}.{TABLE_NAME}
+           WHERE {SUBSET_CONDITION}
+         GROUP BY {GROUPBY_NAMES}
+         HAVING COUNT(*) > 1
+ORDER BY {GROUPBY_NAMES}'),
+        ('1254', '1510', 'Test Results', 'Dupe_Rows', 'snowflake', NULL, 'SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+           FROM {TARGET_SCHEMA}.{TABLE_NAME}
+           WHERE {SUBSET_CONDITION}
+         GROUP BY {GROUPBY_NAMES}
+         HAVING COUNT(*) > 1
+ORDER BY {GROUPBY_NAMES}'),
+        ('1255', '1510', 'Test Results', 'Dupe_Rows', 'mssql', NULL, 'SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+           FROM {TARGET_SCHEMA}.{TABLE_NAME}
+           WHERE {SUBSET_CONDITION}
+         GROUP BY {GROUPBY_NAMES}
+         HAVING COUNT(*) > 1
+ORDER BY {GROUPBY_NAMES}'),
+        ('1256', '1510', 'Test Results', 'Dupe_Rows', 'postgresql', NULL, 'SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+           FROM {TARGET_SCHEMA}.{TABLE_NAME}
+           WHERE {SUBSET_CONDITION}
+         GROUP BY {GROUPBY_NAMES}
+         HAVING COUNT(*) > 1
+ORDER BY {GROUPBY_NAMES}'),
+        ('1257', '1510', 'Test Results', 'Dupe_Rows', 'databricks', NULL, 'SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+           FROM {TARGET_SCHEMA}.{TABLE_NAME}
+           WHERE {SUBSET_CONDITION}
+         GROUP BY {GROUPBY_NAMES}
+         HAVING COUNT(*) > 1
+ORDER BY {GROUPBY_NAMES}'),
+        ('1258', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'redshift', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" LIMIT 20
+UNION ALL ;
+SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
+GROUP BY "{COLUMN_NAME}" LIMIT 20'),
+        ('1259', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'postgresql', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" LIMIT 20
+UNION ALL ;
+SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
+GROUP BY "{COLUMN_NAME}" LIMIT 20'),
+        ('1260', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'mssql', NULL, 'SELECT TOP 20 ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}"
+UNION ALL ;
+SELECT TOP 20 ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
+GROUP BY "{COLUMN_NAME}"'),
+        ('1261', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'snowflake', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" LIMIT 20
+UNION ALL ;
+SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
+GROUP BY "{COLUMN_NAME}" LIMIT 20'),
+        ('1262', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'databricks', NULL, 'SELECT ''Upper Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE UPPER(`{COLUMN_NAME}`) = `{COLUMN_NAME}`
+GROUP BY `{COLUMN_NAME}` LIMIT 20
+UNION ALL ;
+SELECT ''Mixed Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+WHERE `{COLUMN_NAME}` <> UPPER(`{COLUMN_NAME}`) AND `{COLUMN_NAME}` <> LOWER(`{COLUMN_NAME}`)
+GROUP BY `{COLUMN_NAME}` LIMIT 20'),
+        ('1263', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
+GROUP BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1264', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
+GROUP BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1265', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
+GROUP BY "{COLUMN_NAME}"'),
+        ('1266', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
+GROUP BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1267', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'databricks', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
+GROUP BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1268', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1269', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1270', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1271', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1272', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) as record_ct
+ WHERE `{COLUMN_NAME}` < ''A'' AND LEFT(`{COLUMN_NAME}`, 1) NOT IN (''"'', '' '') AND RIGHT(`{COLUMN_NAME}`, 1) <> ''''''''
+GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500'),
+        ('1273', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1274', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1275', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE TRANSLATE("{COLUMN_NAME}", NCHAR(160) + NCHAR(8201) + NCHAR(8203) + NCHAR(8204) + NCHAR(8205) + NCHAR(8206) + NCHAR(8207) + NCHAR(8239) + NCHAR(12288) + NCHAR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}"'),
+        ('1276', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
+        ('1277', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'databricks', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500')
 ;
 
 
diff --git a/testgen/template/dbupgrade/0141_incremental_upgrade.sql b/testgen/template/dbupgrade/0141_incremental_upgrade.sql
new file mode 100644
index 00000000..8a712aaf
--- /dev/null
+++ b/testgen/template/dbupgrade/0141_incremental_upgrade.sql
@@ -0,0 +1,15 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+ALTER TABLE profile_results
+   ADD COLUMN non_printing_ct BIGINT;
+
+ALTER TABLE test_definitions
+   ALTER COLUMN groupby_names TYPE VARCHAR,
+   ALTER COLUMN match_groupby_names TYPE VARCHAR;
+
+ALTER TABLE test_results
+   ALTER COLUMN input_parameters TYPE VARCHAR;
+
+UPDATE profile_anomaly_results
+   SET detail = REPLACE(detail, 'Filled Values:', 'Dummy Values:')
+ WHERE detail ILIKE 'Filled Values:%'
diff --git a/testgen/template/execution/ex_get_tests_non_cat.sql b/testgen/template/execution/ex_get_tests_non_cat.sql
index 7d69ef40..87db206a 100644
--- a/testgen/template/execution/ex_get_tests_non_cat.sql
+++ b/testgen/template/execution/ex_get_tests_non_cat.sql
@@ -20,7 +20,7 @@ SELECT tt.test_type,
        coalesce(groupby_names, '')                     as groupby_names,
        case
            when having_condition is null then ''
-           else concat('WHERE ', having_condition) end as having_condition,
+           else concat('HAVING ', having_condition) end as having_condition,
        coalesce(window_date_column, '')                as window_date_column,
        cast(coalesce(window_days, '0') as varchar(50)) as window_days,
        coalesce(match_schema_name, '')                 as match_schema_name,
@@ -30,7 +30,10 @@ SELECT tt.test_type,
            when nullif(match_subset_condition, '') is null then '1=1'
            else match_subset_condition end             as match_subset_condition,
        coalesce(match_groupby_names, '')               as match_groupby_names,
-       coalesce(match_having_condition, '')            as match_having_condition,
+       case
+           when match_having_condition is null then ''
+           else concat('HAVING ', match_having_condition)
+       END                                             as match_having_condition,
        coalesce(custom_query, '')                      as custom_query,
        coalesce(tm.template_name, '')                  as template_name
 FROM test_definitions td
diff --git a/testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml b/testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml
index 18c24243..d42c6947 100644
--- a/testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml
+++ b/testgen/template/flavors/databricks/profiling/project_profiling_query_databricks.yaml
@@ -71,6 +71,7 @@ strTemplate05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE(`{COL_NAME}`,' '''',.-',
         WHEN TRANSLATE(`{COL_NAME}`, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '                                                    ') = `{COL_NAME}` THEN 1
         ELSE 0
       END) AS non_alpha_ct,
+  COUNT( CASE WHEN TRANSLATE(`{COL_NAME}`, '\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff', 'XXXXXXXXXX') <> `{COL_NAME}` THEN 1 END) as non_printing_ct,
   SUM(<%IS_NUM;LEFT(`{COL_NAME}`, 31)%>)  AS numeric_ct,
   SUM(<%IS_DATE;LEFT(`{COL_NAME}`, 26)%>)  AS date_ct,
   CASE
@@ -122,6 +123,7 @@ strTemplate05_else: NULL as distinct_std_value_ct,
   NULL as upper_case_ct,
   NULL as lower_case_ct,
   NULL as non_alpha_ct,
+  NULL as non_printing_ct,
   NULL as numeric_ct,
   NULL as date_ct,
   NULL as std_pattern_match,
@@ -174,7 +176,7 @@ strTemplate08_else: NULL as min_value,
   NULL as percentile_25,
   NULL as percentile_50,
   NULL as percentile_75,
-strTemplate10_N_dec: SUM(ROUND((`{COL_NAME}` % 1), 5)) as fractional_sum,
+strTemplate10_N_dec: SUM(ROUND(ABS(MOD(`{COL_NAME}`, 1)), 5)) as fractional_sum,
 
 strTemplate10_else: NULL as fractional_sum,
 
diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql
index e9790a55..8a4c4cdf 100644
--- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql
+++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql
@@ -41,7 +41,7 @@ FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TO
        FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
        WHERE {MATCH_SUBSET_CONDITION}
        GROUP BY {MATCH_GROUPBY_NAMES}
-                {MATCH_HAVING_CONDITION} ) a
+       {MATCH_HAVING_CONDITION} ) a
          GROUP BY {GROUPBY_NAMES} ) s
          WHERE total <> match_total
              OR (total IS NOT NULL AND match_total IS NULL)
diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql
new file mode 100644
index 00000000..2ec939c1
--- /dev/null
+++ b/testgen/template/flavors/generic/exec_query_tests/ex_dupe_rows_generic.sql
@@ -0,0 +1,37 @@
+SELECT '{TEST_TYPE}'   as test_type,
+       '{TEST_DEFINITION_ID}' as test_definition_id,
+       '{TEST_SUITE_ID}' as test_suite_id,
+       '{TEST_RUN_ID}' as test_run_id,
+       '{RUN_DATE}'    as test_time,
+       '{START_TIME}'  as starttime,
+       CURRENT_TIMESTAMP       as endtime,
+       '{SCHEMA_NAME}' as schema_name,
+       '{TABLE_NAME}'  as table_name,
+       '{COLUMN_NAME_NO_QUOTES}' as column_names,
+       '{SKIP_ERRORS}' as threshold_value,
+       {SKIP_ERRORS} as skip_errors,
+       '{INPUT_PARAMETERS}' as input_parameters,
+       CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+       CASE
+        WHEN COUNT(*) > 0 THEN
+               CONCAT(
+                      CONCAT( CAST(COUNT(*) AS {VARCHAR_TYPE}), ' duplicate row(s) identified, ' ),
+                      CONCAT(
+                             CASE
+                               WHEN COUNT(*) > {SKIP_ERRORS} THEN 'exceeding limit of '
+                                                                        ELSE 'within limit of '
+                             END,
+                             '{SKIP_ERRORS}.'
+                             )
+                      )
+        ELSE 'No errors found.'
+       END AS result_message,
+       COALESCE(SUM(record_ct), 0) as result_measure,
+       '{SUBSET_DISPLAY}' as subset_condition,
+       NULL as result_query
+  FROM ( SELECT {GROUPBY_NAMES}, COUNT(*) as record_ct
+           FROM {SCHEMA_NAME}.{TABLE_NAME}
+           WHERE {SUBSET_CONDITION}
+         GROUP BY {GROUPBY_NAMES}
+         HAVING COUNT(*) > 1
+       ) test;
diff --git a/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml b/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml
index 1fe2412b..40a7568e 100644
--- a/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml
+++ b/testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml
@@ -71,6 +71,7 @@ strTemplate05_A: COUNT(DISTINCT UPPER(REPLACE(TRANSLATE("{COL_NAME}",' '''',.-',
         WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '                                                    ') = "{COL_NAME}" THEN 1
         ELSE 0
       END) AS non_alpha_ct,
+  COUNT( CASE WHEN TRANSLATE("{COL_NAME}", NCHAR(160), 'X') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
   SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>)  AS numeric_ct,
   SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>)  AS date_ct,
   CASE
@@ -124,6 +125,7 @@ strTemplate05_else: NULL as distinct_std_value_ct,
   NULL as upper_case_ct,
   NULL as lower_case_ct,
   NULL as non_alpha_ct,
+  NULL as non_printing_ct,
   NULL as numeric_ct,
   NULL as date_ct,
   NULL as std_pattern_match,
@@ -171,7 +173,7 @@ strTemplate08_else: NULL as min_value,
   NULL as percentile_25,
   NULL as percentile_50,
   NULL as percentile_75,
-strTemplate10_N_dec: SUM(ROUND(("{COL_NAME}" % 1), 5)) as fractional_sum,
+strTemplate10_N_dec: SUM(ROUND(ABS(("{COL_NAME}" % 1)), 5)) as fractional_sum,
 
 strTemplate10_else: NULL as fractional_sum,
 
diff --git a/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml b/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml
index 384f923e..8f035cff 100644
--- a/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml
+++ b/testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml
@@ -65,6 +65,7 @@ strTemplate05_A:  COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a
         WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1
         ELSE 0
       END) AS non_alpha_ct,
+  COUNT( CASE WHEN TRANSLATE("{COL_NAME}", E'\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff', 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
   SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>)  AS numeric_ct,
   SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>)  AS date_ct,
   CASE
@@ -101,6 +102,7 @@ strTemplate05_else: NULL as distinct_std_value_ct,
   NULL as upper_case_ct,
   NULL as lower_case_ct,
   NULL as non_alpha_ct,
+  NULL as non_printing_ct,
   NULL as numeric_ct,
   NULL as date_ct,
   NULL as std_pattern_match,
@@ -149,7 +151,7 @@ strTemplate08_else: NULL as min_value,
   NULL as percentile_25,
   NULL as percentile_50,
   NULL as percentile_75,
-strTemplate10_N_dec: SUM(ROUND(MOD("{COL_NAME}", 1), 5)) as fractional_sum,
+strTemplate10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
 
 strTemplate10_else: NULL as fractional_sum,
 
diff --git a/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml b/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml
index 0d2db5bc..0de85a1b 100644
--- a/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml
+++ b/testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml
@@ -45,6 +45,7 @@ strTemplate05_A:  COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a
   COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
   COUNT( CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
   COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
+  COUNT( CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
   SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>)  AS numeric_ct,
   SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>)  AS date_ct,
   CASE
@@ -81,6 +82,7 @@ strTemplate05_else: NULL as distinct_std_value_ct,
   NULL as upper_case_ct,
   NULL as lower_case_ct,
   NULL as non_alpha_ct,
+  NULL as non_printing_ct,
   NULL as numeric_ct,
   NULL as date_ct,
   NULL as std_pattern_match,
@@ -126,7 +128,7 @@ strTemplate08_else: NULL as min_value,
   NULL as percentile_25,
   NULL as percentile_50,
   NULL as percentile_75,
-strTemplate10_N_dec: SUM(ROUND(MOD("{COL_NAME}", 1), 5)) as fractional_sum,
+strTemplate10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
 
 strTemplate10_else: NULL as fractional_sum,
 
diff --git a/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml b/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml
index 44e2cd5e..292dcb38 100644
--- a/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml
+++ b/testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml
@@ -52,6 +52,7 @@ strTemplate05_A:  COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a
   COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" <> LOWER("{COL_NAME}") THEN 1 END) AS upper_case_ct,
   COUNT( CASE WHEN "{COL_NAME}" = LOWER("{COL_NAME}") AND "{COL_NAME}" <> UPPER("{COL_NAME}") THEN 1 END) AS lower_case_ct,
   COUNT( CASE WHEN "{COL_NAME}" = UPPER("{COL_NAME}") AND "{COL_NAME}" = LOWER("{COL_NAME}") THEN 1 END) AS non_alpha_ct,
+  COUNT( CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
   SUM(<%IS_NUM;LEFT("{COL_NAME}", 31)%>)  AS numeric_ct,
   SUM(<%IS_DATE;LEFT("{COL_NAME}", 26)%>)  AS date_ct,
   CASE
@@ -88,6 +89,7 @@ strTemplate05_else: NULL as distinct_std_value_ct,
   NULL as upper_case_ct,
   NULL as lower_case_ct,
   NULL as non_alpha_ct,
+  NULL as non_printing_ct,
   NULL as numeric_ct,
   NULL as date_ct,
   NULL as std_pattern_match,
@@ -134,7 +136,7 @@ strTemplate08_else: NULL as min_value,
   NULL as percentile_25,
   NULL as percentile_50,
   NULL as percentile_75,
-strTemplate10_N_dec: SUM(ROUND(MOD("{COL_NAME}", 1), 5)) as fractional_sum,
+strTemplate10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
 
 strTemplate10_else: NULL as fractional_sum,
 
diff --git a/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml b/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml
index 0e2dec6e..c1355afc 100644
--- a/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml
+++ b/testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml
@@ -65,6 +65,7 @@ strTemplate05_A:  COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) a
         WHEN TRANSLATE("{COL_NAME}", 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', '') = "{COL_NAME}" THEN 1
         ELSE 0
       END) AS non_alpha_ct,
+  COUNT( CASE WHEN TRANSLATE("{COL_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), 'XXXXXXXXXX') <> "{COL_NAME}" THEN 1 END) as non_printing_ct,
   SUM(fndk_isnum(SUBSTRING("{COL_NAME}", 1, 31)))  AS numeric_ct,
   SUM(fndk_isdate(SUBSTRING("{COL_NAME}", 1, 26)))  AS date_ct,
   CASE
@@ -101,6 +102,7 @@ strTemplate05_else: NULL as distinct_std_value_ct,
   NULL as upper_case_ct,
   NULL as lower_case_ct,
   NULL as non_alpha_ct,
+  NULL as non_printing_ct,
   NULL as numeric_ct,
   NULL as date_ct,
   NULL as std_pattern_match,
@@ -146,7 +148,8 @@ strTemplate08_else: NULL as min_value,
   NULL as percentile_25,
   NULL as percentile_50,
   NULL as percentile_75,
-strTemplate10_N_dec: SUM(ROUND(MOD("{COL_NAME}", 1), 5)) as fractional_sum,
+strTemplate10_N_dec: SUM(ROUND(ABS(MOD("{COL_NAME}", 1)), 5)) as fractional_sum,
+
 strTemplate10_else: NULL as fractional_sum,
 strTemplate11_D: CASE
          WHEN MIN("{COL_NAME}") IS NULL THEN NULL
diff --git a/testgen/template/gen_funny_cat_tests/gen_test_constant.sql b/testgen/template/gen_funny_cat_tests/gen_test_constant.sql
index 3f28dc5c..4270d713 100644
--- a/testgen/template/gen_funny_cat_tests/gen_test_constant.sql
+++ b/testgen/template/gen_funny_cat_tests/gen_test_constant.sql
@@ -55,6 +55,8 @@ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
                   INNER JOIN profile_results p
                      ON (rr.table_groups_id = p.table_groups_id
                     AND  rr.run_date = p.run_date)
+                    -- No Dates as constants
+                  WHERE NOT (p.general_type = 'D' AND rr.run_rank = 1)
                   GROUP BY p.schema_name, p.table_name, p.column_name
                   HAVING SUM(CASE WHEN distinct_value_ct = 1 THEN 0 ELSE 1 END) = 0
                      AND SUM(CASE WHEN max_length < 100 THEN 0 ELSE 1 END) = 0
@@ -67,7 +69,9 @@ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
                                            WHEN p.general_type = 'B'
                                             AND p.boolean_true_ct = 0
                                             AND p.distinct_value_ct = 1     THEN 'FALSE'
-                                         END ) = 1 ),
+                                         END ) = 1
+                     -- Only constant if more than one profiling result
+                     AND COUNT(*) > 1),
 newtests AS ( SELECT 'Constant'::VARCHAR AS test_type,
                      '{TEST_SUITE_ID}'::UUID AS test_suite_id,
                      c.profile_run_id,
diff --git a/testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql b/testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql
index 75e63cef..ab939339 100644
--- a/testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql
+++ b/testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql
@@ -56,7 +56,7 @@ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
                     AND  rr.run_date = p.run_date)
                   GROUP BY p.schema_name, p.table_name, p.column_name
                   HAVING SUM(CASE WHEN distinct_value_ct = 1 THEN 0 ELSE 1 END) = 0
-                     AND COUNT(DISTINCT CASE
+                     AND (COUNT(DISTINCT CASE
                                            WHEN p.general_type = 'A' THEN min_text
                                            WHEN p.general_type = 'N' THEN min_value::VARCHAR
                                            WHEN p.general_type IN ('D','T') THEN min_date::VARCHAR
@@ -65,7 +65,9 @@ WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
                                            WHEN p.general_type = 'B'
                                             AND p.boolean_true_ct = 0
                                             AND p.distinct_value_ct = 1     THEN 'FALSE'
-                                         END ) > 1 ),
+                                         END ) > 1
+                     -- include cases with only single profiling result -- can't yet assume constant
+                     OR COUNT(*) = 1)),
 newtests AS ( SELECT 'Distinct_Value_Ct'::VARCHAR AS test_type,
                      '{TEST_SUITE_ID}'::UUID AS test_suite_id,
                      c.table_groups_id, c.profile_run_id,
diff --git a/testgen/template/gen_query_tests/gen_dupe_rows_test.sql b/testgen/template/gen_query_tests/gen_dupe_rows_test.sql
new file mode 100644
index 00000000..5027c111
--- /dev/null
+++ b/testgen/template/gen_query_tests/gen_dupe_rows_test.sql
@@ -0,0 +1,46 @@
+INSERT INTO test_definitions (table_groups_id, profile_run_id, test_type, test_suite_id,
+                              schema_name, table_name,
+                              skip_errors, test_active, last_auto_gen_date, profiling_as_of_date,
+                              groupby_names )
+WITH last_run AS (SELECT r.table_groups_id, MAX(run_date) AS last_run_date
+                    FROM profile_results p
+                  INNER JOIN profiling_runs r
+                     ON (p.profile_run_id = r.id)
+                    INNER JOIN test_suites ts
+                       ON p.project_code = ts.project_code
+                      AND p.connection_id = ts.connection_id
+                   WHERE p.project_code = '{PROJECT_CODE}'
+                     AND r.table_groups_id = '{TABLE_GROUPS_ID}'::UUID
+                     AND ts.id = '{TEST_SUITE_ID}'
+                     AND p.run_date::DATE <= '{AS_OF_DATE}'
+                  GROUP BY r.table_groups_id),
+     curprof AS (SELECT p.schema_name, p.table_name, p.profile_run_id,
+                        STRING_AGG(QUOTE_IDENT(p.column_name), ', ' ORDER BY p.position) as unique_by_columns
+                   FROM last_run lr
+                 INNER JOIN profile_results p
+                    ON (lr.table_groups_id = p.table_groups_id
+                    AND lr.last_run_date = p.run_date)
+                 GROUP BY p.schema_name, p.table_name, p.profile_run_id),
+     locked AS (SELECT schema_name, table_name
+                  FROM test_definitions
+				     WHERE table_groups_id = '{TABLE_GROUPS_ID}'::UUID
+                   AND test_suite_id = '{TEST_SUITE_ID}'
+				       AND test_type = '{TEST_TYPE}'
+                   AND lock_refresh = 'Y'),
+     newtests AS (SELECT *
+                  FROM curprof
+                  WHERE schema_name = '{DATA_SCHEMA}')
+SELECT '{TABLE_GROUPS_ID}'::UUID as table_groups_id,
+       n.profile_run_id,
+       'Dupe_Rows' AS test_type,
+       '{TEST_SUITE_ID}' AS test_suite_id,
+       n.schema_name, n.table_name,
+       0 as skip_errors, 'Y' as test_active,
+       '{RUN_DATE}'::TIMESTAMP as last_auto_gen_date,
+       '{AS_OF_DATE}'::TIMESTAMP as profiling_as_of_date,
+       unique_by_columns as groupby_columns
+FROM newtests n
+LEFT JOIN locked l
+  ON (n.schema_name = l.schema_name
+ AND  n.table_name = l.table_name)
+WHERE l.schema_name IS NULL;
diff --git a/testgen/template/profiling/datatype_suggestions.sql b/testgen/template/profiling/datatype_suggestions.sql
index 9a117f27..d0af2a48 100644
--- a/testgen/template/profiling/datatype_suggestions.sql
+++ b/testgen/template/profiling/datatype_suggestions.sql
@@ -1,56 +1,151 @@
-UPDATE profile_results
-               SET datatype_suggestion =
-                   CASE
-                     WHEN record_ct > 500 AND column_name not ILIKE '%id' THEN
-                            CASE
-                              WHEN general_type = 'A' AND column_name ILIKE '%zip%'
-                                 AND max_length <= 10 THEN 'VARCHAR(10)'
-                              WHEN general_type = 'A'
-                                AND numeric_ct > 0
-                                AND value_ct = numeric_ct + zero_length_ct
-                                AND POSITION('.' in top_freq_values) > 0  THEN 'DECIMAL(18,4)'
-                              WHEN general_type = 'A'
-                                AND numeric_ct > 0
-                                AND value_ct = numeric_ct + zero_length_ct
-                                AND max_length <= 6
-                                AND POSITION('.' in top_freq_values) = 0  THEN 'INTEGER'
-                              WHEN general_type = 'A'
-                                AND numeric_ct > 0
-                                AND value_ct = numeric_ct + zero_length_ct
-                                AND max_length > 6
-                                AND POSITION('.' in top_freq_values) = 0  THEN 'BIGINT'
-                              WHEN general_type = 'A'
-                                AND date_ct > 0
-                                AND value_ct = date_ct + zero_length_ct THEN 'DATE'
-                              WHEN general_type = 'A'
-                                AND max_length <= 5 THEN 'VARCHAR(10)'
-                              WHEN general_type = 'A'
-                               AND max_length IS NOT NULL
-                                   THEN 'VARCHAR('
-                                    || ( (1 + TRUNC( (max_length + 10) /20.0, 0)) * 20)::VARCHAR(10)
-                                    || ')'
-                              WHEN general_type = 'N'
-                               AND RTRIM(SPLIT_PART(column_type, ',', 2),')') > '0'
-                               AND fractional_sum = 0
-                               AND min_value >= -100
-                               AND max_value <= 100
-                                   THEN 'SMALLINT'
-                              WHEN general_type = 'N'
-                               AND RTRIM(SPLIT_PART(column_type, ',', 2),')') > '0'
-                               AND fractional_sum = 0
-                               AND min_value >= -100000000
-                               AND max_value <= 100000000
-                                   THEN 'INTEGER'
-                              WHEN general_type = 'N'
-                               AND RTRIM(SPLIT_PART(column_type, ',', 2),')') > '0'
-                               AND fractional_sum = 0
-                               AND (min_value < -100000000
-                                OR  max_value > 100000000)
-                                   THEN 'BIGINT'
-                              ELSE LOWER(column_type)
-                            END
-                     ELSE LOWER(column_type)
-                   END
-             WHERE project_code = '{PROJECT_CODE}'
-               AND schema_name = '{DATA_SCHEMA}'
-               AND run_date = '{RUN_DATE}';
+UPDATE profile_results pr
+SET datatype_suggestion =
+  CASE
+    WHEN pr.record_ct > 500
+         AND pr.column_name NOT ILIKE '%id'
+    THEN
+      CASE base.general_type
+        WHEN 'A' THEN
+          CASE
+            -- ZIP codes
+            WHEN pr.column_name ILIKE '%zip%'
+                 AND pr.max_length <= 10
+            THEN 'VARCHAR('
+                 || COALESCE(LEAST(10, base.current_size), 10)::text
+                 || ')'
+
+            -- Small and Predictable
+            WHEN pr.functional_data_type IN ('State', 'Boolean')
+            THEN 'VARCHAR(' || max_length::VARCHAR || ')'
+
+            WHEN pr.functional_data_type = 'Measurement Pct'
+            THEN 'VARCHAR('
+                 || COALESCE(GREATEST(6, max_length), 6)::text
+                 || ')'
+
+            -- DECIMALs
+            WHEN pr.numeric_ct > 0
+                 AND pr.value_ct = pr.numeric_ct + pr.zero_length_ct
+                 AND POSITION('.' IN pr.top_freq_values) > 0
+            THEN 'DECIMAL(18,4)'
+
+            -- small/big integers
+            WHEN pr.numeric_ct > 0
+                 AND pr.value_ct = pr.numeric_ct + pr.zero_length_ct
+                 AND pr.max_length <= 6
+                 AND POSITION('.' IN pr.top_freq_values) = 0
+            THEN 'INTEGER'
+            WHEN pr.numeric_ct > 0
+                 AND pr.value_ct = pr.numeric_ct + pr.zero_length_ct
+                 AND pr.max_length  > 6
+                 AND POSITION('.' IN pr.top_freq_values) = 0
+            THEN 'BIGINT'
+
+            -- timestamps with zone
+            WHEN pr.date_ct > 0
+                 AND pr.value_ct = pr.date_ct + pr.zero_length_ct
+                 AND POSITION('+' IN pr.top_freq_values) > 0
+            THEN CASE
+                   WHEN '{SQL_FLAVOR}' = 'redshift' THEN 'TIMESTAMPZ'
+                   WHEN '{SQL_FLAVOR}' = 'postgresql' THEN 'TIMESTAMPZ'
+                   WHEN '{SQL_FLAVOR}' = 'snowflake' THEN 'TIMESTAMP_TZ'
+                   WHEN '{SQL_FLAVOR}' LIKE 'mssql%' THEN 'DATETIMEOFFSET'
+                   WHEN '{SQL_FLAVOR}' = 'databricks' THEN 'TIMESTAMP'
+                   WHEN '{SQL_FLAVOR}' = 'bigquery' THEN 'TIMESTAMP'
+                   ELSE 'TIMESTAMPZ'
+                 END
+
+            -- timestamps without zone
+            WHEN pr.date_ct > 0
+                 AND pr.value_ct = pr.date_ct + pr.zero_length_ct
+                 AND POSITION(':' IN pr.top_freq_values) > 0
+            THEN CASE
+                   WHEN '{SQL_FLAVOR}' = 'redshift' THEN 'TIMESTAMP'
+                   WHEN '{SQL_FLAVOR}' = 'postgresql' THEN 'TIMESTAMP'
+                   WHEN '{SQL_FLAVOR}' = 'snowflake' THEN 'TIMESTAMP_NTZ'
+                   WHEN '{SQL_FLAVOR}' LIKE 'mssql%' THEN 'DATETIME2'
+                   WHEN '{SQL_FLAVOR}' = 'databricks' THEN 'TIMESTAMP_NTZ'
+                   WHEN '{SQL_FLAVOR}' = 'bigquery' THEN 'DATETIME'
+                   ELSE 'TIMESTAMP_NTZ'
+                 END
+
+            -- pure dates
+            WHEN pr.date_ct > 0
+                 AND pr.value_ct = pr.date_ct + pr.zero_length_ct
+            THEN 'DATE'
+
+            -- very short text → suggest VARCHAR(10)
+            WHEN pr.max_length <= 5
+            THEN 'VARCHAR('
+                 || COALESCE(LEAST(10, base.current_size), 10)::text
+                 || ')'
+
+            -- fallback text → adaptive bucket
+            WHEN pr.max_length IS NOT NULL
+            THEN
+              'VARCHAR('
+              || COALESCE(
+                   LEAST(
+                     -- computed_bucket:
+                     (CASE
+                        WHEN pr.max_length <= 50
+                        THEN CEIL((pr.max_length + 5)/10.0) * 10
+                        ELSE ((1 + TRUNC((pr.max_length + 10)/20.0, 0)) * 20)
+                      END)::int,
+                     base.current_size
+                   ),
+                   -- fallback if current_size IS NULL
+                   (CASE
+                      WHEN pr.max_length <= 50
+                      THEN CEIL(pr.max_length/10.0) * 10
+                      ELSE ((1 + TRUNC((pr.max_length + 10)/20.0, 0)) * 20)
+                    END)::int
+                 )::text
+              || ')'
+
+            ELSE
+              lower(pr.column_type)
+          END
+
+        WHEN 'N' THEN
+          CASE
+            WHEN RTRIM(SPLIT_PART(pr.column_type, ',', 2),')') > '0'
+                 AND pr.fractional_sum = 0
+                 AND pr.min_value >= -100
+                 AND pr.max_value <=  100
+            THEN 'SMALLINT'
+
+            WHEN RTRIM(SPLIT_PART(pr.column_type, ',', 2),')') > '0'
+                 AND pr.fractional_sum = 0
+                 AND pr.min_value >= -100000000
+                 AND pr.max_value <= 100000000
+            THEN 'INTEGER'
+
+            WHEN RTRIM(SPLIT_PART(pr.column_type, ',', 2),')') > '0'
+                 AND pr.fractional_sum = 0
+                 AND (pr.min_value < -100000000
+                      OR pr.max_value > 100000000)
+            THEN 'BIGINT'
+
+            ELSE
+              lower(pr.column_type)
+          END
+
+        ELSE
+          lower(pr.column_type)
+      END
+    ELSE
+      lower(pr.column_type)
+  END
+FROM (
+  SELECT
+    id,
+    general_type,
+    -- pull out declared size if present, else NULL
+    CAST(substring(column_type FROM '\((\d+)\)') AS int) AS current_size
+  FROM profile_results
+  WHERE project_code = '{PROJECT_CODE}'
+    AND schema_name   = '{DATA_SCHEMA}'
+    AND run_date      = '{RUN_DATE}'
+) AS base
+WHERE pr.id = base.id;
diff --git a/testgen/template/profiling/functional_datatype.sql b/testgen/template/profiling/functional_datatype.sql
index 97d28b82..da853219 100644
--- a/testgen/template/profiling/functional_datatype.sql
+++ b/testgen/template/profiling/functional_datatype.sql
@@ -126,7 +126,8 @@ SET functional_data_type = 'DateTime Stamp'
 WHERE profile_run_id = '{PROFILE_RUN_ID}'
   AND functional_data_type IS NULL
   AND distinct_pattern_ct = 1
-  AND TRIM(SPLIT_PART(top_patterns, '|', 2)) = 'NNNN-NN-NN NN:NN:NN';
+  AND (TRIM(SPLIT_PART(top_patterns, '|', 2)) = 'NNNN-NN-NN NN:NN:NN'
+   OR  TRIM(SPLIT_PART(top_patterns, '|', 2)) = 'NNNN-NN-NNANN:NN:NN+NN:NN');
 
 -- Process Timestamp
 UPDATE profile_results
@@ -306,7 +307,7 @@ INNER JOIN profile_results s
    AND LOWER(c.column_name) SIMILAR TO '%c(|i)ty%'
    AND c.functional_data_type NOT IN ('State', 'Zip')
    AND profile_results.id = c.id;
-  
+
 -- Assign Name
 UPDATE profile_results
    SET functional_data_type = 'Person Full Name'
@@ -476,6 +477,17 @@ SET functional_data_type =
                                  WHEN ROUND(100.0 * value_ct::FLOAT/NULLIF(record_ct, 0)) > 70 THEN 'ID'
                                                                                                ELSE 'Attribute-Numeric'
                             END
+            WHEN general_type='N'
+             AND ( -- Sparsity condition: mostly zero
+                   (percentile_25 = 0 AND percentile_75 = 0 AND percentile_50 = 0)
+                   OR
+                   -- Sparsity condition: mostly NULL
+                   (value_ct > 0 AND record_ct > 0
+                    AND (value_ct::FLOAT / record_ct::FLOAT) < 0.05)  )
+             AND ( -- Evidence of extreme non-zero values
+                   (percentile_75 - percentile_25) > 2 * ABS(avg_value)
+                   OR ABS(avg_value) > 5 * ABS(percentile_50)  )                   THEN 'Measurement Spike'
+
             WHEN general_type='N'
              AND (  column_type ILIKE '%int%'
                       OR

From 58159e42cca206e8c979f281c644ec3c9eb8211b Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Tue, 24 Jun 2025 17:05:50 -0400
Subject: [PATCH 10/56] refactor(table groups): implement page as vanjs
 components

---
 testgen/ui/components/frontend/css/shared.css |  28 ++
 .../components/frontend/js/components/code.js |  43 ++
 .../components/frontend/js/components/icon.js |  16 +-
 .../frontend/js/components/select.js          |  34 +-
 .../js/components/table_group_form.js         |  43 +-
 .../components/frontend/js/components/tabs.js | 128 +++++
 .../js/data_profiling/data_profiling_utils.js |   4 +-
 testgen/ui/components/frontend/js/main.js     |   8 +
 .../frontend/js/pages/connections.js          |   6 +-
 .../frontend/js/pages/data_catalog.js         |   7 +-
 .../frontend/js/pages/project_dashboard.js    |   4 +-
 .../frontend/js/pages/quality_dashboard.js    |   4 +-
 .../frontend/js/pages/run_profiling_dialog.js |  95 ++++
 .../frontend/js/pages/table_group.js          | 171 +++++++
 .../pages/table_group_delete_confirmation.js  | 105 ++++
 .../frontend/js/pages/table_group_list.js     | 248 +++++++++
 .../frontend/js/pages/test_suites.js          |   4 +-
 testgen/ui/components/frontend/js/utils.js    |   6 +-
 testgen/ui/queries/table_group_queries.py     |  36 +-
 testgen/ui/services/table_group_service.py    |   5 +
 testgen/ui/views/connections.py               | 217 ++++----
 testgen/ui/views/profiling_runs.py            |   7 +-
 testgen/ui/views/table_groups.py              | 313 ++++++++++++
 testgen/ui/views/table_groups/__init__.py     |   4 -
 testgen/ui/views/table_groups/forms.py        | 191 -------
 testgen/ui/views/table_groups/page.py         | 475 ------------------
 testgen/ui/views/test_runs.py                 |   7 +-
 27 files changed, 1389 insertions(+), 820 deletions(-)
 create mode 100644 testgen/ui/components/frontend/js/components/code.js
 create mode 100644 testgen/ui/components/frontend/js/components/tabs.js
 create mode 100644 testgen/ui/components/frontend/js/pages/run_profiling_dialog.js
 create mode 100644 testgen/ui/components/frontend/js/pages/table_group.js
 create mode 100644 testgen/ui/components/frontend/js/pages/table_group_delete_confirmation.js
 create mode 100644 testgen/ui/components/frontend/js/pages/table_group_list.js
 create mode 100644 testgen/ui/views/table_groups.py
 delete mode 100644 testgen/ui/views/table_groups/__init__.py
 delete mode 100644 testgen/ui/views/table_groups/forms.py
 delete mode 100644 testgen/ui/views/table_groups/page.py

diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index c2574212..3171a5d0 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -155,6 +155,10 @@ body {
     padding: 12px 0;
 }
 
+.table.hoverable .table-row:hover {
+    background-color: var(--select-hover-background);
+}
+
 .table-row:not(:last-child) {
     border-bottom: var(--button-stroked-border);
 }
@@ -586,3 +590,27 @@ body {
     padding-left: 40px;
 }
 /*  */
+
+code {
+    position: relative;
+    border-radius: 0.5rem;
+    display: block;
+    margin: 0px;
+    overflow: auto;
+    padding: 24px 16px;
+    color: var(--primary-text-color);
+    background-color: var(--empty-light);
+}
+
+code > .tg-icon {
+    position: absolute;
+    top: 21px;
+    right: 16px;
+    color: var(--secondary-text-color);
+    cursor: pointer;
+    opacity: 0;
+}
+
+code > .tg-icon:hover {
+    opacity: 1;
+}
diff --git a/testgen/ui/components/frontend/js/components/code.js b/testgen/ui/components/frontend/js/components/code.js
new file mode 100644
index 00000000..4f9f6ba7
--- /dev/null
+++ b/testgen/ui/components/frontend/js/components/code.js
@@ -0,0 +1,43 @@
+/**
+ * @typedef Options
+ * @type {object}
+ * @property {string?} id
+ * @property {string?} testId
+ * @property {string?} class
+ */
+
+import van from '../van.min.js';
+import { getRandomId } from '../utils.js';
+import { Icon } from './icon.js';
+
+const { code } = van.tags;
+
+/**
+ * 
+ * @param  {Options} options
+ * @param  {...HTMLElement} children 
+ */
+const Code = (options, ...children) => {
+    const domId = options.id ?? `code-snippet-${getRandomId()}`;
+    const icon = 'content_copy';
+
+    return code(
+        { ...options, id: domId, class: options.class ?? '', 'data-testid': options.testId ?? '' },
+        ...children,
+        Icon(
+            {
+                classes: '',
+                onclick: () => {
+                    const parentElement = document.getElementById(domId);
+                    const content = (parentElement.textContent || parentElement.innerText).replace(icon, '');
+                    if (content) {
+                        navigator.clipboard.writeText(content);
+                    }
+                },
+            },
+            'content_copy',
+        ),
+    );
+};
+
+export { Code };
diff --git a/testgen/ui/components/frontend/js/components/icon.js b/testgen/ui/components/frontend/js/components/icon.js
index 59ad154c..b4e879d7 100644
--- a/testgen/ui/components/frontend/js/components/icon.js
+++ b/testgen/ui/components/frontend/js/components/icon.js
@@ -4,14 +4,26 @@
  * @property {number?} size
  * @property {string} classes
  */
-import { getValue, loadStylesheet } from '../utils.js';
+import { getValue, isDataURL, loadStylesheet } from '../utils.js';
 import van from '../van.min.js';
 
-const { i } = van.tags;
+const { i, img } = van.tags;
 const DEFAULT_SIZE = 20;
 
 const Icon = (/** @type Properties */ props, /** @type string */ icon) => {
     loadStylesheet('icon', stylesheet);
+
+    if (isDataURL(getValue(icon))) {
+        return img(
+            {
+                width: () => getValue(props.size) || DEFAULT_SIZE,
+                height: () => getValue(props.size) || DEFAULT_SIZE, src: icon,
+                class: () => `tg-icon tg-icon-image ${getValue(props.classes)}`,
+                src: icon,
+            }
+        );
+    }
+
     return i(
         {
             class: () => `material-symbols-rounded tg-icon text-secondary ${getValue(props.classes)}`,
diff --git a/testgen/ui/components/frontend/js/components/select.js b/testgen/ui/components/frontend/js/components/select.js
index 78903659..eb885fa2 100644
--- a/testgen/ui/components/frontend/js/components/select.js
+++ b/testgen/ui/components/frontend/js/components/select.js
@@ -4,6 +4,7 @@
  * @property {string} label
  * @property {string} value
  * @property {boolean} selected
+ * @property {string?} icon
  *
  * @typedef Properties
  * @type {object}
@@ -22,6 +23,7 @@
 import van from '../van.min.js';
 import { getRandomId, getValue, loadStylesheet, isState, isEqual } from '../utils.js';
 import { Portal } from './portal.js';
+import { Icon } from './icon.js';
 
 const { div, i, label, span } = van.tags;
 
@@ -44,11 +46,9 @@ const Select = (/** @type {Properties} */ props) => {
         return options;
     });
     const value = isState(props.value) ? props.value : van.state(props.value ?? null);
-    const valueLabel = van.derive(() => {
-        const currentOptions = getValue(options);
-        const currentValue = getValue(value);
-        return currentOptions?.find((op) => op.value === currentValue)?.label ?? '';
-    });
+    const initialSelection = options.val?.find((op) => op.value === value.val);
+    const valueLabel = van.state(initialSelection?.label ?? '');
+    const valueIcon = van.state(initialSelection?.icon ?? undefined);
 
     const changeSelection = (/** @type Option */ option) => {
         opened.val = false;
@@ -57,14 +57,19 @@ const Select = (/** @type {Properties} */ props) => {
 
     van.derive(() => {
         const currentOptions = getValue(options);
-
+        const previousValue = value.oldVal;
         let currentValue = getValue(value);
-        let previousValue = value.oldVal;
-        if (currentOptions.find((op) => op.value === currentValue) === undefined) {
-            currentValue = value.val = null;
+        const selectedOption = currentOptions.find((op) => op.value === currentValue);
+
+        if (selectedOption === undefined) {
+            currentValue = null;
+            setTimeout(() => value.val = null, 0.1);
         }
 
         if (!isEqual(currentValue, previousValue)) {
+            valueLabel.val = selectedOption?.label ?? '';
+            valueIcon.val = selectedOption?.icon ?? undefined;
+
             props.onChange?.(currentValue);
         }
     });
@@ -84,9 +89,12 @@ const Select = (/** @type {Properties} */ props) => {
                 style: () => getValue(props.height) ? `height: ${getValue(props.height)}px;` : '',
                 'data-testid': 'select-input',
             },
-            div(
+            () => div(
                 { class: 'tg-select--field--content', 'data-testid': 'select-input-display' },
-                valueLabel,
+                valueIcon.val
+                    ? Icon({ classes: 'mr-2' }, valueIcon.val)
+                    : undefined,
+                valueLabel.val,
             ),
             div(
                 { class: 'tg-select--field--icon', 'data-testid': 'select-input-trigger' },
@@ -110,6 +118,9 @@ const Select = (/** @type {Properties} */ props) => {
                             },
                             'data-testid': 'select-options-item',
                         },
+                        option.icon
+                            ? Icon({ classes: 'mr-2' }, option.icon)
+                            : undefined,
                         span(option.label),
                     )
                 ),
@@ -118,7 +129,6 @@ const Select = (/** @type {Properties} */ props) => {
     );
 };
 
-
 const stylesheet = new CSSStyleSheet();
 stylesheet.replace(`
 .tg-select--label {
diff --git a/testgen/ui/components/frontend/js/components/table_group_form.js b/testgen/ui/components/frontend/js/components/table_group_form.js
index 05e4c490..a0bccc83 100644
--- a/testgen/ui/components/frontend/js/components/table_group_form.js
+++ b/testgen/ui/components/frontend/js/components/table_group_form.js
@@ -1,7 +1,10 @@
 /**
+ * @import { Connection } from './connection_form.js';
+ * 
  * @typedef TableGroup
  * @type {object}
- * @property {string?} table_group_id
+ * @property {string?} id
+ * @property {string?} connection_id
  * @property {string?} table_groups_name
  * @property {string?} profiling_include_mask
  * @property {string?} profiling_exclude_mask
@@ -33,6 +36,9 @@
  * @typedef Properties
  * @type {object}
  * @property {TableGroup} tableGroup
+ * @property {Connection[]} connections
+ * @property {boolean?} showConnectionSelector
+ * @property {boolean?} enableConnectionSelector
  * @property {(tg: TableGroup, state: FormState) => void} onChange
  */
 import van from '../van.min.js';
@@ -41,6 +47,7 @@ import { Input } from './input.js';
 import { Checkbox } from './checkbox.js';
 import { ExpansionPanel } from './expansion_panel.js';
 import { required } from '../form_validators.js';
+import { Select } from './select.js';
 
 const { div, span } = van.tags;
 
@@ -53,6 +60,7 @@ const TableGroupForm = (props) => {
     loadStylesheet('table-group-form', stylesheet);
 
     const tableGroup = getValue(props.tableGroup);
+    const tableGroupConnectionId = van.state(tableGroup.connection_id);
     const tableGroupsName = van.state(tableGroup.table_groups_name);
     const profilingIncludeMask = van.state(tableGroup.profiling_include_mask ?? '%');
     const profilingExcludeMask = van.state(tableGroup.profiling_exclude_mask ?? 'tmp%');
@@ -76,9 +84,21 @@ const TableGroupForm = (props) => {
     const transformLevel = van.state(tableGroup.transform_level);
     const dataProduct = van.state(tableGroup.data_product);
 
+    const connectionOptions = van.derive(() => {
+        const connections = getValue(props.connections) ?? [];
+        return connections.map(c => ({
+            label: c.connection_name,
+            value: c.connection_id,
+            icon: c.flavor.icon,
+        }));
+    });
+    const showConnectionSelector = getValue(props.showConnectionSelector) ?? false;
+    const disableConnectionSelector = van.derive(() => !getValue(props.enableConnectionSelector) || (getValue(props.connections) ?? []).length <= 0);
+
     const updatedTableGroup = van.derive(() => {
         return {
-            table_group_id: tableGroup.table_group_id,
+            id: tableGroup.id,
+            connection_id: tableGroupConnectionId.val,
             table_groups_name: tableGroupsName.val,
             profiling_include_mask: profilingIncludeMask.val,
             profiling_exclude_mask: profilingExcludeMask.val,
@@ -105,6 +125,9 @@ const TableGroupForm = (props) => {
     });
     const dirty = van.derive(() => !isEqual(updatedTableGroup.val, tableGroup));
     const validityPerField = van.state({});
+    if (showConnectionSelector) {
+        validityPerField.val.connection_id = !!tableGroupConnectionId.val;
+    }
 
     van.derive(() => {
         const fieldsValidity = validityPerField.val;
@@ -114,11 +137,25 @@ const TableGroupForm = (props) => {
     });
 
     const setFieldValidity = (field, validity) => {
-        validityPerField.val = {...validityPerField.val, [field]: validity};
+        validityPerField.val = {...validityPerField.rawVal, [field]: validity};
     }
 
     return div(
         { class: 'flex-column fx-gap-3' },
+        showConnectionSelector
+            ? Select({
+                name: 'connection_id',
+                label: 'Connection',
+                value: tableGroupConnectionId.rawVal,
+                options: connectionOptions,
+                height: 38,
+                disabled: disableConnectionSelector,
+                onChange: (value) => {
+                    tableGroupConnectionId.val = value;
+                    setFieldValidity('connection_id', !!value);
+                },
+            })
+            : undefined,
         MainForm(
             { setValidity: setFieldValidity },
             tableGroupsName,
diff --git a/testgen/ui/components/frontend/js/components/tabs.js b/testgen/ui/components/frontend/js/components/tabs.js
new file mode 100644
index 00000000..b23b9ca5
--- /dev/null
+++ b/testgen/ui/components/frontend/js/components/tabs.js
@@ -0,0 +1,128 @@
+/**
+ * @typedef {Object} TabProps
+ * @property {string} label
+ */
+import { getValue, loadStylesheet } from '../utils.js';
+import van from '../van.min.js';
+
+const { div, button, span } = van.tags;
+
+/**
+ * @param {TabProps} props
+ * @param {...any} children
+ * @returns {{label: string, children: van.ChildDom[]}}
+ */
+const Tab = ({ label }, ...children) => ({
+    label,
+    children,
+});
+
+/**
+ * @param {object} props
+ * @param {...Tab} tabs
+ */
+const Tabs = (props, ...tabs) => {
+    loadStylesheet('tabs', stylesheet);
+
+    const activeTab = van.state(0);
+
+    let labelsContainerEl;
+    const highlightEl = span({ class: "tg-tabs--highlight" });
+
+    const updateHighlight = () => {
+        if (!labelsContainerEl?.isConnected || !labelsContainerEl.children.length) return;
+        
+        const activeLabel = labelsContainerEl.children[activeTab.val];
+        if (!activeLabel) return;
+        
+        highlightEl.style.width = `${activeLabel.offsetWidth}px`;
+        highlightEl.style.left = `${activeLabel.offsetLeft}px`;
+        highlightEl.style.opacity = '1';
+    };
+
+    labelsContainerEl = div(
+        { class: "tg-tabs--labels" },
+        ...tabs.map((tab, i) =>
+            button({
+                class: () => `tg-tabs--tab--label ${i === activeTab.val ? 'active' : ''}`,
+                onclick: () => (activeTab.val = i),
+            },
+            tab.label
+        )),
+        highlightEl,
+    );
+
+    const tabsContainerEl = div({ ...props, class: () => `${getValue(props.class) ?? ''} tg-tabs--container` },
+        labelsContainerEl,
+        div({ class: "tg-tabs--content" }, () => div({class: "tg-tabs--content-inner"}, tabs[activeTab.val].children)),
+    );
+
+    van.derive(() => {
+        activeTab.val; 
+        requestAnimationFrame(updateHighlight);
+    });
+
+    const resizeObserver = new ResizeObserver(() => {
+        requestAnimationFrame(updateHighlight);
+    });
+
+    tabsContainerEl.onadd = () => {
+        resizeObserver.observe(labelsContainerEl);
+        updateHighlight();
+    };
+    
+    tabsContainerEl.onremove = () => {
+        resizeObserver.disconnect();
+    };
+    
+    return tabsContainerEl;
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.tg-tabs--container {
+    width: 100%;
+}
+
+.tg-tabs--labels {
+    position: relative;
+    display: flex;
+    border-bottom: 1px solid #dddfe2;
+}
+
+.tg-tabs--tab--label {
+    padding: 12px 20px;
+    cursor: pointer;
+    background-color: transparent;
+    border: none;
+    font-size: 0.875rem;
+    color: var(--secondary-text-color);
+    font-weight: 500;
+    transition: color 0.2s ease-in-out;
+    white-space: nowrap;
+}
+
+.tg-tabs--tab--label:hover {
+    color: var(--primary-color);
+    border-radius: 6px 6px 0 0;
+}
+
+.tg-tabs--tab--label.active {
+    color: var(--primary-color);
+}
+
+.tg-tabs--highlight {
+    position: absolute;
+    bottom: -1px;
+    height: 2px;
+    background-color: var(--primary-color);
+    transition: left 0.3s cubic-bezier(0.25, 0.8, 0.25, 1), width 0.3s cubic-bezier(0.25, 0.8, 0.25, 1);
+    opacity: 0;
+}
+
+.tg-tabs--content {
+    padding-top: 20px;
+}
+`);
+
+export { Tabs, Tab };
\ No newline at end of file
diff --git a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
index df3cbf17..0de721a0 100644
--- a/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
+++ b/testgen/ui/components/frontend/js/data_profiling/data_profiling_utils.js
@@ -236,8 +236,8 @@ const LatestProfilingTime = (/** @type Properties */ props, /** @type Table | Co
         } else {
             text = 'No profiling results yet for table group.';
             link = Link({
-                href: 'connections:table-groups',
-                params: { connection_id: item.connection_id },
+                href: 'table-groups',
+                params: { project_code: item.project_code, connection_id: item.connection_id },
                 open_new: true,
                 label: 'Go to Table Groups',
                 right_icon: 'chevron_right',
diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js
index c39d439e..33792080 100644
--- a/testgen/ui/components/frontend/js/main.js
+++ b/testgen/ui/components/frontend/js/main.js
@@ -30,6 +30,10 @@ import { ScheduleList } from './pages/schedule_list.js';
 import { Connections } from './pages/connections.js';
 import { TableGroupWizard } from './pages/table_group_wizard.js';
 import { HelpMenu } from './components/help_menu.js'
+import { TableGroup } from './pages/table_group.js';
+import { TableGroupList } from './pages/table_group_list.js';
+import { TableGroupDeleteConfirmation } from './pages/table_group_delete_confirmation.js';
+import { RunProfilingDialog } from './pages/run_profiling_dialog.js';
 
 let currentWindowVan = van;
 let topWindowVan = window.top.van;
@@ -58,6 +62,10 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props)
         connections: Connections,
         table_group_wizard: TableGroupWizard,
         help_menu: HelpMenu,
+        table_group: TableGroup,
+        table_group_list: TableGroupList,
+        table_group_delete: TableGroupDeleteConfirmation,
+        run_profiling_dialog: RunProfilingDialog,
     };
 
     if (Object.keys(window.testgen.plugins).includes(id)) {
diff --git a/testgen/ui/components/frontend/js/pages/connections.js b/testgen/ui/components/frontend/js/pages/connections.js
index cd28e96b..1bfec4b8 100644
--- a/testgen/ui/components/frontend/js/pages/connections.js
+++ b/testgen/ui/components/frontend/js/pages/connections.js
@@ -12,6 +12,7 @@
  * 
  * @typedef Properties
  * @type {object}
+ * @property {string} project_code
  * @property {Connection} connection
  * @property {boolean} has_table_groups
  * @property {Array<Flavor>} flavors
@@ -39,6 +40,7 @@ const Connections = (props) => {
     window.testgen.isPage = true;
 
     const wrapperId = 'connections-list-wrapper';
+    const projectCode = getValue(props.project_code);
     const connection = getValue(props.connection);
     const connectionId = connection.connection_id;
     const updatedConnection = van.state(connection);
@@ -53,8 +55,8 @@ const Connections = (props) => {
             { class: 'flex-row fx-justify-content-flex-end' },
             () => getValue(props.has_table_groups)
                 ? Link({
-                    href: 'connections:table-groups',
-                    params: {"connection_id": connectionId},
+                    href: 'table-groups',
+                    params: {'project_code': projectCode, "connection_id": connectionId},
                     label: 'Manage Table Groups',
                     right_icon: 'chevron_right',
                     class: 'tg-connections--link',
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 1641ca5e..45409a52 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -681,8 +681,11 @@ const ConditionalEmptyState = (
             message: EMPTY_STATE_MESSAGE.tableGroup,
             link: {
                 label: 'Go to Table Groups',
-                href: 'connections:table-groups',
-                params: { connection_id: projectSummary.default_connection_id },
+                href: 'table-groups',
+                params: {
+                    project_code: projectSummary.project_code,
+                    connection_id: projectSummary.default_connection_id,
+                },
                 disabled: !userCanNavigate,
             },
         };
diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js
index 940d7708..a387f327 100644
--- a/testgen/ui/components/frontend/js/pages/project_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js
@@ -373,8 +373,8 @@ const ConditionalEmptyState = (/** @type ProjectSummary */ project) => {
         message: EMPTY_STATE_MESSAGE.tableGroup,
         link: {
             label: 'Go to Table Groups',
-            href: 'connections:table-groups',
-            params: { connection_id: project.default_connection_id },
+            href: 'table-groups',
+            params: { project_code: project.project_code, connection_id: project.default_connection_id },
         },
     };
 
diff --git a/testgen/ui/components/frontend/js/pages/quality_dashboard.js b/testgen/ui/components/frontend/js/pages/quality_dashboard.js
index 093fe249..e502b011 100644
--- a/testgen/ui/components/frontend/js/pages/quality_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/quality_dashboard.js
@@ -167,8 +167,8 @@ const ConditionalEmptyState = (/** @type ProjectSummary */ projectSummary) => {
             message: projectSummary.table_groups_count ? EMPTY_STATE_MESSAGE.profiling : EMPTY_STATE_MESSAGE.tableGroup,
             link: {
                 label: 'Go to Table Groups',
-                href: 'connections:table-groups',
-                params: { connection_id: projectSummary.default_connection_id },
+                href: 'table-groups',
+                params: { project_code: projectSummary.project_code, connection_id: projectSummary.default_connection_id },
             },
         };
     }
diff --git a/testgen/ui/components/frontend/js/pages/run_profiling_dialog.js b/testgen/ui/components/frontend/js/pages/run_profiling_dialog.js
new file mode 100644
index 00000000..f5fd0f1e
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/run_profiling_dialog.js
@@ -0,0 +1,95 @@
+/**
+ * @import { TableGroup } from '../components/table_group_form.js';
+ * 
+ * @typedef Result
+ * @type {object}
+ * @property {boolean} success
+ * @property {string?} message
+ * 
+ * @typedef Properties
+ * @type {object}
+ * @property {TableGroup} table_group
+ * @property {Result?} result
+ */
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { Alert } from '../components/alert.js';
+import { ExpanderToggle } from '../components/expander_toggle.js';
+import { Icon } from '../components/icon.js';
+import { emitEvent, getValue, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
+import { Code } from '../components/code.js';
+import { Button } from '../components/button.js';
+
+const { div, em, span, strong } = van.tags;
+
+/**
+ * @param {Properties} props
+ */
+const RunProfilingDialog = (props) => {
+    Streamlit.setFrameHeight(1);
+    window.testgen.isPage = true;
+
+    const wrapperId = 'runprogiling-wrapper';
+
+    resizeFrameHeightToElement(wrapperId);
+    resizeFrameHeightOnDOMChange(wrapperId);
+
+    const tableGroup = getValue(props.table_group);
+    const showCLICommand = van.state(false);
+
+    return div(
+        { id: wrapperId, class: 'flex-column fx-gap-3' },
+        div(
+            { class: 'flex-row fx-gap-1' },
+            span({}, 'Execute profiling for the table group'),
+            strong({}, tableGroup.table_groups_name),
+            span({}, '?'),
+        ),
+        div(
+            { class: 'flex-row fx-gap-1' },
+            Icon({}, 'info'),
+            em({}, ' Profiling will be performed in a background process.'),
+        ),
+        ExpanderToggle({
+            collapseLabel: 'Collapse',
+            expandLabel: 'Show CLI command',
+            onCollapse: () => showCLICommand.val = false,
+            onExpand: () => showCLICommand.val = true,
+        }),
+        Code({ class: () => showCLICommand.val ? '' : 'hidden' }, `testgen run-profile --table-group-id ${tableGroup.id}`),
+        () => {
+            const result = getValue(props.result) ?? {};
+            return result.message
+                ? Alert({ type: result.success ? 'success' : 'error' }, span(result.message))
+                : '';
+        },
+        div(
+            { class: 'flex-row fx-justify-content-flex-end' },
+            () => {
+                const result = getValue(props.result);
+
+                if (result && result.success) {
+                    return Button({
+                        type: 'stroked',
+                        color: 'primary',
+                        label: 'Go to Profiling Runs',
+                        width: 'auto',
+                        icon: 'chevron_right',
+                        onclick: () => emitEvent('GoToProfilingRunsClicked', { payload: tableGroup.id }),
+                    });
+                }
+
+                return Button({
+                    label: 'Run Profiling',
+                    type: 'stroked',
+                    color: 'primary',
+                    width: 'auto',
+                    style: 'width: auto;',
+                    onclick: () => emitEvent('RunProfilingConfirmed', { payload: tableGroup.id }),
+                });
+            }
+        )
+    );
+};
+
+export { RunProfilingDialog };
\ No newline at end of file
diff --git a/testgen/ui/components/frontend/js/pages/table_group.js b/testgen/ui/components/frontend/js/pages/table_group.js
new file mode 100644
index 00000000..d3fa70aa
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/table_group.js
@@ -0,0 +1,171 @@
+/**
+ * @import { TableGroup } from '../components/table_group_form.js';
+ * @import { Connection } from '../components/connection_form.js';
+ * 
+ * @typedef TableGroupPreview
+ * @type {object}
+ * @property {string} schema
+ * @property {string[]?} tables
+ * @property {number?} column_count
+ * @property {boolean?} success
+ * @property {string?} message
+ *
+ * @typedef Result
+ * @type {object}
+ * @property {boolean} success
+ * @property {string} message
+ * 
+ * @typedef Properties
+ * @type {object}
+ * @property {string} project_code
+ * @property {TableGroup} table_group
+ * @property {Connection[]} connections
+ * @property {TableGroupPreview?} table_group_preview
+ * @property {Result?} result
+ */
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { Button } from '../components/button.js';
+import { getValue, emitEvent, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
+import { TableGroupForm } from '../components/table_group_form.js';
+import { Tab, Tabs } from '../components/tabs.js';
+import { Alert } from '../components/alert.js';
+
+const { div, span, strong } = van.tags;
+
+/**
+ * @param {Properties} props
+ * @returns {HTMLElement}
+ */
+const TableGroup = (props) => {
+    loadStylesheet('tablegroupchange', stylesheet);
+    Streamlit.setFrameHeight(1);
+    window.testgen.isPage = true;
+
+    const connections = getValue(props.connections) ?? [];
+    const enableConnectionSelector = getValue(props.table_group)?.connection_id === undefined;
+    const updatedTableGroup = van.state(getValue(props.table_group) ?? {});
+    const disableSave = van.state(true);
+    const wrapperId = 'tablegroup-change-wrapper';
+
+    resizeFrameHeightToElement(wrapperId);
+    resizeFrameHeightOnDOMChange(wrapperId);
+
+    return Tabs(
+        { id: wrapperId },
+        Tab(
+            { label: 'Table Group Settings'},
+            () => {
+                const tableGroup = updatedTableGroup.rawVal;
+                const result = getValue(props.result);
+        
+                return div(
+                    { class: 'flex-column fx-gap-3' },
+                    TableGroupForm({
+                        tableGroup,
+                        connections,
+                        enableConnectionSelector,
+                        showConnectionSelector: connections.length > 1,
+                        onChange: (newTableGroup, state) => {
+                            updatedTableGroup.val = newTableGroup;
+                            disableSave.val = !state.valid;
+                        },
+                    }),
+                    result
+                        ? Alert(
+                            { type: result.success ? 'success' : 'error', closeable: true },
+                            span({}, result.message),
+                        )
+                        : undefined,
+                );
+            },
+            div(
+                { class: 'flex-row fx-gap-2 fx-justify-content-flex-end mt-3' },
+                Button({
+                    label: 'Save',
+                    type: 'stroked',
+                    color: 'primary',
+                    style: 'width: auto;',
+                    disabled: disableSave,
+                    onclick: () => emitEvent('TableGroupSaveClicked', { payload: updatedTableGroup.val }),
+                }),
+            ),
+        ),
+        Tab(
+            { label: 'Test' },
+            () => {
+                const currentSchema = updatedTableGroup.val.table_group_schema ?? tableGroupPreview?.schema ?? '--';
+                const tableGroupPreview = getValue(props.table_group_preview);
+                const wasPreviewExecuted = tableGroupPreview && typeof tableGroupPreview.success === 'boolean';
+                const alertMessage = tableGroupPreview.success ? 'Operation has finished successfully.' : 'Operation was unsuccessful.';
+
+                return div(
+                    { class: 'flex-column fx-gap-2' },
+                    div(
+                        { class: 'flex-row fx-justify-space-between' },
+                        div(
+                            { class: 'flex-column fx-gap-2' },
+                            div(
+                                { class: 'flex-row fx-gap-1' },
+                                strong({}, 'Schema:'),
+                                span({}, currentSchema),
+                            ),
+                            div(
+                                { class: 'flex-row fx-gap-1' },
+                                strong({}, 'Table Count:'),
+                                span({}, tableGroupPreview?.tables?.length ?? '--'),
+                            ),
+                            div(
+                                { class: 'flex-row fx-gap-1' },
+                                strong({}, 'Column Count:'),
+                                span({}, tableGroupPreview?.column_count ?? '--'),
+                            ),
+                        ),
+                        wasPreviewExecuted
+                            ? Alert(
+                                { type: tableGroupPreview.success ? 'success' : 'error' },
+                                span({}, alertMessage),
+                            )
+                            : undefined,
+                    ),
+                    wasPreviewExecuted ?
+                        div(
+                            { class: 'table hoverable p-3' },
+                            div(
+                                { class: 'table-header' },
+                                span('Tables'),
+                            ),
+                            div(
+                                { class: 'flex-column', style: 'max-height: 200px; overflow-y: auto;' },
+                                tableGroupPreview?.tables?.length
+                                    ? tableGroupPreview.tables.map((table) =>
+                                        div({ class: 'table-row' }, table),
+                                    )
+                                    : div(
+                                        { class: 'flex-row fx-justify-center', style: 'height: 50px; font-size: 16px;'},
+                                        tableGroupPreview.message ?? 'No tables found.'
+                                    ),
+                            ),
+                        )
+                        : undefined,
+                );
+            },
+            div(
+                {class: 'flex-row fx-gap-2 fx-justify-content-flex-end mt-3'},
+                Button({
+                    label: 'Test Table Group',
+                    type: 'stroked',
+                    color: 'primary',
+                    style: 'width: auto;',
+                    onclick: () => emitEvent('PreviewTableGroupClicked', { payload: updatedTableGroup.val }),
+                }),
+            ),
+        ),
+    );
+}
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+`);
+
+export { TableGroup };
diff --git a/testgen/ui/components/frontend/js/pages/table_group_delete_confirmation.js b/testgen/ui/components/frontend/js/pages/table_group_delete_confirmation.js
new file mode 100644
index 00000000..2037abd0
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/table_group_delete_confirmation.js
@@ -0,0 +1,105 @@
+/**
+ * @import { TableGroup } from '../components/table_group_form.js';
+ * 
+ * @typedef Result
+ * @type {object}
+ * @property {boolean} success
+ * @property {string} message
+ * 
+ * @typedef Properties
+ * @type {object}
+ * @property {string} project_code
+ * @property {TableGroup} table_group
+ * @property {boolean} can_be_deleted
+ * @property {Result?} result
+ */
+
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { emitEvent, getValue, loadStylesheet, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
+import { Button } from '../components/button.js';
+import { Toggle } from '../components/toggle.js';
+import { Attribute } from '../components/attribute.js';
+import { Alert } from '../components/alert.js';
+
+const { div, h3, hr, span, b } = van.tags;
+
+/**
+ * @param {Properties} props 
+ * @returns 
+ */
+const TableGroupDeleteConfirmation = (props) => {
+    loadStylesheet('tablegroup-delete-confirmation', stylesheet);
+    Streamlit.setFrameHeight(1);
+    window.testgen.isPage = true;
+
+    const wrapperId = 'tablegroup-delete-wrapper';
+    const tableGroup = getValue(props.table_group);
+    const confirmDeleteRelated = van.state(false);
+    const deleteDisabled = van.derive(() => !getValue(props.can_be_deleted) && !confirmDeleteRelated.val);
+
+    resizeFrameHeightToElement(wrapperId);
+    resizeFrameHeightOnDOMChange(wrapperId);
+
+    return div(
+        { id: wrapperId, class: 'flex-column' },
+        div(
+            { class: 'flex-column fx-gap-4' },
+            span(
+                'Are you sure you want to delete the table group ',
+                b(tableGroup.table_groups_name),
+                '?',
+            ),
+            Attribute({
+                label: 'ID',
+                value: tableGroup.id,
+            }),
+            Attribute({
+                label: 'Name',
+                value: tableGroup.table_groups_name,
+            }),
+            Attribute({
+                label: 'Schema',
+                value: tableGroup.table_group_schema,
+            }),
+        ),
+        () => !getValue(props.can_be_deleted)
+            ? div(
+                { class: 'flex-column fx-gap-4 mt-4' },
+                Alert(
+                    { type: 'warn' },
+                    div('This Table Group has related data, which may include profiling, test definitions and test results.'),
+                    div({ class: 'mt-2' }, 'If you proceed, all related data will be permanently deleted.'),
+                ),
+                Toggle({
+                    name: 'confirm-delete-tablegroup',
+                    label: span(
+                        'Yes, delete the table group ',
+                        b(tableGroup.table_groups_name),
+                        ' and related TestGen data.',
+                    ),
+                    checked: confirmDeleteRelated,
+                    onChange: (value) => confirmDeleteRelated.val = value,
+                }),
+            )
+            : '',
+
+        div(
+            { class: 'flex-row fx-justify-content-flex-end' },
+            Button({
+                type: () => deleteDisabled.val ? 'stroked' : 'flat',
+                color: () => deleteDisabled.val ? 'basic' : 'warn',
+                label: 'Delete',
+                style: 'width: auto;',
+                disabled: deleteDisabled,
+                onclick: () => emitEvent('DeleteTableGroupConfirmed'),
+            }),
+        ),
+    );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+`);
+
+export { TableGroupDeleteConfirmation };
diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
new file mode 100644
index 00000000..165a3f01
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -0,0 +1,248 @@
+/**
+ * @import { TableGroup } from '../components/table_group_form.js';
+ * @import { Connection } from '../components/connection_form.js';
+ * 
+ * @typedef Permissions
+ * @type {object}
+ * @property {boolean} can_edit
+ * 
+ * @typedef Properties
+ * @type {object}
+ * @property {string} project_code
+ * @property {string?} connection_id
+ * @property {Connection[]} connections
+ * @property {TableGroup[]} table_groups
+ * @property {Permissions} permissions
+ */
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { Button } from '../components/button.js';
+import { Card } from '../components/card.js';
+import { Caption } from '../components/caption.js';
+import { Link } from '../components/link.js';
+import { getValue, emitEvent, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
+import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js';
+import { Select } from '../components/select.js';
+import { Icon } from '../components/icon.js';
+
+const { div, h4, img, small, span } = van.tags;
+
+/**
+ * @param {Properties} props
+ * @returns {HTMLElement}
+ */
+const TableGroupList = (props) => {
+    loadStylesheet('tablegrouplist', stylesheet);
+    Streamlit.setFrameHeight(1);
+    window.testgen.isPage = true;
+
+    const wrapperId = 'tablegroup-list-wrapper';
+
+    resizeFrameHeightToElement(wrapperId);
+    resizeFrameHeightOnDOMChange(wrapperId);
+
+    return div(
+        { id: wrapperId, style: 'overflow-y: auto;' },
+        () => {
+            const permissions = getValue(props.permissions) ?? {can_edit: false};
+            const connections = getValue(props.connections) ?? [];
+            const connectionId = getValue(props.connection_id);
+            const tableGroups = getValue(props.table_groups) ?? [];
+
+            if (connections.length <= 0) {
+                return EmptyState({
+                    icon: 'table_view',
+                    label: 'Your project is empty',
+                    message: EMPTY_STATE_MESSAGE.connection,
+                    link: {
+                        label: 'Go to Connections',
+                        href: 'connections',
+                        params: { project_code: getValue(props.project_code) },
+                        disabled: !permissions.can_edit,
+                    },
+                });
+            }
+
+            return div(
+                Toolbar(permissions, connections, connectionId),
+                tableGroups.length > 0
+                    ? tableGroups.map((tableGroup) => Card({
+                        testId: 'table-group-card',
+                        class: '',
+                        title: div(
+                            { class: 'flex-column fx-gap-2 tg-tablegroup--card-title', 'data-testid': 'tablegroup-card-title' },
+                            h4({'data-testid': 'tablegroup-card-title-name'}, tableGroup.table_groups_name),
+                            div(
+                                {class: 'flex-row fx-gap-1 fx-align-center'},
+                                Icon({ size: 14 }, tableGroup.connection.flavor.icon),
+                                Caption({ content: tableGroup.connection.name }),
+                            ),
+                        ),
+                        border: true,
+                        content: div(
+                            { class: 'flex-column fx-gap-3' },
+                            div(
+                                { class: 'flex-row fx-gap-3' },
+                                div(
+                                    { class: 'flex-column fx-flex fx-gap-3' },
+                                    Link({
+                                        label: 'View test suites',
+                                        href: 'test-suites',
+                                        params: { 'project_code': getValue(props.project_code), 'table_group_id': tableGroup.id },
+                                        right_icon: 'chevron_right',
+                                        right_icon_size: 20,
+                                    }),
+                                    div(
+                                        { class: 'flex-row fx-flex fx-gap-3' },
+                                        div(
+                                            { class: 'flex-column fx-flex fx-gap-4' },
+                                            div(
+                                                { class: 'flex-column fx-flex' },
+                                                Caption({content: 'DB Schema', style: 'margin-bottom: 4px;'}),
+                                                span(tableGroup.table_group_schema || '--'),
+                                            ),
+                                            div(
+                                                { class: 'flex-column fx-flex' },
+                                                Caption({content: 'Explicit Table List', style: 'margin-bottom: 4px;'}),
+                                                span(tableGroup.profiling_table_set || '--'),
+                                            ),
+                                        ),
+                                        div(
+                                            { class: 'flex-column fx-flex fx-gap-4' },
+                                            div(
+                                                { class: 'flex-column fx-flex' },
+                                                Caption({content: 'Tables to Include Mask', style: 'margin-bottom: 4px;'}),
+                                                span(tableGroup.profiling_include_mask || '--'),
+                                            ),
+                                            div(
+                                                { class: 'flex-column fx-flex' },
+                                                Caption({content: 'Uses Record Sampling', style: 'margin-bottom: 4px;'}),
+                                                span(tableGroup.profile_use_sampling || '--'),
+                                            ),
+                                        ),
+                                        div(
+                                            { class: 'flex-column fx-flex fx-gap-4' },
+                                            div(
+                                                { class: 'flex-column fx-flex' },
+                                                Caption({content: 'Tables to Exclude Mask', style: 'margin-bottom: 4px;'}),
+                                                span(tableGroup.profiling_exclude_mask || '--'),
+                                            ),
+                                            div(
+                                                { class: 'flex-column fx-flex' },
+                                                Caption({content: 'Min Profiling Age (Days)', style: 'margin-bottom: 4px;'}),
+                                                span(tableGroup.profiling_delay_days || '--'),
+                                            ),
+                                        ),
+                                        span({ class: 'fx-flex' }),
+                                    ),
+                                ),
+                                permissions.can_edit
+                                    ? div(
+                                        { class: 'flex-column' },
+                                        Button({
+                                            type: 'stroked',
+                                            color: 'primary',
+                                            label: 'Run Profiling',
+                                            onclick: () => emitEvent('RunProfilingClicked', { payload: tableGroup.id }),
+                                        }),
+                                    )
+                                    : '',
+                            )
+                        ),
+                        actionContent: permissions.can_edit
+                            ? div(
+                                { class: 'flex-row fx-align-center' },
+                                Button({
+                                    type: 'icon',
+                                    icon: 'edit',
+                                    iconSize: 18,
+                                    tooltip: 'Edit table group',
+                                    tooltipPosition: 'left',
+                                    color: 'basic',
+                                    onclick: () => emitEvent('EditTableGroupClicked', { payload: tableGroup.id }),
+                                }),
+                                Button({
+                                    type: 'icon',
+                                    icon: 'delete',
+                                    iconSize: 18,
+                                    tooltip: 'Delete table group',
+                                    tooltipPosition: 'left',
+                                    color: 'basic',
+                                    onclick: () => emitEvent('DeleteTableGroupClicked', { payload: tableGroup.id }),
+                                }),
+                            )
+                            : undefined,
+                    }))
+                    : EmptyState({
+                        icon: 'table_view',
+                        label: 'No table groups yet',
+                        class: 'mt-4',
+                        message: EMPTY_STATE_MESSAGE.tableGroup,
+                        button: Button({
+                            type: 'stroked',
+                            icon: 'add',
+                            label: 'Add Table Group',
+                            color: 'primary',
+                            style: 'width: unset;',
+                            disabled: !permissions.can_edit,
+                            onclick: () => emitEvent('AddTableGroupClicked', {}),
+                        }),
+                    }),
+            );
+        },
+    );
+}
+
+/**
+ * 
+ * @param {Permissions} permissions
+ * @param {Connection[]} connections
+ * @param {string?} selectedConnection
+ * @returns 
+ */
+const Toolbar = (permissions, connections, selectedConnection) => {
+    return div(
+        { class: 'flex-row fx-align-flex-end mb-4' },
+        (getValue(connections) ?? [])?.length > 1
+            ? Select({
+                testId: 'connection-select',
+                label: 'Connection',
+                allowNull: true,
+                height: 38,
+                value: selectedConnection,
+                options: getValue(connections)?.map((connection) => ({
+                    label: connection.connection_name,
+                    value: String(connection.connection_id),
+                })) ?? [],
+                onChange: (value) => emitEvent('ConnectionSelected', { payload: value }),
+            })
+            : undefined,
+        span({ style: 'margin: 0 auto;' }),
+        permissions.can_edit
+            ? Button({
+                type: 'stroked',
+                icon: 'add',
+                label: 'Add Table Group',
+                color: 'basic',
+                style: 'background: var(--button-generic-background-color); width: unset;',
+                onclick: () => emitEvent('AddTableGroupClicked', {}),
+            })
+            : '',
+    );
+}
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.tg-tablegroup--card-title h4 {
+    margin: 0;
+    color: var(--primary-text-color);
+    font-size: 1.5rem;
+    text-transform: initial;
+}
+
+.tg-empty-state.mt-4 {
+    margin-top: 16px;
+}
+`);
+
+export { TableGroupList };
diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js
index 923e9de3..c9487fd8 100644
--- a/testgen/ui/components/frontend/js/pages/test_suites.js
+++ b/testgen/ui/components/frontend/js/pages/test_suites.js
@@ -245,8 +245,8 @@ const ConditionalEmptyState = (
             message: EMPTY_STATE_MESSAGE.tableGroup,
             link: {
                 label: 'Go to Table Groups',
-                href: 'connections:table-groups',
-                params: { connection_id: projectSummary.default_connection_id },
+                href: 'table-groups',
+                params: { project_code: projectSummary.project_code, connection_id: projectSummary.default_connection_id },
             },
         };
     }
diff --git a/testgen/ui/components/frontend/js/utils.js b/testgen/ui/components/frontend/js/utils.js
index 9edab31e..f912916d 100644
--- a/testgen/ui/components/frontend/js/utils.js
+++ b/testgen/ui/components/frontend/js/utils.js
@@ -193,4 +193,8 @@ function slugify(/** @type string */ str) {
         .replace(/^-|-$/g, '');
 }
 
-export { afterMount, debounce, emitEvent, enforceElementWidth, getRandomId, getValue, getParents, isEqual, isState, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, friendlyPercent, slugify };
+function isDataURL(/** @type string */ url) {
+    return url.startsWith('data:');
+}
+
+export { afterMount, debounce, emitEvent, enforceElementWidth, getRandomId, getValue, getParents, isEqual, isState, loadStylesheet, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange, friendlyPercent, slugify, isDataURL };
diff --git a/testgen/ui/queries/table_group_queries.py b/testgen/ui/queries/table_group_queries.py
index 74c81c31..db6e8bd3 100644
--- a/testgen/ui/queries/table_group_queries.py
+++ b/testgen/ui/queries/table_group_queries.py
@@ -7,17 +7,22 @@
 
 def _get_select_statement(schema):
     return f"""
-               SELECT id::VARCHAR(50), project_code, connection_id, table_groups_name,
-                      table_group_schema,
-                      profiling_include_mask, profiling_exclude_mask,
-                      profiling_table_set,
-                      profile_id_column_mask, profile_sk_column_mask,
-                      description, data_source, source_system, source_process, data_location,
-                      business_domain, stakeholder_group, transform_level, data_product,
-                      profile_use_sampling, profile_sample_percent, profile_sample_min_count,
-                      profiling_delay_days, profile_flag_cdes
-               FROM {schema}.table_groups
-               """
+        WITH table_groups AS (
+            SELECT table_groups.*, connections.connection_name, connections.sql_flavor_code
+            FROM {schema}.table_groups
+            INNER JOIN {schema}.connections ON connections.connection_id = table_groups.connection_id
+        )
+        SELECT id::VARCHAR(50), project_code, connection_id, connection_name, sql_flavor_code,
+                table_groups_name, table_group_schema,
+                profiling_include_mask, profiling_exclude_mask,
+                profiling_table_set,
+                profile_id_column_mask, profile_sk_column_mask,
+                description, data_source, source_system, source_process, data_location,
+                business_domain, stakeholder_group, transform_level, data_product,
+                profile_use_sampling, profile_sample_percent, profile_sample_min_count,
+                profiling_delay_days, profile_flag_cdes
+        FROM table_groups
+        """
 
 
 @st.cache_data(show_spinner=False)
@@ -82,6 +87,15 @@ def get_table_group_usage(schema, table_group_names):
     return db.retrieve_data(sql)
 
 
+@st.cache_data(show_spinner=False)
+def get_all(schema, project_code):
+    sql = _get_select_statement(schema)
+    sql += f"""WHERE project_code = '{project_code}'
+            ORDER BY table_groups_name
+     """
+    return db.retrieve_data(sql)
+
+
 @st.cache_data(show_spinner=False)
 def get_by_connection(schema, project_code, connection_id):
     sql = _get_select_statement(schema)
diff --git a/testgen/ui/services/table_group_service.py b/testgen/ui/services/table_group_service.py
index 98784fe1..7d7d6b85 100644
--- a/testgen/ui/services/table_group_service.py
+++ b/testgen/ui/services/table_group_service.py
@@ -17,6 +17,11 @@ def get_by_connection(project_code, connection_id):
     return table_group_queries.get_by_connection(schema, project_code, connection_id)
 
 
+def get_all(project_code: str):
+    schema = st.session_state["dbschema"]
+    return table_group_queries.get_all(schema, project_code)
+
+
 def edit(table_group):
     schema = st.session_state["dbschema"]
     table_group_queries.edit(schema, table_group)
diff --git a/testgen/ui/views/connections.py b/testgen/ui/views/connections.py
index b30029ef..49f5e940 100644
--- a/testgen/ui/views/connections.py
+++ b/testgen/ui/views/connections.py
@@ -28,15 +28,6 @@
 CLEAR_SENTINEL = "<clear>"
 
 
-@dataclass(frozen=True, slots=True, kw_only=True)
-class ConnectionFlavor:
-    value: str
-    label: str
-    icon: str
-    flavor: str
-    connection_string: str
-
-
 class ConnectionsPage(Page):
     path = "connections"
     can_activate: typing.ClassVar = [
@@ -48,60 +39,9 @@ class ConnectionsPage(Page):
         icon="database",
         label=PAGE_TITLE,
         section="Data Configuration",
-        order=0,
+        order=1,
         roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ],
     )
-    flavor_options: typing.ClassVar[list[ConnectionFlavor]] = [
-        ConnectionFlavor(
-            label="Amazon Redshift",
-            value="redshift",
-            flavor="redshift",
-            icon=get_asset_data_url("flavors/redshift.svg"),
-            connection_string=connection_service.get_connection_string("redshift"),
-        ),
-        ConnectionFlavor(
-            label="Azure SQL Database",
-            value="azure_mssql",
-            flavor="mssql",
-            icon=get_asset_data_url("flavors/azure_sql.svg"),
-            connection_string=connection_service.get_connection_string("mssql"),
-        ),
-        ConnectionFlavor(
-            label="Azure Synapse Analytics",
-            value="synapse_mssql",
-            flavor="mssql",
-            icon=get_asset_data_url("flavors/azure_synapse_table.svg"),
-            connection_string=connection_service.get_connection_string("mssql"),
-        ),
-        ConnectionFlavor(
-            label="Microsoft SQL Server",
-            value="mssql",
-            flavor="mssql",
-            icon=get_asset_data_url("flavors/mssql.svg"),
-            connection_string=connection_service.get_connection_string("mssql"),
-        ),
-        ConnectionFlavor(
-            label="PostgreSQL",
-            value="postgresql",
-            flavor="postgresql",
-            icon=get_asset_data_url("flavors/postgresql.svg"),
-            connection_string=connection_service.get_connection_string("postgresql"),
-        ),
-        ConnectionFlavor(
-            label="Snowflake",
-            value="snowflake",
-            flavor="snowflake",
-            icon=get_asset_data_url("flavors/snowflake.svg"),
-            connection_string=connection_service.get_connection_string("snowflake"),
-        ),
-        ConnectionFlavor(
-            label="Databricks",
-            value="databricks",
-            flavor="databricks",
-            icon=get_asset_data_url("flavors/databricks.svg"),
-            connection_string=connection_service.get_connection_string("databricks"),
-        ),
-    ]
 
     def render(self, project_code: str, **_kwargs) -> None:
         testgen.page_header(
@@ -201,9 +141,10 @@ def on_test_connection_clicked(updated_connection: dict) -> None:
         return testgen.testgen_component(
             "connections",
             props={
+                "project_code": project_code,
                 "connection": self._format_connection(connection, should_test=should_check_status()),
                 "has_table_groups": has_table_groups,
-                "flavors": [asdict(flavor) for flavor in self.flavor_options],
+                "flavors": [asdict(flavor) for flavor in FLAVOR_OPTIONS],
                 "permissions": {
                     "is_admin": user_is_admin,
                 },
@@ -216,56 +157,16 @@ def on_test_connection_clicked(updated_connection: dict) -> None:
             },
         )
 
-    def _get_sql_flavor_from_value(self, value: str) -> ConnectionFlavor | None:
-        match = [f for f in self.flavor_options if f.value == value]
+    def _get_sql_flavor_from_value(self, value: str) -> "ConnectionFlavor | None":
+        match = [f for f in FLAVOR_OPTIONS if f.value == value]
         if match:
             return match[0]
         return None
 
     def _format_connection(self, connection: dict, should_test: bool = False) -> dict:
-        fields = [
-            "project_code",
-            "connection_id",
-            "connection_name",
-            "sql_flavor",
-            "sql_flavor_code",
-            "project_host",
-            "project_port",
-            "project_db",
-            "project_user",
-            "password",
-            "max_threads",
-            "max_query_chars",
-            "connect_by_url",
-            "connect_by_key",
-            "private_key",
-            "private_key_passphrase",
-            "http_path",
-            "url",
-        ]
-        formatted_connection = {}
-
-        for fieldname in fields:
-            formatted_connection[fieldname] = format_field(connection[fieldname])
-
+        formatted_connection = format_connection(connection)
         if should_test:
             formatted_connection["status"] = asdict(self.test_connection(connection))
-
-        if formatted_connection["password"]:
-            formatted_connection["password"] = "***"  # noqa S105
-        if formatted_connection["private_key"]:
-            formatted_connection["private_key"] = "***"  # S105
-        if formatted_connection["private_key_passphrase"]:
-            formatted_connection["private_key_passphrase"] = "***"  # noqa S105
-
-        first_match = [f for f in self.flavor_options if f.flavor == formatted_connection.get("sql_flavor")]
-        if formatted_connection["sql_flavor"] and not formatted_connection.get("sql_flavor_code") and first_match:
-            formatted_connection["sql_flavor_code"] = first_match[0].flavor
-
-        flavors = [f for f in self.flavor_options if f.value == formatted_connection["sql_flavor_code"]]
-        if flavors and (flavor := flavors[0]):
-            formatted_connection["flavor"] = asdict(flavor)
-
         return formatted_connection
 
     def test_connection(self, connection: dict) -> "ConnectionStatus":
@@ -415,3 +316,109 @@ def is_open_ssl_error(error: Exception):
         and len(error.args[1]) > 0
         and type(error.args[1][0]).__name__ == "OpenSSLError"
     )
+
+
+def format_connection(connection: dict) -> dict:
+    fields = [
+        "project_code",
+        "connection_id",
+        "connection_name",
+        "sql_flavor",
+        "sql_flavor_code",
+        "project_host",
+        "project_port",
+        "project_db",
+        "project_user",
+        "password",
+        "max_threads",
+        "max_query_chars",
+        "connect_by_url",
+        "connect_by_key",
+        "private_key",
+        "private_key_passphrase",
+        "http_path",
+        "url",
+    ]
+    formatted_connection = {}
+
+    for fieldname in fields:
+        formatted_connection[fieldname] = format_field(connection[fieldname])
+
+    if formatted_connection["password"]:
+        formatted_connection["password"] = "***"  # noqa S105
+    if formatted_connection["private_key"]:
+        formatted_connection["private_key"] = "***"  # S105
+    if formatted_connection["private_key_passphrase"]:
+        formatted_connection["private_key_passphrase"] = "***"  # noqa S105
+
+    first_match = [f for f in FLAVOR_OPTIONS if f.flavor == formatted_connection.get("sql_flavor")]
+    if formatted_connection["sql_flavor"] and not formatted_connection.get("sql_flavor_code") and first_match:
+        formatted_connection["sql_flavor_code"] = first_match[0].flavor
+
+    flavors = [f for f in FLAVOR_OPTIONS if f.value == formatted_connection["sql_flavor_code"]]
+    if flavors and (flavor := flavors[0]):
+        formatted_connection["flavor"] = asdict(flavor)
+
+    return formatted_connection
+
+
+@dataclass(frozen=True, slots=True, kw_only=True)
+class ConnectionFlavor:
+    value: str
+    label: str
+    icon: str
+    flavor: str
+    connection_string: str
+
+
+FLAVOR_OPTIONS = [
+    ConnectionFlavor(
+        label="Amazon Redshift",
+        value="redshift",
+        flavor="redshift",
+        icon=get_asset_data_url("flavors/redshift.svg"),
+        connection_string=connection_service.get_connection_string("redshift"),
+    ),
+    ConnectionFlavor(
+        label="Azure SQL Database",
+        value="azure_mssql",
+        flavor="mssql",
+        icon=get_asset_data_url("flavors/azure_sql.svg"),
+        connection_string=connection_service.get_connection_string("mssql"),
+    ),
+    ConnectionFlavor(
+        label="Azure Synapse Analytics",
+        value="synapse_mssql",
+        flavor="mssql",
+        icon=get_asset_data_url("flavors/azure_synapse_table.svg"),
+        connection_string=connection_service.get_connection_string("mssql"),
+    ),
+    ConnectionFlavor(
+        label="Microsoft SQL Server",
+        value="mssql",
+        flavor="mssql",
+        icon=get_asset_data_url("flavors/mssql.svg"),
+        connection_string=connection_service.get_connection_string("mssql"),
+    ),
+    ConnectionFlavor(
+        label="PostgreSQL",
+        value="postgresql",
+        flavor="postgresql",
+        icon=get_asset_data_url("flavors/postgresql.svg"),
+        connection_string=connection_service.get_connection_string("postgresql"),
+    ),
+    ConnectionFlavor(
+        label="Snowflake",
+        value="snowflake",
+        flavor="snowflake",
+        icon=get_asset_data_url("flavors/snowflake.svg"),
+        connection_string=connection_service.get_connection_string("snowflake"),
+    ),
+    ConnectionFlavor(
+        label="Databricks",
+        value="databricks",
+        flavor="databricks",
+        icon=get_asset_data_url("flavors/databricks.svg"),
+        connection_string=connection_service.get_connection_string("databricks"),
+    ),
+]
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index cd3c8fe0..951ff129 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -151,8 +151,11 @@ def render_empty_state(project_code: str, user_can_run: bool) -> bool:
             icon=PAGE_ICON,
             message=testgen.EmptyStateMessage.TableGroup,
             action_label="Go to Table Groups",
-            link_href="connections:table-groups",
-            link_params={ "connection_id": str(project_summary_df["default_connection_id"]) }
+            link_href="table-groups",
+            link_params={
+                "project_code": project_code,
+                "connection_id": str(project_summary_df["default_connection_id"]),
+            },
         )
     else:
         testgen.empty_state(
diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py
new file mode 100644
index 00000000..d466e250
--- /dev/null
+++ b/testgen/ui/views/table_groups.py
@@ -0,0 +1,313 @@
+import typing
+from dataclasses import asdict
+from functools import partial
+
+import streamlit as st
+from sqlalchemy.exc import IntegrityError
+
+import testgen.ui.services.connection_service as connection_service
+import testgen.ui.services.table_group_service as table_group_service
+from testgen.commands.run_profiling_bridge import run_profiling_in_background
+from testgen.common.models import with_database_session
+from testgen.ui.components import widgets as testgen
+from testgen.ui.navigation.menu import MenuItem
+from testgen.ui.navigation.page import Page
+from testgen.ui.services import user_session_service
+from testgen.ui.session import session, temp_value
+from testgen.ui.views.connections import FLAVOR_OPTIONS, format_connection
+
+PAGE_TITLE = "Table Groups"
+
+
+class TableGroupsPage(Page):
+    path = "table-groups"
+    can_activate: typing.ClassVar = [
+        lambda: session.authentication_status,
+        lambda: not user_session_service.user_has_catalog_role(),
+    ]
+    menu_item = MenuItem(
+        icon="table_view",
+        label=PAGE_TITLE,
+        section="Data Configuration",
+        order=0,
+        roles=[ role for role in typing.get_args(user_session_service.RoleType) if role != "catalog" ],
+    )
+
+    def render(self, project_code: str, connection_id: str | None = None, **_kwargs) -> None:
+        testgen.page_header(PAGE_TITLE, "create-a-table-group")
+
+        user_can_edit = user_session_service.user_can_edit()
+        if connection_id:
+            table_groups = table_group_service.get_by_connection(project_code, connection_id)
+        else:
+            table_groups = table_group_service.get_all(project_code)
+
+        return testgen.testgen_component(
+            "table_group_list",
+            props={
+                "project_code": project_code,
+                "connection_id": connection_id,
+                "permissions": {
+                    "can_edit": user_can_edit,
+                },
+                "connections": self._get_connections(project_code),
+                "table_groups": self._format_table_group_list([
+                    table_group.to_dict() for _, table_group in table_groups.iterrows()
+                ]),
+            },
+            on_change_handlers={
+                "AddTableGroupClicked": partial(self.add_table_group_dialog, project_code),
+                "EditTableGroupClicked": partial(self.edit_table_group_dialog, project_code),
+                "DeleteTableGroupClicked": partial(self.delete_table_group_dialog, project_code),
+                "RunProfilingClicked": partial(self.run_profiling_dialog, project_code),
+                "ConnectionSelected": lambda inner_connection_id: self.router.queue_navigation(
+                    to="table-groups",
+                    with_args={"project_code": project_code, "connection_id": inner_connection_id},
+                ),
+            },
+        )
+
+    @st.dialog(title="Add Table Group")
+    @with_database_session
+    def add_table_group_dialog(self, project_code, *_args):
+        def on_preview_table_group_clicked(table_group: dict):
+            mark_for_preview(True)
+            set_table_group(table_group)
+
+        def on_save_table_group_clicked(table_group: dict):
+            set_save(True)
+            set_table_group(table_group)
+
+        should_preview, mark_for_preview = temp_value("table_groups:preview:new", default=False)
+        should_save, set_save = temp_value("table_groups:save:new", default=False)
+        get_table_group, set_table_group = temp_value("table_groups:updated:new", default={})
+
+        connections = self._get_connections(project_code)
+        table_group = {
+            "project_code": project_code,
+            **get_table_group(),
+        }
+        table_group_preview = {}
+        result = None
+
+        if len(connections) == 1:
+            table_group["connection_id"] = connections[0]["connection_id"]
+
+        if should_save():
+            try:
+                table_group_service.add(table_group)
+                st.rerun()
+            except IntegrityError:
+                result = {"success": False, "message": "A Table Group with the same name already exists."}
+
+        if should_preview():
+            table_group_preview = self._get_table_group_preview(project_code, table_group["connection_id"], {"id": "temp", **table_group})
+
+        return testgen.testgen_component(
+            "table_group",
+            props={
+                "project_code": project_code,
+                "connections": connections,
+                "table_group": table_group,
+                "table_group_preview": table_group_preview,
+                "result": result,
+            },
+            on_change_handlers={
+                "PreviewTableGroupClicked": on_preview_table_group_clicked,
+                "TableGroupSaveClicked": on_save_table_group_clicked,
+            },
+        )
+
+    @st.dialog(title="Edit Table Group")
+    def edit_table_group_dialog(self, project_code: str, table_group_id: str):
+        def on_preview_table_group_clicked(table_group: dict):
+            mark_for_preview(True)
+            set_updated_table_group(table_group)
+
+        def on_save_table_group_clicked(table_group: dict):
+            set_update(True)
+            set_updated_table_group(table_group)
+
+        should_preview, mark_for_preview = temp_value(
+            f"table_groups:preview:{table_group_id}",
+            default=False,
+        )
+        should_update, set_update = temp_value(
+            f"table_groups:save:{table_group_id}",
+            default=False,
+        )
+        get_updated_table_group, set_updated_table_group = temp_value(
+            f"table_groups:updated:{table_group_id}",
+            default={},
+        )
+
+        table_group = {
+            **table_group_service.get_by_id(table_group_id=table_group_id).to_dict(),
+            **get_updated_table_group(),
+        }
+        table_group_preview = {
+            "schema": table_group["table_group_schema"],
+        }
+        result = None
+
+        if should_update():
+            try:
+                table_group_service.edit(table_group)
+                st.rerun()
+            except IntegrityError:
+                result = {"success": False, "message": "A Table Group with the same name already exists."}
+
+        if should_preview():
+            table_group_preview = self._get_table_group_preview(project_code, table_group["connection_id"], table_group)
+
+        return testgen.testgen_component(
+            "table_group",
+            props={
+                "project_code": project_code,
+                "connections": self._get_connections(project_code, connection_id=table_group["connection_id"]),
+                "table_group": table_group,
+                "table_group_preview": table_group_preview,
+                "result": result,
+            },
+            on_change_handlers={
+                "PreviewTableGroupClicked": on_preview_table_group_clicked,
+                "TableGroupSaveClicked": on_save_table_group_clicked,
+            },
+        )
+
+    def _get_connections(self, project_code: str, connection_id: str | None = None) -> list[dict]:
+        if connection_id:
+            connections = [connection_service.get_by_id(connection_id, hide_passwords=True)]
+        else:
+            connections = [
+                connection for _, connection in connection_service.get_connections(
+                    project_code, hide_passwords=True
+                ).iterrows()
+            ]
+        return [ format_connection(connection) for connection in connections ]
+
+    def _format_table_group_list(self, table_groups: list[dict]) -> list[dict]:
+        for table_group in table_groups:
+            flavors = [f for f in FLAVOR_OPTIONS if f.value == table_group["sql_flavor_code"]]
+            if flavors and (flavor := flavors[0]):
+                table_group["connection"] = {
+                    "name": table_group["connection_name"],
+                    "flavor": asdict(flavor),
+                }
+        return table_groups
+
+    def _get_table_group_preview(self, project_code: str, connection_id: str | None, table_group: dict) -> dict:
+        table_group_preview = {
+            "schema": table_group["table_group_schema"],
+            "tables": set(),
+            "column_count": 0,
+            "success": True,
+            "message": None,
+        }
+        if connection_id:
+            try:
+                table_group_results = table_group_service.test_table_group(table_group, connection_id, project_code)
+
+                for column in table_group_results:
+                    table_group_preview["schema"] = column["table_schema"]
+                    table_group_preview["tables"].add(column["table_name"])
+                    table_group_preview["column_count"] += 1
+
+                if len(table_group_results) <= 0:
+                    table_group_preview["success"] = False
+                    table_group_preview["message"] = (
+                        "No tables found matching the criteria. Please check the Table Group configuration."
+                    )
+            except Exception as error:
+                table_group_preview["success"] = False
+                table_group_preview["message"] = error.args[0]
+        else:
+            table_group_preview["success"] = False
+            table_group_preview["message"] = "No connection selected. Please select a connection to preview the Table Group."
+
+        table_group_preview["tables"] = list(table_group_preview["tables"])
+        return table_group_preview
+
+    @st.dialog(title="Run Profiling")
+    def run_profiling_dialog(self, project_code: str, table_group_id: str) -> None:
+        def on_go_to_profiling_runs_clicked(table_group_id: str) -> None:
+            set_navigation_params({ "project_code": project_code, "table_group_id": table_group_id })
+
+        def on_run_profiling_confirmed(*_args) -> None:
+            set_run_profiling(True)
+
+        get_navigation_params, set_navigation_params = temp_value(
+            f"table_groups:{table_group_id}:go_to_profiling_run",
+            default=None,
+        )
+        if (params := get_navigation_params()):
+            self.router.navigate(to="profiling-runs", with_args=params)
+
+        should_run_profiling, set_run_profiling = temp_value(
+            f"table_groups:{table_group_id}:run_profiling",
+            default=False,
+        )
+
+        table_group = table_group_service.get_by_id(table_group_id).to_dict()
+        result = None
+        if should_run_profiling():
+            success = True
+            message = "Profiling run started"
+
+            try:
+                run_profiling_in_background(table_group_id)
+            except Exception as error:
+                success = False
+                message = f"Profiling run encountered errors: {error!s}."
+            result = {"success": success, "message": message}
+
+        return testgen.testgen_component(
+            "run_profiling_dialog",
+            props={
+                "project_code": project_code,
+                "table_group": table_group,
+                "result": result,
+            },
+            on_change_handlers={
+                "GoToProfilingRunsClicked": on_go_to_profiling_runs_clicked,
+                "RunProfilingConfirmed": on_run_profiling_confirmed,
+            },
+        )
+
+    @st.dialog(title="Delete Table Group")
+    def delete_table_group_dialog(self, project_code: str, table_group_id: str):
+        def on_delete_confirmed(*_args):
+            confirm_deletion(True)
+
+        table_group = table_group_service.get_by_id(table_group_id=table_group_id)
+        table_group_name = table_group["table_groups_name"]
+        can_be_deleted = table_group_service.cascade_delete([table_group_name], dry_run=True)
+        is_deletion_confirmed, confirm_deletion = temp_value(
+            f"table_groups:confirm_delete:{table_group_id}",
+            default=False,
+        )
+        success = False
+        message = None
+
+        result = None
+        if is_deletion_confirmed():
+            if not table_group_service.are_table_groups_in_use([table_group_name]):
+                table_group_service.cascade_delete([table_group_name])
+                message = f"Table Group {table_group_name} has been deleted. "
+                st.rerun()
+            else:
+                message = "This Table Group is in use by a running process and cannot be deleted."
+            result = {"success": success, "message": message},
+
+        testgen.testgen_component(
+            "table_group_delete",
+            props={
+                "project_code": project_code,
+                "table_group": table_group.to_dict(),
+                "can_be_deleted": can_be_deleted,
+                "result": result,
+            },
+            on_change_handlers={
+                "DeleteTableGroupConfirmed": on_delete_confirmed,
+            },
+        )
diff --git a/testgen/ui/views/table_groups/__init__.py b/testgen/ui/views/table_groups/__init__.py
deleted file mode 100644
index 77b50278..00000000
--- a/testgen/ui/views/table_groups/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# ruff: noqa: F401
-
-from testgen.ui.views.table_groups.forms import TableGroupForm
-from testgen.ui.views.table_groups.page import TableGroupsPage
diff --git a/testgen/ui/views/table_groups/forms.py b/testgen/ui/views/table_groups/forms.py
deleted file mode 100644
index 5f2151ba..00000000
--- a/testgen/ui/views/table_groups/forms.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# type: ignore
-import typing
-
-from streamlit.delta_generator import DeltaGenerator
-
-from testgen.ui.forms import BaseForm, Field, ManualRender
-
-SQLFlavor = typing.Literal["redshift", "snowflake", "mssql", "postgresql"]
-
-
-class TableGroupForm(BaseForm, ManualRender):
-    table_groups_name: str = Field(
-        default="",
-        min_length=1,
-        max_length=40,
-        st_kwargs_label="Table Group Name",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="A unique name to describe the table group",
-    )
-    profiling_include_mask: str = Field(
-        default="%",
-        max_length=40,
-        st_kwargs_label="Tables to Include Mask",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="A SQL filter supported by your database's LIKE operator for table names to include",
-    )
-    profiling_exclude_mask: str = Field(
-        default="tmp%",
-        st_kwargs_label="Tables to Exclude Mask",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="A SQL filter supported by your database's LIKE operator for table names to exclude",
-    )
-    profiling_table_set: str = Field(
-        default="",
-        st_kwargs_label="Explicit Table List",
-        st_kwargs_max_chars=2000,
-        st_kwargs_help="A list of specific table names to include, separated by commas",
-    )
-    table_group_schema: str = Field(
-        default="",
-        min_length=1,
-        max_length=40,
-        st_kwargs_label="Schema",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="The database schema containing the tables in the Table Group",
-    )
-    profile_id_column_mask: str = Field(
-        default="%_id",
-        st_kwargs_label="Profiling ID column mask",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="A SQL filter supported by your database's LIKE operator representing ID columns (optional)",
-    )
-    profile_sk_column_mask: str = Field(
-        default="%_sk",
-        st_kwargs_label="Profiling Surrogate Key column mask",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="A SQL filter supported by your database's LIKE operator representing surrogate key columns (optional)",
-    )
-    profiling_delay_days: int = Field(
-        default=0,
-        st_kwargs_label="Min Profiling Age, Days",
-        st_kwargs_min_value=0,
-        st_kwargs_max_value=999,
-        st_kwargs_help="The number of days to wait before new profiling will be available to generate tests",
-    )
-    profile_flag_cdes: bool = Field(
-        default=True,
-        st_kwargs_label="Detect critical data elements (CDEs) during profiling",
-    )
-    add_scorecard_definition: bool = Field(
-        default=True,
-        st_kwargs_label="Add scorecard for table group",
-        st_kwargs_help="Add a new scorecard to the Quality Dashboard upon creation of this table group",
-    )
-    profile_use_sampling: bool = Field(
-        default=True,
-        st_kwargs_label="Use profile sampling",
-        st_kwargs_help="Toggle on to base profiling on a sample of records instead of the full table",
-    )
-    profile_sample_percent: int = Field(
-        default=30,
-        st_kwargs_label="Sample percent",
-        st_kwargs_min_value=1,
-        st_kwargs_max_value=100,
-        st_kwargs_help="Percent of records to include in the sample, unless the calculated count falls below the specified minimum.",
-    )
-    profile_sample_min_count: int = Field(
-        default=15000,
-        st_kwargs_label="Min Sample Record Count",
-        st_kwargs_min_value=1,
-        st_kwargs_max_value=1000000,
-        st_kwargs_help="The minimum number of records to be included in any sample (if available)",
-    )
-    description: str = Field(
-        default="",
-        st_kwargs_label="Description",
-        st_kwargs_max_chars=1000,
-        st_kwargs_help="",
-    )
-    data_source: str = Field(
-        default="",
-        st_kwargs_label="Data Source",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Original source of the dataset",
-    )
-    source_system: str = Field(
-        default="",
-        st_kwargs_label="Source System",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Enterprise system source for the dataset",
-    )
-    source_process: str = Field(
-        default="",
-        st_kwargs_label="Source Process",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Process, program, or data flow that produced the dataset",
-    )
-    data_location: str = Field(
-        default="",
-        st_kwargs_label="Location",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Physical or virtual location of the dataset, e.g., Headquarters, Cloud",
-    )
-    business_domain: str = Field(
-        default="",
-        st_kwargs_label="Business Domain",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Business division responsible for the dataset, e.g., Finance, Sales, Manufacturing",
-    )
-    stakeholder_group: str = Field(
-        default="",
-        st_kwargs_label="Stakeholder Group",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Data owners or stakeholders responsible for the dataset",
-    )
-    transform_level: str = Field(
-        default="",
-        st_kwargs_label="Transform Level",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Data warehouse processing stage, e.g., Raw, Conformed, Processed, Reporting, or Medallion level (bronze, silver, gold)",
-    )
-    data_product: str = Field(
-        default="",
-        st_kwargs_label="Data Product",
-        st_kwargs_max_chars=40,
-        st_kwargs_help="Data domain that comprises the dataset",
-    )
-    table_group_id: int | None = Field(default=None)
-
-    def form_key(self):
-        return f"table_group_form:{self.table_group_id or 'new'}"
-
-    def render_input_ui(self, container: DeltaGenerator, _: dict) -> "TableGroupForm":
-        left_column, right_column = container.columns([.5, .5])
-
-        self.render_field("table_groups_name", left_column)
-        self.render_field("profiling_include_mask", left_column)
-        self.render_field("profiling_exclude_mask", left_column)
-        self.render_field("profiling_table_set", left_column)
-        self.render_field("profile_flag_cdes", left_column)
-
-        self.render_field("table_group_schema", right_column)
-        self.render_field("profile_id_column_mask", right_column)
-        self.render_field("profile_sk_column_mask", right_column)
-        self.render_field("profiling_delay_days", right_column)
-
-        if not self.table_group_id:
-            self.render_field("add_scorecard_definition", right_column)
-
-        self.render_field("profile_use_sampling", container)
-        profile_sampling_expander = container.expander("Sampling Parameters", expanded=False)
-        with profile_sampling_expander:
-            expander_left_column, expander_right_column = profile_sampling_expander.columns([0.50, 0.50])
-        self.render_field("profile_sample_percent", expander_left_column)
-        self.render_field("profile_sample_min_count", expander_right_column)
-
-        tags_expander = container.expander("Table Group Tags", expanded=False)
-        with tags_expander:
-            self.render_field("description", tags_expander)
-            tags_left_column, tags_right_column = tags_expander.columns([0.50, 0.50])
-
-        self.render_field("data_source", tags_left_column)
-        self.render_field("source_system", tags_right_column)
-        self.render_field("source_process", tags_left_column)
-        self.render_field("data_location", tags_right_column)
-        self.render_field("business_domain", tags_left_column)
-        self.render_field("stakeholder_group", tags_right_column)
-        self.render_field("transform_level", tags_left_column)
-        self.render_field("data_product", tags_right_column)
-
-        return self
diff --git a/testgen/ui/views/table_groups/page.py b/testgen/ui/views/table_groups/page.py
deleted file mode 100644
index 5d4a50d7..00000000
--- a/testgen/ui/views/table_groups/page.py
+++ /dev/null
@@ -1,475 +0,0 @@
-import time
-import typing
-from functools import partial
-
-import pandas as pd
-import streamlit as st
-from sqlalchemy.exc import IntegrityError
-
-import testgen.ui.services.connection_service as connection_service
-import testgen.ui.services.form_service as fm
-import testgen.ui.services.table_group_service as table_group_service
-from testgen.common.models import with_database_session
-from testgen.ui.components import widgets as testgen
-from testgen.ui.navigation.page import Page
-from testgen.ui.services import project_service, user_session_service
-from testgen.ui.services.string_service import empty_if_null
-from testgen.ui.session import session
-from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog
-
-
-class TableGroupsPage(Page):
-    path = "connections:table-groups"
-    can_activate: typing.ClassVar = [
-        lambda: session.authentication_status,
-        lambda: not user_session_service.user_has_catalog_role(),
-        lambda: "connection_id" in st.query_params or "connections",
-    ]
-
-    def render(self, connection_id: str, **_kwargs) -> None:
-        connection = connection_service.get_by_id(connection_id, hide_passwords=False)
-        if not connection:
-            return self.router.navigate_with_warning(
-                f"Connection with ID '{connection_id}' does not exist. Redirecting to list of Connections ...",
-                "connections",
-            )
-
-        project_code = connection["project_code"]
-        project_service.set_sidebar_project(project_code)
-        user_can_edit = user_session_service.user_can_edit()
-
-        testgen.page_header(
-            "Table Groups",
-            "create-a-table-group",
-            breadcrumbs=[  # type: ignore
-                { "label": "Connections", "path": "connections", "params": { "project_code": project_code } },
-                { "label": connection["connection_name"] },
-            ],
-        )
-
-        df = table_group_service.get_by_connection(project_code, connection_id)
-
-        if df.empty:
-            testgen.whitespace(3)
-            testgen.empty_state(
-                label="No table groups yet",
-                icon="table_view",
-                message=testgen.EmptyStateMessage.TableGroup,
-                action_label="Add Table Group",
-                action_disabled=not user_can_edit,
-                button_onclick=partial(self.add_table_group_dialog, project_code, connection),
-            )
-            return
-
-        testgen.whitespace(0.3)
-        _, actions_column = st.columns([.1, .9], vertical_alignment="bottom")
-        testgen.flex_row_end(actions_column)
-
-        if user_can_edit:
-            actions_column.button(
-                ":material/add: Add Table Group",
-                on_click=partial(self.add_table_group_dialog, project_code, connection)
-            )
-
-        for _, table_group in df.iterrows():
-            with testgen.card(title=table_group["table_groups_name"]) as table_group_card:
-                if user_can_edit:
-                    with table_group_card.actions:
-                        testgen.button(
-                            type_="icon",
-                            icon="edit",
-                            tooltip="Edit table group",
-                            tooltip_position="right",
-                            on_click=partial(self.edit_table_group_dialog, project_code, connection, table_group),
-                            key=f"tablegroups:keys:edit:{table_group['id']}",
-                        )
-                        testgen.button(
-                            type_="icon",
-                            icon="delete",
-                            tooltip="Delete table group",
-                            tooltip_position="right",
-                            on_click=partial(self.delete_table_group_dialog, table_group),
-                            key=f"tablegroups:keys:delete:{table_group['id']}",
-                        )
-
-                main_section, actions_section = st.columns([.8, .2])
-
-                with main_section:
-                    testgen.link(
-                        label="Test Suites",
-                        href="test-suites",
-                        params={ "project_code": project_code, "table_group_id": table_group["id"] },
-                        right_icon="chevron_right",
-                        key=f"tablegroups:keys:go-to-tsuites:{table_group['id']}",
-                    )
-
-                    col1, col2, col3 = st.columns([1/3] * 3, vertical_alignment="bottom")
-                    col4, col5, col6 = st.columns([1/3] * 3, vertical_alignment="bottom")
-
-                    with col1:
-                        testgen.no_flex_gap()
-                        testgen.caption("DB Schema")
-                        st.markdown(table_group["table_group_schema"] or "--")
-                    with col2:
-                        testgen.no_flex_gap()
-                        testgen.caption("Tables to Include Mask")
-                        st.markdown(table_group["profiling_include_mask"] or "--")
-                    with col3:
-                        testgen.no_flex_gap()
-                        testgen.caption("Tables to Exclude Mask")
-                        st.markdown(table_group["profiling_exclude_mask"] or "--")
-                    with col4:
-                        testgen.no_flex_gap()
-                        testgen.caption("Explicit Table List")
-                        st.markdown(table_group["profiling_table_set"] or "--")
-                    with col5:
-                        testgen.no_flex_gap()
-                        testgen.caption("Uses Record Sampling")
-                        st.markdown(table_group["profile_use_sampling"] or "N")
-                    with col6:
-                        testgen.no_flex_gap()
-                        testgen.caption("Min Profiling Age (Days)")
-                        st.markdown(table_group["profiling_delay_days"] or "0")
-
-                if user_can_edit:
-                    with actions_section:
-                        testgen.button(
-                            type_="stroked",
-                            label="Run Profiling",
-                            on_click=partial(run_profiling_dialog, project_code, table_group),
-                            key=f"tablegroups:keys:runprofiling:{table_group['id']}",
-                        )
-
-    @st.dialog(title="Add Table Group")
-    @with_database_session
-    def add_table_group_dialog(self, project_code, connection):
-        show_table_group_form("add", project_code, connection)
-
-    @st.dialog(title="Edit Table Group")
-    def edit_table_group_dialog(self, project_code: str, connection: dict, table_group: pd.Series):
-        show_table_group_form("edit", project_code, connection, table_group)
-
-    @st.dialog(title="Delete Table Group")
-    def delete_table_group_dialog(self, table_group: pd.Series):
-        table_group_name = table_group["table_groups_name"]
-        can_be_deleted = table_group_service.cascade_delete([table_group_name], dry_run=True)
-
-        fm.render_html_list(
-            table_group,
-            [
-                "id",
-                "table_groups_name",
-                "table_group_schema",
-            ],
-            "Table Group Information",
-            int_data_width=700,
-        )
-
-        if not can_be_deleted:
-            st.html(
-                """
-                <div style=\"color: rgb(217, 90, 0);\">
-                    <span>
-                        This Table Group has related data, which may include profiling, test definitions and test results.
-                        If you proceed, all related data will be permanently deleted.
-                    </span>
-                    <br/>
-                    <span>Are you sure you want to proceed?</span>
-                </div>
-                """
-            )
-            accept_cascade_delete = st.toggle("I accept deletion of this Table Group and all related TestGen data.")
-
-        with st.form("Delete Table Group", clear_on_submit=True, border=False):
-            _, button_column = st.columns([.85, .15])
-            with button_column:
-                delete = st.form_submit_button(
-                    "Delete",
-                    disabled=not can_be_deleted and not accept_cascade_delete,
-                    type="primary",
-                    use_container_width=True,
-                )
-
-            if delete:
-                if table_group_service.are_table_groups_in_use([table_group_name]):
-                    st.error("This Table Group is in use by a running process and cannot be deleted.")
-                else:
-                    table_group_service.cascade_delete([table_group_name])
-                    success_message = f"Table Group {table_group_name} has been deleted. "
-                    st.success(success_message)
-                    time.sleep(1)
-                    st.rerun()
-
-
-def show_table_group_form(mode, project_code: str, connection: dict, table_group: pd.Series | None = None):
-    connection_id = connection["connection_id"]
-    table_groups_settings_tab, table_groups_preview_tab = st.tabs(["Table Group Settings", "Test"])
-
-    table_group_id = None
-    table_groups_name = ""
-    table_group_schema = ""
-    profiling_table_set = ""
-    profiling_include_mask = "%"
-    profiling_exclude_mask = "tmp%"
-    profile_id_column_mask = "%_id"
-    profile_sk_column_mask = "%_sk"
-    profile_use_sampling = False
-    profile_sample_percent = 30
-    profile_sample_min_count = 15000
-    profiling_delay_days = 0
-    profile_flag_cdes = True
-
-    with table_groups_settings_tab:
-        selected_table_group = table_group if mode == "edit" else None
-
-        if selected_table_group is not None:
-            # establish default values
-            table_group_id = selected_table_group["id"]
-            table_groups_name = selected_table_group["table_groups_name"]
-            table_group_schema = selected_table_group["table_group_schema"]
-            profiling_table_set = selected_table_group["profiling_table_set"] or ""
-            profiling_include_mask = selected_table_group["profiling_include_mask"]
-            profiling_exclude_mask = selected_table_group["profiling_exclude_mask"]
-            profile_id_column_mask = selected_table_group["profile_id_column_mask"]
-            profile_sk_column_mask = selected_table_group["profile_sk_column_mask"]
-            profile_use_sampling = selected_table_group["profile_use_sampling"] == "Y"
-            profile_sample_percent = int(selected_table_group["profile_sample_percent"])
-            profile_sample_min_count = int(selected_table_group["profile_sample_min_count"])
-            profiling_delay_days = int(selected_table_group["profiling_delay_days"])
-            profile_flag_cdes = selected_table_group["profile_flag_cdes"]
-
-        left_column, right_column = st.columns([0.50, 0.50])
-
-        profile_sampling_expander = st.expander("Sampling Parameters", expanded=False)
-        with profile_sampling_expander:
-            expander_left_column, expander_right_column = st.columns([0.50, 0.50])
-
-        table_group_tags_expander = st.expander("Table Group Tags", expanded=False)
-        with table_group_tags_expander:
-            full_width_column = st.container()
-            tags_left_column, tags_right_column = st.columns([0.5, 0.5], vertical_alignment="bottom")
-
-        with st.form("Table Group Add / Edit", clear_on_submit=True, border=False):
-            entity = {
-                "id": table_group_id,
-                "project_code": project_code,
-                "connection_id": connection["connection_id"],
-                "table_groups_name": left_column.text_input(
-                    label="Name",
-                    max_chars=40,
-                    value=table_groups_name,
-                    help="A unique name to describe the table group",
-                ),
-                "profiling_include_mask": left_column.text_input(
-                    label="Tables to Include Mask",
-                    max_chars=40,
-                    value=profiling_include_mask,
-                    help="A SQL filter supported by your database's LIKE operator for table names to include",
-                ),
-                "profiling_exclude_mask": left_column.text_input(
-                    label="Tables to Exclude Mask",
-                    max_chars=40,
-                    value=profiling_exclude_mask,
-                    help="A SQL filter supported by your database's LIKE operator for table names to exclude",
-                ),
-                "profiling_table_set": left_column.text_input(
-                    label="Explicit Table List",
-                    max_chars=2000,
-                    value=profiling_table_set,
-                    help="A list of specific table names to include, separated by commas",
-                ),
-                "table_group_schema": right_column.text_input(
-                    label="Schema",
-                    max_chars=40,
-                    value=table_group_schema,
-                    help="The database schema containing the tables in the Table Group",
-                ),
-                "profile_id_column_mask": right_column.text_input(
-                    label="Profiling ID column mask",
-                    max_chars=40,
-                    value=profile_id_column_mask,
-                    help="A SQL filter supported by your database's LIKE operator representing ID columns (optional)",
-                ),
-                "profile_sk_column_mask": right_column.text_input(
-                    label="Profiling Surrogate Key column mask",
-                    max_chars=40,
-                    value=profile_sk_column_mask,
-                    help="A SQL filter supported by your database's LIKE operator representing surrogate key columns (optional)",
-                ),
-                "profiling_delay_days": right_column.number_input(
-                    label="Min Profiling Age, Days",
-                    min_value=0,
-                    max_value=999,
-                    value=profiling_delay_days,
-                    help="The number of days to wait before new profiling will be available to generate tests",
-                ),
-                "profile_flag_cdes": left_column.checkbox(
-                    "Detect critical data elements (CDEs) during profiling",
-                    value=profile_flag_cdes,
-                ),
-                "add_scorecard_definition": right_column.checkbox(
-                    "Add scorecard for table group",
-                    value=True,
-                    help="Add a new scorecard to the Quality Dashboard upon creation of this table group",
-                ) if mode != "edit" else None,
-                "profile_use_sampling": left_column.checkbox(
-                    "Use profile sampling",
-                    value=profile_use_sampling,
-                    help="Toggle on to base profiling on a sample of records instead of the full table",
-                ),
-                "profile_sample_percent": str(
-                    expander_left_column.number_input(
-                        label="Sample percent",
-                        min_value=1,
-                        max_value=100,
-                        value=profile_sample_percent,
-                        help="Percent of records to include in the sample, unless the calculated count falls below the specified minimum.",
-                    )
-                ),
-                "profile_sample_min_count": expander_right_column.number_input(
-                    label="Min Sample Record Count",
-                    min_value=1,
-                    max_value=1000000,
-                    value=profile_sample_min_count,
-                    help="The minimum number of records to be included in any sample (if available)",
-                ),
-                "description": full_width_column.text_input(
-                    label="Description",
-                    max_chars=1000,
-                    value=empty_if_null(selected_table_group["description"])
-                    if mode == "edit" and selected_table_group is not None else "",
-                ),
-                "data_source": tags_left_column.text_input(
-                    label="Data Source",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["data_source"])
-                        if mode == "edit" and selected_table_group is not None else "",
-                    help="Original source of the dataset",
-                ),
-                "source_system": tags_right_column.text_input(
-                    label="Source System",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["source_system"])
-                        if mode == "edit" and selected_table_group is not None else "",
-                    help="Enterprise system source for the dataset",
-                ),
-                "source_process": tags_left_column.text_input(
-                    label="Source Process",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["source_process"])
-                        if mode == "edit" and selected_table_group is not None else "",
-                    help="Process, program, or data flow that produced the dataset",
-                ),
-                "data_location": tags_right_column.text_input(
-                    label="Data Location",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["data_location"])
-                    if mode == "edit" and selected_table_group is not None else "",
-                    help="Physical or virtual location of the dataset, e.g., Headquarters, Cloud",
-                ),
-                "business_domain": tags_left_column.text_input(
-                    label="Business Domain",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["business_domain"])
-                    if mode == "edit" and selected_table_group is not None else "",
-                    help="Business division responsible for the dataset, e.g., Finance, Sales, Manufacturing",
-                ),
-                "stakeholder_group": tags_right_column.text_input(
-                    label="Stakeholder Group",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["stakeholder_group"])
-                    if mode == "edit" and selected_table_group is not None else "",
-                    help="Data owners or stakeholders responsible for the dataset",
-                ),
-                "transform_level": tags_left_column.text_input(
-                    label="Transform Level",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["transform_level"])
-                        if mode == "edit" and selected_table_group is not None else "",
-                    help="Data warehouse processing stage, e.g., Raw, Conformed, Processed, Reporting, or Medallion level (bronze, silver, gold)",
-                ),
-                "data_product": tags_right_column.text_input(
-                    label="Data Product",
-                    max_chars=40,
-                    value=empty_if_null(selected_table_group["data_product"])
-                        if mode == "edit" and selected_table_group is not None else "",
-                    help="Data domain that comprises the dataset"
-                ),
-            }
-
-            _, button_column = st.columns([.85, .15])
-            with button_column:
-                submit = st.form_submit_button(
-                    "Save" if mode == "edit" else "Add",
-                    use_container_width=True,
-                )
-
-            if submit:
-                if not entity["table_groups_name"]:
-                    st.error("'Name' is required. ")
-                    return
-
-                try:
-                    if mode == "edit":
-                        table_group_service.edit(entity)
-                        success_message = "Changes have been saved successfully. "
-                    else:
-                        table_group_service.add(entity)
-                        success_message = "New table group added successfully. "
-                except IntegrityError:
-                    st.error("A Table Group with the same name already exists. ")
-                    return
-                else:
-                    st.success(success_message)
-                    time.sleep(1)
-                    st.rerun()
-
-        with table_groups_preview_tab:
-            if mode == "edit":
-                preview_left_column, preview_right_column = st.columns([0.5, 0.5])
-                status_preview = preview_right_column.empty()
-                preview = preview_left_column.button("Test Table Group")
-                if preview:
-                    table_group_preview(entity, connection_id, project_code, status_preview)
-            else:
-                st.write("No preview available while adding a Table Group. Save the configuration first.")
-
-
-def table_group_preview(entity, connection_id, project_code, status):
-    status.empty()
-    status.info("Connecting to the Table Group ...")
-    try:
-        table_group_results = table_group_service.test_table_group(entity, connection_id, project_code)
-        if len(table_group_results) > 0:
-            tables = set()
-            columns = []
-            schemas = set()
-            for result in table_group_results:
-                schemas.add(result["table_schema"])
-                tables.add(result["table_name"])
-                columns.append(result["column_name"])
-
-            show_test_results(schemas, tables, columns)
-
-            status.empty()
-            status.success("Operation has finished successfully.")
-        else:
-            status.empty()
-            status.error("Operation was unsuccessful.")
-            error_message = ""
-            if len(table_group_results) == 0:
-                error_message = "Result is empty."
-            st.text_area("Table Group Error Details", value=error_message)
-    except Exception as e:
-        status.empty()
-        status.error("Error testing the Table Group.")
-        error_message = e.args[0]
-        st.text_area("Table Group Error Details", value=error_message)
-
-
-def show_test_results(schemas, tables, columns):
-    st.markdown(f"**Schema**: {schemas.pop()}")
-    st.markdown(f"**Column Count**: {len(columns)}")
-
-    tables_df = pd.DataFrame({"[tables]": list(tables)})
-    fm.render_grid_select(tables_df, ["[tables]"])
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index 5bd8888f..a32cf24e 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -158,8 +158,11 @@ def render_empty_state(project_code: str, user_can_run: bool) -> bool:
             icon=PAGE_ICON,
             message=testgen.EmptyStateMessage.TableGroup,
             action_label="Go to Table Groups",
-            link_href="connections:table-groups",
-            link_params={ "connection_id": str(project_summary_df["default_connection_id"]) }
+            link_href="table-groups",
+            link_params={
+                "project_code": project_code,
+                "connection_id": str(project_summary_df["default_connection_id"]),
+            }
         )
     elif not project_summary_df["test_suites_ct"] or not project_summary_df["test_definitions_ct"]:
         testgen.empty_state(

From 84711e721950b9e4118b1f1581f1b2bd51cff3c0 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Mon, 30 Jun 2025 16:41:34 -0400
Subject: [PATCH 11/56] misc: Fixing the test result page for the Dupe_Rows
 test

---
 testgen/ui/services/test_results_service.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index 57de1dc0..ee4ad869 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -92,7 +92,7 @@ def get_test_results(
             LEFT JOIN {schema}.test_definitions d
                ON (r.test_suite_id = d.test_suite_id
               AND  r.table_name = d.table_name
-              AND  r.column_names = COALESCE(d.column_name, 'N/A')
+              AND  COALESCE(r.column_names, 'N/A') = COALESCE(d.column_name, 'N/A')
               AND  r.test_type = d.test_type
               AND  r.auto_gen = TRUE
               AND  d.last_auto_gen_date IS NOT NULL)
@@ -126,10 +126,15 @@ def get_test_results(
 
 def get_test_result_history(db_schema, tr_data, limit: int | None = None):
     if tr_data["auto_gen"]:
+        if tr_data["column_names"]:
+            col_name_cond = f"column_names = '{tr_data["column_names"]}'"
+        else:
+            col_name_cond = "column_names IS NULL"
+
         str_where = f"""
             WHERE test_suite_id = '{tr_data["test_suite_id"]}'
               AND table_name = '{tr_data["table_name"]}'
-              AND column_names = '{tr_data["column_names"]}'
+              AND {col_name_cond}
               AND test_type = '{tr_data["test_type"]}'
               AND auto_gen = TRUE
         """

From af3d6481155784d62bd542085b170b06f93f3111 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 1 Jul 2025 12:36:08 -0400
Subject: [PATCH 12/56] misc(analytics): add usage counts to login event

---
 testgen/common/mixpanel_service.py | 19 ++++++++++++++++++-
 testgen/ui/views/login.py          |  2 +-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/testgen/common/mixpanel_service.py b/testgen/common/mixpanel_service.py
index dd6608b3..3895f016 100644
--- a/testgen/common/mixpanel_service.py
+++ b/testgen/common/mixpanel_service.py
@@ -8,6 +8,9 @@
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
 
+import streamlit as st
+
+import testgen.ui.services.database_service as db
 from testgen import settings
 from testgen.ui.session import session
 from testgen.utils.singleton import Singleton
@@ -43,12 +46,14 @@ def _hash_value(self, value: bytes | str, digest_size: int = 8) -> str:
         return blake2b(value, salt=self.instance_id.encode(), digest_size=digest_size).hexdigest()
 
     @safe_method
-    def send_event(self, event_name, **properties):
+    def send_event(self, event_name, include_usage=False, **properties):
         properties.setdefault("instance_id", self.instance_id)
         properties.setdefault("edition", settings.DOCKER_HUB_REPOSITORY)
         properties.setdefault("version", settings.VERSION)
         properties.setdefault("distinct_id", self.distinct_id)
         properties.setdefault("username", session.username)
+        if include_usage:
+            properties.update(self.get_usage())
 
         track_payload = {
             "event": event_name,
@@ -77,3 +82,15 @@ def send_mp_request(self, endpoint, payload):
             urlopen(req, context=self.get_ssl_context(), timeout=settings.MIXPANEL_TIMEOUT)  # noqa: S310
         except Exception:
             LOG.exception("Failed to send analytics data")
+
+    def get_usage(self):
+        schema: str = st.session_state["dbschema"]            
+        query = f"""
+        SELECT 
+            (SELECT COUNT(*) AS count FROM {schema}.auth_users) AS user_count,
+            (SELECT COUNT(*) AS count FROM {schema}.projects) AS project_count,
+            (SELECT COUNT(*) AS count FROM {schema}.connections) AS connection_count,
+            (SELECT COUNT(*) FROM {schema}.table_groups) AS table_group_count,
+            (SELECT COUNT(*) FROM {schema}.test_suites) AS test_suite_count;
+        """
+        return db.retrieve_data(query).iloc[0].to_dict()
diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py
index be0376d8..6a170113 100644
--- a/testgen/ui/views/login.py
+++ b/testgen/ui/views/login.py
@@ -52,4 +52,4 @@ def render(self, **_kwargs) -> None:
             if authentication_status:
                 user_session_service.start_user_session(name, username)
                 session.logging_in = True
-                MixpanelService().send_event("login")
+                MixpanelService().send_event("login", include_usage=True)

From e4d3d142e10d4e2d99e4fbdafbe5cf1443c10f68 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 1 Jul 2025 17:13:54 -0400
Subject: [PATCH 13/56] feat(version): get latest release versions from s3

---
 deploy/charts/testgen-app/Chart.yaml          |   2 +-
 .../testgen-app/templates/_environment.yaml   |   2 -
 deploy/charts/testgen-app/values.yaml         |   1 -
 docker-compose.yml                            |   1 -
 docs/configuration.md                         |   6 -
 testgen/common/mixpanel_service.py            |   6 +-
 testgen/common/version_service.py             |  74 ++------
 testgen/settings.py                           |  37 +---
 tests/unit/test_version_service.py            | 158 ------------------
 9 files changed, 18 insertions(+), 269 deletions(-)
 delete mode 100644 tests/unit/test_version_service.py

diff --git a/deploy/charts/testgen-app/Chart.yaml b/deploy/charts/testgen-app/Chart.yaml
index 8a0824a7..01b2c072 100644
--- a/deploy/charts/testgen-app/Chart.yaml
+++ b/deploy/charts/testgen-app/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.0.0
+version: 1.0.1
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
diff --git a/deploy/charts/testgen-app/templates/_environment.yaml b/deploy/charts/testgen-app/templates/_environment.yaml
index a630b1ee..c329f75c 100644
--- a/deploy/charts/testgen-app/templates/_environment.yaml
+++ b/deploy/charts/testgen-app/templates/_environment.yaml
@@ -31,8 +31,6 @@
   value: {{ .Values.testgen.trustTargetDatabaseCertificate | ternary "yes" "no" | quote }}
 - name: TG_EXPORT_TO_OBSERVABILITY_VERIFY_SSL
   value: {{ .Values.testgen.observabilityVerifySsl | ternary "yes" "no" | quote }}
-- name: TG_DOCKER_RELEASE_CHECK_ENABLED
-  value: {{ .Values.testgen.releaseCheck | ternary "yes" "no" | quote }}
 {{- end -}}
 
 {{- define "testgen.hookEnvironment" -}}
diff --git a/deploy/charts/testgen-app/values.yaml b/deploy/charts/testgen-app/values.yaml
index f97987cb..d958ae09 100644
--- a/deploy/charts/testgen-app/values.yaml
+++ b/deploy/charts/testgen-app/values.yaml
@@ -15,7 +15,6 @@ testgen:
   uiPassword:
   trustTargetDatabaseCertificate: false
   observabilityVerifySsl: true
-  releaseCheck: true
   labels:
 
 cliHooks:
diff --git a/docker-compose.yml b/docker-compose.yml
index 929bb50a..a8903ddb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,7 +9,6 @@ x-common-variables: &common-variables
   TG_METADATA_DB_HOST: postgres
   TG_TARGET_DB_TRUST_SERVER_CERTIFICATE: yes
   TG_EXPORT_TO_OBSERVABILITY_VERIFY_SSL: no
-  TG_DOCKER_RELEASE_CHECK_ENABLED: yes
 
 
 services:
diff --git a/docs/configuration.md b/docs/configuration.md
index d5a10358..1c3c9177 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -282,9 +282,3 @@ default: `dataset`
 
 When exporting to your instance of Observabilty, the key sent to the events API to identify the components.
 default: `default`
-
-#### `TG_DOCKER_RELEASE_CHECK_ENABLED`
-
-Enables calling Docker Hub API to fetch the latest released image tag. The fetched tag is displayed in the UI menu.
-
-default: `yes`
diff --git a/testgen/common/mixpanel_service.py b/testgen/common/mixpanel_service.py
index 3895f016..9deb4a7a 100644
--- a/testgen/common/mixpanel_service.py
+++ b/testgen/common/mixpanel_service.py
@@ -87,9 +87,9 @@ def get_usage(self):
         schema: str = st.session_state["dbschema"]            
         query = f"""
         SELECT 
-            (SELECT COUNT(*) AS count FROM {schema}.auth_users) AS user_count,
-            (SELECT COUNT(*) AS count FROM {schema}.projects) AS project_count,
-            (SELECT COUNT(*) AS count FROM {schema}.connections) AS connection_count,
+            (SELECT COUNT(*) FROM {schema}.auth_users) AS user_count,
+            (SELECT COUNT(*) FROM {schema}.projects) AS project_count,
+            (SELECT COUNT(*) FROM {schema}.connections) AS connection_count,
             (SELECT COUNT(*) FROM {schema}.table_groups) AS table_group_count,
             (SELECT COUNT(*) FROM {schema}.test_suites) AS test_suite_count;
         """
diff --git a/testgen/common/version_service.py b/testgen/common/version_service.py
index 5621d4c6..355c5d9a 100644
--- a/testgen/common/version_service.py
+++ b/testgen/common/version_service.py
@@ -7,6 +7,7 @@
 from testgen.ui.session import session
 
 LOG = logging.getLogger("testgen")
+LATEST_VERSIONS_URL = "https://dk-support-external.s3.us-east-1.amazonaws.com/testgen-observability/testgen-latest-versions.json"
 
 
 @dataclass
@@ -40,71 +41,16 @@ def _get_app_edition() -> str:
 
 def _get_latest_version() -> str | None:
     try:
-        return {
-            "pypi": _get_last_pypi_release,
-            "docker": _get_last_docker_release,
-            "yes": _get_last_docker_release,  # NOTE: kept for retrocompatibility
-        }.get(settings.CHECK_FOR_LATEST_VERSION, lambda: None)()
-    except:
-        return None
-
-
-def _get_last_pypi_release() -> str | None:
-    response = requests.get("https://pypi.org/pypi/dataops-testgen/json", timeout=3)
-    if response.status_code != 200:
-        LOG.warning(f"version_service: Failed to fetch PyPi releases. Status code: {response.status_code}")
-        return None
-
-    package_data = response.json()
-    package_releases = list((package_data.get("releases") or {}).keys())
-
-    return _sorted_tags(package_releases)[0]
-
-
-def _get_last_docker_release() -> str | None:
-    headers = {}
-    if settings.DOCKER_HUB_USERNAME and settings.DOCKER_HUB_PASSWORD:
-        auth_response = requests.post(
-            "https://hub.docker.com/v2/users/login",
-            json={"username": settings.DOCKER_HUB_USERNAME, "password": settings.DOCKER_HUB_PASSWORD},
-            timeout=5,
-        )
-        if auth_response.status_code != 200:
-            LOG.warning(
-                "version_service: unable to login against https://hub.docker.com."
-                f" Status code: {auth_response.status_code}"
-            )
+        response = requests.get(LATEST_VERSIONS_URL, timeout=3)
+        if response.status_code != 200:
+            LOG.warning(f"Failed to fetch latest versions from S3. Status code: {response.status_code}")
             return None
-        headers["Authorization"] = f"Bearer {auth_response.json()['token']}"
+        
+        latest_versions = response.json()
 
-    response = requests.get(
-        f"https://hub.docker.com/v2/repositories/{settings.DOCKER_HUB_REPOSITORY}/tags",
-        headers=headers,
-        params={"page_size": 25, "page": 1, "ordering": "last_updated"},
-        timeout=3,
-    )
-
-    if response.status_code != 200:
-        LOG.debug(f"version_service: Failed to fetch docker tags. Status code: {response.status_code}")
-        return None
+        if settings.CHECK_FOR_LATEST_VERSION == "pypi":
+            return latest_versions.get("pypi")
 
-    tags_to_return = []
-    tags_data = response.json()
-    results = tags_data.get("results", [])
-    for result in results:
-        tag_name = result["name"]
-        if tag_name.count(".") >= 2 and "experimental" not in tag_name:
-            tags_to_return.append(tag_name)
-
-    if len(tags_to_return) <= 0:
+        return latest_versions.get("docker", {}).get(settings.DOCKER_HUB_REPOSITORY)
+    except:
         return None
-
-    return _sorted_tags(tags_to_return)[0]
-
-
-def _sorted_tags(tags: list[str]) -> list[str]:
-    sorted_tags_as_tuples = sorted(
-        [tuple([ int(i) for i in tag.replace("v", "").split(".") ]) for tag in tags],
-        reverse=True,
-    )
-    return [".".join([str(i) for i in tag_tuple]) for tag_tuple in sorted_tags_as_tuples]
diff --git a/testgen/settings.py b/testgen/settings.py
index 205caf96..07f044fa 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -408,17 +408,12 @@
 defaults to: `default`
 """
 
-CHECK_FOR_LATEST_VERSION: typing.Literal["pypi", "docker", "no"] = typing.cast(
-    typing.Literal["pypi", "docker", "no"],
-    os.getenv("TG_RELEASE_CHECK", os.getenv("TG_DOCKER_RELEASE_CHECK_ENABLED", "pypi")).lower(),
+CHECK_FOR_LATEST_VERSION: typing.Literal["pypi", "docker"] = typing.cast(
+    typing.Literal["pypi", "docker"],
+    os.getenv("TG_RELEASE_CHECK", "pypi").lower(),
 )
 """
-When set to, enables calling Docker Hub API to fetch the latest released
-image tag. The fetched tag is displayed in the UI menu.
-
-from env variable: `TG_DOCKER_RELEASE_CHECK_ENABLED`
-choices: `pypi`, `docker`, `no`
-defaults to: `pypi`
+Specifies whether the latest version check should be based on PyPI or DockerHub.
 """
 
 DOCKER_HUB_REPOSITORY: str = os.getenv(
@@ -429,35 +424,11 @@
 URL to the docker hub repository containing the dataops testgen image.
 Used to check for new releases when `CHECK_FOR_LATEST_VERSION` is set to
 `docker`.
-
-from env variable: `TESTGEN_DOCKER_HUB_URL`
-defaults to: datakitchen/dataops-testgen
-"""
-
-DOCKER_HUB_USERNAME: str | None = os.getenv("TESTGEN_DOCKER_HUB_USERNAME", None)
-"""
-Username to authenticate against Docker Hub API before fetching the list
-of tags. Required if `DOCKER_HUB_REPOSITORY` is a private repository.
-
-from env variable: `TESTGEN_DOCKER_HUB_USERNAME`
-defaults to: None
-"""
-
-DOCKER_HUB_PASSWORD: str | None = os.getenv("TESTGEN_DOCKER_HUB_PASSWORD", None)
-"""
-Password to authenticate against Docker Hub API before fetching the list
-of tags. Required if `DOCKER_HUB_REPOSITORY` is a private repository.
-
-from env variable: `TESTGEN_DOCKER_HUB_PASSWORD`
-defaults to: None
 """
 
 VERSION: str = os.getenv("TESTGEN_VERSION", None)
 """
 Current deployed version. The value is displayed in the UI menu.
-
-from env variable: `TESTGEN_VERSION`
-defaults to: None
 """
 
 SUPPORT_EMAIL: str = os.getenv("TESTGEN_SUPPORT_EMAIL", "open-source-support@datakitchen.io")
diff --git a/tests/unit/test_version_service.py b/tests/unit/test_version_service.py
deleted file mode 100644
index 0afd410f..00000000
--- a/tests/unit/test_version_service.py
+++ /dev/null
@@ -1,158 +0,0 @@
-from unittest import mock
-
-import pytest
-
-from testgen.common.version_service import get_version
-
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_calls_pypi_api(requests: mock.Mock, settings: mock.Mock):
-    settings.CHECK_FOR_LATEST_VERSION = "pypi"
-    get_version()
-    requests.get.assert_called_with("https://pypi.org/pypi/dataops-testgen/json", timeout=3)
-
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_return_none_when_pypi_request_fails(requests: mock.Mock, settings: mock.Mock):
-    response = mock.Mock()
-    response.status_code = 400
-    requests.get.return_value = response
-    settings.CHECK_FOR_LATEST_VERSION = "pypi"
-
-    assert get_version().latest == None
-
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_get_the_latest_version_from_pypi(requests: mock.Mock, settings: mock.Mock):
-    response = mock.Mock()
-    response.status_code = 200
-    requests.get.return_value = response
-    response.json.return_value = {
-        "releases": {
-            "0.0.1": "",
-            "0.1.0": "",
-            "1.0.0": "",
-            "1.1.0": "",
-            "v1.2.3": "",
-            "v1.2.0": "",
-        }
-    }
-    settings.CHECK_FOR_LATEST_VERSION = "pypi"
-
-    assert get_version().latest == "1.2.3"
-
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_calls_docker_tags_api(requests: mock.Mock, settings: mock.Mock):
-    settings.DOCKER_HUB_USERNAME = None
-    settings.DOCKER_HUB_PASSWORD = None
-    settings.DOCKER_HUB_REPOSITORY = "datakitchen/testgen-a"
-    settings.CHECK_FOR_LATEST_VERSION = "docker"
-    get_version()
-
-    requests.get.assert_called_with(
-        "https://hub.docker.com/v2/repositories/datakitchen/testgen-a/tags",
-        headers={},
-        params={"page_size": 25, "page": 1, "ordering": "last_updated"},
-        timeout=3,
-    )
-
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_return_none_when_docker_request_fails(requests: mock.Mock, settings: mock.Mock):
-    response = mock.Mock()
-    response.status_code = 400
-    requests.get.return_value = response
-    settings.DOCKER_HUB_USERNAME = None
-    settings.DOCKER_HUB_PASSWORD = None
-    settings.CHECK_FOR_LATEST_VERSION = "docker"
-
-    assert get_version().latest == None
-
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_get_the_latest_version_from_dockerhub(requests: mock.Mock, settings: mock.Mock):
-    settings.DOCKER_HUB_USERNAME = None
-    settings.DOCKER_HUB_PASSWORD = None
-    settings.CHECK_FOR_LATEST_VERSION = "docker"
-
-    response = mock.Mock()
-    response.status_code = 200
-    requests.get.return_value = response
-    response.json.return_value = {
-        "results": [
-            {"name": "v0.0.1"},
-            {"name": "v0.1.0"},
-            {"name": "v1.0.0"},
-            {"name": "v1.1.0"},
-            {"name": "v1.2.0"},
-            {"name": "v1.2.3-experimental"},
-        ],
-    }
-
-    assert get_version().latest == "1.2.0"
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_authenticates_docker_request(requests: mock.Mock, settings: mock.Mock):
-    username = settings.DOCKER_HUB_USERNAME = "docker-username"
-    password = settings.DOCKER_HUB_PASSWORD = "docker-password"  # noqa: S105
-    docker_auth_token = "docker-auth-token"  # noqa: S105
-    settings.CHECK_FOR_LATEST_VERSION = "docker"
-    settings.DOCKER_HUB_REPOSITORY = "datakitchen/testgen-b"
-
-    response = mock.Mock()
-    response.status_code = 200
-    response.json.return_value = {"token": docker_auth_token}
-    requests.post.return_value = response
-
-    get_version()
-
-    requests.post.assert_called_with(
-        "https://hub.docker.com/v2/users/login",
-        json={"username": username, "password": password},
-        timeout=5,
-    )
-    requests.get.assert_called_with(
-        "https://hub.docker.com/v2/repositories/datakitchen/testgen-b/tags",
-        headers={"Authorization": f"Bearer {docker_auth_token}"},
-        params={"page_size": 25, "page": 1, "ordering": "last_updated"},
-        timeout=3,
-    )
-
-
-@pytest.mark.unit
-@mock.patch("testgen.common.version_service.settings")
-@mock.patch("testgen.common.version_service.requests")
-@mock.patch("testgen.common.version_service.session.version", None)
-def test_return_none_when_docker_auth_request_fails(requests: mock.Mock, settings: mock.Mock):
-    settings.DOCKER_HUB_USERNAME = "docker-username"
-    settings.DOCKER_HUB_PASSWORD = "docker-password"  # noqa: S105
-    settings.CHECK_FOR_LATEST_VERSION = "docker"
-    settings.DOCKER_HUB_REPOSITORY = "datakitchen/testgen-b"
-
-    response = mock.Mock()
-    response.status_code = 400
-    requests.post.return_value = response
-
-    assert get_version().latest == None

From 99c1b8334a04991cdd6b91088cc1545df95bbdb6 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Wed, 2 Jul 2025 15:23:26 -0400
Subject: [PATCH 14/56] feat(telemetry): Capturing failed logins; Persistent
 instance ID

---
 testgen/common/mixpanel_service.py            | 23 +++++++---
 testgen/common/models/settings.py             | 43 +++++++++++++++++++
 .../030_initialize_new_schema_structure.sql   |  4 ++
 .../dbupgrade/0141_incremental_upgrade.sql    |  6 +++
 testgen/ui/views/login.py                     |  1 +
 5 files changed, 70 insertions(+), 7 deletions(-)
 create mode 100644 testgen/common/models/settings.py
 create mode 100644 testgen/template/dbupgrade/0141_incremental_upgrade.sql

diff --git a/testgen/common/mixpanel_service.py b/testgen/common/mixpanel_service.py
index 9deb4a7a..fd3908f8 100644
--- a/testgen/common/mixpanel_service.py
+++ b/testgen/common/mixpanel_service.py
@@ -1,3 +1,4 @@
+import functools
 import json
 import logging
 import ssl
@@ -12,6 +13,8 @@
 
 import testgen.ui.services.database_service as db
 from testgen import settings
+from testgen.common.models import with_database_session
+from testgen.common.models.settings import PersistedSetting, SettingNotFound
 from testgen.ui.session import session
 from testgen.utils.singleton import Singleton
 
@@ -33,13 +36,19 @@ def wrapped(*args, **kwargs):
 class MixpanelService(Singleton):
 
     @cached_property
+    @with_database_session
     def instance_id(self):
-        return settings.INSTANCE_ID or blake2b(uuid.getnode().to_bytes(8), digest_size=8).hexdigest()
+        try:
+            instance_id = PersistedSetting.get("INSTANCE_ID")
+        except SettingNotFound:
+            instance_id = settings.INSTANCE_ID or blake2b(uuid.getnode().to_bytes(8), digest_size=8).hexdigest()
+            PersistedSetting.set("INSTANCE_ID", instance_id)
+        return instance_id
 
-    @cached_property
-    def distinct_id(self):
-        return self._hash_value(session.username or "")
+    def get_distinct_id(self, username):
+        return self._hash_value(username or "")
 
+    @functools.cache  # noqa: B019
     def _hash_value(self, value: bytes | str, digest_size: int = 8) -> str:
         if isinstance(value, str):
             value = value.encode()
@@ -50,8 +59,8 @@ def send_event(self, event_name, include_usage=False, **properties):
         properties.setdefault("instance_id", self.instance_id)
         properties.setdefault("edition", settings.DOCKER_HUB_REPOSITORY)
         properties.setdefault("version", settings.VERSION)
-        properties.setdefault("distinct_id", self.distinct_id)
         properties.setdefault("username", session.username)
+        properties.setdefault("distinct_id", self.get_distinct_id(properties["username"]))
         if include_usage:
             properties.update(self.get_usage())
 
@@ -84,9 +93,9 @@ def send_mp_request(self, endpoint, payload):
             LOG.exception("Failed to send analytics data")
 
     def get_usage(self):
-        schema: str = st.session_state["dbschema"]            
+        schema: str = st.session_state["dbschema"]
         query = f"""
-        SELECT 
+        SELECT
             (SELECT COUNT(*) FROM {schema}.auth_users) AS user_count,
             (SELECT COUNT(*) FROM {schema}.projects) AS project_count,
             (SELECT COUNT(*) FROM {schema}.connections) AS connection_count,
diff --git a/testgen/common/models/settings.py b/testgen/common/models/settings.py
new file mode 100644
index 00000000..4d9d67c9
--- /dev/null
+++ b/testgen/common/models/settings.py
@@ -0,0 +1,43 @@
+from typing import Any
+
+from sqlalchemy import Column, String
+from sqlalchemy.dialects.postgresql import JSONB
+
+from testgen.common.models import Base, get_current_session
+
+NO_DEFAULT = type("NoDefaultSentinel", (), {})()
+
+
+class SettingNotFound(ValueError):
+    pass
+
+
+class PersistedSetting(Base):
+    __tablename__ = "settings"
+
+    key: str = Column(String, primary_key=True)
+    value: Any = Column(JSONB, nullable=False)
+
+    @classmethod
+    def get(cls, key: str, default=NO_DEFAULT) -> Any:
+        # This caches all the settings in the session, so it hits the database only once
+        get_current_session().query(cls).all()
+
+        if ps := get_current_session().query(cls).filter_by(key=key).first():
+            return ps.value
+        elif default is NO_DEFAULT:
+            raise SettingNotFound(f"Setting '{key}' not found")
+        else:
+            return default
+
+    @classmethod
+    def set(cls, key: str, value: Any):
+        session = get_current_session()
+        if ps := session.query(cls).filter_by(key=key).first():
+            ps.value = value
+        else:
+            session.add(cls(key=key, value=value))
+        session.commit()
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}(key={self.key!r} value={self.value!r})"
diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
index 897cb7c5..2e8db10d 100644
--- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
+++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
@@ -894,6 +894,10 @@ CREATE TABLE job_schedules (
 
 CREATE INDEX job_schedules_idx ON job_schedules (project_code, key);
 
+CREATE TABLE settings (
+    key VARCHAR(50) NOT NULL PRIMARY KEY,
+    value JSONB NOT NULL
+);
 
 INSERT INTO tg_revision (component, revision)
 VALUES  ('metadata_db', 0);
diff --git a/testgen/template/dbupgrade/0141_incremental_upgrade.sql b/testgen/template/dbupgrade/0141_incremental_upgrade.sql
new file mode 100644
index 00000000..c70a7360
--- /dev/null
+++ b/testgen/template/dbupgrade/0141_incremental_upgrade.sql
@@ -0,0 +1,6 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+CREATE TABLE settings (
+    key VARCHAR(50) NOT NULL PRIMARY KEY,
+    value JSONB NOT NULL
+);
diff --git a/testgen/ui/views/login.py b/testgen/ui/views/login.py
index 6a170113..d59ab817 100644
--- a/testgen/ui/views/login.py
+++ b/testgen/ui/views/login.py
@@ -43,6 +43,7 @@ def render(self, **_kwargs) -> None:
 
             if authentication_status is False:
                 st.error("Username or password is incorrect.")
+                MixpanelService().send_event("login-denied", username=username)
 
             if authentication_status is None:
                 javascript_service.clear_component_states()

From 99513b6e2db03942a6fb6831860dc3ddf5f73870 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Thu, 3 Jul 2025 14:04:03 -0400
Subject: [PATCH 15/56] misc: Addressing code review feedback

---
 testgen/template/get_entities/get_profile_screen.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/testgen/template/get_entities/get_profile_screen.sql b/testgen/template/get_entities/get_profile_screen.sql
index 8598f5ef..cdd42a09 100644
--- a/testgen/template/get_entities/get_profile_screen.sql
+++ b/testgen/template/get_entities/get_profile_screen.sql
@@ -37,7 +37,7 @@ WITH
                       p.column_name,
                       p.column_type,
                       'Non-Standard Blank Values'   AS qualification_test,
-                      (((('Filled Values: ' || p.filled_value_ct::VARCHAR(10)) || ', Null: ') ||
+                      (((('Dummy Values: ' || p.filled_value_ct::VARCHAR(10)) || ', Null: ') ||
                         p.null_value_ct::VARCHAR(10)) || ', Empty String: ') ||
                       p.zero_length_ct::VARCHAR(10) AS detail
                  FROM profiling p
@@ -50,7 +50,7 @@ WITH
                       p.column_type,
                       'Invalid Zip Code Format'      AS qualification_test,
                       (((('Min Length: ' || p.min_length::VARCHAR(10)) || ', Max Length: ') ||
-                        p.max_length::VARCHAR(10)) || ', Filled Values: ') ||
+                        p.max_length::VARCHAR(10)) || ', Dummy Values: ') ||
                       p.filled_value_ct::VARCHAR(10) AS detail
                  FROM profiling p
                 WHERE p.column_name ILIKE '%zip%'
@@ -95,7 +95,7 @@ WITH
                       p.column_name,
                       p.column_type,
                       'No column values present'    AS qualification_test,
-                      (((('Null: ' || p.null_value_ct::VARCHAR(10)) || ', Filled: ') ||
+                      (((('Null: ' || p.null_value_ct::VARCHAR(10)) || ', Dummy: ') ||
                         p.filled_value_ct::VARCHAR(10)) || ', Zero Len: ') ||
                       p.zero_length_ct::VARCHAR(10) AS detail
                  FROM profiling p

From d3e37cd4e9f7a9e9c7ed6f04f54310b3f2b94846 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 2 Jul 2025 17:18:17 -0400
Subject: [PATCH 16/56] refactor(table groups): lock the schema field when
 profiling or tests exist

---
 .../js/components/table_group_form.js         |  5 ++-
 .../frontend/js/pages/table_group.js          |  3 ++
 .../frontend/js/pages/table_group_list.js     | 38 ++++++++++++++++++-
 testgen/ui/queries/table_group_queries.py     | 19 +++++++++-
 testgen/ui/services/table_group_service.py    | 10 ++++-
 testgen/ui/views/table_groups.py              |  8 +++-
 6 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/testgen/ui/components/frontend/js/components/table_group_form.js b/testgen/ui/components/frontend/js/components/table_group_form.js
index a0bccc83..6d043319 100644
--- a/testgen/ui/components/frontend/js/components/table_group_form.js
+++ b/testgen/ui/components/frontend/js/components/table_group_form.js
@@ -39,6 +39,7 @@
  * @property {Connection[]} connections
  * @property {boolean?} showConnectionSelector
  * @property {boolean?} enableConnectionSelector
+ * @property {boolean?} disableSchemaField
  * @property {(tg: TableGroup, state: FormState) => void} onChange
  */
 import van from '../van.min.js';
@@ -94,6 +95,7 @@ const TableGroupForm = (props) => {
     });
     const showConnectionSelector = getValue(props.showConnectionSelector) ?? false;
     const disableConnectionSelector = van.derive(() => !getValue(props.enableConnectionSelector) || (getValue(props.connections) ?? []).length <= 0);
+    const disableSchemaField = van.derive(() => getValue(props.disableSchemaField) ?? false)
 
     const updatedTableGroup = van.derive(() => {
         return {
@@ -157,7 +159,7 @@ const TableGroupForm = (props) => {
             })
             : undefined,
         MainForm(
-            { setValidity: setFieldValidity },
+            { disableSchemaField, setValidity: setFieldValidity },
             tableGroupsName,
             profilingIncludeMask,
             profilingExcludeMask,
@@ -264,6 +266,7 @@ const MainForm = (
             height: 38,
             help: 'Database schema containing the tables for the Table Group',
             helpPlacement: 'bottom-left',
+            disabled: options.disableSchemaField,
             onChange: (value, state) => {
                 tableGroupSchema.val = value;
                 options.setValidity?.('table_group_schema', state.valid);
diff --git a/testgen/ui/components/frontend/js/pages/table_group.js b/testgen/ui/components/frontend/js/pages/table_group.js
index d3fa70aa..ce2cba33 100644
--- a/testgen/ui/components/frontend/js/pages/table_group.js
+++ b/testgen/ui/components/frontend/js/pages/table_group.js
@@ -20,6 +20,7 @@
  * @property {string} project_code
  * @property {TableGroup} table_group
  * @property {Connection[]} connections
+ * @property {boolean?} in_used
  * @property {TableGroupPreview?} table_group_preview
  * @property {Result?} result
  */
@@ -45,6 +46,7 @@ const TableGroup = (props) => {
     const connections = getValue(props.connections) ?? [];
     const enableConnectionSelector = getValue(props.table_group)?.connection_id === undefined;
     const updatedTableGroup = van.state(getValue(props.table_group) ?? {});
+    const disableSchemaField = getValue(props.in_used ?? false);
     const disableSave = van.state(true);
     const wrapperId = 'tablegroup-change-wrapper';
 
@@ -65,6 +67,7 @@ const TableGroup = (props) => {
                         tableGroup,
                         connections,
                         enableConnectionSelector,
+                        disableSchemaField,
                         showConnectionSelector: connections.length > 1,
                         onChange: (newTableGroup, state) => {
                             updatedTableGroup.val = newTableGroup;
diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
index 165a3f01..af9a25c7 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_list.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -24,8 +24,9 @@ import { getValue, emitEvent, loadStylesheet, resizeFrameHeightToElement, resize
 import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js';
 import { Select } from '../components/select.js';
 import { Icon } from '../components/icon.js';
+import { withTooltip } from '../components/tooltip.js';
 
-const { div, h4, img, small, span } = van.tags;
+const { div, h4, i, span } = van.tags;
 
 /**
  * @param {Properties} props
@@ -104,7 +105,12 @@ const TableGroupList = (props) => {
                                             div(
                                                 { class: 'flex-column fx-flex' },
                                                 Caption({content: 'Explicit Table List', style: 'margin-bottom: 4px;'}),
-                                                span(tableGroup.profiling_table_set || '--'),
+                                                tableGroup.profiling_table_set
+                                                    ? TruncatedText(
+                                                        {max: 3},
+                                                        ...tableGroup.profiling_table_set.split(',').map(t => t.trim())
+                                                    )
+                                                    : '--',
                                             ),
                                         ),
                                         div(
@@ -231,6 +237,34 @@ const Toolbar = (permissions, connections, selectedConnection) => {
     );
 }
 
+/**
+ * @typedef TruncatedTextOptions
+ * @type {object}
+ * @property {number} max
+ * @property {string?} class
+ * 
+ * @param {TruncatedTextOptions} options
+ * @param {string[]} children
+ */
+const TruncatedText = ({ max, ...options }, ...children) => {
+    const sortedChildren = [...children.sort((a, b) => a.length - b.length)];
+    const tooltipText = children.sort((a, b) => a.localeCompare(b)).join(', ');
+
+    return div(
+        { class: () => `${options.class ?? ''}`, style: 'position: relative;' },
+        span(sortedChildren.slice(0, max).join(', ')),
+        sortedChildren.length > max
+            ? withTooltip(
+                i({class: 'text-caption'}, ` + ${sortedChildren.length - max} more`),
+                {
+                    text: tooltipText,
+                    position: 'top-right',
+                }
+            )
+            : '',
+    );
+};
+
 const stylesheet = new CSSStyleSheet();
 stylesheet.replace(`
 .tg-tablegroup--card-title h4 {
diff --git a/testgen/ui/queries/table_group_queries.py b/testgen/ui/queries/table_group_queries.py
index db6e8bd3..fc05b714 100644
--- a/testgen/ui/queries/table_group_queries.py
+++ b/testgen/ui/queries/table_group_queries.py
@@ -56,7 +56,6 @@ def get_test_suite_ids_by_table_group_names(schema, table_group_names):
     return db.retrieve_data(sql)
 
 
-
 def get_table_group_dependencies(schema, table_group_names):
     if table_group_names is None or len(table_group_names) == 0:
         raise ValueError("No Table Group is specified.")
@@ -225,3 +224,21 @@ def cascade_delete(schema, table_group_names):
 delete from {schema}.table_groups where table_groups_name in ({",".join(table_group_items)});"""
     db.execute_sql(sql)
     st.cache_data.clear()
+
+
+def get_test_suite_ids_by_table_group_id(schema, table_group_id: str) -> list[str]:
+    sql = f"""
+        SELECT ts.id::VARCHAR
+        FROM {schema}.test_suites ts
+        WHERE ts.table_groups_id = '{table_group_id}'
+    """
+    return db.retrieve_data(sql)
+
+
+def get_profiling_run_ids_by_table_group_id(schema, table_group_id: str) -> list[str]:
+    sql = f"""
+        SELECT pr.id::VARCHAR
+        FROM {schema}.profiling_runs pr
+        WHERE pr.table_groups_id = '{table_group_id}'
+    """
+    return db.retrieve_data(sql)
diff --git a/testgen/ui/services/table_group_service.py b/testgen/ui/services/table_group_service.py
index 7d7d6b85..ca6df20a 100644
--- a/testgen/ui/services/table_group_service.py
+++ b/testgen/ui/services/table_group_service.py
@@ -59,7 +59,7 @@ def table_group_has_dependencies(table_group_names):
     return not table_group_queries.get_table_group_dependencies(schema, table_group_names).empty
 
 
-def are_table_groups_in_use(table_group_names):
+def are_table_groups_in_use(table_group_names: list[str]):
     if not table_group_names:
         return False
 
@@ -74,6 +74,14 @@ def are_table_groups_in_use(table_group_names):
     return test_suites_in_use or table_groups_in_use
 
 
+def is_table_group_used(table_group_id: str) -> bool:
+    schema = st.session_state["dbschema"]
+    test_suite_ids = table_group_queries.get_test_suite_ids_by_table_group_id(schema, table_group_id)
+    proling_run_ids = table_group_queries.get_profiling_run_ids_by_table_group_id(schema, table_group_id)
+
+    return len(test_suite_ids) + len(proling_run_ids) > 0
+
+
 def get_test_suite_ids_by_table_group_names(table_group_names):
     if not table_group_names:
         return []
diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py
index d466e250..53cb9a07 100644
--- a/testgen/ui/views/table_groups.py
+++ b/testgen/ui/views/table_groups.py
@@ -141,10 +141,15 @@ def on_save_table_group_clicked(table_group: dict):
             default={},
         )
 
+        original_table_group = table_group_service.get_by_id(table_group_id=table_group_id).to_dict()
+        is_table_group_used = table_group_service.is_table_group_used(table_group_id)
         table_group = {
-            **table_group_service.get_by_id(table_group_id=table_group_id).to_dict(),
+            **original_table_group,
             **get_updated_table_group(),
         }
+        if is_table_group_used:
+            table_group["table_group_schema"] = original_table_group["table_group_schema"]
+
         table_group_preview = {
             "schema": table_group["table_group_schema"],
         }
@@ -166,6 +171,7 @@ def on_save_table_group_clicked(table_group: dict):
                 "project_code": project_code,
                 "connections": self._get_connections(project_code, connection_id=table_group["connection_id"]),
                 "table_group": table_group,
+                "in_used": is_table_group_used,
                 "table_group_preview": table_group_preview,
                 "result": result,
             },

From aef8d1ae136625816bfce1a7da460fbbe2ddbc34 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Fri, 20 Jun 2025 12:48:42 -0400
Subject: [PATCH 17/56] refactor(flavor-service): update connect args and
 templated variables

---
 testgen/commands/run_launch_db_config.py      |  3 ++-
 testgen/common/database/database_service.py   |  3 +--
 .../flavor/databricks_flavor_service.py       |  9 -------
 .../common/database/flavor/flavor_service.py  | 13 +++++-----
 .../database/flavor/mssql_flavor_service.py   |  9 ++++++-
 .../flavor/redshift_flavor_service.py         |  3 ---
 .../database/flavor/trino_flavor_service.py   |  3 ---
 testgen/common/get_pipeline_parms.py          | 14 -----------
 .../040_populate_new_schema_project.sql       | 24 +++++++++----------
 .../template/get_entities/get_connection.sql  |  3 ++-
 testgen/ui/services/database_service.py       |  3 +--
 11 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/testgen/commands/run_launch_db_config.py b/testgen/commands/run_launch_db_config.py
index 2a50126a..bdfa6ab1 100644
--- a/testgen/commands/run_launch_db_config.py
+++ b/testgen/commands/run_launch_db_config.py
@@ -38,6 +38,8 @@ def _get_params_mapping() -> dict:
         "PROJECT_USER": settings.PROJECT_DATABASE_USER,
         "PROJECT_PORT": settings.PROJECT_DATABASE_PORT,
         "PROJECT_HOST": settings.PROJECT_DATABASE_HOST,
+        "PROJECT_PW_ENCRYPTED": EncryptText(settings.PROJECT_DATABASE_PASSWORD),
+        "PROJECT_HTTP_PATH": "",
         "PROJECT_SCHEMA": settings.PROJECT_DATABASE_SCHEMA,
         "PROFILING_TABLE_SET": settings.DEFAULT_PROFILING_TABLE_SET,
         "PROFILING_INCLUDE_MASK": settings.DEFAULT_PROFILING_INCLUDE_MASK,
@@ -54,7 +56,6 @@ def _get_params_mapping() -> dict:
         "TEST_SUITE_DESCRIPTION": settings.DEFAULT_TEST_SUITE_DESCRIPTION,
         "MAX_THREADS": settings.PROJECT_CONNECTION_MAX_THREADS,
         "MAX_QUERY_CHARS": settings.PROJECT_CONNECTION_MAX_QUERY_CHAR,
-        "PROJECT_PW_ENCRYPTED": EncryptText(settings.PROJECT_DATABASE_PASSWORD),
         "OBSERVABILITY_API_URL": settings.OBSERVABILITY_API_URL,
         "OBSERVABILITY_API_KEY": settings.OBSERVABILITY_API_KEY,
         "OBSERVABILITY_COMPONENT_KEY": settings.OBSERVABILITY_DEFAULT_COMPONENT_KEY,
diff --git a/testgen/common/database/database_service.py b/testgen/common/database/database_service.py
index 714aae01..643217ec 100644
--- a/testgen/common/database/database_service.py
+++ b/testgen/common/database/database_service.py
@@ -313,8 +313,7 @@ def _InitDBConnection_target_db(flavor_service, strCredentialSet, strRaw="N", us
         is_password_overwritten = pwd_override is not None
         strConnect = flavor_service.get_connection_string(strPW, is_password_overwritten)
 
-        connect_args = {"connect_timeout": 3600}
-        connect_args.update(flavor_service.get_connect_args(is_password_overwritten))
+        connect_args = flavor_service.get_connect_args(is_password_overwritten)
 
         try:
             # Timeout in seconds:  1 hour = 60 * 60 second = 3600
diff --git a/testgen/common/database/flavor/databricks_flavor_service.py b/testgen/common/database/flavor/databricks_flavor_service.py
index da451e9b..a31367f5 100644
--- a/testgen/common/database/flavor/databricks_flavor_service.py
+++ b/testgen/common/database/flavor/databricks_flavor_service.py
@@ -4,9 +4,6 @@
 
 
 class DatabricksFlavorService(FlavorService):
-    def __init__(self):
-        self.http_path = None
-
     def get_connection_string_head(self, strPW):
         strConnect = f"{self.flavor}://{self.username}:{quote_plus(strPW)}@"
         return strConnect
@@ -17,9 +14,3 @@ def get_connection_string_from_fields(self, strPW, is_password_overwritten: bool
             f"?http_path={self.http_path}"
         )
         return strConnect
-
-    def get_pre_connection_queries(self):
-        return []
-
-    def get_connect_args(self, is_password_overwritten: bool = False):  # NOQA ARG002
-        return {}
diff --git a/testgen/common/database/flavor/flavor_service.py b/testgen/common/database/flavor/flavor_service.py
index 06c539ac..27bec3e6 100644
--- a/testgen/common/database/flavor/flavor_service.py
+++ b/testgen/common/database/flavor/flavor_service.py
@@ -51,13 +51,14 @@ def get_db_name(self) -> str:
 
     def is_connect_by_key(self) -> str:
         return self.connect_by_key
+    
+    def get_pre_connection_queries(self) -> list[str]:
+        return []
+    
+    def get_connect_args(self, _is_password_overwritten: bool = False) -> dict:
+        return {"connect_timeout": 3600}
 
-    def get_connect_args(self, is_password_overwritten: bool = False):  # NOQA ARG002
-        if settings.SKIP_DATABASE_CERTIFICATE_VERIFICATION:
-            return {"TrustServerCertificate": "yes"}
-        return {}
-
-    def get_concat_operator(self):
+    def get_concat_operator(self) -> str:
         return "||"
 
     def get_connection_string(self, strPW, is_password_overwritten: bool = False):
diff --git a/testgen/common/database/flavor/mssql_flavor_service.py b/testgen/common/database/flavor/mssql_flavor_service.py
index cfbb9c55..d472f3cd 100644
--- a/testgen/common/database/flavor/mssql_flavor_service.py
+++ b/testgen/common/database/flavor/mssql_flavor_service.py
@@ -1,9 +1,10 @@
 from urllib.parse import quote_plus
 
+from testgen import settings
 from testgen.common.database.flavor.flavor_service import FlavorService
 
 
-class MssqlFlavorService(FlavorService):
+class MssqlFlavorService(FlavorService):    
     def get_connection_string_head(self, strPW):
         username = self.username
         password = quote_plus(strPW)
@@ -29,6 +30,12 @@ def get_pre_connection_queries(self):  # ARG002
             "SET ANSI_DEFAULTS ON;",
             "SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;",
         ]
+    
+    def get_connect_args(self, is_password_overwritten: bool = False):
+        connect_args = super().get_connect_args(is_password_overwritten)
+        if settings.SKIP_DATABASE_CERTIFICATE_VERIFICATION:
+            connect_args["TrustServerCertificate"] = "yes"
+        return connect_args
 
     def get_concat_operator(self):
         return "+"
diff --git a/testgen/common/database/flavor/redshift_flavor_service.py b/testgen/common/database/flavor/redshift_flavor_service.py
index e3ed1a21..1d29e3f2 100644
--- a/testgen/common/database/flavor/redshift_flavor_service.py
+++ b/testgen/common/database/flavor/redshift_flavor_service.py
@@ -17,6 +17,3 @@ def get_pre_connection_queries(self):
         return [
             "SET SEARCH_PATH = '" + self.dbschema + "'",
         ]
-
-    def get_connect_args(self, is_password_overwritten: bool = False):  # NOQA ARG002
-        return {}
diff --git a/testgen/common/database/flavor/trino_flavor_service.py b/testgen/common/database/flavor/trino_flavor_service.py
index 788fcaeb..12db762b 100644
--- a/testgen/common/database/flavor/trino_flavor_service.py
+++ b/testgen/common/database/flavor/trino_flavor_service.py
@@ -16,6 +16,3 @@ def get_pre_connection_queries(self):
         return [
             "USE " + self.catalog + "." + self.dbschema,
         ]
-
-    def get_connect_args(self, is_password_overwritten: bool = False):  # NOQA ARG002
-        return {}
diff --git a/testgen/common/get_pipeline_parms.py b/testgen/common/get_pipeline_parms.py
index cade94cf..d8d2e213 100644
--- a/testgen/common/get_pipeline_parms.py
+++ b/testgen/common/get_pipeline_parms.py
@@ -13,20 +13,6 @@ def RetrieveProfilingParms(strTableGroupsID):
     if lstParms is None:
         raise ValueError("Project Connection Parameters not found")
 
-    required_params = (
-        "project_code",
-        "connection_id",
-        "sql_flavor",
-        "project_user",
-        "profile_use_sampling",
-        "profile_sample_percent",
-        "profile_sample_min_count",
-        "table_group_schema",
-    )
-
-    if missing := [param for param in required_params if not lstParms[0][param]]:
-        raise ValueError(f"Project Connection parameters are missing: {', '.join(missing)}.")
-
     return lstParms[0]
 
 
diff --git a/testgen/template/dbsetup/040_populate_new_schema_project.sql b/testgen/template/dbsetup/040_populate_new_schema_project.sql
index 84d4d961..3e8adfdf 100644
--- a/testgen/template/dbsetup/040_populate_new_schema_project.sql
+++ b/testgen/template/dbsetup/040_populate_new_schema_project.sql
@@ -12,18 +12,18 @@ INSERT INTO connections
 (project_code, sql_flavor, sql_flavor_code,
  project_host, project_port, project_user, project_db,
  connection_name, project_pw_encrypted, http_path, max_threads, max_query_chars)
-SELECT '{PROJECT_CODE}'                       as project_code,
-       '{SQL_FLAVOR}'                         as sql_flavor,
-       '{SQL_FLAVOR}'                         as sql_flavor_code,
-       '{PROJECT_HOST}'                       as project_host,
-       '{PROJECT_PORT}'                       as project_port,
-       '{PROJECT_USER}'                       as project_user,
-       '{PROJECT_DB}'                         as project_db,
-       '{CONNECTION_NAME}'                    as connection_name,
-       '{PROJECT_PW_ENCRYPTED}'               as project_pw_encrypted,
-       '{PROJECT_HTTP_PATH}'                  as http_path,
-       '{MAX_THREADS}'::INTEGER               as max_threads,
-       '{MAX_QUERY_CHARS}'::INTEGER           as max_query_chars;
+SELECT '{PROJECT_CODE}'                            as project_code,
+       '{SQL_FLAVOR}'                              as sql_flavor,
+       '{SQL_FLAVOR}'                              as sql_flavor_code,
+       NULLIF('{PROJECT_HOST}', '')                as project_host,
+       NULLIF('{PROJECT_PORT}', '')                as project_port,
+       NULLIF('{PROJECT_USER}', '')                as project_user,
+       NULLIF('{PROJECT_DB}', '')                  as project_db,
+       '{CONNECTION_NAME}'                         as connection_name,
+       NULLIF('{PROJECT_PW_ENCRYPTED}', ''::BYTEA) as project_pw_encrypted,
+       NULLIF('{PROJECT_HTTP_PATH}', '')           as http_path,
+       '{MAX_THREADS}'::INTEGER                    as max_threads,
+       '{MAX_QUERY_CHARS}'::INTEGER                as max_query_chars;
 
 INSERT INTO table_groups
 (id, project_code, connection_id, table_groups_name, table_group_schema, profiling_table_set, profiling_include_mask, profiling_exclude_mask,
diff --git a/testgen/template/get_entities/get_connection.sql b/testgen/template/get_entities/get_connection.sql
index 30621ea1..035f9304 100644
--- a/testgen/template/get_entities/get_connection.sql
+++ b/testgen/template/get_entities/get_connection.sql
@@ -15,6 +15,7 @@ SELECT
     connect_by_url,
     connect_by_key,
     private_key,
-    private_key_passphrase
+    private_key_passphrase,
+	http_path
 FROM connections
 WHERE connection_id = {CONNECTION_ID};
diff --git a/testgen/ui/services/database_service.py b/testgen/ui/services/database_service.py
index 98c09ed6..c779948f 100644
--- a/testgen/ui/services/database_service.py
+++ b/testgen/ui/services/database_service.py
@@ -263,8 +263,7 @@ def _start_target_db_engine(flavor, host, port, db_name, user, password, url, co
     flavor_service = get_flavor_service(flavor)
     flavor_service.init(connection_params)
     connection_string = flavor_service.get_connection_string(password)
-    connect_args = {"connect_timeout": 3600}
-    connect_args.update(flavor_service.get_connect_args())
+    connect_args = flavor_service.get_connect_args()
     return create_engine(connection_string, connect_args=connect_args)
 
 

From f502fdf8c7f45a0f22dbeb94ad9ff147c5975562 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 24 Jun 2025 01:37:58 -0400
Subject: [PATCH 18/56] feat(test-definitions): align matching attributes for
 aggregate tests

---
 .../common/database/flavor/flavor_service.py  |   1 -
 testgen/ui/views/test_definitions.py          | 135 +++++++++++-------
 2 files changed, 80 insertions(+), 56 deletions(-)

diff --git a/testgen/common/database/flavor/flavor_service.py b/testgen/common/database/flavor/flavor_service.py
index 27bec3e6..7b7f7246 100644
--- a/testgen/common/database/flavor/flavor_service.py
+++ b/testgen/common/database/flavor/flavor_service.py
@@ -1,6 +1,5 @@
 from abc import abstractmethod
 
-from testgen import settings
 from testgen.common.encrypt import DecryptText
 
 
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 8f4e6ad8..1dfaaf43 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 import streamlit as st
+from streamlit.delta_generator import DeltaGenerator
 from streamlit_extras.no_default_selectbox import selectbox
 
 import testgen.ui.services.database_service as db
@@ -367,9 +368,6 @@ def show_test_form(
             value=lock_refresh,
             help="Protects test parameters from being overwritten when tests in this Test Suite are regenerated.",
         ),
-        "schema_name": right_column.text_input(
-            label="Schema Name", max_chars=100, value=schema_name, disabled=True
-        ),
         "test_active": left_column.toggle(label="Test Active", value=test_active),
         "check_result": check_result,
         "custom_query": custom_query,
@@ -423,8 +421,16 @@ def show_test_form(
 
     st.divider()
 
+    has_match_attributes = any(attribute.startswith("match_") for attribute in dynamic_attributes)
+    left_column, right_column = st.columns([0.5, 0.5]) if has_match_attributes else (st.container(), None)
+
+    # schema_name
+    test_definition["schema_name"] = left_column.text_input(
+        label="Schema Name", max_chars=100, value=schema_name, disabled=True
+    )
+
     # table_name
-    test_definition["table_name"] = st.text_input(
+    test_definition["table_name"] = left_column.text_input(
         label="Table Name", max_chars=100, value=table_name, disabled=False
     )
 
@@ -443,7 +449,7 @@ def show_test_form(
         column_name_label = None
     elif test_scope == "referential":
         column_name_disabled = False
-        test_definition["column_name"] = st.text_input(
+        test_definition["column_name"] = left_column.text_input(
             label=column_name_label,
             value=column_name,
             max_chars=500,
@@ -464,7 +470,7 @@ def show_test_form(
             else:  # query edit not-present
                 column_name_disabled = False
 
-        test_definition["column_name"] = st.text_input(
+        test_definition["column_name"] = left_column.text_input(
             label=column_name_label,
             value=column_name,
             max_chars=100,
@@ -498,72 +504,91 @@ def show_test_form(
             disabled=column_name_disabled,
         )
 
-    st.divider()
+    leftover_attributes = dynamic_attributes.copy()
 
-    # dynamic attributes
-    mid_left_column, mid_right_column = st.columns([0.5, 0.5])
+    def render_dynamic_attribute(attribute: str, container: DeltaGenerator):
+        if not attribute in dynamic_attributes:
+            return
+        
+        default_value = "" if attribute != "threshold_value" else 0
+        value = empty_if_null(selected_test_def[attribute]) if mode == "edit" else default_value
 
-    current_column = mid_left_column
-    show_custom_query = False
-    dynamic_attributes_length = len(dynamic_attributes)
-    dynamic_attributes_half_length = max(round((dynamic_attributes_length + 0.5) / 2), 1)
-    for i, dynamic_attribute in enumerate(dynamic_attributes):
-        if i >= dynamic_attributes_half_length:
-            current_column = mid_right_column
+        index = dynamic_attributes.index(attribute)
+        leftover_attributes.remove(attribute)
 
-        default_value = "" if dynamic_attribute != "threshold_value" else 0
-        value = empty_if_null(selected_test_def[dynamic_attribute]) if mode == "edit" else default_value
-
-        actual_dynamic_attributes_labels = (
-            dynamic_attributes_labels[i]
-            if dynamic_attributes_labels and len(dynamic_attributes_labels) > i
+        label_text = (
+            dynamic_attributes_labels[index]
+            if dynamic_attributes_labels and len(dynamic_attributes_labels) > index
             else "Help text is not available."
         )
-
-        actual_dynamic_attributes_help = (
-            dynamic_attributes_help[i]
-            if dynamic_attributes_help and len(dynamic_attributes_help) > i
-            else snake_case_to_title_case(dynamic_attribute)
+        help_text = (
+            dynamic_attributes_help[index]
+            if dynamic_attributes_help and len(dynamic_attributes_help) > index
+            else snake_case_to_title_case(attribute)
         )
 
-        if dynamic_attribute in ["custom_query"]:
-            show_custom_query = True
-        elif dynamic_attribute in ["threshold_value"]:
-            test_definition[dynamic_attribute] = current_column.number_input(
-                label=actual_dynamic_attributes_labels,
+        if attribute == "custom_query":
+            custom_query_placeholder = None
+            if test_type == "Condition_Flag":
+                custom_query_placeholder = "EXAMPLE:  status = 'SHIPPED' and qty_shipped = 0"
+            elif test_type == "CUSTOM":
+                custom_query_placeholder = "EXAMPLE:  SELECT product, SUM(qty_sold) as sum_sold, SUM(qty_shipped) as qty_shipped \n FROM {DATA_SCHEMA}.sales_history \n GROUP BY product \n HAVING SUM(qty_shipped) > SUM(qty_sold)"
+                
+            test_definition[attribute] = st.text_area(
+                label=label_text,
+                value=custom_query,
+                placeholder=custom_query_placeholder,
+                height=150 if test_type == "CUSTOM" else 75,
+                help=help_text,
+            )
+        elif attribute in ["threshold_value"]:
+            test_definition[attribute] = container.number_input(
+                label=label_text,
                 value=float(value),
-                help=actual_dynamic_attributes_help,
+                step=1.0,
+                help=help_text,
             )
         else:
-            test_definition[dynamic_attribute] = current_column.text_input(
-                label=actual_dynamic_attributes_labels,
-                max_chars=4000 if dynamic_attribute in ["match_column_names", "match_groupby_names", "groupby_names"] else 1000,
+            test_definition[attribute] = container.text_input(
+                label=label_text,
+                max_chars=4000 if attribute in ["match_column_names", "match_groupby_names", "groupby_names"] else 1000,
                 value=value,
-                help=actual_dynamic_attributes_help,
+                help=help_text,
             )
 
-    # Custom Query
-    if show_custom_query:
-        if test_type == "Condition_Flag":
-            custom_query_default = "EXAMPLE:  status = 'SHIPPED' and qty_shipped = 0"
-            custom_query_height = 75
-        elif test_type == "CUSTOM":
-            custom_query_default = "EXAMPLE:  SELECT product, SUM(qty_sold) as sum_sold, SUM(qty_shipped) as qty_shipped \n FROM {DATA_SCHEMA}.sales_history \n GROUP BY product \n HAVING SUM(qty_shipped) > SUM(qty_sold)"
-            custom_query_height = 150
-        else:
-            custom_query_default = None
-            custom_query_height = 75
-        test_definition["custom_query"] = st.text_area(
-            label=actual_dynamic_attributes_labels,
-            value=custom_query,
-            placeholder=custom_query_default,
-            height=custom_query_height,
-            help=actual_dynamic_attributes_help,
+    if has_match_attributes:
+        for attribute in ["match_schema_name", "match_table_name", "match_column_names"]:
+            render_dynamic_attribute(attribute, right_column)
+
+    st.divider()
+
+    mid_left_column, mid_right_column = st.columns([0.5, 0.5])
+
+    if has_match_attributes:
+        for attribute in ["subset_condition", "groupby_names", "having_condition"]:
+            if attribute in dynamic_attributes and f"match_{attribute}" in dynamic_attributes:
+                render_dynamic_attribute(attribute, mid_left_column)
+                render_dynamic_attribute(f"match_{attribute}", mid_right_column)
+
+    if "custom_query" in dynamic_attributes:
+        render_dynamic_attribute("custom_query", mid_left_column)
+
+    total_length = len(leftover_attributes)
+    half_length = round(total_length / 2)
+    for index, attribute in enumerate(leftover_attributes.copy()):
+        render_dynamic_attribute(
+            attribute,
+            mid_left_column if index == 0 or index < half_length else mid_right_column,
         )
 
     # skip_errors
     if run_type == "QUERY":
-        test_definition["skip_errors"] = left_column.number_input(label="Threshold Error Count", value=skip_errors)
+        container = mid_right_column if total_length % 2 else mid_left_column
+        test_definition["skip_errors"] = container.number_input(
+            label="Threshold Error Count",
+            value=skip_errors,
+            step=1,
+        )
     else:
         test_definition["skip_errors"] = skip_errors
 

From 91d7121b1efd17c737807fc900a0b4bff4d91633 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Mon, 30 Jun 2025 16:54:50 -0400
Subject: [PATCH 19/56] feat(tests): add aggregate balance percent and range
 tests

---
 .../commands/queries/execute_tests_query.py   |  30 ++-
 .../030_initialize_new_schema_structure.sql   |   2 +
 .../050_populate_new_schema_metadata.sql      | 225 ++++++++++++++++--
 .../dbupgrade/0142_incremental_upgrade.sql    |   5 +
 .../execution/ex_get_tests_non_cat.sql        |   2 +
 .../ex_aggregate_match_num_incr_generic.sql   |  34 ---
 ...=> ex_aggregate_match_percent_generic.sql} |  24 +-
 ...l => ex_aggregate_match_range_generic.sql} |  24 +-
 .../template/get_entities/get_test_info.sql   |   2 +
 testgen/ui/queries/test_definition_queries.py |  11 +
 .../ui/services/test_definition_service.py    |   1 +
 testgen/ui/services/test_results_service.py   |   2 +
 testgen/ui/views/test_definitions.py          |  10 +-
 13 files changed, 291 insertions(+), 81 deletions(-)
 create mode 100644 testgen/template/dbupgrade/0142_incremental_upgrade.sql
 delete mode 100644 testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql
 rename testgen/template/flavors/generic/exec_query_tests/{ex_aggregate_match_percent_above_generic.sql => ex_aggregate_match_percent_generic.sql} (70%)
 rename testgen/template/flavors/generic/exec_query_tests/{ex_aggregate_match_percent_within_generic.sql => ex_aggregate_match_range_generic.sql} (69%)

diff --git a/testgen/commands/queries/execute_tests_query.py b/testgen/commands/queries/execute_tests_query.py
index 20b0cf2d..0fe927db 100644
--- a/testgen/commands/queries/execute_tests_query.py
+++ b/testgen/commands/queries/execute_tests_query.py
@@ -29,12 +29,30 @@ def __init__(self, strProjectCode, strFlavor, strTestSuiteId, strTestSuite, minu
 
     def _AssembleDisplayParameters(self):
 
-        lst_parms = ["column_name", "skip_errors", "baseline_ct", "baseline_unique_ct", "baseline_value",
-                     "baseline_value_ct", "baseline_sum", "baseline_avg", "baseline_sd", "subset_condition",
-                     "groupby_names", "having_condition", "window_date_column", "window_days",
-                     "match_column_names", "match_subset_condition", "match_schema_name", "match_table_name",
-                     "match_groupby_names", "match_having_condition",
-                     ]
+        lst_parms = [
+            "column_name",
+            "skip_errors",
+            "baseline_ct",
+            "baseline_unique_ct",
+            "baseline_value",
+            "baseline_value_ct",
+            "baseline_sum",
+            "baseline_avg",
+            "baseline_sd",
+            "lower_tolerance",
+            "upper_tolerance",
+            "subset_condition",
+            "groupby_names",
+            "having_condition",
+            "window_date_column",
+            "window_days",
+            "match_column_names",
+            "match_subset_condition",
+            "match_schema_name",
+            "match_table_name",
+            "match_groupby_names",
+            "match_having_condition",
+        ]
         str_parms = "; ".join(f"{key}={self.dctTestParms[key]}"
                              for key in lst_parms
                              if key.lower() in self.dctTestParms and self.dctTestParms[key] not in [None, ""])
diff --git a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
index 058f5eb0..56a74d6e 100644
--- a/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
+++ b/testgen/template/dbsetup/030_initialize_new_schema_structure.sql
@@ -186,6 +186,8 @@ CREATE TABLE test_definitions (
    baseline_sum           VARCHAR(1000),
    baseline_avg           VARCHAR(1000),
    baseline_sd            VARCHAR(1000),
+   lower_tolerance        VARCHAR(1000),
+   upper_tolerance        VARCHAR(1000),
    subset_condition       VARCHAR(500),
    groupby_names          VARCHAR,
    having_condition       VARCHAR(500),
diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
index 92905fba..fa50cab9 100644
--- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
+++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
@@ -146,13 +146,11 @@ VALUES  ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con
         ('1501', 'Aggregate_Minimum', 'Aggregate Minimum', 'Aggregate values per group are at or above reference', 'Tests that aggregate values for each set of column values are at least the same as reference dataset', 'Aggregate measure per set of column values is not at least the same as reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of reference table|Reference table name|Aggregate column expression in reference table (e.g. `SUM(sales)`)|Condition defining a subset of records in reference table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in reference table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in reference table (e.g. HAVING clause) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations, but requires a match or increase in the aggregate value, rather than an exact match, across two different tables. Both tables must be accessible at the same time. Use this to confirm that aggregate values have not dropped for any set of categories, even if some values may rise. This test is useful to compare an older and newer version of a cumulative dataset. An error here means that one or more values per category set fail to match or exceed the prior dataset. New categories or combinations are allowed (but can be restricted independently with a Combo_Match test). Both tables must be present to run this test.', 'Y'),
         ('1502', 'Combo_Match', 'Reference Match', 'Column values or combinations found in reference', 'Tests for the presence of one or a set of column values in a reference table', 'Column value combinations are not found in reference table values.', 'Missing values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. \n\nDo not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,having_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition,match_having_condition', NULL, 'Record Subset Condition,Group Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns,Matching Record Subset Condition,Matching Group Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in source table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to validate source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL|Condition based on aggregate expression used to exclude value combinations in reference table, written like a condition within a SQL HAVING clause (e.g. `SUM(sales) < 100`) - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Validity', 'Schema Drift', 'Expected count of non-matching value combinations', 'This test verifies that values, or combinations of values, that are present in the main table are also found in a reference table. This is a useful test for referential integrity between fact and dimension tables. You can also use it to confirm the validity of a code or category, or of combinations of values that should only be found together within each record, such as product/size/color.  An error here means that one  or more category combinations in the main table are not found in the reference table. Both tables must be present to run this test.', 'Y'),
         ('1503', 'Distribution_Shift', 'Distribution Shift', 'Probability distribution consistent with reference', 'Tests the closeness of match between two distributions of aggregate measures across combinations of column values, using Jensen-Shannon Divergence test', 'Divergence between two distributions exceeds specified threshold.', 'Divergence level (0-1)', 'Jensen-Shannon Divergence, from 0 (identical distributions), to 1.0 (max divergence)', NULL, '1', '0.75', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous mesurements here. Do not use numeric values unless they represent discrete categories.', 'subset_condition,match_schema_name,match_table_name,match_groupby_names,match_subset_condition', NULL, 'Record Subset Condition,Reference Schema Name,Reference Table Name,Matching Columns to Compare,Matching Record Subset Condition', 'Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL|Schema location of matching table|Matching table name|Column Names in reference table used to compare counts with source table values (separated by commas)|Condition defining a subset of records in reference table to match against, written like a condition within a SQL WHERE clause - OPTIONAL', 'Warning', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected maximum divergence level between 0 and 1', 'This test measures the similarity of two sets of counts per categories, by using their proportional counts as probability distributions.  Using Jensen-Shannon divergence, a measure of relative entropy or difference between two distributions, the test assigns a score ranging from 0, meaning that the distributions are identical, to 1, meaning that the distributions are completely unrelated. This test can be used to compare datasets that may not match exactly, but should have similar distributions.  For example, it is a useful sanity check for data from different sources that you would expect to have a consistent spread, such as shipment of building materials per state and construction projects by state. Scores can be compared over time even if the distributions are not identical -- a dataset can be expected to maintain a comparable divergence score with a reference dataset over time. Both tables must be present to run this test.', 'Y'),
+        ('1504', 'Aggregate_Balance_Percent', 'Aggregate Balance Percent', 'Aggregate measure per group within percent of reference', 'Tests that aggregate measure for each set of column values fall within a percent range above or below the measure for reference dataset', 'Aggregate measure per set of column values is outside percent range of reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition,lower_tolerance,upper_tolerance', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition,Lower Tolerance Percent,Upper Tolerance Percent', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL|Allowable tolerance below the reference measure expressed as a percent|Allowable tolerance above the reference measure expressed as a percent', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. Use it to confirm that two datasets closely match within the tolerance you set -- that the sum of a measure or count of a value remains sufficiently consistent between categories. You could use this test compare sales per product within one month to another, when you want to be alerted if the difference for any product falls outside of the range defined as 5% below to 10% above the prior month. An error here means that one or more value combinations fail to match within the set tolerances. New categories or combinations will cause failure.', 'Y'),
+        ('1505', 'Aggregate_Balance_Range', 'Aggregate Balance Range', 'Aggregate measure per group within hard range of reference', 'Tests that aggregate measure for each set of column values fall within a hard range above or below the measure for reference dataset', 'Aggregate measure per set of column values is outside expected range of reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_schema_name,match_table_name,match_column_names,match_subset_condition,match_groupby_names,match_having_condition,lower_tolerance,upper_tolerance', NULL, 'Record Subset Condition,Grouping Columns,Group Subset Condition,Matching Schema Name,Matching Table Name,Matching Aggregate Expression,Matching Record Subset Condition,Matching Grouping Columns,Matching Group Subset Condition,Lower Tolerance Constant,Upper Tolerance Constant', 'Condition defining a subset of records in main table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in main table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in main table (e.g. HAVING clause) - OPTIONAL|Schema location of matching table|Matching table name|Agregate column expression in matching table: one of `SUM([column_name])` or `COUNT([column_name])`|Condition defining a subset of records in matching table, written like a condition within a SQL WHERE clause - OPTIONAL|Category columns in matching table separated by commas (e.g. GROUP BY columns)|Condition defining a subset of aggregate records in matching table (e.g. HAVING clause) - OPTIONAL|Allowable tolerance below the reference measure expressed as a constant value|Allowable tolerance above the reference measure expressed as a constant value', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of group totals not matching aggregate value', 'This test compares sums or counts of a column rolled up to one or more category combinations across two different tables. Both tables must be accessible at the same time. Use it to confirm that two datasets closely match within the tolerances you define as specific values above or below the aggregate measure for the same categories in the reference dataset -- that the sum of a measure or count of a value remains sufficiently consistent between categories. For instance, you can use this test to compare sales per product within one month to another, when you want to be alerted if the difference for any product falls outside of the range defined as 10000 dollars above or below the prior week. An error here means that one or more value combinations fail to match within the set tolerances. New categories or combinations will cause failure.', 'Y'),
         ('1508', 'Timeframe_Combo_Gain', 'Timeframe No Drops', 'Latest timeframe has at least all value combinations from prior period', 'Tests that column values in most recent time-window include at least same as prior time window', 'Column values in most recent time-window don''t include all values in prior window.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Make sure not to use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', 'The date column used to define the time windows. This must be a DATE or DATETIME type.|Length in days of the time window. The test will compare the most recent period of days to the prior period of the same duration.|Condition defining a subset of records in main table to evaluate, written like a condition within a SQL WHERE clause - OPTIONAL', 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of missing value combinations', 'This test checks a single transactional table to verify that categorical values or combinations that are present in the most recent time window you define include at least all those found in the prior time window of the same duration. Missing values in the latest time window will trigger the test to fail. New values are permitted. Use this test to confirm that codes or categories are not lost across successive time periods in a transactional table.', 'Y'),
         ('1509', 'Timeframe_Combo_Match', 'Timeframe Match', 'Column value combinations from latest timeframe same as prior period', 'Tests for presence of same column values in most recent time-window vs. prior time window', 'Column values don''t match in most recent time-windows.', 'Mismatched values', NULL, NULL, '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'Categorical Column List', 'Specify one or more Categorical columns, separated by commas. Do not use continuous measurements here. Do not use numeric values unless they represent discrete categories.', 'window_date_column,window_days,subset_condition', NULL, 'Date Column for Time Windows,Time Window in Days,Record Subset Condition', NULL, 'Fail', 'QUERY', 'referential', 'Consistency', 'Data Drift', 'Expected count of non-matching value combinations', 'This test checks a single transactional table (such as a fact table) to verify that categorical values or combinations that are present in the most recent time window you define match those found in the prior time window of the same duration. New or missing values in the latest time window will trigger the test to fail. Use this test to confirm the consistency in the occurrence of codes or categories across successive time periods in a transactional table.', 'Y'),
-        ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y'),
-
-        ('1504', 'Aggregate_Pct_Above', 'Aggregate Pct Above', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'),
-        ('1505', 'Aggregate_Pct_Within', 'Aggregate Pct Within', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals with not exceeding aggregate measure', NULL, 'N'),
-        ('1506', 'Aggregate_Increase', 'Aggregate Increase', 'Aggregate values per group exceed reference', 'Tests that aggregate values for each set of column values exceed values for reference dataset', 'Aggregate measure per set of column values fails to exceed the reference dataset.', 'Mismatched measures', NULL, NULL, '1', '1.0', 'Aggregate Expression', 'Specify an aggregate column expression: one of `SUM([column_name])` or `COUNT([column_name])`', 'subset_condition,groupby_names,having_condition,match_column_names,match_schema_name,match_table_name,match_subset_condition,match_groupby_names,match_having_condition', NULL, 'TODO Fill in default_parm_prompts match_schema_name,TODO Fill in default_parm_prompts match_table_name,TODO Fill in default_parm_prompts match_column_names,TODO Fill in default_parm_prompts match_subset_condition,TODO Fill in default_parm_prompts match_groupby_names,TODO Fill in default_parm_prompts match_having_condition,TODO Fill in default_parm_prompts subset_condition,TODO Fill in default_parm_prompts groupby_names,TODO Fill in default_parm_prompts having_condition', NULL, 'Fail', 'QUERY', 'referential', 'Accuracy', 'Data Drift', 'Expected count of group totals below reference value', NULL, 'N')
+        ('1510', 'Dupe_Rows', 'Duplicate Rows', 'Rows are not duplicated in table', 'Tests for the absence of duplicate rows based on unique combination of column values', 'Column value combinations are duplicated in the table.', 'Duplicate records', NULL, NULL, '(({RESULT_MEASURE}-{THRESHOLD_VALUE}))::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', 'null', 'null', 'groupby_names', NULL, 'Columns to Compare', 'List of columns in the table that define a duplicate record when the combination of values is repeated on multiple rows', 'Fail', 'QUERY', 'table', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate value combinations', 'This test verifies that combinations of values are not repeated within the table. By default when auto-generated, the test considers all columns to protect against duplication of entire rows. If you know the minimum columns that should constitute a unique record, such as a set of ID''s, you should use those to make the test as sensitive as possible. Alternatively, if you know of columns you can always exclude, such as file_date or refresh_snapshot_id, remove them to tighten the test somewhat.', 'Y')
 ;
 
 
@@ -177,7 +175,9 @@ VALUES  ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'),
         ('2006', 'Aggregate_Balance', 'redshift', 'ex_aggregate_match_same_generic.sql'),
         ('2007', 'Timeframe_Combo_Gain', 'redshift', 'ex_window_match_no_drops_generic.sql'),
         ('2008', 'Timeframe_Combo_Match', 'redshift', 'ex_window_match_same_generic.sql'),
-        ('2009', 'Aggregate_Increase', 'redshift', 'ex_aggregate_match_num_incr_generic.sql'),
+        ('2009', 'Aggregate_Balance_Percent', 'redshift', 'ex_aggregate_match_percent_generic.sql'),
+        ('2010', 'Aggregate_Balance_Range', 'redshift', 'ex_aggregate_match_range_generic.sql'),
+        ('2011', 'Dupe_Rows', 'redshift', 'ex_dupe_rows_generic.sql'),
 
         ('2101', 'Combo_Match', 'snowflake', 'ex_data_match_generic.sql'),
         ('2102', 'Aggregate_Minimum', 'snowflake', 'ex_aggregate_match_no_drops_generic.sql'),
@@ -186,7 +186,9 @@ VALUES  ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'),
         ('2106', 'Aggregate_Balance', 'snowflake', 'ex_aggregate_match_same_generic.sql'),
         ('2107', 'Timeframe_Combo_Gain', 'snowflake', 'ex_window_match_no_drops_generic.sql'),
         ('2108', 'Timeframe_Combo_Match', 'snowflake', 'ex_window_match_same_generic.sql'),
-        ('2109', 'Aggregate_Increase', 'snowflake', 'ex_aggregate_match_num_incr_generic.sql'),
+        ('2109', 'Aggregate_Balance_Percent', 'snowflake', 'ex_aggregate_match_percent_generic.sql'),
+        ('2110', 'Aggregate_Balance_Range', 'snowflake', 'ex_aggregate_match_range_generic.sql'),
+        ('2111', 'Dupe_Rows', 'snowflake', 'ex_dupe_rows_generic.sql'),
 
         ('2201', 'Combo_Match', 'mssql', 'ex_data_match_generic.sql'),
         ('2202', 'Aggregate_Minimum', 'mssql', 'ex_aggregate_match_no_drops_generic.sql'),
@@ -195,7 +197,9 @@ VALUES  ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'),
         ('2206', 'Aggregate_Balance', 'mssql', 'ex_aggregate_match_same_generic.sql'),
         ('2207', 'Timeframe_Combo_Gain', 'mssql', 'ex_window_match_no_drops_generic.sql'),
         ('2208', 'Timeframe_Combo_Match', 'mssql', 'ex_window_match_same_generic.sql'),
-        ('2209', 'Aggregate_Increase', 'mssql', 'ex_aggregate_match_num_incr_generic.sql'),
+        ('2209', 'Aggregate_Balance_Percent', 'mssql', 'ex_aggregate_match_percent_generic.sql'),
+        ('2210', 'Aggregate_Balance_Range', 'mssql', 'ex_aggregate_match_range_generic.sql'),
+        ('2211', 'Dupe_Rows', 'mssql', 'ex_dupe_rows_generic.sql'),
 
         ('2301', 'Combo_Match', 'postgresql', 'ex_data_match_generic.sql'),
         ('2302', 'Aggregate_Minimum', 'postgresql', 'ex_aggregate_match_no_drops_generic.sql'),
@@ -204,7 +208,9 @@ VALUES  ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'),
         ('2306', 'Aggregate_Balance', 'postgresql', 'ex_aggregate_match_same_generic.sql'),
         ('2307', 'Timeframe_Combo_Gain', 'postgresql', 'ex_window_match_no_drops_postgresql.sql'),
         ('2308', 'Timeframe_Combo_Match', 'postgresql', 'ex_window_match_same_postgresql.sql'),
-        ('2309', 'Aggregate_Increase', 'postgresql', 'ex_aggregate_match_num_incr_generic.sql'),
+        ('2309', 'Aggregate_Balance_Percent', 'postgresql', 'ex_aggregate_match_percent_generic.sql'),
+        ('2310', 'Aggregate_Balance_Range', 'postgresql', 'ex_aggregate_match_range_generic.sql'),
+        ('2311', 'Dupe_Rows', 'postgresql', 'ex_dupe_rows_generic.sql'),
 
         ('2401', 'Combo_Match', 'databricks', 'ex_data_match_generic.sql'),
         ('2402', 'Aggregate_Minimum', 'databricks', 'ex_aggregate_match_no_drops_generic.sql'),
@@ -213,12 +219,9 @@ VALUES  ('2001', 'Combo_Match', 'redshift', 'ex_data_match_generic.sql'),
         ('2406', 'Aggregate_Balance', 'databricks', 'ex_aggregate_match_same_generic.sql'),
         ('2407', 'Timeframe_Combo_Gain', 'databricks', 'ex_window_match_no_drops_databricks.sql'),
         ('2408', 'Timeframe_Combo_Match', 'databricks', 'ex_window_match_same_databricks.sql'),
-        ('2409', 'Aggregate_Increase', 'databricks', 'ex_aggregate_match_num_incr_generic.sql'),
-        ('2010', 'Dupe_Rows', 'redshift', 'ex_dupe_rows_generic.sql'),
-        ('2110', 'Dupe_Rows', 'snowflake', 'ex_dupe_rows_generic.sql'),
-        ('2210', 'Dupe_Rows', 'mssql', 'ex_dupe_rows_generic.sql'),
-        ('2310', 'Dupe_Rows', 'postgresql', 'ex_dupe_rows_generic.sql'),
-        ('2410', 'Dupe_Rows', 'databricks', 'ex_dupe_rows_generic.sql')
+        ('2409', 'Aggregate_Balance_Percent', 'databricks', 'ex_aggregate_match_percent_generic.sql'),
+        ('2410', 'Aggregate_Balance_Range', 'databricks', 'ex_aggregate_match_range_generic.sql'),
+        ('2411', 'Dupe_Rows', 'databricks', 'ex_dupe_rows_generic.sql')
 ;
 
 TRUNCATE TABLE cat_test_conditions;
@@ -1099,6 +1102,160 @@ FULL JOIN older_ver o
   ON (l.category = o.category)
 ORDER BY COALESCE(l.category, o.category)'),
 
+    ('1245', '1504', 'Test Results', 'Aggregate_Balance_Percent', 'redshift', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+  ORDER BY {GROUPBY_NAMES};'),
+    ('1246', '1504', 'Test Results', 'Aggregate_Balance_Percent', 'snowflake', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+  ORDER BY {GROUPBY_NAMES};'),
+    ('1247', '1504', 'Test Results', 'Aggregate_Balance_Percent', 'mssql', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+  ORDER BY {GROUPBY_NAMES};'),
+    ('1248', '1504', 'Test Results', 'Aggregate_Balance_Percent', 'postgresql', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+  ORDER BY {GROUPBY_NAMES};'),
+
+    ('1245', '1505', 'Test Results', 'Aggregate_Balance_Range', 'redshift', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+  ORDER BY {GROUPBY_NAMES};'),
+    ('1246', '1505', 'Test Results', 'Aggregate_Balance_Range', 'snowflake', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+  ORDER BY {GROUPBY_NAMES};'),
+    ('1247', '1505', 'Test Results', 'Aggregate_Balance_Range', 'mssql', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+  ORDER BY {GROUPBY_NAMES};'),
+    ('1248', '1505', 'Test Results', 'Aggregate_Balance_Range', 'postgresql', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+  ORDER BY {GROUPBY_NAMES};'),
+
     ('1261', '1508', 'Test Results', 'Timeframe_Combo_Gain', 'redshift', NULL, 'SELECT {COLUMN_NAME_NO_QUOTES}
   FROM {TARGET_SCHEMA}.{TABLE_NAME}
  WHERE {SUBSET_CONDITION}
@@ -1383,6 +1540,44 @@ SELECT COALESCE(l.category, o.category) AS category,
 FULL JOIN older_ver o
   ON (l.category = o.category)
 ORDER BY COALESCE(l.category, o.category)'),
+    ('1248', '1504', 'Test Results', 'Aggregate_Balance_Percent', 'databricks', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0))
+  ORDER BY {GROUPBY_NAMES};'),
+    ('1245', '1505', 'Test Results', 'Aggregate_Balance_Range', 'databricks', NULL, 'SELECT *
+  FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) AS total, SUM(MATCH_TOTAL) AS MATCH_TOTAL
+    FROM
+      ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} AS total, NULL AS match_total
+          FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        WHERE {SUBSET_CONDITION}
+        GROUP BY {GROUPBY_NAMES}
+        {HAVING_CONDITION}
+          UNION ALL
+        SELECT {MATCH_GROUPBY_NAMES}, NULL AS total, {MATCH_COLUMN_NAMES} AS match_total
+          FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+        WHERE {MATCH_SUBSET_CONDITION}
+        GROUP BY {MATCH_GROUPBY_NAMES}
+        {MATCH_HAVING_CONDITION} ) a
+    GROUP BY {GROUPBY_NAMES} ) s
+  WHERE (total IS NOT NULL AND match_total IS NULL)
+    OR (total IS NULL AND match_total IS NOT NULL)
+    OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE})
+  ORDER BY {GROUPBY_NAMES};'),
     ('1337', '1509', 'Test Results', 'Timeframe_Combo_Match', 'databricks', NULL, '        (
 SELECT ''Prior Timeframe'' as missing_from, {COLUMN_NAME}
 FROM {TARGET_SCHEMA}.{TABLE_NAME}
diff --git a/testgen/template/dbupgrade/0142_incremental_upgrade.sql b/testgen/template/dbupgrade/0142_incremental_upgrade.sql
new file mode 100644
index 00000000..b9db7219
--- /dev/null
+++ b/testgen/template/dbupgrade/0142_incremental_upgrade.sql
@@ -0,0 +1,5 @@
+SET SEARCH_PATH TO {SCHEMA_NAME};
+
+ALTER TABLE test_definitions
+  ADD COLUMN lower_tolerance VARCHAR(1000),
+  ADD COLUMN upper_tolerance VARCHAR(1000);
diff --git a/testgen/template/execution/ex_get_tests_non_cat.sql b/testgen/template/execution/ex_get_tests_non_cat.sql
index 87db206a..536fd509 100644
--- a/testgen/template/execution/ex_get_tests_non_cat.sql
+++ b/testgen/template/execution/ex_get_tests_non_cat.sql
@@ -14,6 +14,8 @@ SELECT tt.test_type,
        coalesce(baseline_sum, '')                      as baseline_sum,
        coalesce(baseline_avg, '')                      as baseline_avg,
        coalesce(baseline_sd, '')                       as baseline_sd,
+       coalesce(lower_tolerance, '')                   as lower_tolerance,
+       coalesce(upper_tolerance, '')                   as upper_tolerance,
        case
            when nullif(subset_condition, '') is null then '1=1'
            else subset_condition end                   as subset_condition,
diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql
deleted file mode 100644
index c9660494..00000000
--- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql
+++ /dev/null
@@ -1,34 +0,0 @@
-SELECT '{TEST_TYPE}' as test_type,
-       '{TEST_DEFINITION_ID}' as test_definition_id,
-       '{TEST_SUITE_ID}' as test_suite_id,
-       '{RUN_DATE}' as test_time, '{START_TIME}' as starttime, CURRENT_TIMESTAMP as endtime,
-       '{SCHEMA_NAME}' as schema_name, '{TABLE_NAME}' as table_name, '{GROUPBY_NAMES}' as column_name,
-       {SKIP_ERRORS} as skip_errors,
-       'match_schema_name = {MATCH_SCHEMA_NAME}, match_table_name = {MATCH_TABLE_NAME}, match_groupby_names = {MATCH_GROUPBY_NAMES} ,match_column_names = {MATCH_COLUMN_NAMES}, match_subset_condition = {MATCH_SUBSET_CONDITION}, match_having_condition = {MATCH_HAVING_CONDITION}, mode = {MODE}'
-         as input_parameters,
-       CASE WHEN COUNT(*) > COALESCE(skip_errors, 0) THEN 0 ELSE 1 END as result_code,
-       CONCAT(
-             CONCAT( 'Mismatched measures: ', CAST( COALESCE(COUNT(*), 0) AS {VARCHAR_TYPE}) ),
-             CONCAT( ', Threshold: ',
-                     CONCAT( CAST(COALESCE(skip_errors, 0) AS {VARCHAR_TYPE}), '.')
-                    )
-              )  AS result_message,
-       COUNT(*) as result_measure,
-       '{TEST_ACTION}' as test_action,
-       '{SUBSET_CONDITION}' as subset_condition,
-       NULL as result_query,
-       '{TEST_DESCRIPTION}' as test_description
-FROM (
-      SELECT {GROUPBY_NAMES}, {SUM_COLUMNS}
-       FROM {SCHEMA_NAME}.{TABLE_NAME}
-       WHERE {SUBSET_CONDITION}
-       GROUP BY {GROUPBY_NAMES}
-       {HAVING_CONDITION}
-           UNION ALL
-       SELECT {MATCH_GROUPBY_NAMES}, {MATCH_SUM_COLUMNS}
-       FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
-       WHERE {MATCH_SUBSET_CONDITION}
-       GROUP BY {MATCH_GROUPBY_NAMES}
-       {MATCH_HAVING_CONDITION}
-          )
-     ) a ;
diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_generic.sql
similarity index 70%
rename from testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql
rename to testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_generic.sql
index 6e20b995..62a92d40 100644
--- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql
+++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_generic.sql
@@ -8,10 +8,10 @@ SELECT '{TEST_TYPE}'   as test_type,
        '{SCHEMA_NAME}' as schema_name,
        '{TABLE_NAME}'  as table_name,
        '{COLUMN_NAME_NO_QUOTES}' as column_names,
-       '{THRESHOLD_VALUE}' as threshold_value,
+       '{SKIP_ERRORS}' as threshold_value,
        {SKIP_ERRORS} as skip_errors,
        '{INPUT_PARAMETERS}' as input_parameters,
-    CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+       CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
        CASE
         WHEN COUNT(*) > 0 THEN
                CONCAT(
@@ -32,17 +32,17 @@ SELECT '{TEST_TYPE}'   as test_type,
 FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
          FROM
               ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
-                  FROM {SCHEMA_NAME}.{TABLE_NAME}
-                WHERE {SUBSET_CONDITION}
-                GROUP BY {GROUPBY_NAMES}
-                {HAVING_CONDITION}
+       FROM {SCHEMA_NAME}.{TABLE_NAME}
+       WHERE {SUBSET_CONDITION}
+       GROUP BY {GROUPBY_NAMES}
+       {HAVING_CONDITION}
               UNION ALL
                 SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
-                  FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
-                WHERE {MATCH_SUBSET_CONDITION}
-                GROUP BY {MATCH_GROUPBY_NAMES}
+       FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+       WHERE {MATCH_SUBSET_CONDITION}
+       GROUP BY {MATCH_GROUPBY_NAMES}
                 {MATCH_HAVING_CONDITION} ) a
          GROUP BY {GROUPBY_NAMES} ) s
-         WHERE NOT total BETWEEN match_total AND match_total * (1 + {BASELINE_VALUE}::FLOAT/100.0)
-            OR (total IS NOT NULL AND match_total IS NULL)
-            OR (total IS NULL AND match_total IS NOT NULL);
+         WHERE (total IS NOT NULL AND match_total IS NULL)
+             OR (total IS NULL AND match_total IS NOT NULL)
+             OR (total NOT BETWEEN match_total * (1 + {LOWER_TOLERANCE}/100.0) AND match_total * (1 + {UPPER_TOLERANCE}/100.0));
diff --git a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_range_generic.sql
similarity index 69%
rename from testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql
rename to testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_range_generic.sql
index 78864287..9ab77d10 100644
--- a/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql
+++ b/testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_range_generic.sql
@@ -8,10 +8,10 @@ SELECT '{TEST_TYPE}'   as test_type,
        '{SCHEMA_NAME}' as schema_name,
        '{TABLE_NAME}'  as table_name,
        '{COLUMN_NAME_NO_QUOTES}' as column_names,
-       '{THRESHOLD_VALUE}' as threshold_value,
+       '{SKIP_ERRORS}' as threshold_value,
        {SKIP_ERRORS} as skip_errors,
        '{INPUT_PARAMETERS}' as input_parameters,
-    CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
+       CASE WHEN COUNT (*) > {SKIP_ERRORS} THEN 0 ELSE 1 END as result_code,
        CASE
         WHEN COUNT(*) > 0 THEN
                CONCAT(
@@ -32,17 +32,17 @@ SELECT '{TEST_TYPE}'   as test_type,
 FROM ( SELECT {GROUPBY_NAMES}, SUM(TOTAL) as total, SUM(MATCH_TOTAL) as MATCH_TOTAL
          FROM
               ( SELECT {GROUPBY_NAMES}, {COLUMN_NAME_NO_QUOTES} as total, NULL as match_total
-                  FROM {SCHEMA_NAME}.{TABLE_NAME}
-                WHERE {SUBSET_CONDITION}
-                GROUP BY {GROUPBY_NAMES}
-                {HAVING_CONDITION}
+       FROM {SCHEMA_NAME}.{TABLE_NAME}
+       WHERE {SUBSET_CONDITION}
+       GROUP BY {GROUPBY_NAMES}
+       {HAVING_CONDITION}
               UNION ALL
                 SELECT {MATCH_GROUPBY_NAMES}, NULL as total, {MATCH_COLUMN_NAMES} as match_total
-                  FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
-                WHERE {MATCH_SUBSET_CONDITION}
-                GROUP BY {MATCH_GROUPBY_NAMES}
+       FROM {MATCH_SCHEMA_NAME}.{MATCH_TABLE_NAME}
+       WHERE {MATCH_SUBSET_CONDITION}
+       GROUP BY {MATCH_GROUPBY_NAMES}
                 {MATCH_HAVING_CONDITION} ) a
          GROUP BY {GROUPBY_NAMES} ) s
-         WHERE NOT total BETWEEN match_total * (1 - {BASELINE_VALUE}::FLOAT/100.0) AND match_total * (1 + {BASELINE_VALUE}::FLOAT/100.0)
-            OR (total IS NOT NULL AND match_total IS NULL)
-            OR (total IS NULL AND match_total IS NOT NULL);
+         WHERE (total IS NOT NULL AND match_total IS NULL)
+             OR (total IS NULL AND match_total IS NOT NULL)
+             OR (total NOT BETWEEN match_total + {LOWER_TOLERANCE} AND match_total + {UPPER_TOLERANCE});
diff --git a/testgen/template/get_entities/get_test_info.sql b/testgen/template/get_entities/get_test_info.sql
index 2bd589a5..feb0cfb8 100644
--- a/testgen/template/get_entities/get_test_info.sql
+++ b/testgen/template/get_entities/get_test_info.sql
@@ -29,6 +29,8 @@ Optional: last_auto_run_date (==test-gen-run-id==), schema-name, table-name, col
            td.baseline_sum,
            td.baseline_avg,
            td.baseline_sd,
+           td.lower_tolerance,
+           td.upper_tolerance,
            td.subset_condition,
            td.check_result,
            td.last_auto_gen_date,
diff --git a/testgen/ui/queries/test_definition_queries.py b/testgen/ui/queries/test_definition_queries.py
index ae16d4c6..5e235a80 100644
--- a/testgen/ui/queries/test_definition_queries.py
+++ b/testgen/ui/queries/test_definition_queries.py
@@ -54,6 +54,7 @@ def get_test_definitions(schema, project_code, test_suite, table_name, column_na
                    d.threshold_value, COALESCE(t.measure_uom_description, t.measure_uom) as export_uom,
                    d.baseline_ct, d.baseline_unique_ct, d.baseline_value,
                    d.baseline_value_ct, d.baseline_sum, d.baseline_avg, d.baseline_sd,
+                   d.lower_tolerance, d.upper_tolerance,
                    d.subset_condition,
                    d.groupby_names, d.having_condition, d.window_date_column, d.window_days,
                    d.match_schema_name, d.match_table_name, d.match_column_names,
@@ -126,6 +127,8 @@ def update(schema, test_definition):
                     baseline_sum = NULLIF('{test_definition["baseline_sum"]}', ''),
                     baseline_avg = NULLIF('{test_definition["baseline_avg"]}', ''),
                     baseline_sd = NULLIF('{test_definition["baseline_sd"]}', ''),
+                    lower_tolerance = NULLIF('{test_definition["lower_tolerance"]}', ''),
+                    upper_tolerance = NULLIF('{test_definition["upper_tolerance"]}', ''),
                     subset_condition = NULLIF($${test_definition["subset_condition"]}$$, ''),
                     groupby_names = NULLIF($${test_definition["groupby_names"]}$$, ''),
                     having_condition = NULLIF($${test_definition["having_condition"]}$$, ''),
@@ -179,6 +182,8 @@ def add(schema, test_definition):
                     baseline_sum,
                     baseline_avg,
                     baseline_sd,
+                    lower_tolerance,
+                    upper_tolerance,
                     subset_condition,
                     groupby_names,
                     having_condition,
@@ -223,6 +228,8 @@ def add(schema, test_definition):
                     NULLIF($${test_definition["baseline_sum"]}$$, '') as baseline_sum,
                     NULLIF('{test_definition["baseline_avg"]}', '') as baseline_avg,
                     NULLIF('{test_definition["baseline_sd"]}', '') as baseline_sd,
+                    NULLIF('{test_definition["lower_tolerance"]}', '') as lower_tolerance,
+                    NULLIF('{test_definition["upper_tolerance"]}', '') as upper_tolerance,
                     NULLIF($${test_definition["subset_condition"]}$$, '') as subset_condition,
                     NULLIF($${test_definition["groupby_names"]}$$, '') as groupby_names,
                     NULLIF($${test_definition["having_condition"]}$$, '') as having_condition,
@@ -314,6 +321,8 @@ def copy(schema, test_definitions, target_table_group, target_test_suite):
             baseline_sum,
             baseline_avg,
             baseline_sd,
+            lower_tolerance,
+            upper_tolerance,
             subset_condition,
             groupby_names,
             having_condition,
@@ -357,6 +366,8 @@ def copy(schema, test_definitions, target_table_group, target_test_suite):
             td.baseline_sum,
             td.baseline_avg,
             td.baseline_sd,
+            td.lower_tolerance,
+            td.upper_tolerance,
             td.subset_condition,
             td.groupby_names,
             td.having_condition,
diff --git a/testgen/ui/services/test_definition_service.py b/testgen/ui/services/test_definition_service.py
index 452e7cda..323b4af1 100644
--- a/testgen/ui/services/test_definition_service.py
+++ b/testgen/ui/services/test_definition_service.py
@@ -28,6 +28,7 @@ def get_test_definition(db_schema, test_def_id):
                   tt.test_description as description, tt.usage_notes,
                   d.column_name,
                   d.baseline_value, d.baseline_ct, d.baseline_avg, d.baseline_sd, d.threshold_value,
+                  d.lower_tolerance, d.upper_tolerance,
                   d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name,
                   d.match_table_name, d.match_column_names, d.match_subset_condition,
                   d.match_groupby_names, d.match_having_condition,
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index ee4ad869..0d8da17a 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -243,6 +243,8 @@ def replace_parms(df_test, str_query):
         str_query = str_query.replace("{BASELINE_AVG}", empty_if_null(df_test.at[0, "baseline_avg"]))
         str_query = str_query.replace("{BASELINE_SD}", empty_if_null(df_test.at[0, "baseline_sd"]))
         str_query = str_query.replace("{THRESHOLD_VALUE}", empty_if_null(df_test.at[0, "threshold_value"]))
+        str_query = str_query.replace("{LOWER_TOLERANCE}", empty_if_null(df_test.at[0, "lower_tolerance"]))
+        str_query = str_query.replace("{UPPER_TOLERANCE}", empty_if_null(df_test.at[0, "upper_tolerance"]))
 
         str_substitute = empty_if_null(df_test.at[0, "subset_condition"])
         str_substitute = "1=1" if str_substitute == "" else str_substitute
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 1dfaaf43..1551636c 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -274,6 +274,8 @@ def show_test_form(
     baseline_sum = empty_if_null(selected_test_def["baseline_sum"]) if mode == "edit" else ""
     baseline_avg = empty_if_null(selected_test_def["baseline_avg"]) if mode == "edit" else ""
     baseline_sd = empty_if_null(selected_test_def["baseline_sd"]) if mode == "edit" else ""
+    lower_tolerance = empty_if_null(selected_test_def["lower_tolerance"]) if mode == "edit" else 0
+    upper_tolerance = empty_if_null(selected_test_def["upper_tolerance"]) if mode == "edit" else 0
     subset_condition = empty_if_null(selected_test_def["subset_condition"]) if mode == "edit" else ""
     groupby_names = empty_if_null(selected_test_def["groupby_names"]) if mode == "edit" else ""
     having_condition = empty_if_null(selected_test_def["having_condition"]) if mode == "edit" else ""
@@ -379,6 +381,8 @@ def show_test_form(
         "baseline_sum": baseline_sum,
         "baseline_avg": baseline_avg,
         "baseline_sd": baseline_sd,
+        "lower_tolerance": lower_tolerance,
+        "upper_tolerance": upper_tolerance,
         "subset_condition": subset_condition,
         "groupby_names": groupby_names,
         "having_condition": having_condition,
@@ -510,7 +514,9 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator):
         if not attribute in dynamic_attributes:
             return
         
-        default_value = "" if attribute != "threshold_value" else 0
+        numeric_attributes = ["threshold_value", "lower_tolerance", "upper_tolerance"]
+
+        default_value = 0 if attribute in numeric_attributes else ""
         value = empty_if_null(selected_test_def[attribute]) if mode == "edit" else default_value
 
         index = dynamic_attributes.index(attribute)
@@ -541,7 +547,7 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator):
                 height=150 if test_type == "CUSTOM" else 75,
                 help=help_text,
             )
-        elif attribute in ["threshold_value"]:
+        elif attribute in numeric_attributes:
             test_definition[attribute] = container.number_input(
                 label=label_text,
                 value=float(value),

From 9d37db663367c171e2ac15d978d12ba843c4e8c0 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Mon, 7 Jul 2025 15:51:00 -0400
Subject: [PATCH 20/56] misc: Addressing code review feedback

---
 testgen/template/dbsetup/075_grant_role_rights.sql | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/testgen/template/dbsetup/075_grant_role_rights.sql b/testgen/template/dbsetup/075_grant_role_rights.sql
index 1b4f11b5..2f7fbf31 100644
--- a/testgen/template/dbsetup/075_grant_role_rights.sql
+++ b/testgen/template/dbsetup/075_grant_role_rights.sql
@@ -39,7 +39,8 @@ GRANT SELECT, INSERT, DELETE, UPDATE ON
     {SCHEMA_NAME}.score_definition_results_breakdown,
     {SCHEMA_NAME}.score_definition_results_history,
     {SCHEMA_NAME}.score_history_latest_runs,
-    {SCHEMA_NAME}.job_schedules
+    {SCHEMA_NAME}.job_schedules,
+    {SCHEMA_NAME}.settings
     TO testgen_execute_role;
 
 

From 72f86922d17b394b41c56e9100bee488db329f94 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Thu, 3 Jul 2025 17:32:24 -0400
Subject: [PATCH 21/56] fix(source-data): preserve sort order when sampling

---
 testgen/ui/services/hygiene_issues_service.py | 2 +-
 testgen/ui/services/test_results_service.py   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py
index 53ca43e7..3fba8539 100644
--- a/testgen/ui/services/hygiene_issues_service.py
+++ b/testgen/ui/services/hygiene_issues_service.py
@@ -84,7 +84,7 @@ def replace_parms(str_query):
                 return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", str_sql, None
             else:
                 if limit:
-                    df = df.sample(n=min(len(df), limit))
+                    df = df.sample(n=min(len(df), limit)).sort_index()
                 return "OK", None, str_sql, df
         else:
             return "NA", "Source data lookup is not available for this Issue.", None, None
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index 0d8da17a..d2e7440a 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -200,7 +200,7 @@ def do_source_data_lookup_custom(db_schema, tr_data, limit: int | None = None):
                 return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
             else:
                 if limit:
-                    df = df.sample(n=min(len(df), limit))
+                    df = df.sample(n=min(len(df), limit)).sort_index()
                 return "OK", None, str_sql, df
         else:
             return "NA", "Source data lookup is not available for this test.", None, None
@@ -309,7 +309,7 @@ def replace_parms(df_test, str_query):
                 return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
             else:
                 if limit:
-                    df = df.sample(n=min(len(df), limit))
+                    df = df.sample(n=min(len(df), limit)).sort_index()
                 return "OK", None, str_sql, df
         else:
             return "NA", "A source data lookup for this Test is not available.", None, None

From b6d574165a80dc570312d835dfb15844a8fac101 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Fri, 4 Jul 2025 13:25:27 -0400
Subject: [PATCH 22/56] fix(snowflake): upgrade python connector to fix
 certificate error

---
 deploy/testgen.dockerfile | 2 +-
 pyproject.toml            | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile
index 0e85c3f6..dc41c314 100644
--- a/deploy/testgen.dockerfile
+++ b/deploy/testgen.dockerfile
@@ -1,4 +1,4 @@
-ARG TESTGEN_BASE_LABEL=v6
+ARG TESTGEN_BASE_LABEL=v7
 
 FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image
 
diff --git a/pyproject.toml b/pyproject.toml
index 9b0879ba..179a1545 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,7 +61,6 @@ dependencies = [
 
     # Pinned to match the manually compiled libs or for security
     "pyarrow==18.1.0",
-    "snowflake-connector-python==3.13.1",
     "matplotlib==3.9.2",
     "scipy==1.14.1",
     "jinja2==3.1.6",

From 87c069226429dec060c0882304e0e993c326aad5 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Fri, 4 Jul 2025 14:26:23 -0400
Subject: [PATCH 23/56] fix(docker): missing arg for support email

---
 deploy/testgen.dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile
index dc41c314..58e15db3 100644
--- a/deploy/testgen.dockerfile
+++ b/deploy/testgen.dockerfile
@@ -5,6 +5,7 @@ FROM datakitchen/dataops-testgen-base:${TESTGEN_BASE_LABEL} AS release-image
 # Args have to be set in current build stage: https://github.com/moby/moby/issues/37345
 ARG TESTGEN_VERSION
 ARG TESTGEN_DOCKER_HUB_REPO
+ARG TESTGEN_SUPPORT_EMAIL
 
 ENV PYTHONPATH=/dk/lib/python3.12/site-packages
 ENV PATH=$PATH:/dk/bin

From b44fb46462e83e47c5d8f9d4f3285928a321f52e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Thu, 3 Jul 2025 21:13:57 -0400
Subject: [PATCH 24/56] fix(table-groups): handle connections with no flavor
 code

---
 testgen/ui/queries/connection_queries.py  | 6 ++++--
 testgen/ui/queries/table_group_queries.py | 5 +++--
 testgen/ui/views/connections.py           | 4 ----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/testgen/ui/queries/connection_queries.py b/testgen/ui/queries/connection_queries.py
index c3aad89c..fc86517f 100644
--- a/testgen/ui/queries/connection_queries.py
+++ b/testgen/ui/queries/connection_queries.py
@@ -10,7 +10,8 @@ def get_by_id(connection_id):
     str_schema = st.session_state["dbschema"]
     str_sql = f"""
            SELECT id::VARCHAR(50), project_code, connection_id, connection_name,
-                  sql_flavor, sql_flavor_code, project_host, project_port, project_user,
+                  sql_flavor, COALESCE(sql_flavor_code, sql_flavor) AS sql_flavor_code, 
+                  project_host, project_port, project_user,
                   project_db, project_pw_encrypted, NULL as password,
                   max_threads, max_query_chars, url, connect_by_url, connect_by_key, private_key,
                   private_key_passphrase, http_path
@@ -24,7 +25,8 @@ def get_connections(project_code):
     str_schema = st.session_state["dbschema"]
     str_sql = f"""
            SELECT id::VARCHAR(50), project_code, connection_id, connection_name,
-                  sql_flavor, sql_flavor_code, project_host, project_port, project_user,
+                  sql_flavor, COALESCE(sql_flavor_code, sql_flavor) AS sql_flavor_code,
+                  project_host, project_port, project_user,
                   project_db, project_pw_encrypted, NULL as password,
                   max_threads, max_query_chars, connect_by_url, url, connect_by_key, private_key,
                   private_key_passphrase, http_path
diff --git a/testgen/ui/queries/table_group_queries.py b/testgen/ui/queries/table_group_queries.py
index fc05b714..1d6e9b1e 100644
--- a/testgen/ui/queries/table_group_queries.py
+++ b/testgen/ui/queries/table_group_queries.py
@@ -8,11 +8,12 @@
 def _get_select_statement(schema):
     return f"""
         WITH table_groups AS (
-            SELECT table_groups.*, connections.connection_name, connections.sql_flavor_code
+            SELECT table_groups.*, connections.connection_name, connections.sql_flavor,
+                COALESCE(connections.sql_flavor_code, connections.sql_flavor) AS sql_flavor_code
             FROM {schema}.table_groups
             INNER JOIN {schema}.connections ON connections.connection_id = table_groups.connection_id
         )
-        SELECT id::VARCHAR(50), project_code, connection_id, connection_name, sql_flavor_code,
+        SELECT id::VARCHAR(50), project_code, connection_id, connection_name, sql_flavor, sql_flavor_code,
                 table_groups_name, table_group_schema,
                 profiling_include_mask, profiling_exclude_mask,
                 profiling_table_set,
diff --git a/testgen/ui/views/connections.py b/testgen/ui/views/connections.py
index 49f5e940..21c76ea4 100644
--- a/testgen/ui/views/connections.py
+++ b/testgen/ui/views/connections.py
@@ -351,10 +351,6 @@ def format_connection(connection: dict) -> dict:
     if formatted_connection["private_key_passphrase"]:
         formatted_connection["private_key_passphrase"] = "***"  # noqa S105
 
-    first_match = [f for f in FLAVOR_OPTIONS if f.flavor == formatted_connection.get("sql_flavor")]
-    if formatted_connection["sql_flavor"] and not formatted_connection.get("sql_flavor_code") and first_match:
-        formatted_connection["sql_flavor_code"] = first_match[0].flavor
-
     flavors = [f for f in FLAVOR_OPTIONS if f.value == formatted_connection["sql_flavor_code"]]
     if flavors and (flavor := flavors[0]):
         formatted_connection["flavor"] = asdict(flavor)

From cb5cda42b5fa93aecf89646bd3dc6c51442118d9 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Mon, 30 Jun 2025 14:20:37 -0400
Subject: [PATCH 25/56] fix(test runs): hide score if no tests were run

---
 testgen/ui/components/frontend/js/pages/test_runs.js | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js
index 9c5713d1..57afa0c0 100644
--- a/testgen/ui/components/frontend/js/pages/test_runs.js
+++ b/testgen/ui/components/frontend/js/pages/test_runs.js
@@ -134,7 +134,9 @@ const TestRunItem = (
         ),
         div(
             { style: `flex: ${columns[3]}; font-size: 16px;` },
-            item.dq_score_testing ?? '--',
+            item.test_ct && item.dq_score_testing
+                ? item.dq_score_testing
+                : '--',
         )
     );
 }

From 009d3fbe69356230fd3e5b723495fcfb8e1fc13c Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Mon, 30 Jun 2025 14:21:07 -0400
Subject: [PATCH 26/56] feat(test suites): warn users before unlocking a test
 definition

---
 testgen/ui/views/test_definitions.py | 47 ++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 6 deletions(-)

diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 1551636c..f9954dc2 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -122,12 +122,16 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name:
             # This has to be done as a second loop - otherwise, the rest of the buttons after the clicked one are not displayed briefly while refreshing
             for action in disposition_actions:
                 if action["button"]:
-                    fm.reset_post_updates(
-                        update_test_definition(selected, action["attribute"], action["value"], action["message"]),
-                        as_toast=True,
-                        clear_cache=True,
-                        lst_cached_functions=[],
-                    )
+                    is_unlocking = action["attribute"] == "lock_refresh" and not action["value"]
+                    if is_unlocking:
+                        confirm_unlocking_test_definition(selected)
+                    else:
+                        fm.reset_post_updates(
+                            update_test_definition(selected, action["attribute"], action["value"], action["message"]),
+                            as_toast=True,
+                            clear_cache=True,
+                            lst_cached_functions=[],
+                        )
 
         if selected:
             selected_test_def = selected[0]
@@ -771,6 +775,37 @@ def prompt_for_test_type():
     return str_value, row_selected
 
 
+@st.dialog(title="Unlock Test Definition")
+def confirm_unlocking_test_definition(test_definitions: list[dict]):
+    unlock_confirmed, set_unlock_confirmed = temp_value("test-definitions:confirm-unlock-tests")
+
+    st.warning(
+        f"""Unlocked tests subject to auto-genration will be overwritten during the next test generation run."""
+    )
+
+    st.html(f"""
+        Are you sure you want to unlock
+        {f"<b>{len(test_definitions)}</b> selected test definitions?"
+        if len(test_definitions) > 1
+        else "the selected test definition?"}
+    """)
+
+    if unlock_confirmed():
+        update_test_definition(test_definitions, "lock_refresh", False, "Test definitions have been unlocked.")
+        time.sleep(1)
+        st.rerun()
+
+    _, button_column = st.columns([.85, .15])
+    with button_column:
+        testgen.button(
+            label="Unlock",
+            type_="stroked",
+            color="basic",
+            key="test-definitions:confirm-unlock-tests-btn",
+            on_click=lambda: set_unlock_confirmed(True),
+        )
+
+
 def update_test_definition(selected, attribute, value, message):
     result = None
     test_definition_ids = [row["id"] for row in selected if "id" in row]

From d8780045f49d4a53977c31b518aff8960c49ea09 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 2 Jul 2025 14:31:40 -0400
Subject: [PATCH 27/56] refactor(test runs): allow deleting multiple test runs

---
 testgen/ui/components/frontend/css/shared.css |   4 +
 testgen/ui/components/frontend/js/main.js     |   2 +
 .../frontend/js/pages/confirmation_dialog.js  | 111 ++++++++++++++++++
 .../components/frontend/js/pages/test_runs.js |  85 ++++++++++++--
 testgen/ui/queries/test_run_queries.py        |  36 ++++++
 testgen/ui/views/test_runs.py                 |  77 +++++++++++-
 6 files changed, 299 insertions(+), 16 deletions(-)
 create mode 100644 testgen/ui/components/frontend/js/pages/confirmation_dialog.js

diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index 3171a5d0..d096a445 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -614,3 +614,7 @@ code > .tg-icon {
 code > .tg-icon:hover {
     opacity: 1;
 }
+
+.accent-primary {
+    accent-color: var(--primary-color);
+}
diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js
index 33792080..5cee58cc 100644
--- a/testgen/ui/components/frontend/js/main.js
+++ b/testgen/ui/components/frontend/js/main.js
@@ -34,6 +34,7 @@ import { TableGroup } from './pages/table_group.js';
 import { TableGroupList } from './pages/table_group_list.js';
 import { TableGroupDeleteConfirmation } from './pages/table_group_delete_confirmation.js';
 import { RunProfilingDialog } from './pages/run_profiling_dialog.js';
+import { ConfirmationDialog } from './pages/confirmation_dialog.js';
 
 let currentWindowVan = van;
 let topWindowVan = window.top.van;
@@ -66,6 +67,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props)
         table_group_list: TableGroupList,
         table_group_delete: TableGroupDeleteConfirmation,
         run_profiling_dialog: RunProfilingDialog,
+        confirm_dialog: ConfirmationDialog,
     };
 
     if (Object.keys(window.testgen.plugins).includes(id)) {
diff --git a/testgen/ui/components/frontend/js/pages/confirmation_dialog.js b/testgen/ui/components/frontend/js/pages/confirmation_dialog.js
new file mode 100644
index 00000000..1516f7b3
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/confirmation_dialog.js
@@ -0,0 +1,111 @@
+/**
+ * @typedef Result
+ * @type {object}
+ * @property {boolean} success
+ * @property {string} message
+ * 
+ * @typedef Constraint
+ * @type {object}
+ * @property {string} warning
+ * @property {string} confirmation
+ * 
+ * @typedef Properties
+ * @type {object}
+ * @property {string} project_code
+ * @property {string} message
+ * @property {Constraint?} constraint
+ * @property {Result?} result
+ * @property {string?} button_label
+ * @property {string?} button_type
+ * @property {string?} button_color
+ */
+
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { emitEvent, getValue, loadStylesheet, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
+import { Button } from '../components/button.js';
+import { Toggle } from '../components/toggle.js';
+import { Alert } from '../components/alert.js';
+
+const { div, span } = van.tags;
+
+/**
+ * @param {Properties} props 
+ * @returns 
+ */
+const ConfirmationDialog = (props) => {
+    loadStylesheet('confirmation-dialog', stylesheet);
+    Streamlit.setFrameHeight(1);
+    window.testgen.isPage = true;
+
+    const wrapperId = 'confirmation-dialog';
+    const confirmed = van.state(false);
+    const actionDisabled = van.derive(() => !!getValue(props.constraint) && !confirmed.val);
+    const buttonLabel = van.derive(() => getValue(props.button_label) ?? 'Confirm');
+    const buttonColor = van.derive(() => (actionDisabled.val ? 'basic' : getValue(props.button_color)) ?? 'basic');
+    const buttonType = van.derive(() => (actionDisabled.val ? 'stroked' : getValue(props.button_type)) ?? 'flat');
+
+    resizeFrameHeightToElement(wrapperId);
+    resizeFrameHeightOnDOMChange(wrapperId);
+
+    return div(
+        { id: wrapperId, class: 'flex-column' },
+        div(
+            { class: 'flex-column fx-gap-4' },
+            span(props.message),
+        ),
+        () => {
+            const constraint = getValue(props.constraint);
+
+            if (!constraint) {
+                return '';
+            }
+
+            return div(
+                { class: 'flex-column fx-gap-4 mt-4' },
+                Alert({ type: 'warn' }, span(constraint.warning)),
+                Toggle({
+                    name: 'confirm-action',
+                    label: span(constraint.confirmation),
+                    checked: confirmed,
+                    onChange: (value) => confirmed.val = value,
+                }),
+            );
+        },
+        div(
+            { class: 'flex-row fx-justify-content-flex-end' },
+            Button({
+                type: buttonType,
+                color: buttonColor,
+                label: buttonLabel,
+                style: 'width: auto;',
+                disabled: actionDisabled,
+                onclick: () => emitEvent('ActionConfirmed'),
+            }),
+        ),
+        () => {
+            const result = getValue(props.result);
+
+            if (!result) {
+                return '';
+            }
+
+            return div(
+                {class: 'mt-4'},
+                Alert(
+                    {
+                        type: result.success ? 'success' : 'error',
+                        closeable: true,
+                    },
+                    span(result.message),
+                ),
+            );
+        },
+    );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+`);
+
+export { ConfirmationDialog };
diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js
index 57afa0c0..3cee47b6 100644
--- a/testgen/ui/components/frontend/js/pages/test_runs.js
+++ b/testgen/ui/components/frontend/js/pages/test_runs.js
@@ -20,6 +20,7 @@
  * @typedef Permissions
  * @type {object}
  * @property {boolean} can_run
+ * @property {boolean} can_edit
  *
  * @typedef Properties
  * @type {object}
@@ -35,7 +36,7 @@ import { Streamlit } from '../streamlit.js';
 import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js';
 import { formatTimestamp, formatDuration } from '../display_utils.js';
 
-const { div, span, i } = van.tags;
+const { div, i, input, span } = van.tags;
 
 const TestRuns = (/** @type Properties */ props) => {
     window.testgen.isPage = true;
@@ -48,36 +49,82 @@ const TestRuns = (/** @type Properties */ props) => {
         Streamlit.setFrameHeight(100 * items.length);
         return items;
     });
-    const columns = ['30%', '20%', '40%', '10%'];
+    const columns = ['5%', '28%', '17%', '40%', '10%'];
 
     const userCanRun = getValue(props.permissions)?.can_run ?? false;
+    const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
+    const selectedRuns = {};
 
     const tableId = 'test-runs-table';
     resizeFrameHeightToElement(tableId);
 
+    const initializeSelectedStates = (items) => {
+        for (const testRun of items) {
+            if (selectedRuns[testRun.test_run_id] == undefined) {
+                selectedRuns[testRun.test_run_id] = van.state(false);
+            }
+        }
+    };
+
+    initializeSelectedStates(testRunItems.val);
+
+    van.derive(() => {
+        initializeSelectedStates(testRunItems.val);
+    });
+
     return div(
         { class: 'table', id: tableId },
+        () => {
+            const items = testRunItems.val;
+            const selectedItems = items.filter(i => selectedRuns[i.test_run_id]?.val ?? false);
+            const allSelected = selectedItems.length === items.length;
+            const partiallySelected = selectedItems.length > 0 && selectedItems.length < items.length;
+
+            return div(
+                { class: 'flex-row pb-2' },
+                div(
+                    { class: 'flex-row mr-2' },
+                    input({
+                        type: 'checkbox',
+                        class: 'clickable accent-primary',
+                        'data-testid': 'select-all-test-run',
+                        checked: allSelected,
+                        indeterminate: partiallySelected,
+                        onchange: (event) => items.forEach(item => selectedRuns[item.test_run_id].val = event.target.checked),
+                    }),
+                ),
+                Button({
+                    type: 'icon',
+                    icon: 'delete',
+                    tooltip: 'Delete',
+                    tooltipPosition: 'bottom-right',
+                    disabled: !allSelected && !partiallySelected,
+                    onclick: () => emitEvent('RunsDeleted', { payload: selectedItems.map(i => i.test_run_id) }),
+                }),
+            );
+        },
         div(
             { class: 'table-header flex-row' },
+            span({ style: `flex: ${columns[0]}` }, ''),
             span(
-                { style: `flex: ${columns[0]}` },
+                { style: `flex: ${columns[1]}` },
                 'Start Time | Table Group | Test Suite',
             ),
             span(
-                { style: `flex: ${columns[1]}` },
+                { style: `flex: ${columns[2]}` },
                 'Status | Duration',
             ),
             span(
-                { style: `flex: ${columns[2]}` },
+                { style: `flex: ${columns[3]}` },
                 'Results Summary',
             ),
             span(
-                { style: `flex: ${columns[3]}` },
+                { style: `flex: ${columns[4]}` },
                 'Testing Score',
             ),
         ),
         () => div(
-            testRunItems.val.map(item => TestRunItem(item, columns, userCanRun)),
+            testRunItems.val.map(item => TestRunItem(item, columns, selectedRuns[item.test_run_id], userCanRun, userCanEdit)),
         ),
     );
 }
@@ -85,12 +132,26 @@ const TestRuns = (/** @type Properties */ props) => {
 const TestRunItem = (
     /** @type TestRun */ item,
     /** @type string[] */ columns,
+    /** @type boolean */ selected,
     /** @type boolean */ userCanRun,
+    /** @type boolean */ userCanEdit,
 ) => {
     return div(
         { class: 'table-row flex-row' },
+        userCanEdit
+            ? div(
+                { style: `flex: ${columns[0]}; font-size: 16px;` },
+                input({
+                    type: 'checkbox',
+                    class: 'clickable accent-primary',
+                    'data-testid': 'select-test-run',
+                    checked: selected,
+                    onchange: (event) => selected.val = event.target.checked,
+                })
+            )
+            : '',
         div(
-            { style: `flex: ${columns[0]}` },
+            { style: `flex: ${columns[1]}` },
             Link({
                 label: formatTimestamp(item.test_starttime),
                 href: 'test-runs:results',
@@ -103,7 +164,7 @@ const TestRunItem = (
             ),
         ),
         div(
-            { class: 'flex-row', style: `flex: ${columns[1]}` },
+            { class: 'flex-row', style: `flex: ${columns[2]}` },
             div(
                 TestRunStatus(item),
                 div(
@@ -119,7 +180,7 @@ const TestRunItem = (
             }) : null,
         ),
         div(
-            { class: 'pr-3', style: `flex: ${columns[2]}` },
+            { class: 'pr-3', style: `flex: ${columns[3]}` },
             item.test_ct ? SummaryBar({
                 items: [
                     { label: 'Passed', value: item.passed_ct, color: 'green' },
@@ -133,11 +194,11 @@ const TestRunItem = (
             }) : '--',
         ),
         div(
-            { style: `flex: ${columns[3]}; font-size: 16px;` },
+            { style: `flex: ${columns[4]}; font-size: 16px;` },
             item.test_ct && item.dq_score_testing
                 ? item.dq_score_testing
                 : '--',
-        )
+        ),
     );
 }
 
diff --git a/testgen/ui/queries/test_run_queries.py b/testgen/ui/queries/test_run_queries.py
index 1ad15044..f132d517 100644
--- a/testgen/ui/queries/test_run_queries.py
+++ b/testgen/ui/queries/test_run_queries.py
@@ -8,6 +8,23 @@
 from testgen.common.models import get_current_session
 
 
+def is_running(test_run_id: str | tuple[str]) -> bool:
+    session = get_current_session()
+
+    test_run_ids: tuple[str] = tuple(test_run_id)
+    if isinstance(test_run_id, str):
+        test_run_ids = (test_run_id,)
+
+    query = """
+    SELECT id
+    FROM test_runs
+    WHERE id::text IN :test_run_ids
+        AND status = 'Running'
+    """
+    result = session.execute(query, params={"test_run_ids": test_run_ids})
+    return result and len(result.all()) > 0
+
+
 def cascade_delete(test_suite_ids: list[str]) -> None:
     if not test_suite_ids:
         raise ValueError("No Test Suite is specified.")
@@ -28,6 +45,25 @@ def cascade_delete(test_suite_ids: list[str]) -> None:
     st.cache_data.clear()
 
 
+def cascade_delete_test_run(test_run_id: str) -> None:
+    if not test_run_id:
+        raise ValueError("No Test Run is specified.")
+
+    schema: str = st.session_state["dbschema"]
+    sql = f"""
+        DELETE
+            FROM {schema}.working_agg_cat_results
+            WHERE test_run_id = '{test_run_id}';
+        DELETE
+            FROM {schema}.working_agg_cat_tests
+            WHERE test_run_id = '{test_run_id}';
+        DELETE FROM {schema}.test_runs WHERE id = '{test_run_id}';
+        DELETE FROM {schema}.test_results WHERE test_run_id = '{test_run_id}';
+    """
+    db.execute_sql(sql)
+    st.cache_data.clear()
+
+
 def update_status(test_run_id: str, status: str) -> None:
     if not all([test_run_id, status]):
         raise ValueError("Missing query parameters.")
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index a32cf24e..62daca8a 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -1,3 +1,4 @@
+import logging
 import typing
 from functools import partial
 
@@ -8,13 +9,14 @@
 import testgen.ui.services.database_service as db
 import testgen.ui.services.form_service as fm
 import testgen.ui.services.query_service as dq
+from testgen.common.models import with_database_session
 from testgen.ui.components import widgets as testgen
 from testgen.ui.components.widgets import testgen_component
 from testgen.ui.navigation.menu import MenuItem
 from testgen.ui.navigation.page import Page
 from testgen.ui.queries import project_queries, test_run_queries
 from testgen.ui.services import user_session_service
-from testgen.ui.session import session
+from testgen.ui.session import session, temp_value
 from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog
 from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog
 from testgen.utils import friendly_score, to_int
@@ -22,6 +24,7 @@
 PAGE_SIZE = 50
 PAGE_ICON = "labs"
 PAGE_TITLE = "Test Runs"
+LOG = logging.getLogger("testgen")
 
 
 class TestRunsPage(Page):
@@ -106,9 +109,13 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit
                     "items": paginated_df.to_json(orient="records"),
                     "permissions": {
                         "can_run": user_can_run,
+                        "can_edit": user_can_run,
                     },
                 },
-                event_handlers={ "RunCanceled": on_cancel_run }
+                event_handlers={
+                    "RunCanceled": on_cancel_run,
+                    "RunsDeleted": partial(on_delete_runs, project_code, table_group_id, test_suite_id),
+                }
             )
 
 
@@ -190,10 +197,60 @@ def on_cancel_run(test_run: pd.Series) -> None:
     process_status, process_message = process_service.kill_test_run(to_int(test_run["process_id"]))
     if process_status:
         test_run_queries.update_status(test_run["test_run_id"], "Cancelled")
-
     fm.reset_post_updates(str_message=f":{'green' if process_status else 'red'}[{process_message}]", as_toast=True)
 
 
+@st.dialog(title="Delete Test Run")
+@with_database_session
+def on_delete_runs(project_code: str, table_group_id: str, test_suite_id: str, test_run_ids: list[str]) -> None:
+    def on_delete_confirmed(*_args) -> None:
+        set_delete_confirmed(True)
+
+    message = f"Are you sure you want to delete the {len(test_run_ids)} selected test runs?"
+    constraint = {
+        "warning": "Any running processes will be canceled.",
+        "confirmation": "Yes, cancel and delete the test runs.",
+    }
+    if len(test_run_ids) == 1 and (test_run_id := test_run_ids[0]):
+        message = "Are you sure you want to delete the selected test run?"
+        constraint["confirmation"] = "Yes, cancel and delete the test run."
+
+    if not test_run_queries.is_running(test_run_ids):
+        constraint = None
+
+    result = None
+    delete_confirmed, set_delete_confirmed = temp_value("test-runs:confirm-delete", default=False)
+    if delete_confirmed():
+        try:
+            test_runs = get_db_test_runs(project_code, table_group_id, test_suite_id, test_runs_ids=test_run_ids)
+            for _, test_run in test_runs.iterrows():
+                test_run_id = test_run["test_run_id"]
+                if test_run["status"] == "Running":
+                    process_status, _ = process_service.kill_test_run(to_int(test_run["process_id"]))
+                    if process_status:
+                        test_run_queries.update_status(test_run_id, "Cancelled")
+                test_run_queries.cascade_delete_test_run(test_run_id)
+            st.rerun()
+        except Exception:
+            LOG.exception("Failed to delete test run")
+            result = {"success": False, "message": "Unable to delete the test run, try again."}
+
+    testgen.testgen_component(
+        "confirm_dialog",
+        props={
+            "project_code": project_code,
+            "message": message,
+            "constraint": constraint,
+            "button_label": "Delete",
+            "button_color": "warn",
+            "result": result,
+        },
+        on_change_handlers={
+            "ActionConfirmed": on_delete_confirmed,
+        },
+    )
+
+
 @st.cache_data(show_spinner=False)
 def run_test_suite_lookup_query(schema: str, project_code: str, table_groups_id: str | None = None) -> pd.DataFrame:
     table_group_condition = f" AND test_suites.table_groups_id = '{table_groups_id}' " if table_groups_id else ""
@@ -222,10 +279,21 @@ def get_db_test_suite_choices(project_code: str, table_groups_id: str | None = N
 
 
 @st.cache_data(show_spinner="Loading data ...")
-def get_db_test_runs(project_code: str, table_groups_id: str | None = None, test_suite_id: str | None = None) -> pd.DataFrame:
+def get_db_test_runs(
+    project_code: str,
+    table_groups_id: str | None = None,
+    test_suite_id: str | None = None,
+    test_runs_ids: list[str] | None = None,
+) -> pd.DataFrame:
     schema = st.session_state["dbschema"]
     table_group_condition = f" AND test_suites.table_groups_id = '{table_groups_id}' " if table_groups_id else ""
     test_suite_condition = f" AND test_suites.id = '{test_suite_id}' " if test_suite_id else ""
+
+    test_runs_conditions = ""
+    if test_runs_ids and len(test_runs_ids) > 0:
+        test_runs_ids_ = [f"'{run_id}'" for run_id in test_runs_ids]
+        test_runs_conditions = f" AND test_runs.id::VARCHAR IN ({', '.join(test_runs_ids_)})"
+
     sql = f"""
     WITH run_results AS (
         SELECT test_run_id,
@@ -289,6 +357,7 @@ def get_db_test_runs(project_code: str, table_groups_id: str | None = None, test
     WHERE test_suites.project_code = '{project_code}'
     {table_group_condition}
     {test_suite_condition}
+    {test_runs_conditions}
     ORDER BY test_runs.test_starttime DESC;
     """
 

From 435aa22ff87fd8aa6c27cc44f806f793dc5d79da Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 2 Jul 2025 14:32:15 -0400
Subject: [PATCH 28/56] refactor(test definitions): display profiling and
 generation date

---
 testgen/ui/views/test_definitions.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index f9954dc2..dda13688 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -267,6 +267,9 @@ def show_test_form(
     test_definition_status = selected_test_def["test_definition_status"] if mode == "edit" else ""
     check_result = selected_test_def["check_result"] if mode == "edit" else None
     column_name = empty_if_null(selected_test_def["column_name"]) if mode == "edit" else ""
+    last_auto_gen_date = empty_if_null(selected_test_def["last_auto_gen_date"]) if mode == "edit" else ""
+    profiling_as_of_date = empty_if_null(selected_test_def["profiling_as_of_date"]) if mode == "edit" else ""
+    profile_run_id = empty_if_null(selected_test_def["profile_run_id"]) if mode == "edit" else ""
 
     # dynamic attributes
     custom_query = empty_if_null(selected_test_def["custom_query"]) if mode == "edit" else ""
@@ -427,6 +430,24 @@ def show_test_form(
         help=severity_help,
     )
 
+    if mode == "edit":
+        columns = st.columns([0.5, 0.5])
+        if profiling_as_of_date and profile_run_id and (container := columns.pop()):
+            testgen.caption("Based on Profiling", container=container)
+            with container:
+                testgen.link(
+                    href="profiling-runs:results",
+                    params={"run_id": profile_run_id},
+                    label=datetime.strptime(profiling_as_of_date, "%Y-%m-%d %H:%M:%S").strftime("%b %d, %I:%M %p"),
+                )
+
+        if last_auto_gen_date and (container := columns.pop()):
+            testgen.caption("Auto-generated at", container=container)
+            testgen.text(
+                datetime.strptime(last_auto_gen_date, "%Y-%m-%d %H:%M:%S").strftime("%b %d, %I:%M %p"),
+                container=container,
+            )
+
     st.divider()
 
     has_match_attributes = any(attribute.startswith("match_") for attribute in dynamic_attributes)
@@ -529,12 +550,12 @@ def render_dynamic_attribute(attribute: str, container: DeltaGenerator):
         label_text = (
             dynamic_attributes_labels[index]
             if dynamic_attributes_labels and len(dynamic_attributes_labels) > index
-            else "Help text is not available."
+            else snake_case_to_title_case(attribute)
         )
         help_text = (
             dynamic_attributes_help[index]
             if dynamic_attributes_help and len(dynamic_attributes_help) > index
-            else snake_case_to_title_case(attribute)
+            else "Help text is not available."
         )
 
         if attribute == "custom_query":
@@ -780,7 +801,7 @@ def confirm_unlocking_test_definition(test_definitions: list[dict]):
     unlock_confirmed, set_unlock_confirmed = temp_value("test-definitions:confirm-unlock-tests")
 
     st.warning(
-        f"""Unlocked tests subject to auto-genration will be overwritten during the next test generation run."""
+        """Unlocked tests subject to auto-genration will be overwritten during the next test generation run."""
     )
 
     st.html(f"""

From ea88bbbd2681f19f3c9b85e28f8f92ec4b49c1c4 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 2 Jul 2025 14:33:12 -0400
Subject: [PATCH 29/56] refactor(test results): style and arrangement of
 elements in result details

---
 .../frontend/js/components/alert.js           |  43 ++-
 .../frontend/js/components/attribute.js       |   3 +-
 .../frontend/js/components/checkbox.js        |   2 +-
 testgen/ui/components/frontend/js/main.js     |   2 +
 .../frontend/js/pages/confirmation_dialog.js  |  24 +-
 .../js/pages/test_definition_summary.js       | 143 ++++++++++
 .../components/frontend/js/pages/test_runs.js |  69 +++--
 testgen/ui/queries/test_run_queries.py        |  20 ++
 testgen/ui/services/form_service.py           | 268 ------------------
 .../ui/services/test_definition_service.py    |  48 ++--
 testgen/ui/views/test_definitions.py          |  15 +-
 testgen/ui/views/test_results.py              | 133 +++++----
 testgen/ui/views/test_runs.py                 |  44 +--
 13 files changed, 386 insertions(+), 428 deletions(-)
 create mode 100644 testgen/ui/components/frontend/js/pages/test_definition_summary.js

diff --git a/testgen/ui/components/frontend/js/components/alert.js b/testgen/ui/components/frontend/js/components/alert.js
index a797d6aa..cda6afda 100644
--- a/testgen/ui/components/frontend/js/components/alert.js
+++ b/testgen/ui/components/frontend/js/components/alert.js
@@ -4,6 +4,7 @@
  * @property {string?} icon
  * @property {number?} timeout
  * @property {boolean?} closeable
+ * @property {string?} class
  * @property {'info'|'success'|'warn'|'error'} type
  */
 import van from '../van.min.js';
@@ -12,12 +13,6 @@ import { Icon } from './icon.js';
 import { Button } from './button.js';
 
 const { div } = van.tags;
-const alertTypeColors = {
-    info: {backgroundColor: 'rgba(28, 131, 225, 0.1)', color: 'rgb(0, 66, 128)'},
-    success: {backgroundColor: 'rgba(33, 195, 84, 0.1)', color: 'rgb(23, 114, 51)'},
-    warn: {backgroundColor: 'rgba(255, 227, 18, 0.2)', color: 'rgb(255, 255, 194)'},
-    error: {backgroundColor: 'rgba(255, 43, 43, 0.09)', color: 'rgb(125, 53, 59)'},
-};
 
 const Alert = (/** @type Properties */ props, /** @type Array<HTMLElement> */ ...children) => {
     loadStylesheet('alert', stylesheet);
@@ -40,6 +35,10 @@ const Alert = (/** @type Properties */ props, /** @type Array<HTMLElement> */ ..
         },
         () => {
             const icon = getValue(props.icon);
+            if (!icon) {
+                return '';
+            }
+
             return Icon({size: 20, classes: 'mr-2'}, icon);
         },
         div(
@@ -52,11 +51,10 @@ const Alert = (/** @type Properties */ props, /** @type Array<HTMLElement> */ ..
                 return '';
             }
 
-            const colors = alertTypeColors[getValue(props.type)];
             return Button({
                 type: 'icon',
                 icon: 'close',
-                style: `margin-left: auto; color: ${colors.color};`,
+                style: `margin-left: auto;`,
             });
         },
     );
@@ -81,26 +79,45 @@ stylesheet.replace(`
     color: rgb(23, 114, 51);
 }
 
-.tg-alert-error {
-    background-color: rgba(255, 43, 43, 0.09);
-    color: rgb(125, 53, 59);
-}
-
 .tg-alert-warn {
     background-color: rgba(255, 227, 18, 0.1);
     color: rgb(146, 108, 5);
 }
 
+.tg-alert-error {
+    background-color: rgba(255, 43, 43, 0.09);
+    color: rgb(125, 53, 59);
+}
+
 @media (prefers-color-scheme: dark) {
+    .tg-alert-info {
+        background-color: rgba(61, 157, 243, 0.2);
+        color: rgb(199, 235, 255);
+    }
+
+    .tg-alert-success {
+        background-color: rgba(61, 213, 109, 0.2);
+        color: rgb(223, 253, 233);
+    }
+
     .tg-alert-warn {
         background-color: rgba(255, 227, 18, 0.2);
         color: rgb(255, 255, 194);
     }
+
+    .tg-alert-error {
+        background-color: rgba(255, 108, 108, 0.2);
+        color: rgb(255, 222, 222);
+    }
 }
 
 .tg-alert > .tg-icon {
     color: inherit !important;
 }
+
+.tg-alert > .tg-button {
+    color: inherit !important;
+}
 `);
 
 export { Alert };
diff --git a/testgen/ui/components/frontend/js/components/attribute.js b/testgen/ui/components/frontend/js/components/attribute.js
index 106bd7e0..61240f7f 100644
--- a/testgen/ui/components/frontend/js/components/attribute.js
+++ b/testgen/ui/components/frontend/js/components/attribute.js
@@ -5,6 +5,7 @@
  * @property {string?} help
  * @property {string | number} value
  * @property {number?} width
+ * @property {string?} class
  */
 import { getValue, loadStylesheet } from '../utils.js';
 import { Icon } from './icon.js';
@@ -17,7 +18,7 @@ const Attribute = (/** @type Properties */ props) => {
     loadStylesheet('attribute', stylesheet);
 
     return div(
-        { style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}` },
+        { style: () => `width: ${props.width ? getValue(props.width) + 'px' : 'auto'}`, class: props.class },
         div(
             { class: 'flex-row fx-gap-1 text-caption mb-1' },
             props.label,
diff --git a/testgen/ui/components/frontend/js/components/checkbox.js b/testgen/ui/components/frontend/js/components/checkbox.js
index 75bbe743..6e5968d8 100644
--- a/testgen/ui/components/frontend/js/components/checkbox.js
+++ b/testgen/ui/components/frontend/js/components/checkbox.js
@@ -8,7 +8,7 @@
  * @property {boolean?} indeterminate
  * @property {function(boolean, Event)?} onChange
  * @property {number?} width
- * @property {testId?} testId
+ * @property {string?} testId
  */
 import van from '../van.min.js';
 import { getValue, loadStylesheet } from '../utils.js';
diff --git a/testgen/ui/components/frontend/js/main.js b/testgen/ui/components/frontend/js/main.js
index 5cee58cc..2265c595 100644
--- a/testgen/ui/components/frontend/js/main.js
+++ b/testgen/ui/components/frontend/js/main.js
@@ -35,6 +35,7 @@ import { TableGroupList } from './pages/table_group_list.js';
 import { TableGroupDeleteConfirmation } from './pages/table_group_delete_confirmation.js';
 import { RunProfilingDialog } from './pages/run_profiling_dialog.js';
 import { ConfirmationDialog } from './pages/confirmation_dialog.js';
+import { TestDefinitionSummary } from './pages/test_definition_summary.js';
 
 let currentWindowVan = van;
 let topWindowVan = window.top.van;
@@ -68,6 +69,7 @@ const TestGenComponent = (/** @type {string} */ id, /** @type {object} */ props)
         table_group_delete: TableGroupDeleteConfirmation,
         run_profiling_dialog: RunProfilingDialog,
         confirm_dialog: ConfirmationDialog,
+        test_definition_summary: TestDefinitionSummary,
     };
 
     if (Object.keys(window.testgen.plugins).includes(id)) {
diff --git a/testgen/ui/components/frontend/js/pages/confirmation_dialog.js b/testgen/ui/components/frontend/js/pages/confirmation_dialog.js
index 1516f7b3..c1fa1aad 100644
--- a/testgen/ui/components/frontend/js/pages/confirmation_dialog.js
+++ b/testgen/ui/components/frontend/js/pages/confirmation_dialog.js
@@ -45,23 +45,17 @@ const ConfirmationDialog = (props) => {
     const buttonColor = van.derive(() => (actionDisabled.val ? 'basic' : getValue(props.button_color)) ?? 'basic');
     const buttonType = van.derive(() => (actionDisabled.val ? 'stroked' : getValue(props.button_type)) ?? 'flat');
 
+    const message = getValue(props.message);
+    const constraint = getValue(props.constraint);
+
     resizeFrameHeightToElement(wrapperId);
     resizeFrameHeightOnDOMChange(wrapperId);
 
     return div(
         { id: wrapperId, class: 'flex-column' },
-        div(
-            { class: 'flex-column fx-gap-4' },
-            span(props.message),
-        ),
-        () => {
-            const constraint = getValue(props.constraint);
-
-            if (!constraint) {
-                return '';
-            }
-
-            return div(
+        div({ class: 'flex-column fx-gap-4' }, message),
+        constraint
+            ? div(
                 { class: 'flex-column fx-gap-4 mt-4' },
                 Alert({ type: 'warn' }, span(constraint.warning)),
                 Toggle({
@@ -70,8 +64,8 @@ const ConfirmationDialog = (props) => {
                     checked: confirmed,
                     onChange: (value) => confirmed.val = value,
                 }),
-            );
-        },
+            )
+            : '',
         div(
             { class: 'flex-row fx-justify-content-flex-end' },
             Button({
@@ -80,7 +74,7 @@ const ConfirmationDialog = (props) => {
                 label: buttonLabel,
                 style: 'width: auto;',
                 disabled: actionDisabled,
-                onclick: () => emitEvent('ActionConfirmed'),
+                onclick: () => emitEvent('ActionConfirmed', {}),
             }),
         ),
         () => {
diff --git a/testgen/ui/components/frontend/js/pages/test_definition_summary.js b/testgen/ui/components/frontend/js/pages/test_definition_summary.js
new file mode 100644
index 00000000..42984d50
--- /dev/null
+++ b/testgen/ui/components/frontend/js/pages/test_definition_summary.js
@@ -0,0 +1,143 @@
+/**
+ * @typedef TestDefinitionAttribute
+ * @type {object}
+ * @property {string} label
+ * @property {string} value
+ * @property {string?} help
+ * 
+ * @typedef TestDefinition
+ * @type {object}
+ * @property {string} schema
+ * @property {string} test_suite_name
+ * @property {string} table_name
+ * @property {string} test_focus
+ * @property {string?} status
+ * @property {string} severity
+ * @property {string} active
+ * @property {string} locked
+ * @property {string} export_to_observability
+ * @property {string?} last_manual_update
+ * @property {string?} usage_notes
+ * @property {Array<TestDefinitionAttribute>} attributes
+ * 
+ * @typedef Properties
+ * @type {object}
+ * @property {TestDefinition} test_definition
+ */
+import van from '../van.min.js';
+import { Streamlit } from '../streamlit.js';
+import { getValue, loadStylesheet, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
+import { Alert } from '../components/alert.js';
+import { Attribute } from '../components/attribute.js';
+
+const { div, strong } = van.tags;
+
+/**
+ * @param {Properties} props 
+ * @returns 
+ */
+const TestDefinitionSummary = (props) => {
+    loadStylesheet('test-definition-summary', stylesheet)
+    Streamlit.setFrameHeight(1);
+    window.testgen.isPage = true;
+
+    const wrapperId = 'test-definition-summary';
+
+    resizeFrameHeightToElement(wrapperId);
+    resizeFrameHeightOnDOMChange(wrapperId);
+
+    return div(
+        {id: wrapperId},
+        () => {
+            const testDefinition = getValue(props.test_definition);
+            console.log(testDefinition);
+
+            return div(
+                { class: 'flex-column' },
+                div(
+                    { class: 'flex-row fx-gap-1 fx-align-flex-start' },
+                    div(
+                        { class: 'flex-column fx-flex fx-gap-4 test-definition-attributes'},
+                        Attribute({
+                            label: 'Schema Name',
+                            value: testDefinition.schema,
+                            class: 'fx-flex'
+                        }),
+                        Attribute({
+                            label: 'Test Suite Name',
+                            value: testDefinition.test_suite_name,
+                            class: 'fx-flex'
+                        }),
+                        Attribute({
+                            label: 'Table Name',
+                            value: testDefinition.table_name,
+                            class: 'fx-flex'
+                        }),
+                        Attribute({
+                            label: 'Test Focus',
+                            value: testDefinition.test_focus,
+                            class: 'fx-flex'
+                        }),
+                        Attribute({
+                            label: 'Test Active',
+                            value: testDefinition.active,
+                            class: 'fx-flex'
+                        }),
+                        Attribute({
+                            label: 'Lock Refresh',
+                            value: testDefinition.locked,
+                            class: 'fx-flex'
+                        }),
+                        Attribute({
+                            label: 'Last Manual Update',
+                            value: testDefinition.last_manual_update
+                                ? Intl.DateTimeFormat("en-US", {dateStyle: 'long', timeStyle: 'long'}).format(Date.parse(testDefinition.last_manual_update))
+                                : undefined,
+                            class: 'fx-flex'
+                        }),
+                    ),
+                    div(
+                        { class: 'flex-column fx-flex fx-gap-4 test-definition-attributes'},
+                        Attribute({
+                            label: 'Test Result Urgency',
+                            value: testDefinition.severity,
+                            class: 'fx-flex'
+                        }),
+                        Attribute({
+                            label: 'Send to Observability',
+                            value: testDefinition.export_to_observability,
+                            class: 'fx-flex'
+                        }),
+                        testDefinition.attributes.map(attribute =>
+                            Attribute({
+                                label: attribute.label,
+                                value: attribute.value,
+                                help: attribute.help,
+                                class: 'fx-flex'
+                            })
+                        ),
+                    ),
+                ),
+                testDefinition.usage_notes
+                    ? Alert(
+                        { type: 'info', class: 'mt-4' },
+                        strong({class: 'mb-4'}, 'Usage Notes'),
+                        testDefinition.usage_notes,
+                      )
+                    : '',
+            );
+        },
+    );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.test-definition-attributes > div .text-caption {
+    font-size: 14px;
+}
+.test-definition-attributes > div .attribute-value {
+    font-size: 16px;
+}
+`);
+
+export { TestDefinitionSummary };
diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js
index 3cee47b6..725d12c3 100644
--- a/testgen/ui/components/frontend/js/pages/test_runs.js
+++ b/testgen/ui/components/frontend/js/pages/test_runs.js
@@ -35,8 +35,9 @@ import { Button } from '../components/button.js';
 import { Streamlit } from '../streamlit.js';
 import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js';
 import { formatTimestamp, formatDuration } from '../display_utils.js';
+import { Checkbox } from '../components/checkbox.js';
 
-const { div, i, input, span } = van.tags;
+const { div, i, span, strong } = van.tags;
 
 const TestRuns = (/** @type Properties */ props) => {
     window.testgen.isPage = true;
@@ -77,35 +78,53 @@ const TestRuns = (/** @type Properties */ props) => {
         () => {
             const items = testRunItems.val;
             const selectedItems = items.filter(i => selectedRuns[i.test_run_id]?.val ?? false);
-            const allSelected = selectedItems.length === items.length;
-            const partiallySelected = selectedItems.length > 0 && selectedItems.length < items.length;
+            const someRunSelected = selectedItems.length > 0;
+            const tooltipText = !someRunSelected ? 'No runs selected' : undefined;
+
+            if (!userCanEdit) {
+                return '';
+            }
 
             return div(
-                { class: 'flex-row pb-2' },
-                div(
-                    { class: 'flex-row mr-2' },
-                    input({
-                        type: 'checkbox',
-                        class: 'clickable accent-primary',
-                        'data-testid': 'select-all-test-run',
-                        checked: allSelected,
-                        indeterminate: partiallySelected,
-                        onchange: (event) => items.forEach(item => selectedRuns[item.test_run_id].val = event.target.checked),
-                    }),
-                ),
+                { class: 'flex-row fx-justify-content-flex-end pb-2' },
+                someRunSelected ? strong({class: 'mr-1'}, selectedItems.length) : '',
+                someRunSelected ? span({class: 'mr-4'}, 'runs selected') : '',
                 Button({
-                    type: 'icon',
+                    type: 'stroked',
                     icon: 'delete',
-                    tooltip: 'Delete',
-                    tooltipPosition: 'bottom-right',
-                    disabled: !allSelected && !partiallySelected,
+                    label: 'Delete Runs',
+                    tooltip: tooltipText,
+                    tooltipPosition: 'bottom-left',
+                    disabled: !someRunSelected,
+                    width: 'auto',
                     onclick: () => emitEvent('RunsDeleted', { payload: selectedItems.map(i => i.test_run_id) }),
                 }),
             );
         },
         div(
             { class: 'table-header flex-row' },
-            span({ style: `flex: ${columns[0]}` }, ''),
+            () => {
+                const items = testRunItems.val;
+                const selectedItems = items.filter(i => selectedRuns[i.test_run_id]?.val ?? false);
+                const allSelected = selectedItems.length === items.length;
+                const partiallySelected = selectedItems.length > 0 && selectedItems.length < items.length;
+
+                if (!userCanEdit) {
+                    return '';
+                }
+
+                return span(
+                    { style: `flex: ${columns[0]}` }, 
+                    userCanEdit
+                        ? Checkbox({
+                            checked: allSelected,
+                            indeterminate: partiallySelected,
+                            onChange: (checked) => items.forEach(item => selectedRuns[item.test_run_id].val = checked),
+                            testId: 'select-all-test-run',
+                        })
+                        : '',
+                );
+            },
             span(
                 { style: `flex: ${columns[1]}` },
                 'Start Time | Table Group | Test Suite',
@@ -141,13 +160,11 @@ const TestRunItem = (
         userCanEdit
             ? div(
                 { style: `flex: ${columns[0]}; font-size: 16px;` },
-                input({
-                    type: 'checkbox',
-                    class: 'clickable accent-primary',
-                    'data-testid': 'select-test-run',
+                Checkbox({
                     checked: selected,
-                    onchange: (event) => selected.val = event.target.checked,
-                })
+                    onChange: (checked) => selected.val = checked,
+                    testId: 'select-test-run',
+                }),
             )
             : '',
         div(
diff --git a/testgen/ui/queries/test_run_queries.py b/testgen/ui/queries/test_run_queries.py
index f132d517..9259f3ed 100644
--- a/testgen/ui/queries/test_run_queries.py
+++ b/testgen/ui/queries/test_run_queries.py
@@ -64,6 +64,26 @@ def cascade_delete_test_run(test_run_id: str) -> None:
     st.cache_data.clear()
 
 
+def cascade_delete_multiple_test_runs(test_run_ids: list[str]) -> None:
+    if not test_run_ids:
+        raise ValueError("No Test Run is specified.")
+
+    test_run_ids_str = ", ".join([f"'{run_id}'" for run_id in test_run_ids])
+    schema: str = st.session_state["dbschema"]
+    sql = f"""
+        DELETE
+            FROM {schema}.working_agg_cat_results
+            WHERE test_run_id IN ({test_run_ids_str});
+        DELETE
+            FROM {schema}.working_agg_cat_tests
+            WHERE test_run_id IN ({test_run_ids_str});
+        DELETE FROM {schema}.test_runs WHERE id IN ({test_run_ids_str});
+        DELETE FROM {schema}.test_results WHERE test_run_id IN ({test_run_ids_str});
+    """
+    db.execute_sql(sql)
+    st.cache_data.clear()
+
+
 def update_status(test_run_id: str, status: str) -> None:
     if not all([test_run_id, status]):
         raise ValueError("Missing query parameters.")
diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index ee4bf4a1..bcdc3a73 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -1,6 +1,5 @@
 import typing
 from builtins import float
-from enum import Enum
 from pathlib import Path
 from time import sleep
 
@@ -8,9 +7,7 @@
 import streamlit as st
 from pandas.api.types import is_datetime64_any_dtype
 from st_aggrid import AgGrid, ColumnsAutoSizeMode, DataReturnMode, GridOptionsBuilder, GridUpdateMode, JsCode
-from streamlit_extras.no_default_selectbox import selectbox
 
-import testgen.ui.services.database_service as db
 from testgen.ui.navigation.router import Router
 
 """
@@ -21,153 +18,6 @@
 help_icon = (Path(__file__).parent.parent / "assets/question_mark.png").as_posix()
 
 
-class FormWidget(Enum):
-    text_md = 1
-    text_input = 2
-    text_area = 3
-    number_input = 4
-    selectbox = 5
-    date_input = 6
-    radio = 7
-    checkbox = 8
-    multiselect = 9  # TODO: implement
-    hidden = 99
-
-
-class FieldSpec:
-    field_label = None
-    column_name = None
-    widget = None
-    value_original = None
-    init_value = None
-    display_only = False
-    required = False
-    key_order = 0
-
-    # Entry Options
-    max_chars = None
-    num_min = None
-    num_max = None
-    text_multi_lines = 3
-
-    # Selectbox Options
-    df_options = None
-    show_column_name = None
-    return_column_name = None
-
-    # Radio options
-    lst_option_text: typing.ClassVar = []
-    lst_option_values: typing.ClassVar = []
-    show_horizontal = True
-
-    value = None
-
-    def __init__(
-        self,
-        str_label,
-        str_column_name,
-        form_widget,
-        orig_val=None,
-        init_val=None,
-        read_only=False,
-        required=False,
-        int_key=0,
-        max_chars=None,
-        num_min=None,
-        num_max=None,
-        text_multi_lines=3,
-    ):
-        self.field_label = str_label
-        self.column_name = str_column_name
-        self.value_original = orig_val
-        self.init_value = init_val if init_val else orig_val
-        self.widget = form_widget
-        self.display_only = read_only
-        self.required = required
-        self.key_order = int_key
-        self.max_chars = max_chars
-        self.num_min = num_min
-        self.num_max = num_max
-        self.text_multi_lines = text_multi_lines
-
-    def set_select_choices(self, df_options, str_show_column_name, str_return_column_name):
-        if self.widget in [FormWidget.selectbox, FormWidget.multiselect]:
-            self.df_options = df_options
-            self.show_column_name = str_show_column_name
-            self.return_column_name = str_return_column_name
-        else:
-            raise ValueError(f"Can't set Select Choices for widget {self.widget}")
-
-    def render_widget(self, boo_form_display_only=False):
-        # if either form-level or field-level display-only is true, then widget is display-only
-        boo_display_only = boo_form_display_only or self.display_only
-
-        match self.widget:
-            case FormWidget.text_md:
-                st.markdown(f"**{self.field_label}**")
-                st.markdown(self.init_value)
-
-            case FormWidget.text_input:
-                self.value = st.text_input(
-                    label=self.field_label, value=self.init_value, disabled=boo_display_only, max_chars=self.max_chars
-                )
-
-            case FormWidget.text_area:
-                box_height = 26 * self.text_multi_lines
-                self.value = st.text_area(
-                    label=self.field_label,
-                    value=self.init_value,
-                    disabled=boo_display_only,
-                    max_chars=self.max_chars,
-                    height=box_height,
-                )
-
-            case FormWidget.number_input:
-                self.value = st.number_input(
-                    label=self.field_label,
-                    value=self.init_value,
-                    min_value=self.num_min,
-                    max_value=self.num_max,
-                    disabled=boo_display_only,
-                )
-
-            case FormWidget.selectbox:
-                self.value = render_select(
-                    self.field_label,
-                    self.df_options,
-                    self.show_column_name,
-                    not self.return_column_name,
-                    self.required,
-                    self.init_value,
-                    self.display_only,
-                )
-
-            case FormWidget.date_input:
-                self.value = render_select_date(self.field_label, boo_disabled=boo_display_only)
-
-            case FormWidget.radio:
-                # If no init_value, or if init_value is None (NULL), the first value will be selected by default
-                self.value = render_radio(
-                    self.field_label,
-                    self.lst_option_text,
-                    self.lst_option_values if self.lst_option_values else self.lst_option_text,
-                    self.init_value,
-                    boo_display_only,
-                    self.show_horizontal,
-                )
-
-            case FormWidget.checkbox:
-                self.value = render_checkbox(
-                    self.field_label, self.lst_option_values, self.init_value, boo_display_only
-                )
-
-            case FormWidget.hidden:
-                self.value = self.init_value
-
-            case _:
-                raise ValueError(f"Widget {self.widget} is not supported.")
-
-
 def render_refresh_button(button_container):
     with button_container:
         do_refresh = st.button(":material/refresh:", help="Refresh page data", use_container_width=False)
@@ -180,11 +30,6 @@ def show_prompt(str_prompt=None):
         st.markdown(f":blue[{str_prompt}]")
 
 
-def show_header(str_header=None):
-    if str_header:
-        st.header(f":green[{str_header}]")
-
-
 def show_subheader(str_text=None):
     if str_text:
         st.subheader(f":green[{str_text}]")
@@ -195,61 +40,6 @@ def _show_section_header(str_section_header=None):
         st.markdown(f":green[**{str_section_header}**]")
 
 
-def render_form_by_field_specs(
-    str_form_name, str_table_name, lst_field_specs, str_text_display=None, boo_display_only=False, str_caption=None
-):
-    show_header(str_form_name)
-
-    if str_text_display:
-        layout_column_1, layout_column_2 = st.columns([0.7, 0.3])
-    else:
-        layout_column_1, layout_column_2 = st.columns([0.95, 0.05])
-
-    if str_text_display:
-        with layout_column_2:
-            st.markdown(str_text_display)
-
-    with layout_column_1:
-        # Render form
-        layout_container = st.container() if boo_display_only else st.form(str_form_name, clear_on_submit=True)
-        with layout_container:
-            if str_caption:
-                st.caption(f":green[{str_caption}]")
-
-            # Render all widgets
-            for field in lst_field_specs:
-                field.render_widget(boo_display_only)
-
-            submit = (
-                False
-                if boo_display_only
-                else st.form_submit_button("Save Changes")
-            )
-
-            if submit and not boo_display_only:
-                # Process Results
-                changes = []
-                keys = []
-
-                # Construct SQL UPDATE statement based on the changed values
-                lst_field_specs_by_key = sorted(lst_field_specs, key=lambda x: x.key_order)
-                for field in lst_field_specs_by_key:
-                    if field.key_order > 0:
-                        keys.append(f"{field.column_name} = '{field.value}'")
-                    elif not field.display_only and field.value is None and field.value_original is not None:
-                        changes.append(f"{field.column_name} = NULL")
-                    elif not field.display_only and field.value != field.value_original:
-                        changes.append(f"{field.column_name} = '{field.value}'")
-                # If there are any changes, construct and run the SQL statement
-                if changes:
-                    str_schema = st.session_state["dbschema"]
-                    str_sql = (
-                        f"UPDATE {str_schema}.{str_table_name} SET {', '.join(changes)} WHERE {' AND '.join(keys)};"
-                    )
-                    db.execute_sql(str_sql)
-                    reset_post_updates("Changes have been saved.")
-
-
 def ut_prettify_header(str_header, expand=False):
     # First drop underscores and make title-case
     str_new = str_header.replace("_", " ").title()
@@ -288,64 +78,6 @@ def reset_post_updates(str_message=None, as_toast=False, clear_cache=True, lst_c
     st.rerun()
 
 
-def render_select(
-    str_label, df_options, str_show_column, str_return_column, boo_required=True, str_default=None, boo_disabled=False
-):
-    # Assemble conditional arguments for selectbox
-    kwargs = {"label": str_label, "options": df_options[str_show_column], "disabled": boo_disabled}
-    if str_default:
-        # Conditionally select index based on index of default value
-        if str_default not in df_options[str_show_column].values:
-            message = f"Label: {str_label} - Option: {str_default} not available. Click the refresh button."
-            st.markdown(f":orange[{message}]")
-        else:
-            kwargs["index"] = int(df_options[df_options[str_show_column] == str_default].index[0])
-    str_choice_name = st.selectbox(**kwargs) if boo_required else selectbox(**kwargs)
-    # Assign return-value from selected show-value
-    if str_choice_name:
-        return df_options.loc[df_options[str_show_column] == str_choice_name, str_return_column].iloc[0]
-
-
-def render_select_date(str_label, dt_min_date=None, dt_max_date=None, boo_disabled=False, dt_default=None):
-    dt_select = st.date_input(
-        label=str_label,
-        value=dt_default,
-        min_value=dt_min_date,
-        max_value=dt_max_date,
-        format="YYYY-MM-DD",
-        disabled=boo_disabled,
-    )
-    return dt_select
-
-
-def render_radio(
-    str_label, lst_option_text, lst_option_values=None, init_value=None, boo_disabled=False, boo_horizontal=True
-):
-    if init_value:
-        # Lookup index for init value
-        i = next((i for i, x in enumerate(lst_option_values) if x == init_value), -1)
-        i = i if i > 0 else 0
-    else:
-        # If no init_value, or if init_value is None (NULL), the first value will be selected by default
-        i = 0
-    str_choice_text = st.radio(
-        str_label, options=lst_option_text, index=i, disabled=boo_disabled, horizontal=boo_horizontal
-    )
-    if lst_option_values:
-        # Lookup choice -- get value
-        i = next((i for i, x in enumerate(lst_option_text) if x == str_choice_text), -1)
-        val_select = lst_option_values[i]
-    else:
-        val_select = str_choice_text
-
-    return val_select
-
-
-def render_checkbox(str_label, lst_true_false_values, boo_init_state=False, boo_disabled=False):
-    boo_value = st.checkbox(str_label, boo_init_state, disabled=boo_disabled)
-    return lst_true_false_values[0] if boo_value else lst_true_false_values[1]
-
-
 def render_html_list(dct_row, lst_columns, str_section_header=None, int_data_width=300, lst_labels=None):
     # Renders sets of values as vertical markdown list
 
diff --git a/testgen/ui/services/test_definition_service.py b/testgen/ui/services/test_definition_service.py
index 323b4af1..7e54da6c 100644
--- a/testgen/ui/services/test_definition_service.py
+++ b/testgen/ui/services/test_definition_service.py
@@ -24,22 +24,38 @@ def get_test_definitions(
 
 def get_test_definition(db_schema, test_def_id):
     str_sql = f"""
-           SELECT d.id::VARCHAR, tt.test_name_short as test_name, tt.test_name_long as full_name,
-                  tt.test_description as description, tt.usage_notes,
-                  d.column_name,
-                  d.baseline_value, d.baseline_ct, d.baseline_avg, d.baseline_sd, d.threshold_value,
-                  d.lower_tolerance, d.upper_tolerance,
-                  d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name,
-                  d.match_table_name, d.match_column_names, d.match_subset_condition,
-                  d.match_groupby_names, d.match_having_condition,
-                  d.window_date_column, d.window_days::VARCHAR as window_days,
-                  d.custom_query,
-                  d.severity, tt.default_severity,
-                  d.test_active, d.lock_refresh, d.last_manual_update
-             FROM {db_schema}.test_definitions d
-           INNER JOIN {db_schema}.test_types tt
-              ON (d.test_type = tt.test_type)
-            WHERE d.id = '{test_def_id}';
+        SELECT
+            d.id::VARCHAR,
+            tg.table_group_schema as schema_name,
+            ts.test_suite as test_suite_name,
+            d.export_to_observability as export_to_observability,
+            ts.export_to_observability as default_export_to_observability,
+            tt.test_name_short as test_name,
+            tt.test_name_long as full_name,
+            tt.test_description as description,
+            d.test_definition_status as status,
+            tt.usage_notes,
+            d.table_name,
+            d.column_name,
+            d.baseline_value, d.baseline_ct, d.baseline_unique_ct, d.baseline_value_ct,
+            d.baseline_avg, d.baseline_sd, d.threshold_value, d.baseline_sum,
+            d.lower_tolerance, d.upper_tolerance,
+            d.subset_condition, d.groupby_names, d.having_condition, d.match_schema_name,
+            d.match_table_name, d.match_column_names, d.match_subset_condition,
+            d.match_groupby_names, d.match_having_condition,
+            d.window_date_column, d.window_days::VARCHAR as window_days,
+            d.custom_query, d.test_mode, 
+            d.severity, tt.default_severity,
+            d.test_active, d.lock_refresh, d.last_manual_update,
+            tt.default_parm_prompts, tt.default_parm_columns, tt.default_parm_help
+        FROM {db_schema}.test_definitions d
+        INNER JOIN {db_schema}.test_types tt
+            ON (d.test_type = tt.test_type)
+        INNER JOIN {db_schema}.test_suites ts
+            ON (ts.id = d.test_suite_id)
+        INNER JOIN {db_schema}.table_groups tg
+            ON (tg.id = d.table_groups_id)
+        WHERE d.id = '{test_def_id}';
     """
     return database_service.retrieve_data(str_sql)
 
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index dda13688..a42b772d 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -433,18 +433,27 @@ def show_test_form(
     if mode == "edit":
         columns = st.columns([0.5, 0.5])
         if profiling_as_of_date and profile_run_id and (container := columns.pop()):
+            if isinstance(profiling_as_of_date, str):
+                formatted_time = datetime.strptime(profiling_as_of_date, "%Y-%m-%d %H:%M:%S").strftime("%b %d, %I:%M %p")
+            else:
+                formatted_time = profiling_as_of_date.strftime("%b %d, %I:%M %p")
             testgen.caption("Based on Profiling", container=container)
             with container:
                 testgen.link(
                     href="profiling-runs:results",
                     params={"run_id": profile_run_id},
-                    label=datetime.strptime(profiling_as_of_date, "%Y-%m-%d %H:%M:%S").strftime("%b %d, %I:%M %p"),
+                    label=formatted_time,
+                    open_new=True,
                 )
 
         if last_auto_gen_date and (container := columns.pop()):
+            if isinstance(last_auto_gen_date, str):
+                formatted_time = datetime.strptime(last_auto_gen_date, "%Y-%m-%d %H:%M:%S").strftime("%b %d, %I:%M %p")
+            else:
+                formatted_time = last_auto_gen_date.strftime("%b %d, %I:%M %p")
             testgen.caption("Auto-generated at", container=container)
             testgen.text(
-                datetime.strptime(last_auto_gen_date, "%Y-%m-%d %H:%M:%S").strftime("%b %d, %I:%M %p"),
+                formatted_time,
                 container=container,
             )
 
@@ -801,7 +810,7 @@ def confirm_unlocking_test_definition(test_definitions: list[dict]):
     unlock_confirmed, set_unlock_confirmed = temp_value("test-definitions:confirm-unlock-tests")
 
     st.warning(
-        """Unlocked tests subject to auto-genration will be overwritten during the next test generation run."""
+        """Unlocked tests subject to auto-generation will be overwritten during the next test generation run."""
     )
 
     st.html(f"""
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index d472e731..4a0312a0 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -1,7 +1,8 @@
 import typing
-from datetime import date
 from functools import partial
 from io import BytesIO
+from itertools import zip_longest
+from operator import itemgetter
 
 import pandas as pd
 import plotly.express as px
@@ -26,7 +27,7 @@
 from testgen.ui.navigation.page import Page
 from testgen.ui.pdf.test_result_report import create_report
 from testgen.ui.services import project_service, test_definition_service, test_results_service, user_session_service
-from testgen.ui.services.string_service import empty_if_null
+from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case
 from testgen.ui.session import session
 from testgen.ui.views.dialogs.profiling_results_dialog import view_profiling_button
 from testgen.ui.views.test_definitions import show_test_form_by_id
@@ -395,79 +396,73 @@ def get_test_result_history(selected_row):
     return test_results_service.get_test_result_history(schema, selected_row)
 
 
-def show_test_def_detail(str_test_def_id):
-    if not str_test_def_id:
+def show_test_def_detail(test_def_id: str):
+    def readable_boolean(v: typing.Literal["Y", "N"]):
+        return "Yes" if v == "Y" else "No"
+
+    if not test_def_id:
         st.warning("Test definition no longer exists.")
         return
     
-    df = get_test_definition(str_test_def_id)
+    df = get_test_definition(test_def_id)
 
     specs = []
     if not df.empty:
-        # Get First Row
-        row = df.iloc[0]
-
-        specs.append(
-            fm.FieldSpec(
-                "Usage Notes",
-                "usage_notes",
-                fm.FormWidget.text_area,
-                row["usage_notes"],
-                read_only=True,
-                text_multi_lines=7,
-            )
-        )
-        specs.append(
-            fm.FieldSpec(
-                "Threshold Value",
-                "threshold_value",
-                fm.FormWidget.number_input,
-                float(row["threshold_value"]) if row["threshold_value"] else None,
-                required=True,
-            )
-        )
-
-        default_severity_choice = f"Test Default ({row['default_severity']})"
-
-        spec = fm.FieldSpec("Test Result Urgency", "severity", fm.FormWidget.radio, row["severity"], required=True)
-        spec.lst_option_text = [default_severity_choice, "Warning", "Fail", "Log"]
-        spec.lst_option_values = [None, "Warning", "Fail", "Ignore"]
-        spec.show_horizontal = True
-        specs.append(spec)
-
-        spec = fm.FieldSpec(
-            "Perform Test in Future Runs", "test_active", fm.FormWidget.radio, row["test_active"], required=True
-        )
-        spec.lst_option_text = ["Yes", "No"]
-        spec.lst_option_values = ["Y", "N"]
-        spec.show_horizontal = True
-        specs.append(spec)
-
-        spec = fm.FieldSpec(
-            "Lock from Refresh", "lock_refresh", fm.FormWidget.radio, row["lock_refresh"], required=True
-        )
-        spec.lst_option_text = ["Unlocked", "Locked"]
-        spec.lst_option_values = ["N", "Y"]
-        spec.show_horizontal = True
-        specs.append(spec)
-
-        specs.append(fm.FieldSpec("", "id", form_widget=fm.FormWidget.hidden, int_key=1, init_val=row["id"]))
-
-        specs.append(
-            fm.FieldSpec(
-                "Last Manual Update",
-                "last_manual_update",
-                fm.FormWidget.date_input,
-                row["last_manual_update"],
-                date.today().strftime("%Y-%m-%d hh:mm:ss"),
-                read_only=True,
-            )
-        )
-        fm.render_form_by_field_specs(
-            None,
-            "test_definitions",
-            specs,
-            boo_display_only=True,
+        test_definition = df.iloc[0]
+        row = test_definition
+
+        dynamic_attributes_labels_raw: str = test_definition["default_parm_prompts"]
+        if not dynamic_attributes_labels_raw:
+            dynamic_attributes_labels_raw = ""
+        dynamic_attributes_labels = dynamic_attributes_labels_raw.split(",")
+
+        dynamic_attributes_raw: str = test_definition["default_parm_columns"]
+        dynamic_attributes_fields = dynamic_attributes_raw.split(",")
+        dynamic_attributes_values = itemgetter(*dynamic_attributes_fields)(test_definition)\
+            if len(dynamic_attributes_fields) > 1\
+            else (test_definition[dynamic_attributes_fields[0]],)
+
+        for field_name in dynamic_attributes_fields[len(dynamic_attributes_labels):]:
+            dynamic_attributes_labels.append(snake_case_to_title_case(field_name))
+
+        dynamic_attributes_help_raw: str = test_definition["default_parm_help"]
+        if not dynamic_attributes_help_raw:
+            dynamic_attributes_help_raw = ""
+        dynamic_attributes_help = dynamic_attributes_help_raw.split("|")
+
+        testgen.testgen_component(
+            "test_definition_summary",
+            props={
+                "test_definition": {
+                    "schema": test_definition["schema_name"],
+                    "test_suite_name": test_definition["test_suite_name"],
+                    "table_name": test_definition["table_name"],
+                    "test_focus": test_definition["column_name"],
+                    "export_to_observability": readable_boolean(test_definition["export_to_observability"])
+                        if test_definition["export_to_observability"]
+                        else f"Inherited ({readable_boolean(test_definition["default_export_to_observability"])})",
+                    "severity": test_definition["severity"] or f"Test Default ({test_definition['default_severity']})",
+                    "locked": readable_boolean(test_definition["lock_refresh"]),
+                    "active": readable_boolean(test_definition["test_active"]),
+                    "status": test_definition["status"],
+                    "usage_notes": test_definition["usage_notes"],
+                    "last_manual_update": test_definition["last_manual_update"].isoformat()
+                        if test_definition["last_manual_update"]
+                        else None,
+                    "custom_query": test_definition["custom_query"]
+                        if "custom_query" in dynamic_attributes_fields
+                        else None,
+                    "attributes": [
+                        {"label": label, "value": value, "help": help_}
+                        for label, value, help_ in zip_longest(
+                            dynamic_attributes_labels,
+                            dynamic_attributes_values,
+                            dynamic_attributes_help,
+                        )
+                        if label and value
+                    ],
+                },
+            },
         )
 
 
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index 62daca8a..4a5484be 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -200,7 +200,7 @@ def on_cancel_run(test_run: pd.Series) -> None:
     fm.reset_post_updates(str_message=f":{'green' if process_status else 'red'}[{process_message}]", as_toast=True)
 
 
-@st.dialog(title="Delete Test Run")
+@st.dialog(title="Delete Test Runs")
 @with_database_session
 def on_delete_runs(project_code: str, table_group_id: str, test_suite_id: str, test_run_ids: list[str]) -> None:
     def on_delete_confirmed(*_args) -> None:
@@ -220,21 +220,6 @@ def on_delete_confirmed(*_args) -> None:
 
     result = None
     delete_confirmed, set_delete_confirmed = temp_value("test-runs:confirm-delete", default=False)
-    if delete_confirmed():
-        try:
-            test_runs = get_db_test_runs(project_code, table_group_id, test_suite_id, test_runs_ids=test_run_ids)
-            for _, test_run in test_runs.iterrows():
-                test_run_id = test_run["test_run_id"]
-                if test_run["status"] == "Running":
-                    process_status, _ = process_service.kill_test_run(to_int(test_run["process_id"]))
-                    if process_status:
-                        test_run_queries.update_status(test_run_id, "Cancelled")
-                test_run_queries.cascade_delete_test_run(test_run_id)
-            st.rerun()
-        except Exception:
-            LOG.exception("Failed to delete test run")
-            result = {"success": False, "message": "Unable to delete the test run, try again."}
-
     testgen.testgen_component(
         "confirm_dialog",
         props={
@@ -250,6 +235,22 @@ def on_delete_confirmed(*_args) -> None:
         },
     )
 
+    if delete_confirmed():
+        try:
+            with st.spinner("Deleting runs ..."):
+                test_runs = _get_db_test_runs(project_code, table_group_id, test_suite_id, test_runs_ids=test_run_ids)
+                for _, test_run in test_runs.iterrows():
+                    test_run_id = test_run["test_run_id"]
+                    if test_run["status"] == "Running":
+                        process_status, _ = process_service.kill_test_run(to_int(test_run["process_id"]))
+                        if process_status:
+                            test_run_queries.update_status(test_run_id, "Cancelled")
+                test_run_queries.cascade_delete_multiple_test_runs(test_run_ids)
+            st.rerun()
+        except Exception:
+            LOG.exception("Failed to delete test run")
+            result = {"success": False, "message": "Unable to delete the test run, try again."}
+
 
 @st.cache_data(show_spinner=False)
 def run_test_suite_lookup_query(schema: str, project_code: str, table_groups_id: str | None = None) -> pd.DataFrame:
@@ -284,6 +285,17 @@ def get_db_test_runs(
     table_groups_id: str | None = None,
     test_suite_id: str | None = None,
     test_runs_ids: list[str] | None = None,
+) -> pd.DataFrame:
+    return _get_db_test_runs(
+        project_code, table_groups_id=table_groups_id, test_suite_id=test_suite_id, test_runs_ids=test_runs_ids
+    )
+
+
+def _get_db_test_runs(
+    project_code: str,
+    table_groups_id: str | None = None,
+    test_suite_id: str | None = None,
+    test_runs_ids: list[str] | None = None,
 ) -> pd.DataFrame:
     schema = st.session_state["dbschema"]
     table_group_condition = f" AND test_suites.table_groups_id = '{table_groups_id}' " if table_groups_id else ""

From 9c846e18e55715b0fd0f91cdc8dfbc38659f9365 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Mon, 7 Jul 2025 19:10:58 -0400
Subject: [PATCH 30/56] fix(upgrade): drop views before altering table

---
 testgen/template/dbupgrade/0141_incremental_upgrade.sql | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/testgen/template/dbupgrade/0141_incremental_upgrade.sql b/testgen/template/dbupgrade/0141_incremental_upgrade.sql
index 8a712aaf..55675eaf 100644
--- a/testgen/template/dbupgrade/0141_incremental_upgrade.sql
+++ b/testgen/template/dbupgrade/0141_incremental_upgrade.sql
@@ -7,6 +7,9 @@ ALTER TABLE test_definitions
    ALTER COLUMN groupby_names TYPE VARCHAR,
    ALTER COLUMN match_groupby_names TYPE VARCHAR;
 
+DROP VIEW IF EXISTS v_test_results;
+DROP VIEW IF EXISTS v_queued_observability_results;
+
 ALTER TABLE test_results
    ALTER COLUMN input_parameters TYPE VARCHAR;
 

From 8a72804e97797f1cb076b293cde0d83e95232325 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Tue, 8 Jul 2025 12:36:10 -0400
Subject: [PATCH 31/56] fix: Inserting the Test Run record earlier in the
 process

---
 .../commands/queries/execute_tests_query.py   |  5 ----
 testgen/commands/run_execute_tests.py         | 23 ++++++++++++++-----
 .../ex_write_test_record_to_testrun_table.sql |  5 ----
 3 files changed, 17 insertions(+), 16 deletions(-)
 delete mode 100644 testgen/template/execution/ex_write_test_record_to_testrun_table.sql

diff --git a/testgen/commands/queries/execute_tests_query.py b/testgen/commands/queries/execute_tests_query.py
index 0fe927db..00b9d4d4 100644
--- a/testgen/commands/queries/execute_tests_query.py
+++ b/testgen/commands/queries/execute_tests_query.py
@@ -125,11 +125,6 @@ def GetTestsNonCAT(self, booClean):
 
         return strQ
 
-    def AddTestRecordtoTestRunTable(self):
-        strQ = self._ReplaceParms(read_template_sql_file("ex_write_test_record_to_testrun_table.sql", "execution"))
-
-        return strQ
-
     def PushTestRunStatusUpdateSQL(self):
         # Runs on DK DB
         strQ = self._ReplaceParms(read_template_sql_file("ex_update_test_record_in_testrun_table.sql", "execution"))
diff --git a/testgen/commands/run_execute_tests.py b/testgen/commands/run_execute_tests.py
index dc8028d1..a5799006 100644
--- a/testgen/commands/run_execute_tests.py
+++ b/testgen/commands/run_execute_tests.py
@@ -17,7 +17,7 @@
     WriteListToDB,
     date_service,
 )
-from testgen.common.database.database_service import empty_cache
+from testgen.common.database.database_service import ExecuteDBQuery, empty_cache
 
 from .run_execute_cat_tests import run_cat_test_queries
 from .run_refresh_data_chars import run_refresh_data_chars_queries
@@ -26,6 +26,17 @@
 LOG = logging.getLogger("testgen")
 
 
+def add_test_run_record(test_run_id, test_suite_id, test_time, process_id):
+    query = f"""
+        INSERT INTO test_runs(id, test_suite_id, test_starttime, process_id)
+        (SELECT '{test_run_id}':: UUID as id,
+                '{test_suite_id}' as test_suite_id,
+                '{test_time}' as test_starttime,
+                '{process_id}' as process_id);
+    """
+    ExecuteDBQuery("DKTG", query)
+
+
 def run_test_queries(dctParms, strTestRunID, strTestTime, strProjectCode, strTestSuite, minutes_offset=0, spinner=None):
     booErrors = False
     error_msg = ""
@@ -38,11 +49,6 @@ def run_test_queries(dctParms, strTestRunID, strTestTime, strProjectCode, strTes
     clsExecute.process_id = process_service.get_current_process_id()
     booClean = False
 
-    # Add a record in Test Run table for the new Test Run
-    strTestRunQuery = clsExecute.AddTestRecordtoTestRunTable()
-    lstTestRunQuery = [strTestRunQuery]
-    RunActionQueryList("DKTG", lstTestRunQuery)
-
     try:
         # Retrieve non-CAT Queries
         LOG.info("CurrentStep: Retrieve Non-CAT Queries")
@@ -131,6 +137,11 @@ def run_execution_steps(
     LOG.info("CurrentStep: Retrieving TestExec Parameters")
     test_exec_params = RetrieveTestExecParms(project_code, test_suite)
 
+    # Add a record in Test Run table for the new Test Run
+    add_test_run_record(
+        test_run_id, test_exec_params["test_suite_id"], test_time, process_service.get_current_process_id()
+    )
+
     LOG.info("CurrentStep: Assigning Connection Parms")
     AssignConnectParms(
         test_exec_params["project_code"],
diff --git a/testgen/template/execution/ex_write_test_record_to_testrun_table.sql b/testgen/template/execution/ex_write_test_record_to_testrun_table.sql
deleted file mode 100644
index 07be1462..00000000
--- a/testgen/template/execution/ex_write_test_record_to_testrun_table.sql
+++ /dev/null
@@ -1,5 +0,0 @@
-INSERT INTO test_runs (id, test_suite_id, test_starttime, process_id)
-(SELECT '{TEST_RUN_ID}' :: UUID  as id,
-        '{TEST_SUITE_ID}' as test_suite_id,
-        '{RUN_DATE}' as test_starttime,
-        '{PROCESS_ID}'as process_id);

From a43194ea14b131442a5a9b87fcae8e6b1299431b Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Wed, 9 Jul 2025 16:31:42 -0400
Subject: [PATCH 32/56] fix: Allowing literal underscores in the table group
 pattern fields

---
 .../commands/queries/refresh_data_chars_query.py   | 14 +++++++-------
 tests/unit/test_profiling_query.py                 |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/testgen/commands/queries/refresh_data_chars_query.py b/testgen/commands/queries/refresh_data_chars_query.py
index 4934601f..325d6751 100644
--- a/testgen/commands/queries/refresh_data_chars_query.py
+++ b/testgen/commands/queries/refresh_data_chars_query.py
@@ -10,7 +10,7 @@ class CRefreshDataCharsSQL:
     sql_flavor: str
     table_group_schema: str
     table_group_id: str
-    
+
     max_query_chars: int
     profiling_table_set: str
     profiling_include_mask: str
@@ -37,7 +37,7 @@ def _replace_params(self, sql_query: str) -> str:
         sql_query = sql_query.replace("{RUN_DATE}", self.run_date)
         sql_query = sql_query.replace("{SOURCE_TABLE}", self.source_table)
         return sql_query
-    
+
     def _get_mask_query(self, mask: str, is_include: bool) -> str:
         sub_query = ""
         if mask:
@@ -46,11 +46,11 @@ def _get_mask_query(self, mask: str, is_include: bool) -> str:
             for item in mask.split(","):
                 if not is_first:
                     sub_query += " OR "
-                sub_query += "(c.table_name LIKE '" + item.strip() + "')"
+                sub_query += "(c.table_name LIKE '" + item.strip().replace("_", r"\_") + r"' ESCAPE '\')"
                 is_first = False
             sub_query += ")"
         return sub_query
-    
+
     def GetDDFQuery(self) -> str:
         # Runs on Project DB
         sql_query = self._replace_params(
@@ -67,18 +67,18 @@ def GetDDFQuery(self) -> str:
         sql_query = sql_query.replace("{TABLE_CRITERIA}", table_criteria)
 
         return sql_query
-    
+
     def GetRecordCountQueries(self, schema_tables: list[str]) -> list[str]:
         count_queries = [
             f"SELECT '{item}', COUNT(*) FROM {item}"
             for item in schema_tables
         ]
         return chunk_queries(count_queries, " UNION ALL ", self.max_query_chars)
-    
+
     def GetDataCharsUpdateQuery(self) -> str:
         # Runs on DK Postgres Server
         return self._replace_params(read_template_sql_file("data_chars_update.sql", sub_directory="data_chars"))
-    
+
     def GetStagingDeleteQuery(self) -> str:
         # Runs on DK Postgres Server
         return self._replace_params(read_template_sql_file("data_chars_staging_delete.sql", sub_directory="data_chars"))
diff --git a/tests/unit/test_profiling_query.py b/tests/unit/test_profiling_query.py
index 6bfb010b..113508d6 100644
--- a/tests/unit/test_profiling_query.py
+++ b/tests/unit/test_profiling_query.py
@@ -18,9 +18,9 @@ def test_include_exclude_mask_basic():
 
     # test assertions
     assert "SELECT 'dummy_project_code'" in query
-    assert "AND ((c.table_name LIKE 'important%') OR (c.table_name LIKE '%useful%'))" in query
+    assert r"AND ((c.table_name LIKE 'important%' ESCAPE '\') OR (c.table_name LIKE '%useful%' ESCAPE '\'))" in query
     assert (
-        "AND NOT ((c.table_name LIKE 'temp%') OR (c.table_name LIKE 'tmp%') OR (c.table_name LIKE 'raw_slot_utilization%') OR (c.table_name LIKE 'gps_product_step_change_log'))"
+        r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\') OR (c.table_name LIKE 'tmp%' ESCAPE '\') OR (c.table_name LIKE 'raw\_slot\_utilization%' ESCAPE '\') OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ESCAPE '\'))"
         in query
     )
 
@@ -41,7 +41,7 @@ def test_include_empty_exclude_mask(mask):
 
     # test assertions
     assert (
-        "AND NOT ((c.table_name LIKE 'temp%') OR (c.table_name LIKE 'tmp%') OR (c.table_name LIKE 'raw_slot_utilization%') OR (c.table_name LIKE 'gps_product_step_change_log'))"
+        r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\') OR (c.table_name LIKE 'tmp%' ESCAPE '\') OR (c.table_name LIKE 'raw\_slot\_utilization%' ESCAPE '\') OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ESCAPE '\'))"
         in query
     )
 
@@ -61,4 +61,4 @@ def test_include_empty_include_mask(mask):
     query = profiling_query.GetDDFQuery()
 
     # test assertions
-    assert "AND ((c.table_name LIKE 'important%') OR (c.table_name LIKE '%useful%'))" in query
+    assert r"AND ((c.table_name LIKE 'important%' ESCAPE '\') OR (c.table_name LIKE '%useful%' ESCAPE '\'))" in query

From d0648119894fbb01268876c3a76a1493279d2850 Mon Sep 17 00:00:00 2001
From: Diogo Basto <dbasto@datakitchen.io>
Date: Mon, 7 Jul 2025 18:09:03 +0100
Subject: [PATCH 33/56] feat(tests): Support moving and copying test definition
 to same suite with different columns TG-899

---
 testgen/ui/queries/test_definition_queries.py | 22 +++++++++---
 .../ui/services/test_definition_service.py    |  8 ++---
 testgen/ui/views/test_definitions.py          | 35 ++++++++++++++-----
 3 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/testgen/ui/queries/test_definition_queries.py b/testgen/ui/queries/test_definition_queries.py
index 5e235a80..a47d7dcf 100644
--- a/testgen/ui/queries/test_definition_queries.py
+++ b/testgen/ui/queries/test_definition_queries.py
@@ -269,13 +269,21 @@ def cascade_delete(schema, test_suite_ids):
     st.cache_data.clear()
 
 
-def move(schema, test_definitions, target_table_group, target_test_suite):
+def move(schema, test_definitions, target_table_group, target_test_suite, target_table_column=None):
+    if target_table_column is not None:
+        update_target_table_column = f"""
+        column_name = '{target_table_column['column_name']}', 
+        table_name = '{target_table_column['table_name']}', 
+        """
+    else:
+        update_target_table_column = "" 
     sql = f"""
     WITH selected as (
         SELECT UNNEST(ARRAY [{", ".join([ f"'{td['id']}'" for td in test_definitions ])}]) AS id
     )
     UPDATE {schema}.test_definitions
     SET 
+        {update_target_table_column}
         table_groups_id = '{target_table_group}'::UUID,
         test_suite_id = '{target_test_suite}'::UUID
     FROM {schema}.test_definitions td
@@ -286,7 +294,13 @@ def move(schema, test_definitions, target_table_group, target_test_suite):
     st.cache_data.clear()
 
 
-def copy(schema, test_definitions, target_table_group, target_test_suite):
+def copy(schema, test_definitions, target_table_group, target_test_suite, target_table_column=None):
+    if target_table_column is not None:
+        update_target_column = f"'{target_table_column['column_name']}' as column_name"
+        update_target_table = f"'{target_table_column['table_name']}' as table_name"
+    else:
+        update_target_column = "td.colum_name"
+        update_target_table = "td.table_name"
     test_definition_ids = [f"'{td['id']}'" for td in test_definitions]
     sql = f"""
         INSERT INTO {schema}.test_definitions
@@ -342,7 +356,7 @@ def copy(schema, test_definitions, target_table_group, target_test_suite):
             td.custom_query,
             td.test_definition_status,
             td.export_to_observability,
-            td.column_name,
+            {update_target_column},
             td.watch_level,
             '{target_table_group}'::UUID AS table_groups_id,
             CASE WHEN td.table_groups_id = '{target_table_group}' THEN td.profile_run_id ELSE NULL END AS profile_run_id,
@@ -354,7 +368,7 @@ def copy(schema, test_definitions, target_table_group, target_test_suite):
             td.lock_refresh,
             td.last_auto_gen_date,
             td.schema_name,
-            td.table_name,
+            {update_target_table},
             td.test_active,
             td.severity,
             td.check_result,
diff --git a/testgen/ui/services/test_definition_service.py b/testgen/ui/services/test_definition_service.py
index 7e54da6c..7c4eca29 100644
--- a/testgen/ui/services/test_definition_service.py
+++ b/testgen/ui/services/test_definition_service.py
@@ -152,15 +152,15 @@ def validate_test(test_definition):
     )
 
 
-def move(test_definitions, target_table_group, target_test_suite):
+def move(test_definitions, target_table_group, target_test_suite, target_table_column=None):
     schema = st.session_state["dbschema"]
-    test_definition_queries.move(schema, test_definitions, target_table_group, target_test_suite)
+    test_definition_queries.move(schema, test_definitions, target_table_group, target_test_suite, target_table_column)
 
 
 
-def copy(test_definitions, target_table_group, target_test_suite):
+def copy(test_definitions, target_table_group, target_test_suite, target_table_column=None):
     schema = st.session_state["dbschema"]
-    test_definition_queries.copy(schema, test_definitions, target_table_group, target_test_suite)
+    test_definition_queries.copy(schema, test_definitions, target_table_group, target_test_suite, target_table_column)
 
 
 def get_test_definitions_collision(test_definitions, target_table_group, target_test_suite):
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index a42b772d..e8ad138f 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -679,7 +679,7 @@ def edit_test_dialog(project_code, table_group, test_suite, str_table_name, str_
 def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, selected_test_definitions):
     st.text(f"Selected tests: {len(selected_test_definitions)}")
 
-    group_filter_column, suite_filter_column = st.columns([.5, .5], vertical_alignment="bottom")
+    group_filter_column, suite_filter_column, table_filter_column = st.columns([.33, .33, .33], vertical_alignment="bottom")
 
     with group_filter_column:
         table_groups_df = run_table_groups_lookup_query(project_code)
@@ -693,11 +693,6 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s
 
     with suite_filter_column:
         test_suites_df = run_test_suite_lookup_query(target_table_group_id)
-        try:
-            origin_index = test_suites_df[test_suites_df["id"] == origin_test_suite["id"]].index
-            test_suites_df.drop(origin_index, inplace=True)
-        except KeyError:
-            pass
         target_test_suite_id = testgen.select(
             options=test_suites_df,
             value_column="id",
@@ -706,6 +701,30 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s
             label="Target Test Suite",
         )
 
+    target_table_column = None
+    if target_test_suite_id == origin_test_suite["id"]:
+        with table_filter_column:
+            columns_df = get_test_suite_columns(origin_test_suite["id"])
+            table_name = testgen.select(
+                options=list(columns_df["table_name"].unique()),
+                value_column="table_name",
+                default_value=None,
+                required=True,
+                label="Target Table Name",
+            )
+            column_options = list(columns_df.loc[columns_df["table_name"] == table_name]["column_name"].unique())
+            column_name = testgen.select(
+                options=column_options,
+                default_value=None,
+                required=True,
+                label="Column Name",
+                disabled=not table_name,
+            )
+        target_table_column = {
+            "table_name": table_name,
+            "column_name":column_name
+        }
+
     movable_test_definitions = []
     if target_table_group_id and target_test_suite_id:
         collision_test_definitions = test_definition_service.get_test_definitions_collision(selected_test_definitions, target_table_group_id, target_test_suite_id)
@@ -738,13 +757,13 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s
     )
 
     if move:
-        test_definition_service.move(movable_test_definitions, target_table_group_id, target_test_suite_id)
+        test_definition_service.move(movable_test_definitions, target_table_group_id, target_test_suite_id, target_table_column)
         success_message = "Test Definitions have been moved."
         st.success(success_message)
         time.sleep(1)
         st.rerun()
     elif copy:
-        test_definition_service.copy(movable_test_definitions, target_table_group_id, target_test_suite_id)
+        test_definition_service.copy(movable_test_definitions, target_table_group_id, target_test_suite_id, target_table_column)
         success_message = "Test Definitions have been copied."
         st.success(success_message)
         time.sleep(1)

From e72b2b45dabd8a345f84e58ca75dd596897c5ed3 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 8 Jul 2025 23:37:49 -0400
Subject: [PATCH 34/56] fix(data-catalog): add commas when displaying numbers

---
 .../frontend/js/components/box_plot.js        |  4 +-
 .../frontend/js/components/frequency_bars.js  |  4 +-
 .../frontend/js/components/percent_bar.js     |  4 +-
 .../frontend/js/components/summary_bar.js     |  4 +-
 .../js/data_profiling/column_distribution.js  | 40 +++++++++----------
 .../frontend/js/data_profiling/table_size.js  |  4 +-
 .../components/frontend/js/display_utils.js   |  6 +--
 7 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/testgen/ui/components/frontend/js/components/box_plot.js b/testgen/ui/components/frontend/js/components/box_plot.js
index 5c3ba7e9..ef1957b9 100644
--- a/testgen/ui/components/frontend/js/components/box_plot.js
+++ b/testgen/ui/components/frontend/js/components/box_plot.js
@@ -12,7 +12,7 @@
  */
 import van from '../van.min.js';
 import { getValue, loadStylesheet } from '../utils.js';
-import { colorMap } from '../display_utils.js';
+import { colorMap, formatNumber } from '../display_utils.js';
 import { niceBounds } from '../axis_utils.js';
 
 const { div } = van.tags;
@@ -83,7 +83,7 @@ const BoxPlot = (/** @type Properties */ props) => {
                             class: 'tg-box-plot--axis-tick',
                             style: `left: ${(position - min) * 100 / range}%;`
                         },
-                        position,
+                        formatNumber(position),
                     )),
                 );
             },
diff --git a/testgen/ui/components/frontend/js/components/frequency_bars.js b/testgen/ui/components/frontend/js/components/frequency_bars.js
index c3ad64a3..d26073ce 100644
--- a/testgen/ui/components/frontend/js/components/frequency_bars.js
+++ b/testgen/ui/components/frontend/js/components/frequency_bars.js
@@ -14,7 +14,7 @@
  */
 import van from '../van.min.js';
 import { getValue, loadStylesheet } from '../utils.js';
-import { colorMap } from '../display_utils.js';
+import { colorMap, formatNumber } from '../display_utils.js';
 
 const { div, span } = van.tags;
 const defaultColor = 'teal';
@@ -67,7 +67,7 @@ const FrequencyBars = (/** @type Properties */ props) => {
                         class: 'text-caption tg-frequency-bars--count',
                         style: () => `width: ${width.val}px;`,
                     },
-                    count,
+                    formatNumber(count),
                 ),
                 div(value),
             );
diff --git a/testgen/ui/components/frontend/js/components/percent_bar.js b/testgen/ui/components/frontend/js/components/percent_bar.js
index e6a53210..a0260344 100644
--- a/testgen/ui/components/frontend/js/components/percent_bar.js
+++ b/testgen/ui/components/frontend/js/components/percent_bar.js
@@ -10,7 +10,7 @@
  */
 import van from '../van.min.js';
 import { getValue, loadStylesheet } from '../utils.js';
-import { colorMap } from '../display_utils.js';
+import { colorMap, formatNumber } from '../display_utils.js';
 
 const { div, span } = van.tags;
 const defaultHeight = 10;
@@ -25,7 +25,7 @@ const PercentBar = (/** @type Properties */ props) => {
         { style: () => `max-width: ${props.width ? getValue(props.width) + 'px' : '100%'};` },
         div(
             { class: () => `tg-percent-bar--label ${value.val ? '' : 'text-secondary'}` },
-            () => `${getValue(props.label)}: ${value.val}`,
+            () => `${getValue(props.label)}: ${formatNumber(value.val)}`,
         ),
         div(
             {
diff --git a/testgen/ui/components/frontend/js/components/summary_bar.js b/testgen/ui/components/frontend/js/components/summary_bar.js
index 2c791913..c16dcc61 100644
--- a/testgen/ui/components/frontend/js/components/summary_bar.js
+++ b/testgen/ui/components/frontend/js/components/summary_bar.js
@@ -15,7 +15,7 @@
  */
 import van from '../van.min.js';
 import { friendlyPercent, getValue, loadStylesheet } from '../utils.js';
-import { colorMap } from '../display_utils.js';
+import { colorMap, formatNumber } from '../display_utils.js';
 
 const { div, span } = van.tags;
 const defaultHeight = 24;
@@ -50,7 +50,7 @@ const SummaryBar = (/** @type Properties */ props) => {
                         class: 'dot',
                         style: `color: ${colorMap[item.color] || item.color};`,
                     }),
-                    `${item.label}: ${item.value || 0}` + (item.showPercent ? ` (${friendlyPercent(item.value * 100 / total.val)}%)` : '')
+                    `${item.label}: ${formatNumber(item.value || 0)}` + (item.showPercent ? ` (${friendlyPercent(item.value * 100 / total.val)}%)` : '')
                 )
                 : null,
             ),
diff --git a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
index a0810553..4d51f65f 100644
--- a/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
+++ b/testgen/ui/components/frontend/js/data_profiling/column_distribution.js
@@ -16,7 +16,7 @@ import { PercentBar } from '../components/percent_bar.js';
 import { FrequencyBars } from '../components/frequency_bars.js';
 import { BoxPlot } from '../components/box_plot.js';
 import { loadStylesheet, emitEvent, friendlyPercent, getValue } from '../utils.js';
-import { formatTimestamp, roundDigits } from '../display_utils.js';
+import { formatNumber, formatTimestamp } from '../display_utils.js';
 
 const { div, span } = van.tags;
 const columnTypeFunctionMap = {
@@ -106,7 +106,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
             SummaryBar({
                 height: summaryHeight,
                 width: summaryWidth,
-                label: `Missing Values: ${missing} (${friendlyPercent(missing * 100 / total)}%)`,
+                label: `Missing Values: ${formatNumber(missing)} (${friendlyPercent(missing * 100 / total)}%)`,
                 items: [
                     { label: 'Actual Values', value: item.value_ct - item.zero_length_ct - item.filled_value_ct, color: 'green' },
                     { label: 'Null', value: item.null_value_ct, color: 'brownLight', showPercent: true },
@@ -117,7 +117,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
             SummaryBar({
                 height: summaryHeight,
                 width: summaryWidth,
-                label: `Duplicate Values: ${duplicates} (${friendlyPercent(duplicates * 100 / item.value_ct)}%)`,
+                label: `Duplicate Values: ${formatNumber(duplicates)} (${friendlyPercent(duplicates * 100 / item.value_ct)}%)`,
                 items: [
                     { label: 'Distinct', value: item.distinct_value_ct, color: 'indigo' },
                     { label: 'Duplicates', value: duplicates, color: 'orange' },
@@ -128,7 +128,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
                 ? SummaryBar({
                     height: summaryHeight,
                     width: summaryWidth,
-                    label: `Duplicate Values, Standardized: ${duplicatesStandardized} (${friendlyPercent(duplicatesStandardized * 100 / item.value_ct)}%)`,
+                    label: `Duplicate Values, Standardized: ${formatNumber(duplicatesStandardized)} (${friendlyPercent(duplicatesStandardized * 100 / item.value_ct)}%)`,
                     items: [
                         { label: 'Distinct', value: item.distinct_std_value_ct, color: 'indigo' },
                         { label: 'Duplicates', value: duplicatesStandardized, color: 'orange' },
@@ -188,14 +188,14 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
                 PercentBar({ label: 'Quoted Values', value: item.quoted_value_ct, total, width: percentWidth }),
                 PercentBar({ label: 'Leading Spaces', value: item.lead_space_ct, total, width: percentWidth }),
                 PercentBar({ label: 'Embedded Spaces', value: item.embedded_space_ct ?? 0, total, width: percentWidth }),
-                Attribute({ label: 'Average Embedded Spaces', value: roundDigits(item.avg_embedded_spaces), width: attributeWidth }),
+                Attribute({ label: 'Average Embedded Spaces', value: formatNumber(item.avg_embedded_spaces), width: attributeWidth }),
             ),
         ),
         div(
             { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' },
-            Attribute({ label: 'Minimum Length', value: item.min_length, width: attributeWidth }),
-            Attribute({ label: 'Maximum Length', value: item.max_length, width: attributeWidth }),
-            Attribute({ label: 'Average Length', value: roundDigits(item.avg_length), width: attributeWidth }),
+            Attribute({ label: 'Minimum Length', value: formatNumber(item.min_length), width: attributeWidth }),
+            Attribute({ label: 'Maximum Length', value: formatNumber(item.max_length), width: attributeWidth }),
+            Attribute({ label: 'Average Length', value: formatNumber(item.avg_length), width: attributeWidth }),
         ),
         div(
             { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' },
@@ -205,7 +205,7 @@ function AlphaColumn(/** @type ColumnProfile */ item) {
         div(
             { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' },
             Attribute({ label: 'Standard Pattern Match', value: standardPattern, width: attributeWidth }),
-            Attribute({ label: 'Distinct Patterns', value: item.distinct_pattern_ct, width: attributeWidth }),
+            Attribute({ label: 'Distinct Patterns', value: formatNumber(item.distinct_pattern_ct), width: attributeWidth }),
         ),
     );
 }
@@ -260,7 +260,7 @@ function DatetimeColumn(/** @type ColumnProfile */ item) {
             { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4' },
             Attribute({ label: 'Minimum Date', value: formatTimestamp(item.min_date, true), width: attributeWidth }),
             Attribute({ label: 'Maximum Date', value: formatTimestamp(item.max_date, true), width: attributeWidth }),
-            Attribute({ label: 'Distinct Values', value: item.distinct_value_ct, width: attributeWidth }),
+            Attribute({ label: 'Distinct Values', value: formatNumber(item.distinct_value_ct), width: attributeWidth }),
         ),
     );
 }
@@ -283,15 +283,15 @@ function NumericColumn(/** @type ColumnProfile */ item) {
         ),
         div(
             { class: 'flex-row fx-flex-wrap fx-align-flex-start fx-gap-4 tg-profile--attribute-block' },
-            Attribute({ label: 'Distinct Values', value: item.distinct_value_ct, width: attributeWidth }),
-            Attribute({ label: 'Average Value', value: roundDigits(item.avg_value), width: attributeWidth }),
-            Attribute({ label: 'Standard Deviation', value: roundDigits(item.stdev_value), width: attributeWidth }),
-            Attribute({ label: 'Minimum Value', value: item.min_value, width: attributeWidth }),
-            Attribute({ label: 'Minimum Value > 0', value: item.min_value_over_0, width: attributeWidth }),
-            Attribute({ label: 'Maximum Value', value: item.max_value, width: attributeWidth }),
-            Attribute({ label: '25th Percentile', value: roundDigits(item.percentile_25), width: attributeWidth }),
-            Attribute({ label: 'Median Value', value: roundDigits(item.percentile_50), width: attributeWidth }),
-            Attribute({ label: '75th Percentile', value: roundDigits(item.percentile_75), width: attributeWidth }),
+            Attribute({ label: 'Distinct Values', value: formatNumber(item.distinct_value_ct), width: attributeWidth }),
+            Attribute({ label: 'Average Value', value: formatNumber(item.avg_value), width: attributeWidth }),
+            Attribute({ label: 'Standard Deviation', value: formatNumber(item.stdev_value), width: attributeWidth }),
+            Attribute({ label: 'Minimum Value', value: formatNumber(item.min_value), width: attributeWidth }),
+            Attribute({ label: 'Minimum Value > 0', value: formatNumber(item.min_value_over_0), width: attributeWidth }),
+            Attribute({ label: 'Maximum Value', value: formatNumber(item.max_value), width: attributeWidth }),
+            Attribute({ label: '25th Percentile', value: formatNumber(item.percentile_25), width: attributeWidth }),
+            Attribute({ label: 'Median Value', value: formatNumber(item.percentile_50), width: attributeWidth }),
+            Attribute({ label: '75th Percentile', value: formatNumber(item.percentile_75), width: attributeWidth }),
         ),
         div(
             { class: 'flex-row fx-justify-center tg-profile--plot-block' },
@@ -318,7 +318,7 @@ const BaseCounts = (/** @type ColumnProfile */ item) => {
         { class: 'flex-row fx-gap-4' },
         attributes.map(({ key, label }) => Attribute({ 
             label: item[key] === 0 ? span({ class: 'text-error' }, label) : label, 
-            value: item[key],
+            value: formatNumber(item[key]),
             width: attributeWidth,
         })),
     );
diff --git a/testgen/ui/components/frontend/js/data_profiling/table_size.js b/testgen/ui/components/frontend/js/data_profiling/table_size.js
index 9c5055b1..2573d9c3 100644
--- a/testgen/ui/components/frontend/js/data_profiling/table_size.js
+++ b/testgen/ui/components/frontend/js/data_profiling/table_size.js
@@ -9,7 +9,7 @@ import { Card } from '../components/card.js';
 import { Attribute } from '../components/attribute.js';
 import { Button } from '../components/button.js';
 import { emitEvent } from '../utils.js';
-import { formatTimestamp } from '../display_utils.js';
+import { formatNumber, formatTimestamp } from '../display_utils.js';
 
 const { div, span } = van.tags;
 
@@ -27,7 +27,7 @@ const TableSizeCard = (/** @type Properties */ _props, /** @type Table */ item)
                 { class: 'flex-row fx-flex-wrap fx-gap-4' },
                 attributes.map(({ key, label }) => Attribute({ 
                     label: item[key] === 0 ? span({ class: 'text-error' }, label) : label, 
-                    value: item[key],
+                    value: formatNumber(item[key]),
                     width: 250,
                 })),
             ),
diff --git a/testgen/ui/components/frontend/js/display_utils.js b/testgen/ui/components/frontend/js/display_utils.js
index bc7c1a9d..f0315368 100644
--- a/testgen/ui/components/frontend/js/display_utils.js
+++ b/testgen/ui/components/frontend/js/display_utils.js
@@ -30,11 +30,11 @@ function formatDuration(/** @type string */ duration) {
     return formatted.trim() || '< 1s';
 }
 
-function roundDigits(/** @type number | string */ number, /** @type number */ precision = 3) {
+function formatNumber(/** @type number | string */ number, /** @type number */ precision = 3) {
     if (!['number', 'string'].includes(typeof number) || isNaN(number)) {
         return '--';
     }
-    return parseFloat(Number(number).toPrecision(precision));
+    return parseFloat(Number(number).toPrecision(precision)).toLocaleString();
 }
 
 function capitalize(/** @type string */ text) {
@@ -89,4 +89,4 @@ const colorMap = {
 
 const DISABLED_ACTION_TEXT = 'You do not have permissions to perform this action. Contact your administrator.';
 
-export { formatTimestamp, formatDuration, roundDigits, capitalize, humanReadableSize, colorMap, DISABLED_ACTION_TEXT };
+export { formatTimestamp, formatDuration, formatNumber, capitalize, humanReadableSize, colorMap, DISABLED_ACTION_TEXT };

From c98b12e7942bfd91dc95c0cda97d015225edb9fa Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 8 Jul 2025 23:42:35 -0400
Subject: [PATCH 35/56] fix(data-catalog): column tags not updated when table
 fully selected

---
 .../components/frontend/js/pages/data_catalog.js | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 45409a52..6f81db12 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -599,16 +599,14 @@ const MultiEdit = (/** @type Properties */ props, /** @type Object */ selectedIt
                             disabled: () => attributes.every(({ checkedState }) => !checkedState.val),
                             onclick: () => {
                                 const items = selectedItems.val.reduce((array, table) => {
-                                    if (table.all) {
-                                        const [ type, id ] = table.id.split('_');
+                                    const [ type, id ] = table.id.split('_');
+                                    array.push({ type, id });
+
+                                    table.children.forEach(column => {
+                                        const [ type, id ] = column.id.split('_');
                                         array.push({ type, id });
-                                    } else {
-                                        const columns = table.children.map(column => {
-                                            const [ type, id ] = column.id.split('_');
-                                            return { type, id };
-                                        });
-                                        array.push(...columns);
-                                    }
+                                    });
+
                                     return array;
                                 }, []);
 

From fa3c1256bf285f6b819f947d690a7f6832e35586 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 8 Jul 2025 23:44:45 -0400
Subject: [PATCH 36/56] feat(data-catalog): add test count and dropdown options
 for export

---
 .../frontend/js/components/portal.js          |  11 +-
 .../frontend/js/pages/data_catalog.js         | 142 +++++++++++++---
 testgen/ui/queries/profiling_queries.py       | 153 +++++++++++++++---
 testgen/ui/views/data_catalog.py              |  57 +++++--
 4 files changed, 303 insertions(+), 60 deletions(-)

diff --git a/testgen/ui/components/frontend/js/components/portal.js b/testgen/ui/components/frontend/js/components/portal.js
index 072395d8..ad2287e5 100644
--- a/testgen/ui/components/frontend/js/components/portal.js
+++ b/testgen/ui/components/frontend/js/components/portal.js
@@ -9,6 +9,7 @@
  * @property {string} target
  * @property {boolean?} targetRelative
  * @property {boolean} opened
+ * @property {'left' | 'right'} align
  * @property {(string|undefined)} style
  * @property {(string|undefined)} class
  */
@@ -18,7 +19,7 @@ import { getValue } from '../utils.js';
 const { div } = van.tags;
 
 const Portal = (/** @type Options */ options, ...args) => {
-    const { target, targetRelative } = getValue(options);
+    const { target, targetRelative, align = 'left' } = getValue(options);
     const id = `${target}-portal`;
 
     window.testgen.portals[id] = { domId: id, targetId: target, opened: options.opened };
@@ -32,13 +33,19 @@ const Portal = (/** @type Options */ options, ...args) => {
         const anchorRect = anchor.getBoundingClientRect();
         const top = (targetRelative ? 0 : anchorRect.top) + anchorRect.height;
         const left = targetRelative ? 0 : anchorRect.left;
+        const right = targetRelative ? 0 : (window.innerWidth - anchorRect.right);
         const minWidth = anchorRect.width;
 
         return div(
             {
                 id,
                 class: getValue(options.class) ?? '',
-                style: `position: absolute; z-index: 99; min-width: ${minWidth}px; top: ${top}px; left: ${left}px; ${getValue(options.style)}`,
+                style: `position: absolute;
+                    z-index: 99;
+                    min-width: ${minWidth}px;
+                    top: ${top}px;
+                    ${align === 'left' ? `left: ${left}px;` : `right: ${right}px;`}
+                    ${getValue(options.style)}`,
             },
             ...args,
         );
diff --git a/testgen/ui/components/frontend/js/pages/data_catalog.js b/testgen/ui/components/frontend/js/pages/data_catalog.js
index 6f81db12..0a9bedca 100644
--- a/testgen/ui/components/frontend/js/pages/data_catalog.js
+++ b/testgen/ui/components/frontend/js/pages/data_catalog.js
@@ -1,6 +1,6 @@
 /**
  * @import { Column, Table } from '../data_profiling/data_profiling_utils.js';
- * @import { TreeNode } from '../components/tree.js';
+ * @import { TreeNode, SelectedNode } from '../components/tree.js';
  *
  * @typedef ProjectSummary
  * @type {object}
@@ -46,7 +46,7 @@ import { Input } from '../components/input.js';
 import { Icon } from '../components/icon.js';
 import { withTooltip } from '../components/tooltip.js';
 import { Streamlit } from '../streamlit.js';
-import { emitEvent, getValue, loadStylesheet } from '../utils.js';
+import { emitEvent, getRandomId, getValue, loadStylesheet } from '../utils.js';
 import { ColumnDistributionCard } from '../data_profiling/column_distribution.js';
 import { DataCharacteristicsCard } from '../data_profiling/data_characteristics.js';
 import { PotentialPIICard, HygieneIssuesCard, TestIssuesCard } from '../data_profiling/data_issues.js';
@@ -60,6 +60,7 @@ import { Card } from '../components/card.js';
 import { Button } from '../components/button.js';
 import { Link } from '../components/link.js';
 import { EMPTY_STATE_MESSAGE, EmptyState } from '../components/empty_state.js';
+import { Portal } from '../components/portal.js';
 
 const { div, h2, span, i } = van.tags;
 
@@ -183,9 +184,9 @@ const DataCatalog = (/** @type Properties */ props) => {
     return projectSummary.table_groups_ct > 0
         ? div(
             { class: 'flex-column tg-dh' },
-            () => div(
+            div(
                 { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-2' },
-                Select({
+                () => Select({
                     label: 'Table Group',
                     value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null,
                     options: getValue(props.table_group_filter_options) ?? [],
@@ -194,28 +195,7 @@ const DataCatalog = (/** @type Properties */ props) => {
                     testId: 'table-group-filter',
                     onChange: (value) => emitEvent('TableGroupSelected', {payload: value}),
                 }),
-                Button({
-                    icon: 'download',
-                    type: 'stroked',
-                    label: 'Export',
-                    tooltip: 'Download filtered columns to Excel',
-                    tooltipPosition: 'left',
-                    width: 'fit-content',
-                    style: 'background: var(--dk-card-background);',
-                    onclick: () => {
-                        const columnIds = treeNodes.val.reduce((ids, table) => {
-                            if (!table.hidden.val) {
-                                table.children.forEach(column => {
-                                    if (!column.hidden.val) {
-                                        ids.push(column.id);
-                                    }
-                                });
-                            }
-                            return ids;
-                        }, []);
-                        emitEvent('ExportClicked', { payload: columnIds });
-                    },
-                }),
+                ExportOptions(treeNodes, multiSelectedItems),
             ),
             () => treeNodes.val.length
                 ? div(
@@ -329,6 +309,88 @@ const DataCatalog = (/** @type Properties */ props) => {
         : ConditionalEmptyState(projectSummary, userCanEdit, userCanNavigate);
 };
 
+const ExportOptions = (/** @type TreeNode[] */ treeNodes, /** @type SelectedNode[] */ selectedNodes) => {
+    const exportOptionsDomId = `data-catalog-export-${getRandomId()}`;
+    const exportOptionsOpened = van.state(false);
+
+    return [
+        Button({
+            id: exportOptionsDomId,
+            icon: 'download',
+            type: 'stroked',
+            label: 'Export',
+            tooltip: 'Download columns to Excel',
+            tooltipPosition: 'left',
+            width: 'fit-content',
+            style: 'background: var(--dk-card-background);',
+            onclick: () => exportOptionsOpened.val = !exportOptionsOpened.val,
+        }),
+        Portal(
+            { target: exportOptionsDomId, opened: exportOptionsOpened, align: 'right' },
+            () => div(
+                { class: 'tg-dh--export-portal' },
+                div(
+                    {
+                        class: 'tg-dh--export-option',
+                        onclick: () => {
+                            emitEvent('ExportClicked', { payload: null });
+                            exportOptionsOpened.val = false;
+                        },
+                    },
+                    'All columns',
+                ),
+                div(
+                    {
+                        class: 'tg-dh--export-option',
+                        onclick: () => {
+                            const payload = treeNodes.val.reduce((array, table) => {
+                                if (!table.hidden.val) {
+                                    const [ type, id ] = table.id.split('_');
+                                    array.push({ type, id, selected: table.selected.val });
+
+                                    table.children.forEach(column => {
+                                        if (!column.hidden.val) {
+                                            const [ type, id ] = column.id.split('_');
+                                            array.push({ type, id, selected: column.selected.val });
+                                        }
+                                    });
+                                }
+                                return array;
+                            }, []);
+                            emitEvent('ExportClicked', { payload });
+                            exportOptionsOpened.val = false;
+                        },
+                    },
+                    'Filtered columns',
+                ),
+                selectedNodes.val?.length
+                    ? div(
+                        {
+                            class: 'tg-dh--export-option',
+                            onclick: () => {
+                                const payload = selectedNodes.val.reduce((array, table) => {
+                                    const [ type, id ] = table.id.split('_');
+                                    array.push({ type, id });
+
+                                    table.children.forEach(column => {
+                                        const [ type, id ] = column.id.split('_');
+                                        array.push({ type, id });
+                                    });
+
+                                    return array;
+                                }, []);
+                                emitEvent('ExportClicked', { payload });
+                                exportOptionsOpened.val = false;
+                            },
+                        },
+                        'Selected columns',
+                    )
+                    : null,
+            ),
+        ),
+    ];
+};
+
 const SelectedDetails = (/** @type Properties */ props, /** @type Table | Column */ item) => {
     const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
     const userCanNavigate = getValue(props.permissions)?.can_navigate ?? false;
@@ -746,6 +808,34 @@ stylesheet.replace(`
     font-size: 18px;
     text-align: center;
 }
+
+.tg-dh--export-portal {
+    border-radius: 8px;
+    background: var(--dk-card-background);
+    box-shadow: var(--portal-box-shadow);
+    overflow: visible;
+    z-index: 99;
+}
+
+.tg-dh--export-option {
+    padding: 12px 16px;
+    cursor: pointer;
+    color: var(--primary-text-color);
+}
+
+.tg-dh--export-option:first-child {
+    border-top-left-radius: 8px;
+    border-top-right-radius: 8px;
+}
+
+.tg-dh--export-option:last-child {
+    border-bottom-left-radius: 8px;
+    border-bottom-right-radius: 8px;
+}
+
+.tg-dh--export-option:hover {
+    background: var(--select-hover-background);
+}
 `);
 
 export { DataCatalog };
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 4893e0ec..765b888d 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -140,12 +140,75 @@ def get_profiling_results(profiling_run_id: str, table_name: str, column_name: s
 
 
 @st.cache_data(show_spinner=False)
-def get_table_by_id(table_id: str) -> dict | None:
+def get_table_by_id(
+    table_id: str,
+    include_tags: bool = False,
+    include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
+    include_scores: bool = False,
+) -> dict | None:
     if not is_uuid4(table_id):
         return None
+    
+    condition = f"WHERE table_id = '{table_id}'"
+    return get_tables_by_condition(condition, include_tags, include_has_test_runs, include_active_tests, include_scores)[0]
+
 
+def get_tables_by_id(
+    table_ids: list[str],
+    include_tags: bool = False,
+    include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
+    include_scores: bool = False,
+) -> list[dict] | None:
+    condition = f"""
+    INNER JOIN (
+        SELECT UNNEST(ARRAY [{", ".join([ f"'{col}'" for col in table_ids if is_uuid4(col) ])}]) AS id
+    ) selected ON (table_chars.table_id = selected.id::UUID)"""
+    return get_tables_by_condition(condition, include_tags, include_has_test_runs, include_active_tests, include_scores)
+
+
+def get_tables_by_table_group(
+    table_group_id: str,
+    include_tags: bool = False,
+    include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
+    include_scores: bool = False,
+) -> list[dict] | None:
+    if not is_uuid4(table_group_id):
+        return None
+    
+    condition = f"WHERE table_chars.table_groups_id = '{table_group_id}'"
+    return get_tables_by_condition(condition, include_tags, include_has_test_runs, include_active_tests, include_scores)
+
+
+def get_tables_by_condition(
+    filter_condition: str,
+    include_tags: bool = False,
+    include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
+    include_scores: bool = False,
+) -> list[dict] | None:
     schema: str = st.session_state["dbschema"]
     query = f"""
+    {f"""
+    WITH active_test_definitions AS (
+        SELECT
+            test_defs.table_groups_id,
+            test_defs.table_name,
+            COUNT(*) AS count
+        FROM {schema}.test_definitions test_defs
+            LEFT JOIN {schema}.data_column_chars ON (
+                test_defs.table_groups_id = data_column_chars.table_groups_id
+                AND test_defs.table_name = data_column_chars.table_name
+                AND test_defs.column_name = data_column_chars.column_name
+            )
+        WHERE test_active = 'Y'
+            AND column_id IS NULL
+        GROUP BY test_defs.table_groups_id,
+            test_defs.table_name
+    )
+    """ if include_active_tests else ""}
     SELECT
         table_chars.table_id::VARCHAR AS id,
         'table' AS type,
@@ -160,39 +223,59 @@ def get_table_by_id(table_id: str) -> dict | None:
         add_date,
         last_refresh_date,
         drop_date,
+        {f"""
         -- Table Tags
         table_chars.description,
         table_chars.critical_data_element,
         {", ".join([ f"table_chars.{tag}" for tag in TAG_FIELDS ])},
         -- Table Groups Tags
         {", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])},
-        -- Profile & Test Runs
-        table_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
-        profiling_starttime AS profile_run_date,
-        TRUE AS is_latest_profile,
+        """ if include_tags else ""}
+        {f"""
+        -- Has Test Runs
         EXISTS(
             SELECT 1
             FROM {schema}.test_results
             WHERE table_groups_id = table_chars.table_groups_id
                 AND table_name = table_chars.table_name
         ) AS has_test_runs,
+        """ if include_has_test_runs else ""}
+        {"""
+        -- Test Definition Count
+        active_tests.count AS active_test_count,
+        """ if include_active_tests else ""}
+        {"""
         -- Scores
         table_chars.dq_score_profiling,
-        table_chars.dq_score_testing
+        table_chars.dq_score_testing,
+        """ if include_scores else ""}
+        -- Profile Run
+        table_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
+        profiling_starttime AS profile_run_date,
+        TRUE AS is_latest_profile
     FROM {schema}.data_table_chars table_chars
         LEFT JOIN {schema}.profiling_runs ON (
             table_chars.last_complete_profile_run_id = profiling_runs.id
         )
+        {f"""
         LEFT JOIN {schema}.table_groups ON (
             table_chars.table_groups_id = table_groups.id
         )
-    WHERE table_id = '{table_id}';
+        """ if include_tags else ""}
+        {"""
+        LEFT JOIN active_test_definitions active_tests ON (
+            table_chars.table_groups_id = active_tests.table_groups_id
+            AND table_chars.table_name = active_tests.table_name
+        )
+        """ if include_active_tests else ""}
+    {filter_condition}
+    ORDER BY table_name;
     """
 
     results = db.retrieve_data(query)
     if not results.empty:
         # to_json converts datetimes, NaN, etc, to JSON-safe values (Note: to_dict does not)
-        return json.loads(results.to_json(orient="records"))[0]
+        return json.loads(results.to_json(orient="records"))
 
 
 @st.cache_data(show_spinner=False)
@@ -200,13 +283,14 @@ def get_column_by_id(
     column_id: str,
     include_tags: bool = False,
     include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
     include_scores: bool = False,
 ) -> dict | None:
     if not is_uuid4(column_id):
         return None
 
-    condition = f"column_chars.column_id = '{column_id}'"
-    return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)[0]
+    condition = f"WHERE column_chars.column_id = '{column_id}'"
+    return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_active_tests, include_scores)[0]
 
 
 @st.cache_data(show_spinner="Loading data ...")
@@ -216,33 +300,53 @@ def get_column_by_name(
     table_group_id: str,
     include_tags: bool = False,
     include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
     include_scores: bool = False,
 ) -> dict | None:
 
     condition = f"""
-        column_chars.column_name = '{column_name}'
+    WHERE column_chars.column_name = '{column_name}'
     AND column_chars.table_name = '{table_name}'
     AND column_chars.table_groups_id = '{table_group_id}'
     """
-    return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)[0]
+    return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_active_tests, include_scores)[0]
 
 
 def get_columns_by_id(
     column_ids: list[str],
     include_tags: bool = False,
     include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
     include_scores: bool = False,
-) -> dict | None:
-    condition = f"column_chars.column_id IN ('{"', '".join([ col for col in column_ids if is_uuid4(col) ])}')"
-    return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_scores)
+) -> list[dict] | None:
+    condition = f"""
+    INNER JOIN (
+        SELECT UNNEST(ARRAY [{", ".join([ f"'{col}'" for col in column_ids if is_uuid4(col) ])}]) AS id
+    ) selected ON (column_chars.column_id = selected.id::UUID)"""
+    return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_active_tests, include_scores)
+
+
+def get_columns_by_table_group(
+    table_group_id: str,
+    include_tags: bool = False,
+    include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
+    include_scores: bool = False,
+) -> list[dict] | None:
+    if not is_uuid4(table_group_id):
+        return None
+
+    condition = f"WHERE column_chars.table_groups_id = '{table_group_id}'"
+    return get_columns_by_condition(condition, include_tags, include_has_test_runs, include_active_tests, include_scores)
 
 
 def get_columns_by_condition(
     filter_condition: str,
     include_tags: bool = False,
     include_has_test_runs: bool = False,
+    include_active_tests: bool = False,
     include_scores: bool = False,
-) -> dict | None:
+) -> list[dict] | None:
     schema: str = st.session_state["dbschema"]
 
     query = f"""
@@ -273,11 +377,12 @@ def get_columns_by_condition(
         -- Table Groups Tags
         {", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])},
         """ if include_tags else ""}
-        -- Profile & Test Runs
+        -- Profile Run
         column_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
         run_date AS profile_run_date,
         TRUE AS is_latest_profile,
         {f"""
+        -- Has Test Runs
         EXISTS(
             SELECT 1
             FROM {schema}.test_results
@@ -286,6 +391,17 @@ def get_columns_by_condition(
                 AND column_names = column_chars.column_name
         ) AS has_test_runs,
         """ if include_has_test_runs else ""}
+        {f"""
+        -- Test Definition Count
+        (
+            SELECT COUNT(*)
+            FROM {schema}.test_definitions
+            WHERE table_groups_id = column_chars.table_groups_id
+                AND table_name = column_chars.table_name
+                AND column_name = column_chars.column_name
+                AND test_active = 'Y'
+        ) AS active_test_count,
+        """ if include_active_tests else ""}
         {"""
         -- Scores
         column_chars.dq_score_profiling,
@@ -306,7 +422,8 @@ def get_columns_by_condition(
             AND column_chars.table_name = profile_results.table_name
             AND column_chars.column_name = profile_results.column_name
         )
-    WHERE {filter_condition};
+    {filter_condition}
+    ORDER BY table_name, ordinal_position;
     """
 
     results = db.retrieve_data(query)
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index 8eff9142..52bd1b68 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -27,8 +27,11 @@
     TAG_FIELDS,
     get_column_by_id,
     get_columns_by_id,
+    get_columns_by_table_group,
     get_hygiene_issues,
     get_table_by_id,
+    get_tables_by_id,
+    get_tables_by_table_group,
 )
 from testgen.ui.services import user_session_service
 from testgen.ui.session import session, temp_value
@@ -117,10 +120,10 @@ def render(self, project_code: str, table_group_id: str | None = None, selected:
                 ),
                 "TableGroupSelected": on_table_group_selected,
                 "ItemSelected": on_item_selected,
-                "ExportClicked": lambda columns: download_dialog(
+                "ExportClicked": lambda items: download_dialog(
                     dialog_title="Download Excel Report",
                     file_content_func=get_excel_report_data,
-                    args=(selected_table_group["table_groups_name"], columns),
+                    args=(selected_table_group, items),
                 ),
                 "RemoveTableClicked": remove_table_dialog,
                 "DataPreviewClicked": lambda item: data_preview_dialog(
@@ -149,12 +152,37 @@ def on_item_selected(item_id: str | None) -> None:
     Router().set_query_params({ "selected": item_id })
 
 
-def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: str, columns: list[str]) -> None:
-    data = get_columns_by_id(
-        [ col.split("_")[1] for col in columns ],
-        include_tags=True,
-    )
-    data = pd.DataFrame(data)
+class ExportItem(typing.TypedDict):
+    id: str
+    type: typing.Literal["table", "column"]
+
+def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: dict, items: list[ExportItem] | None) -> None:
+    if items:
+        table_data = get_tables_by_id(
+            table_ids=[ item["id"] for item in items if item["type"] == "table" ],
+            include_tags=True,
+            include_active_tests=True,
+        )
+        column_data = get_columns_by_id(
+            column_ids=[ item["id"] for item in items if item["type"] == "column" ],
+            include_tags=True,
+            include_active_tests=True,
+        )
+    else:
+        table_data = get_tables_by_table_group(
+            table_group["id"],
+            include_tags=True,
+            include_active_tests=True,
+        )
+        column_data = get_columns_by_table_group(
+            table_group["id"],
+            include_tags=True,
+            include_active_tests=True,
+        )
+        
+
+    data = pd.DataFrame(table_data + column_data)
+    data = data.sort_values(by=["table_name", "ordinal_position"], na_position="first")
 
     for key in ["column_type", "datatype_suggestion"]:
         data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None)
@@ -169,7 +197,7 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: st
 
     for key in ["data_source", "source_system", "source_process", "business_domain", "stakeholder_group", "transform_level", "aggregation_level", "data_product"]:
         data[key] = data.apply(
-            lambda col: col[key] or col[f"table_{key}"] or col.get(f"table_group_{key}"),
+            lambda row: row[key] or row[f"table_{key}"] or row.get(f"table_group_{key}"),
             axis=1,
         )
 
@@ -177,17 +205,17 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: st
     data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val))
 
     data["critical_data_element"] = data.apply(
-        lambda col: "Yes" if col["critical_data_element"] or col["table_critical_data_element"] else None,
+        lambda row: "Yes" if row["critical_data_element"] == True or row["table_critical_data_element"] == True else None,
         axis=1,
     )
     data["top_freq_values"] = data["top_freq_values"].apply(
         lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ])
-        if val
+        if not pd.isna(val)
         else None
     )
     data["top_patterns"] = data["top_patterns"].apply(
         lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ])
-        if val
+        if not pd.isna(val)
         else None
     )
 
@@ -196,6 +224,7 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: st
         "table_name": {"header": "Table"},
         "column_name": {"header": "Column"},
         "critical_data_element": {},
+        "active_test_count": {"header": "Active tests"},
         "ordinal_position": {"header": "Position"},
         "general_type": {},
         "column_type": {"header": "Data type"},
@@ -261,7 +290,7 @@ def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, table_group: st
     return get_excel_file_data(
         data,
         "Data Catalog Columns",
-        details={"Table group": table_group},
+        details={"Table group": table_group["table_groups_name"]},
         columns=file_columns,
         update_progress=update_progress,
     )
@@ -406,7 +435,7 @@ def get_selected_item(selected: str, table_group_id: str) -> dict | None:
     item_type, item_id = selected.split("_", 2)
 
     if item_type == "table":
-        item = get_table_by_id(item_id)
+        item = get_table_by_id(item_id, include_tags=True, include_has_test_runs=True, include_scores=True)
     elif item_type == "column":
         item = get_column_by_id(item_id, include_tags=True, include_has_test_runs=True, include_scores=True)
     else:

From e68f67d4a96708eb71e02081591833a1641dc887 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 9 Jul 2025 00:56:27 -0400
Subject: [PATCH 37/56] feat(export): add dropdown options to test definitions
 and result pages

---
 testgen/ui/assets/style.css             | 42 ++++++++++++++++++++++
 testgen/ui/queries/profiling_queries.py |  2 +-
 testgen/ui/services/form_service.py     |  1 +
 testgen/ui/views/hygiene_issues.py      | 48 +++++++++++++++++--------
 testgen/ui/views/profiling_results.py   | 44 +++++++++++++++++------
 testgen/ui/views/test_definitions.py    | 44 ++++++++++++++++++-----
 testgen/ui/views/test_results.py        | 38 +++++++++++++++-----
 7 files changed, 175 insertions(+), 44 deletions(-)

diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index b60981cf..a3e703f4 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -410,6 +410,48 @@ div[data-testid="stPopoverBody"]:has(i.tg-header--help-wrapper) {
 }
 /*  */
 
+/* Export Menu */
+.st-key-tg--export-popover [data-testid="stPopoverButton"] > div:last-child {
+    display: none;
+}
+
+.st-key-tg--export-popover [data-testid="stPopover"] {
+    width: auto;
+}
+
+div[data-testid="stPopoverBody"]:has(i.tg--export-wrapper) {
+    min-width: 150px;
+    border-radius: 8px;
+    padding: 0;
+}
+
+div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--export-wrapper) {
+    gap: 0;
+}
+
+div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--export-wrapper) button {
+    width: 100%;
+    padding: 4px 16px;
+    justify-content: flex-start;
+    border-radius: 0;
+}
+
+div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--export-wrapper) [data-testid="stElementContainer"]:nth-child(2) button {
+    border-top-left-radius: 8px;
+    border-top-right-radius: 8px;
+}
+
+div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--export-wrapper) [data-testid="stElementContainer"]:last-child button {
+    border-bottom-left-radius: 8px;
+    border-bottom-right-radius: 8px;
+}
+
+div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--export-wrapper) button:hover {
+    color: unset;
+    background: var(--select-hover-background);
+}
+/*  */
+
 /* Dark mode */
 @media (prefers-color-scheme: dark) {
     body {
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 765b888d..71139317 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -94,7 +94,7 @@ def get_run_by_id(profile_run_id: str) -> pd.Series:
 
 
 @st.cache_data(show_spinner=False)
-def get_profiling_results(profiling_run_id: str, table_name: str, column_name: str, sorting_columns = None):
+def get_profiling_results(profiling_run_id: str, table_name: str = "%%", column_name: str = "%%", sorting_columns = None):
     order_by = ""
     if sorting_columns is None:
         order_by = "ORDER BY schema_name, table_name, position"
diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index bcdc3a73..771c9fc1 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -235,6 +235,7 @@ def render_grid_select(
     rendering_counter = st.session_state.get(f"{key}_counter") or 0
     previous_dataframe = st.session_state.get(f"{key}_dataframe")
 
+    df = df.copy()
     if previous_dataframe is not None:
         data_changed = not df.equals(previous_dataframe)
 
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 6b766a26..7f987bd9 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -20,6 +20,7 @@
     get_excel_file_data,
     zip_multi_file_data,
 )
+from testgen.ui.components.widgets.page import css_class, flex_row_end
 from testgen.ui.navigation.page import Page
 from testgen.ui.pdf.hygiene_issue_report import create_report
 from testgen.ui.services import project_service, user_session_service
@@ -68,7 +69,7 @@ def render(
 
         others_summary_column, pii_summary_column, score_column, actions_column = st.columns([.25, .25, .2, .3], vertical_alignment="bottom")
         (liklihood_filter_column, issue_type_filter_column, table_filter_column, column_filter_column, sort_column, export_button_column) = (
-            st.columns([.15, .25, .2, .2, .1, .1], vertical_alignment="bottom")
+            st.columns([.15, .2, .2, .2, .1, .15], vertical_alignment="bottom")
         )
         testgen.flex_row_end(actions_column)
         testgen.flex_row_end(export_button_column)
@@ -185,13 +186,28 @@ def render(
                 bind_to_query_prop="id",
             )
 
-            with export_button_column:
-                if st.button(label=":material/download: Export", help="Download filtered hygiene issues to Excel"):
-                    download_dialog(
-                        dialog_title="Download Excel Report",
-                        file_content_func=get_excel_report_data,
-                        args=(df_pa, run_df["table_groups_name"], run_date),
-                    )
+            popover_container = export_button_column.empty()
+
+            def open_download_dialog(data: pd.DataFrame | None = None) -> None:
+                # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+                with popover_container.container():
+                    flex_row_end()
+                    st.button(label="Export", icon=":material/download:", disabled=True)
+
+                download_dialog(
+                    dialog_title="Download Excel Report",
+                    file_content_func=get_excel_report_data,
+                    args=(run_df["table_groups_name"], run_date, run_id, data),
+                )
+
+            with popover_container.container(key="tg--export-popover"):
+                flex_row_end()
+                with st.popover(label="Export", icon=":material/download:", help="Download hygiene issues to Excel"):
+                    css_class("tg--export-wrapper")
+                    st.button(label="All issues", type="tertiary", on_click=open_download_dialog)
+                    st.button(label="Filtered issues", type="tertiary", on_click=partial(open_download_dialog, df_pa))
+                    if selected:
+                        st.button(label="Selected issues", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected)))
 
             if selected:
                 # Always show details for last selected row
@@ -341,11 +357,11 @@ def get_profiling_run_columns(profiling_run_id: str) -> pd.DataFrame:
 @st.cache_data(show_spinner=False)
 def get_profiling_anomalies(
     profile_run_id: str,
-    likelihood: str | None,
-    issue_type_id: str | None,
-    table_name: str | None,
-    column_name: str | None,
-    sorting_columns: list[str] | None,
+    likelihood: str | None = None,
+    issue_type_id: str | None = None,
+    table_name: str | None = None,
+    column_name: str | None = None,
+    sorting_columns: list[str] | None = None,
 ):
     schema: str = st.session_state["dbschema"]
     criteria = ""
@@ -490,10 +506,14 @@ def get_profiling_anomaly_summary(str_profile_run_id):
 
 def get_excel_report_data(
     update_progress: PROGRESS_UPDATE_TYPE,
-    data: pd.DataFrame,
     table_group: str,
     run_date: str,
+    run_id: str,
+    data: pd.DataFrame | None = None,
 ) -> FILE_DATA_TYPE:
+    if data is None:
+        data = get_profiling_anomalies(run_id)
+
     columns = {
         "schema_name": {"header": "Schema"},
         "table_name": {"header": "Table"},
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index cc656ce0..c9af8f53 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -1,6 +1,7 @@
 import json
 import typing
 from datetime import datetime
+from functools import partial
 
 import pandas as pd
 import streamlit as st
@@ -16,6 +17,7 @@
     download_dialog,
     get_excel_file_data,
 )
+from testgen.ui.components.widgets.page import css_class, flex_row_end
 from testgen.ui.components.widgets.testgen_component import testgen_component
 from testgen.ui.navigation.page import Page
 from testgen.ui.services import project_service, user_session_service
@@ -125,14 +127,29 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
             bind_to_query_prop="id",
         )
 
-        with export_button_column:
-            testgen.flex_row_end()
-            if st.button(label=":material/download: Export", help="Download filtered profiling results to Excel"):
-                download_dialog(
-                    dialog_title="Download Excel Report",
-                    file_content_func=get_excel_report_data,
-                    args=(df, run_df["table_groups_name"], run_date),
-                )
+        popover_container = export_button_column.empty()
+
+        def open_download_dialog(data: pd.DataFrame | None = None) -> None:
+            # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+            with popover_container.container():
+                flex_row_end()
+                st.button(label="Export", icon=":material/download:", disabled=True)
+
+            download_dialog(
+                dialog_title="Download Excel Report",
+                file_content_func=get_excel_report_data,
+                args=(run_df["table_groups_name"], run_date, run_id, data),
+            )
+
+        with popover_container.container(key="tg--export-popover"):
+            flex_row_end()
+            with st.popover(label="Export", icon=":material/download:", help="Download profiling results to Excel"):
+                css_class("tg--export-wrapper")
+                st.button(label="All results", type="tertiary", on_click=open_download_dialog)
+                st.button(label="Filtered results", type="tertiary", on_click=partial(open_download_dialog, df))
+                if selected_row:
+                    st.button(label="Selected results", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected_row)))
+
 
         # Display profiling for selected row
         if not selected_row:
@@ -156,11 +173,16 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
 
 def get_excel_report_data(
     update_progress: PROGRESS_UPDATE_TYPE,
-    data: pd.DataFrame,
     table_group: str,
     run_date: str,
+    run_id: str,
+    data: pd.DataFrame | None = None,
 ) -> FILE_DATA_TYPE:
-    data = data.copy()
+    if data is not None:
+        data = data.copy()
+    else:
+        data = profiling_queries.get_profiling_results(run_id)
+    date_service.accommodate_dataframe_to_timezone(data, st.session_state)
 
     for key in ["column_type", "datatype_suggestion"]:
         data[key] = data[key].apply(lambda val: val.lower() if not pd.isna(val) else None)
@@ -170,7 +192,7 @@ def get_excel_report_data(
 
     for key in ["min_date", "max_date"]:
         data[key] = data[key].apply(
-            lambda val: datetime.strptime(val, "%Y-%m-%dT%H:%M:%S").strftime("%b %-d %Y, %-I:%M %p") if val != "NaT" else None
+            lambda val: datetime.fromtimestamp(val / 1000).strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None
         )
 
     data["hygiene_issues"] = data["hygiene_issues"].apply(lambda val: "Yes" if val else None)
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index e8ad138f..fd5753f9 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -2,6 +2,7 @@
 import time
 import typing
 from datetime import datetime
+from functools import partial
 
 import pandas as pd
 import streamlit as st
@@ -22,6 +23,7 @@
     download_dialog,
     get_excel_file_data,
 )
+from testgen.ui.components.widgets.page import css_class, flex_row_end
 from testgen.ui.navigation.page import Page
 from testgen.ui.services import project_service, user_session_service
 from testgen.ui.services.string_service import empty_if_null, snake_case_to_title_case
@@ -914,13 +916,28 @@ def show_test_defs_grid(
         bind_to_query_prop="id",
     )
 
-    with export_container:
-        if st.button(label=":material/download: Export", help="Download filtered test definitions to Excel"):
-            download_dialog(
-                dialog_title="Download Excel Report",
-                file_content_func=get_excel_report_data,
-                args=(df, str_test_suite),
-            )
+    popover_container = export_container.empty()
+
+    def open_download_dialog(data: pd.DataFrame | None = None) -> None:
+        # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+        with popover_container.container():
+            flex_row_end()
+            st.button(label="Export", icon=":material/download:", disabled=True)
+
+        download_dialog(
+            dialog_title="Download Excel Report",
+            file_content_func=get_excel_report_data,
+            args=(str_project_code, str_test_suite, data),
+        )
+
+    with popover_container.container(key="tg--export-popover"):
+        flex_row_end()
+        with st.popover(label="Export", icon=":material/download:", help="Download test definitions to Excel"):
+            css_class("tg--export-wrapper")
+            st.button(label="All tests", type="tertiary", on_click=open_download_dialog)
+            st.button(label="Filtered tests", type="tertiary", on_click=partial(open_download_dialog, df))
+            if dct_selected_row:
+                st.button(label="Selected tests", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(dct_selected_row)))
 
     if dct_selected_row:
         st.html("</p>&nbsp;</br>")
@@ -987,8 +1004,17 @@ def show_test_defs_grid(
     return dct_selected_row
 
 
-def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, data: pd.DataFrame, test_suite: str) -> FILE_DATA_TYPE:
-    data = data.copy()
+def get_excel_report_data(
+    update_progress: PROGRESS_UPDATE_TYPE,
+    project_code: str,
+    test_suite: str,
+    data: pd.DataFrame | None = None,
+) -> FILE_DATA_TYPE:
+    if data is not None:
+        data = data.copy()
+    else:
+        data = test_definition_service.get_test_definitions(project_code, test_suite)
+        date_service.accommodate_dataframe_to_timezone(data, st.session_state)
 
     for key in ["test_active_display", "lock_refresh_display"]:
         data[key] = data[key].apply(lambda val: val if val == "Yes" else None)
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 4a0312a0..d1897353 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -24,6 +24,7 @@
     get_excel_file_data,
     zip_multi_file_data,
 )
+from testgen.ui.components.widgets.page import css_class, flex_row_end
 from testgen.ui.navigation.page import Page
 from testgen.ui.pdf.test_result_report import create_report
 from testgen.ui.services import project_service, test_definition_service, test_results_service, user_session_service
@@ -76,7 +77,7 @@ def render(
 
         summary_column, score_column, actions_column = st.columns([.4, .2, .4], vertical_alignment="bottom")
         status_filter_column, test_type_filter_column, table_filter_column, column_filter_column, sort_column, export_button_column = st.columns(
-            [.2, .2, .2, .2, .1, .1], vertical_alignment="bottom"
+            [.175, .175, .2, .2, .1, .15], vertical_alignment="bottom"
         )
 
         testgen.flex_row_end(actions_column)
@@ -518,13 +519,28 @@ def show_result_detail(
         bind_to_query_prop="test_result_id",
     )
 
-    with export_container:
-        if st.button(label=":material/download: Export", help="Download filtered test results to Excel"):
-            download_dialog(
-                dialog_title="Download Excel Report",
-                file_content_func=get_excel_report_data,
-                args=(df, test_suite, run_date),
-            )
+    popover_container = export_container.empty()
+
+    def open_download_dialog(data: pd.DataFrame | None = None) -> None:
+        # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+        with popover_container.container():
+            flex_row_end()
+            st.button(label="Export", icon=":material/download:", disabled=True)
+
+        download_dialog(
+            dialog_title="Download Excel Report",
+            file_content_func=get_excel_report_data,
+            args=(test_suite, run_date, run_id, data),
+        )
+
+    with popover_container.container(key="tg--export-popover"):
+        flex_row_end()
+        with st.popover(label="Export", icon=":material/download:", help="Download test results to Excel"):
+            css_class("tg--export-wrapper")
+            st.button(label="All tests", type="tertiary", on_click=open_download_dialog)
+            st.button(label="Filtered tests", type="tertiary", on_click=partial(open_download_dialog, df))
+            if selected_rows:
+                st.button(label="Selected tests", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected_rows)))
 
     # Display history and detail for selected row
     if not selected_rows:
@@ -623,10 +639,14 @@ def show_result_detail(
 
 def get_excel_report_data(
     update_progress: PROGRESS_UPDATE_TYPE,
-    data: pd.DataFrame,
     test_suite: str,
     run_date: str,
+    run_id: str,
+    data: pd.DataFrame | None = None,
 ) -> FILE_DATA_TYPE:
+    if data is None:
+        data = get_test_results(run_id)
+
     columns = {
         "schema_name": {"header": "Schema"},
         "table_name": {"header": "Table"},

From 4beff73e7fa8a80bfec18c3091fa15b47e579070 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 9 Jul 2025 00:57:14 -0400
Subject: [PATCH 38/56] fix(sort): close popover on apply

---
 .../ui/components/widgets/sorting_selector.py | 27 +++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/testgen/ui/components/widgets/sorting_selector.py b/testgen/ui/components/widgets/sorting_selector.py
index 8b168f1c..5dd1cc95 100644
--- a/testgen/ui/components/widgets/sorting_selector.py
+++ b/testgen/ui/components/widgets/sorting_selector.py
@@ -73,14 +73,25 @@ def sorting_selector(
     if state is None:
         state = default
 
-    with st.popover(popover_label):
-        new_state = component(
-            id_="sorting_selector",
-            key=key,
-            default=state,
-            on_change=on_change,
-            props={"columns": columns, "state": state},
-        )
+    popover_container = st.empty()
+
+    def handle_change() -> None:
+        if on_change:
+            on_change()
+
+        # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+        with popover_container.container():
+            st.button(label=f"{popover_label} :material/keyboard_arrow_up:", disabled=True)
+
+    with popover_container.container():
+        with st.popover(popover_label):
+            new_state = component(
+                id_="sorting_selector",
+                key=key,
+                default=state,
+                on_change=handle_change,
+                props={"columns": columns, "state": state},
+            )
 
     # For some unknown reason, sometimes, streamlit returns None as the component state
     new_state = [] if new_state is None else new_state

From 5fcf9d342577f5fd62e81b45a575603bf18a254e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 9 Jul 2025 00:57:30 -0400
Subject: [PATCH 39/56] fix: misc styling improvements

---
 testgen/ui/components/frontend/js/pages/profiling_runs.js | 2 +-
 testgen/ui/components/frontend/js/pages/test_runs.js      | 4 ++--
 testgen/ui/views/dialogs/data_preview_dialog.py           | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js
index dc955cb9..feee47de 100644
--- a/testgen/ui/components/frontend/js/pages/profiling_runs.js
+++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js
@@ -146,7 +146,7 @@ const ProfilingRunItem = (
                     { label: 'Possible', value: item.anomalies_possible_ct, color: 'yellow' },
                     { label: 'Dismissed', value: item.anomalies_dismissed_ct, color: 'grey' },
                 ],
-                height: 10,
+                height: 3,
                 width: 350,
             }) : '--',
             item.anomaly_ct ? Link({
diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js
index 725d12c3..0159b0cd 100644
--- a/testgen/ui/components/frontend/js/pages/test_runs.js
+++ b/testgen/ui/components/frontend/js/pages/test_runs.js
@@ -206,8 +206,8 @@ const TestRunItem = (
                     { label: 'Error', value: item.error_ct, color: 'brown' },
                     { label: 'Dismissed', value: item.dismissed_ct, color: 'grey' },
                 ],
-                height: 10,
-                width: 400,
+                height: 8,
+                width: 350,
             }) : '--',
         ),
         div(
diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py
index 6911c3d6..9d5beaea 100644
--- a/testgen/ui/views/dialogs/data_preview_dialog.py
+++ b/testgen/ui/views/dialogs/data_preview_dialog.py
@@ -20,7 +20,8 @@ def data_preview_dialog(
         f"Table: <b>{table_name}</b>"
     )
 
-    data = get_preview_data(table_group_id, schema_name, table_name, column_name)
+    with st.spinner("Loading data ..."):
+        data = get_preview_data(table_group_id, schema_name, table_name, column_name)
 
     if data.empty:
         st.warning("The preview data could not be loaded.")
@@ -32,7 +33,7 @@ def data_preview_dialog(
         )
 
 
-@st.cache_data(show_spinner="Loading data ...")
+@st.cache_data(show_spinner=False)
 def get_preview_data(
     table_group_id: str,
     schema_name: str,

From bc9a54ebb662e1ef6fcb4bd658fd67c162bc032e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 9 Jul 2025 15:58:33 -0400
Subject: [PATCH 40/56] feat(schedules): add schedule dialogs to table groups
 and test suites pages

---
 .../frontend/js/pages/schedule_list.js        | 10 +++---
 .../frontend/js/pages/table_group_list.js     | 34 +++++++++++++------
 .../frontend/js/pages/test_suites.js          | 31 ++++++++++++-----
 testgen/ui/views/profiling_runs.py            |  2 +-
 testgen/ui/views/table_groups.py              |  2 ++
 testgen/ui/views/test_runs.py                 |  2 +-
 testgen/ui/views/test_suites.py               |  2 ++
 7 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/testgen/ui/components/frontend/js/pages/schedule_list.js b/testgen/ui/components/frontend/js/pages/schedule_list.js
index a4621c5d..2d9e7bf9 100644
--- a/testgen/ui/components/frontend/js/pages/schedule_list.js
+++ b/testgen/ui/components/frontend/js/pages/schedule_list.js
@@ -35,7 +35,7 @@ const ScheduleList = (/** @type Properties */ props) => {
         } catch (e) {
             console.log(e)
         }
-        Streamlit.setFrameHeight(100 * items.length);
+        Streamlit.setFrameHeight(100 * items.length || 150);
         return items;
     });
     const columns = ['40%', '50%', '10%'];
@@ -60,9 +60,11 @@ const ScheduleList = (/** @type Properties */ props) => {
                 'Actions',
             ),
         ),
-        () => div(
-            scheduleItems.val.map(item => ScheduleListItem(item, columns, getValue(props.permissions))),
-        ),
+        () => scheduleItems.val?.length 
+            ? div(
+                scheduleItems.val.map(item => ScheduleListItem(item, columns, getValue(props.permissions))),
+            ) 
+            : div({ class: 'mt-5 mb-3 ml-3 text-secondary' }, 'No schedules defined yet.'),
     );
 }
 
diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
index af9a25c7..b201bd8f 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_list.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -208,7 +208,7 @@ const TableGroupList = (props) => {
  */
 const Toolbar = (permissions, connections, selectedConnection) => {
     return div(
-        { class: 'flex-row fx-align-flex-end mb-4' },
+        { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-4' },
         (getValue(connections) ?? [])?.length > 1
             ? Select({
                 testId: 'connection-select',
@@ -223,17 +223,29 @@ const Toolbar = (permissions, connections, selectedConnection) => {
                 onChange: (value) => emitEvent('ConnectionSelected', { payload: value }),
             })
             : undefined,
-        span({ style: 'margin: 0 auto;' }),
-        permissions.can_edit
-            ? Button({
+        div(
+            { class: 'flex-row fx-gap-4' },
+            Button({
+                icon: 'today',
                 type: 'stroked',
-                icon: 'add',
-                label: 'Add Table Group',
-                color: 'basic',
-                style: 'background: var(--button-generic-background-color); width: unset;',
-                onclick: () => emitEvent('AddTableGroupClicked', {}),
-            })
-            : '',
+                label: 'Profiling Schedules',
+                tooltip: 'Manage when profiling should run for table groups',
+                tooltipPosition: 'bottom',
+                width: 'fit-content',
+                style: 'background: var(--dk-card-background);',
+                onclick: () => emitEvent('RunSchedulesClicked', {}),
+            }),
+            permissions.can_edit
+                ? Button({
+                    type: 'stroked',
+                    icon: 'add',
+                    label: 'Add Table Group',
+                    color: 'basic',
+                    style: 'background: var(--button-generic-background-color); width: unset;',
+                    onclick: () => emitEvent('AddTableGroupClicked', {}),
+                })
+                : '',
+        )
     );
 }
 
diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js
index c9487fd8..4aba36ce 100644
--- a/testgen/ui/components/frontend/js/pages/test_suites.js
+++ b/testgen/ui/components/frontend/js/pages/test_suites.js
@@ -77,7 +77,7 @@ const TestSuites = (/** @type Properties */ props) => {
             ? div(
                 { class: 'tg-test-suites'},
                 () => div(
-                    { class: 'tg-test-suites--toolbar flex-row fx-align-flex-end mb-4' },
+                    { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-4' },
                     Select({
                         label: 'Table Group',
                         value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null,
@@ -88,16 +88,29 @@ const TestSuites = (/** @type Properties */ props) => {
                         testId: 'table-group-filter',
                         onChange: (value) => emitEvent('FilterApplied', {payload: value}),
                     }),
-                    userCanEdit
-                        ? Button({
-                            icon: 'add',
+                    div(
+                        { class: 'flex-row fx-gap-4' },
+                        Button({
+                            icon: 'today',
                             type: 'stroked',
-                            label: 'Add Test Suite',
+                            label: 'Test Run Schedules',
+                            tooltip: 'Manage when test suites should run',
+                            tooltipPosition: 'bottom',
                             width: 'fit-content',
-                            style: 'margin-left: auto; background: var(--dk-card-background);',
-                            onclick: () => emitEvent('AddTestSuiteClicked', {}),
-                        })
-                        : '',
+                            style: 'background: var(--dk-card-background);',
+                            onclick: () => emitEvent('RunSchedulesClicked', {}),
+                        }),
+                        userCanEdit
+                            ? Button({
+                                icon: 'add',
+                                type: 'stroked',
+                                label: 'Add Test Suite',
+                                width: 'fit-content',
+                                style: 'background: var(--dk-card-background);',
+                                onclick: () => emitEvent('AddTestSuiteClicked', {}),
+                            })
+                            : '',
+                    ),
                 ),
                 () => div(
                     { class: 'flex-column' },
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index 951ff129..a5c7bfa5 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -68,7 +68,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
 
             st.button(
                 ":material/today: Profiling Schedules",
-                help="Manages when profiling should run for a given table group",
+                help="Manage when profiling should run for table groups",
                 on_click=partial(ProfilingScheduleDialog().open, project_code)
             )
 
diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py
index 53cb9a07..aff424c7 100644
--- a/testgen/ui/views/table_groups.py
+++ b/testgen/ui/views/table_groups.py
@@ -15,6 +15,7 @@
 from testgen.ui.services import user_session_service
 from testgen.ui.session import session, temp_value
 from testgen.ui.views.connections import FLAVOR_OPTIONS, format_connection
+from testgen.ui.views.profiling_runs import ProfilingScheduleDialog
 
 PAGE_TITLE = "Table Groups"
 
@@ -56,6 +57,7 @@ def render(self, project_code: str, connection_id: str | None = None, **_kwargs)
                 ]),
             },
             on_change_handlers={
+                "RunSchedulesClicked": lambda *_: ProfilingScheduleDialog().open(project_code),
                 "AddTableGroupClicked": partial(self.add_table_group_dialog, project_code),
                 "EditTableGroupClicked": partial(self.edit_table_group_dialog, project_code),
                 "DeleteTableGroupClicked": partial(self.delete_table_group_dialog, project_code),
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index 4a5484be..c3fe9913 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -81,7 +81,7 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit
 
             st.button(
                 ":material/today: Test Run Schedules",
-                help="Manages when a test suite should run.",
+                help="Manage when test suites should run",
                 on_click=partial(TestRunScheduleDialog().open, project_code)
             )
 
diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py
index 524c74f5..13250493 100644
--- a/testgen/ui/views/test_suites.py
+++ b/testgen/ui/views/test_suites.py
@@ -18,6 +18,7 @@
 from testgen.ui.session import session
 from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog
 from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog
+from testgen.ui.views.test_runs import TestRunScheduleDialog
 from testgen.utils import format_field
 
 PAGE_ICON = "rule"
@@ -96,6 +97,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
             },
             on_change_handlers={
                 "FilterApplied": on_test_suites_filtered,
+                "RunSchedulesClicked": lambda *_: TestRunScheduleDialog().open(project_code),
                 "AddTestSuiteClicked": lambda *_: add_test_suite_dialog(project_code, table_groups),
                 "ExportActionClicked": observability_export_dialog,
                 "EditActionClicked": partial(edit_test_suite_dialog, project_code, table_groups),

From 56d2d0d85ac8df4d80cbadb1661d9b60564d1fe5 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Thu, 10 Jul 2025 14:41:36 -0400
Subject: [PATCH 41/56] fix(tests): flag invalid tests and display Error
 details in test results

---
 testgen/commands/run_test_parameter_validation.py | 14 ++++++++++++--
 testgen/ui/views/test_results.py                  |  5 +++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/testgen/commands/run_test_parameter_validation.py b/testgen/commands/run_test_parameter_validation.py
index b2b98936..71668bcd 100644
--- a/testgen/commands/run_test_parameter_validation.py
+++ b/testgen/commands/run_test_parameter_validation.py
@@ -29,6 +29,10 @@ def run_parameter_validation_queries(
     strColumnList = clsExecute.GetTestValidationColumns(booClean)
     test_columns, _ = RetrieveDBResultsToList("DKTG", strColumnList)
 
+    invalid_tests = [ test_ids for col, test_ids in test_columns if not col ]
+    invalid_tests = { item for sublist in invalid_tests for item in sublist }
+    test_columns = [ item for item in test_columns if item[0] ]
+
     if not test_columns:
         LOG.warning(f"No test columns are present to validate in Test Suite {strTestSuite}")
         missing_columns = []
@@ -71,7 +75,7 @@ def run_parameter_validation_queries(
         if missing_tables:
             LOG.info("Missing tables: %s", ", ".join(missing_tables))
 
-    if missing_columns or missing_tables:
+    if missing_columns or missing_tables or invalid_tests:
         # Flag test_definitions tests with missing tables or columns
         LOG.info("CurrentStep: Flagging Tests That Failed Validation")
 
@@ -86,7 +90,7 @@ def run_parameter_validation_queries(
                 tests_missing_columns[column_name].extend(test_ids)
 
         clsExecute.flag_val = "D"
-        clsExecute.test_ids = list(set(chain(*tests_missing_tables.values(), *tests_missing_columns.values())))
+        clsExecute.test_ids = list(set(chain(*tests_missing_tables.values(), *tests_missing_columns.values(), invalid_tests)))
         strPrepFlagTests = clsExecute.PrepFlagTestsWithFailedValidation()
         RunActionQueryList("DKTG", [strPrepFlagTests])
 
@@ -101,6 +105,12 @@ def run_parameter_validation_queries(
             clsExecute.test_ids = test_ids
             strFlagTests = clsExecute.FlagTestsWithFailedValidation()
             RunActionQueryList("DKTG", [strFlagTests])
+        
+        if invalid_tests:
+            clsExecute.message = "Invalid test: schema, table, or column not defined"
+            clsExecute.test_ids = invalid_tests
+            strFlagTests = clsExecute.FlagTestsWithFailedValidation()
+            RunActionQueryList("DKTG", [strFlagTests])
 
         # Copy test results to DK DB, using temporary flagged D value to identify
         LOG.info("CurrentStep: Saving error results for invalid tests")
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index d1897353..920e007f 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -96,6 +96,7 @@ def render(
                 "Failed",
                 "Warning",
                 "Passed",
+                "Error",
             ]
             status = testgen.select(
                 options=status_options,
@@ -163,6 +164,8 @@ def render(
                 status = "'Warning'"
             case "Passed":
                 status = "'Passed'"
+            case "Error":
+                status = "'Error'"
 
         # Display main grid and retrieve selection
         selected = show_result_detail(
@@ -498,6 +501,7 @@ def show_result_detail(
         "measure_uom",
         "result_status",
         "action",
+        "result_message",
     ]
 
     lst_show_headers = [
@@ -508,6 +512,7 @@ def show_result_detail(
         "UOM",
         "Status",
         "Action",
+        "Details",
     ]
 
     selected_rows = fm.render_grid_select(

From 773e771abd27b923294c04c6d28a8b860e08fdc8 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Thu, 10 Jul 2025 15:41:09 -0400
Subject: [PATCH 42/56] fix: Do not generate Required Entity tests for empty
 tables

---
 testgen/template/dbsetup/050_populate_new_schema_metadata.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
index fa50cab9..5d57acf9 100644
--- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
+++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
@@ -123,7 +123,7 @@ VALUES  ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con
         ('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'functional_data_type = ''Measurement'' AND distinct_value_ct > 30 AND NOT distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct::FLOAT/value_ct::FLOAT > 0.1 AND stdev_value::FLOAT/avg_value::FLOAT > 0.01 AND column_name NOT ILIKE ''%latitude%'' AND column_name NOT ilike ''%longitude%''', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline.  Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see.  This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'),
         ('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, '(functional_data_type IN (''Attribute'', ''DateTime Stamp'', ''Phone'') OR functional_data_type ILIKE ''ID%'' OR functional_data_type ILIKE ''Period%'') AND fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'TRIM(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''([*+\-%_])'', ''[\1]'', ''g''), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database:  REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'),
         ('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', '(ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/(1.0+DATEDIFF(''DAY'', ''{MIN_DATE}'', ''{MAX_DATE}''))::FLOAT)/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column.  The test is appropriate for transactional dates and timestamps.  The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed.  ', 'Y'),
-        ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'),
+        ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct AND record_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'),
         ('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses.  You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'),
         ('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness.  If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y'),
         ('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10 AND functional_data_type NOT ILIKE ''Measurement%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline.  A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios.  Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge.  You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'),

From 2aba7f02d8c7ea7bbc6cea8da509fdd81ebf92b1 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Fri, 11 Jul 2025 15:45:55 -0400
Subject: [PATCH 43/56] feat: allow deleting profiling runs

---
 .../frontend/js/pages/profiling_runs.js       | 103 +++++++++++++++---
 testgen/ui/queries/profiling_run_queries.py   |  20 ++++
 testgen/ui/views/profiling_runs.py            |  78 ++++++++++++-
 3 files changed, 185 insertions(+), 16 deletions(-)

diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js
index feee47de..1290e270 100644
--- a/testgen/ui/components/frontend/js/pages/profiling_runs.js
+++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js
@@ -35,8 +35,9 @@ import { Button } from '../components/button.js';
 import { Streamlit } from '../streamlit.js';
 import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js';
 import { formatTimestamp, formatDuration } from '../display_utils.js';
+import { Checkbox } from '../components/checkbox.js';
 
-const { div, span, i } = van.tags;
+const { div, i, span, strong } = van.tags;
 
 const ProfilingRuns = (/** @type Properties */ props) => {
     window.testgen.isPage = true;
@@ -49,40 +50,104 @@ const ProfilingRuns = (/** @type Properties */ props) => {
         Streamlit.setFrameHeight(100 * items.length);
         return items;
     });
-    const columns = ['20%', '20%', '20%', '30%', '10%'];
+    const columns = ['5%', '15%', '20%', '20%', '30%', '10%'];
 
     const userCanRun = getValue(props.permissions)?.can_run ?? false;
+    const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
+    const selectedRuns = {};
 
     const tableId = 'profiling-runs-table';
     resizeFrameHeightToElement(tableId);
 
+    const initializeSelectedStates = (items) => {
+        for (const profilingRun of items) {
+            if (selectedRuns[profilingRun.profiling_run_id] == undefined) {
+                selectedRuns[profilingRun.profiling_run_id] = van.state(false);
+            }
+        }
+    };
+
+    initializeSelectedStates(profilingRunItems.val);
+
+    van.derive(() => {
+        initializeSelectedStates(profilingRunItems.val);
+    });
+
     return div(
         { class: 'table', id: tableId },
+        () => {
+            const items = profilingRunItems.val;
+            const selectedItems = items.filter(i => selectedRuns[i.profiling_run_id]?.val ?? false);
+            const someRunSelected = selectedItems.length > 0;
+            const tooltipText = !someRunSelected ? 'No runs selected' : undefined;
+
+            if (!userCanEdit) {
+                return '';
+            }
+
+            return div(
+                { class: 'flex-row fx-justify-content-flex-end pb-2' },
+                someRunSelected ? strong({class: 'mr-1'}, selectedItems.length) : '',
+                someRunSelected ? span({class: 'mr-4'}, 'runs selected') : '',
+                Button({
+                    type: 'stroked',
+                    icon: 'delete',
+                    label: 'Delete Runs',
+                    tooltip: tooltipText,
+                    tooltipPosition: 'bottom-left',
+                    disabled: !someRunSelected,
+                    width: 'auto',
+                    onclick: () => emitEvent('RunsDeleted', { payload: selectedItems.map(i => i.profiling_run_id) }),
+                }),
+            );
+        },
         div(
             { class: 'table-header flex-row' },
+            () => {
+                const items = profilingRunItems.val;
+                const selectedItems = items.filter(i => selectedRuns[i.profiling_run_id]?.val ?? false);
+                const allSelected = selectedItems.length === items.length;
+                const partiallySelected = selectedItems.length > 0 && selectedItems.length < items.length;
+
+                if (!userCanEdit) {
+                    return '';
+                }
+
+                return span(
+                    { style: `flex: ${columns[0]}` }, 
+                    userCanEdit
+                        ? Checkbox({
+                            checked: allSelected,
+                            indeterminate: partiallySelected,
+                            onChange: (checked) => items.forEach(item => selectedRuns[item.profiling_run_id].val = checked),
+                            testId: 'select-all-profiling-run',
+                        })
+                        : '',
+                );
+            },
             span(
-                { style: `flex: ${columns[0]}` },
+                { style: `flex: ${columns[1]}` },
                 'Start Time | Table Group',
             ),
             span(
-                { style: `flex: ${columns[1]}` },
+                { style: `flex: ${columns[2]}` },
                 'Status | Duration',
             ),
             span(
-                { style: `flex: ${columns[2]}` },
+                { style: `flex: ${columns[3]}` },
                 'Schema',
             ),
             span(
-                { style: `flex: ${columns[3]}` },
+                { style: `flex: ${columns[4]}` },
                 'Hygiene Issues',
             ),
             span(
-                { style: `flex: ${columns[4]}` },
+                { style: `flex: ${columns[5]}` },
                 'Profiling Score',
             ),
         ),
         () => div(
-            profilingRunItems.val.map(item => ProfilingRunItem(item, columns, userCanRun)),
+            profilingRunItems.val.map(item => ProfilingRunItem(item, columns, selectedRuns[item.profiling_run_id], userCanRun, userCanEdit)),
         ),
     );
 }
@@ -90,12 +155,24 @@ const ProfilingRuns = (/** @type Properties */ props) => {
 const ProfilingRunItem = (
     /** @type ProfilingRun */ item,
     /** @type string[] */ columns,
+    /** @type boolean */ selected,
     /** @type boolean */ userCanRun,
+    /** @type boolean */ userCanEdit,
 ) => {
     return div(
         { class: 'table-row flex-row', 'data-testid': 'profiling-run-item' },
+        userCanEdit
+            ? div(
+                { style: `flex: ${columns[0]}; font-size: 16px;` },
+                Checkbox({
+                    checked: selected,
+                    onChange: (checked) => selected.val = checked,
+                    testId: 'select-profiling-run',
+                }),
+            )
+            : '',
         div(
-            { style: `flex: ${columns[0]}` },
+            { style: `flex: ${columns[1]}` },
             div({'data-testid': 'profiling-run-item-starttime'}, formatTimestamp(item.start_time)),
             div(
                 { class: 'text-caption mt-1', 'data-testid': 'profiling-run-item-tablegroup' },
@@ -103,7 +180,7 @@ const ProfilingRunItem = (
             ),
         ),
         div(
-            { class: 'flex-row', style: `flex: ${columns[1]}` },
+            { class: 'flex-row', style: `flex: ${columns[2]}` },
             div(
                 ProfilingRunStatus(item),
                 div(
@@ -119,7 +196,7 @@ const ProfilingRunItem = (
             }) : null,
         ),
         div(
-            { style: `flex: ${columns[2]}` },
+            { style: `flex: ${columns[3]}` },
             div({'data-testid': 'profiling-run-item-schema'}, item.schema_name),
             div(
                 {
@@ -138,7 +215,7 @@ const ProfilingRunItem = (
             }) : null,
         ),
         div(
-            { class: 'pr-3', style: `flex: ${columns[3]}` },
+            { class: 'pr-3', style: `flex: ${columns[4]}` },
             item.anomaly_ct ? SummaryBar({
                 items: [
                     { label: 'Definite', value: item.anomalies_definite_ct, color: 'red' },
@@ -160,7 +237,7 @@ const ProfilingRunItem = (
             }) : null,
         ),
         div(
-            { style: `flex: ${columns[4]}; font-size: 16px;` },
+            { style: `flex: ${columns[5]}; font-size: 16px;` },
             item.dq_score_profiling ?? '--',
         ),
     );
diff --git a/testgen/ui/queries/profiling_run_queries.py b/testgen/ui/queries/profiling_run_queries.py
index a2bfa805..ea40f93d 100644
--- a/testgen/ui/queries/profiling_run_queries.py
+++ b/testgen/ui/queries/profiling_run_queries.py
@@ -2,6 +2,7 @@
 
 import testgen.ui.services.database_service as db
 from testgen.common import date_service
+from testgen.common.models import get_current_session
 
 
 def update_status(profile_run_id: str, status: str) -> None:
@@ -25,3 +26,22 @@ def cancel_all_running() -> None:
         SET status = 'Cancelled'
         WHERE status = 'Running';
     """)
+
+
+def cascade_delete_multiple_profiling_runs(profiling_run_ids: list[str]) -> None:
+    session = get_current_session()
+
+    if not profiling_run_ids:
+        raise ValueError("No profiling run is specified.")
+
+    params = {f"id_{idx}": value for idx, value in enumerate(profiling_run_ids)}
+    param_keys = [f":{slot}" for slot in params.keys()]
+
+    with session.begin():
+        session.execute(f"DELETE FROM profile_pair_rules WHERE profile_run_id IN ({', '.join(param_keys)})", params=params)
+        session.execute(f"DELETE FROM profile_anomaly_results WHERE profile_run_id IN ({', '.join(param_keys)})", params=params)
+        session.execute(f"DELETE FROM profile_results WHERE profile_run_id IN ({', '.join(param_keys)})", params=params)
+        session.execute(f"DELETE FROM profiling_runs WHERE id IN ({', '.join(param_keys)})", params=params)
+        session.commit()
+
+    st.cache_data.clear()
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index a5c7bfa5..a90de40c 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -1,3 +1,4 @@
+import logging
 import typing
 from functools import partial
 
@@ -8,17 +9,19 @@
 import testgen.ui.services.database_service as db
 import testgen.ui.services.form_service as fm
 import testgen.ui.services.query_service as dq
+from testgen.common.models import with_database_session
 from testgen.ui.components import widgets as testgen
 from testgen.ui.components.widgets import testgen_component
 from testgen.ui.navigation.menu import MenuItem
 from testgen.ui.navigation.page import Page
 from testgen.ui.queries import profiling_run_queries, project_queries
 from testgen.ui.services import user_session_service
-from testgen.ui.session import session
+from testgen.ui.session import session, temp_value
 from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog
 from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog
 from testgen.utils import friendly_score, to_int
 
+LOG = logging.getLogger("testgen")
 FORM_DATA_WIDTH = 400
 PAGE_SIZE = 50
 PAGE_ICON = "data_thresholding"
@@ -97,9 +100,13 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
                     "items": paginated_df.to_json(orient="records"),
                     "permissions": {
                         "can_run": user_can_run,
+                        "can_edit": user_can_run,
                     },
                 },
-                event_handlers={ "RunCanceled": on_cancel_run }
+                event_handlers={
+                    "RunCanceled": on_cancel_run,
+                    "RunsDeleted": partial(on_delete_runs, project_code, table_group_id),
+                }
             )
 
 
@@ -178,6 +185,60 @@ def on_cancel_run(profiling_run: pd.Series) -> None:
     fm.reset_post_updates(str_message=f":{'green' if process_status else 'red'}[{process_message}]", as_toast=True)
 
 
+@st.dialog(title="Delete Profiling Runs")
+@with_database_session
+def on_delete_runs(project_code: str, table_group_id: str, profiling_run_ids: list[str]) -> None:
+    def on_delete_confirmed(*_args) -> None:
+        set_delete_confirmed(True)
+
+    message = f"Are you sure you want to delete the {len(profiling_run_ids)} selected profiling runs?"
+    constraint = {
+        "warning": "Any running processes will be canceled.",
+        "confirmation": "Yes, cancel and delete the profiling runs.",
+    }
+    if len(profiling_run_ids) == 1:
+        message = "Are you sure you want to delete the selected profiling run?"
+        constraint["confirmation"] = "Yes, cancel and delete the profiling run."
+
+    result, set_result = temp_value("profiling-runs:result-value", default=None)
+    delete_confirmed, set_delete_confirmed = temp_value("profiling-runs:confirm-delete", default=False)
+
+    testgen.testgen_component(
+        "confirm_dialog",
+        props={
+            "project_code": project_code,
+            "message": message,
+            "constraint": constraint,
+            "button_label": "Delete",
+            "button_color": "warn",
+            "result": result(),
+        },
+        on_change_handlers={
+            "ActionConfirmed": on_delete_confirmed,
+        },
+    )
+
+    if delete_confirmed():
+        try:
+            with st.spinner("Deleting runs ..."):
+                profiling_runs = get_db_profiling_runs(project_code, table_group_id, profiling_run_ids=profiling_run_ids)
+                for _, profiling_run in profiling_runs.iterrows():
+                    profiling_run_id = profiling_run["profiling_run_id"]
+                    if profiling_run["status"] == "Running":
+                        process_status, process_message = process_service.kill_profile_run(to_int(profiling_run["process_id"]))
+                        if process_status:
+                            profiling_run_queries.update_status(profiling_run_id, "Cancelled")
+                profiling_run_queries.cascade_delete_multiple_profiling_runs(profiling_run_ids)
+            st.rerun()
+        except Exception:
+            LOG.exception("Failed to delete profiling runs")
+            set_result({
+                "success": False,
+                "message": "Unable to delete the selected profiling runs, try again.",
+            })
+            st.rerun(scope="fragment")
+
+
 @st.cache_data(show_spinner=False)
 def get_db_table_group_choices(project_code: str) -> pd.DataFrame:
     schema = st.session_state["dbschema"]
@@ -185,9 +246,19 @@ def get_db_table_group_choices(project_code: str) -> pd.DataFrame:
 
 
 @st.cache_data(show_spinner="Loading data ...")
-def get_db_profiling_runs(project_code: str, table_group_id: str | None = None) -> pd.DataFrame:
+def get_db_profiling_runs(
+    project_code: str,
+    table_group_id: str | None = None,
+    profiling_run_ids: list[str] | None = None,
+) -> pd.DataFrame:
     schema = st.session_state["dbschema"]
     table_group_condition = f" AND v_profiling_runs.table_groups_id = '{table_group_id}' " if table_group_id else ""
+
+    profling_runs_condition = ""
+    if profiling_run_ids and len(profiling_run_ids) > 0:
+        profiling_run_ids_ = [f"'{run_id}'" for run_id in profiling_run_ids]
+        profling_runs_condition = f" AND v_profiling_runs.profiling_run_id::VARCHAR IN ({', '.join(profiling_run_ids_)})"
+
     sql = f"""
     WITH profile_anomalies AS (
         SELECT profile_anomaly_results.profile_run_id,
@@ -245,6 +316,7 @@ def get_db_profiling_runs(project_code: str, table_group_id: str | None = None)
         LEFT JOIN profile_anomalies ON (v_profiling_runs.profiling_run_id = profile_anomalies.profile_run_id)
     WHERE project_code = '{project_code}'
     {table_group_condition}
+    {profling_runs_condition}
     ORDER BY start_time DESC;
     """
 

From c481d0da410985e1b151e71169122a0bd9bfbf9b Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 9 Jul 2025 11:34:28 -0400
Subject: [PATCH 44/56] misc: upgrade Streamlit from 1.44.1 to 1.46.1

---
 pyproject.toml                                |  2 +-
 testgen/ui/assets/scripts.js                  |  4 -
 testgen/ui/assets/style.css                   | 92 +++++++++++--------
 .../frontend/js/components/button.js          |  4 -
 4 files changed, 56 insertions(+), 46 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 179a1545..5ed0ceab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ dependencies = [
     "requests_extensions==1.1.3",
     "numpy==1.26.4",
     "pandas==2.1.4",
-    "streamlit==1.44.1",
+    "streamlit==1.46.1",
     "streamlit-extras==0.3.0",
     "streamlit-aggrid==0.3.4.post3",
     "plotly_express==0.4.1",
diff --git a/testgen/ui/assets/scripts.js b/testgen/ui/assets/scripts.js
index 45da923a..46e0aafb 100644
--- a/testgen/ui/assets/scripts.js
+++ b/testgen/ui/assets/scripts.js
@@ -2,10 +2,6 @@ import van from './static/js/van.min.js';
 
 window.van = van;
 
-window.addEventListener('load', function() {
-    removeElements([ 'header[data-testid="stHeader"]' ]);
-});
-
 window.addEventListener('message', async function(event) {
     if (event.data.type === 'TestgenCopyToClipboard') {
         await copyToClipboard(event.data.text || '');
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index a3e703f4..3929be26 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -33,6 +33,8 @@ body {
     --portal-background: white;
     --portal-box-shadow: rgba(0, 0, 0, 0.16) 0px 4px 16px;
     --select-hover-background: rgb(240, 242, 246);
+
+    --app-background-color: #f8f9fa;
 }
 
 img.dk-logo-img {
@@ -40,22 +42,47 @@ img.dk-logo-img {
     width: 100%;
 }
 
-/* Streamlit header */
-header {
-    display: none !important;
+/* Header */
+.stAppHeader {
+    width: 85px !important; /* allows clicking on the breadcrumbs */
+    left: calc(24px - 1rem) !important;
+    background: transparent !important;
+    min-height: unset !important;
+    overflow: hidden !important; /* hides the running man animation */
+}
+
+/* - with breadcrumbs */
+.stAppHeader:has(~ .stMain .st-key-testgen-breadcrumbs) {
+    height: 65px !important;
+    top: 5px !important;
+}
+
+/* - without breadcrumbs */
+.stAppHeader:not(:has(~ .stMain .st-key-testgen-breadcrumbs)) {
+    top: 7px !important;
+    height: 39.59px !important;
+}
+
+/* hide while sidebar expanded */
+.stApp:has(.stSidebar[aria-expanded="true"]) .stAppHeader {
+    display: none;
+}
+/* End Header */
+
+#stDecoration {
+    visibility: hidden;
 }
-/* ... */
 
 /* Sidebar */
 [data-testid="stSidebarContent"] [data-testid="stSidebarHeader"] {
     padding: 16px 20px;
 }
 
-[data-testid="stSidebarHeader"] [data-testid="stLogo"] {
+[data-testid="stSidebarHeader"] .stLogo {
     max-width: fit-content;
 }
 
-section[data-testid="stSidebar"] {
+section.stSidebar {
     width: 250px;
     z-index: 999;
     background-color: var(--sidebar-background-color);
@@ -68,30 +95,18 @@ section[data-testid="stSidebar"] {
 /*  */
 
 /* Main content */
-div[data-testid="stAppViewContainer"] > :nth-child(2 of section) {
-    background-color: #f8f9fa;
+.stMain {
+    background-color: var(--app-background-color);
 }
 
-div[data-testid="stMainBlockContainer"] {
+.stMain > .stMainBlockContainer {
     padding: 12px 24px 24px;
 }
 
-div[data-testid="stVerticalBlock"] {
+.stVerticalBlock[data-testid="stVerticalBlock"] {
     gap: 0.5rem;
 }
 
-div[data-testid="stAppViewContainer"]:has(section[data-testid="stSidebar"]) div[data-testid="stSidebarCollapsedControl"] {
-    top: 0.5rem;
-    border-radius: 4px;
-    background-color: var(--border-color);
-    padding: 3px 0 0 8px;
-}
-
-div[data-testid="stAppViewContainer"]:has(section[data-testid="stSidebar"][aria-expanded="true"]) div[data-testid="stSidebarCollapsedControl"] {
-    display: none;
-}
-/*  */
-
 /* Dialog - sets the width of all st.dialog */
 /* There is no way to target "large" and "small" dialogs reliably */
 div[data-testid="stDialog"] div[role="dialog"] {
@@ -203,11 +218,12 @@ button[title="Show password text"] {
 }
 /* ... */
 
-[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.bg-white) {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.bg-white),
+[data-testid="stVerticalBlockBorderWrapper"]:has(> .stVerticalBlock > .stElementContainer > div.stHtml > i.bg-white) {
     background-color: var(--dk-card-background);
 }
 
-div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) > div > [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-row) {
     width: 100%;
     flex-direction: row;
 }
@@ -218,19 +234,19 @@ div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="st
     max-height: 40px;
 }
 
-div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-start) [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-start) {
     justify-content: flex-start;
 }
 
-div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-end) [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-end) {
     justify-content: flex-end;
 }
 
-div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-center) [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-center) {
     justify-content: center;
 }
 
-[data-testid="stVerticalBlock"]:has(> div.element-container > div.stHtml > i.no-flex-gap) {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.no-flex-gap) {
     gap: unset;
 }
 
@@ -296,8 +312,13 @@ Use as testgen.text("text", "extra_styles") */
     transition: padding 0.3s;
 }
 
-[data-testid="stSidebar"][aria-expanded="false"] ~ [data-testid="stMain"] .tg-header {
-    padding-left: 80px;
+.st-key-testgen-breadcrumbs {
+    transition: padding 0.3s;
+}
+
+[data-testid="stSidebar"][aria-expanded="false"] ~ div > [data-testid="stMain"] .tg-header,
+[data-testid="stSidebar"][aria-expanded="false"] ~ div > [data-testid="stMain"] .st-key-testgen-breadcrumbs {
+    padding-left: 85px;
 }
 
 .tg-header--line {
@@ -321,7 +342,8 @@ Use as testgen.text("text", "extra_styles") */
 }
 
 .st-key-tg-header--help [data-testid="stPopover"] {
-    width: auto;
+    display: flex;
+    justify-content: flex-end;
 }
 
 .st-key-tg-header--help button[data-testid="stPopoverButton"] {
@@ -472,20 +494,16 @@ div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--expor
         --portal-background: #14181f;
         --portal-box-shadow: rgba(0, 0, 0, 0.95) 0px 4px 16px;
         --select-hover-background: rgba(255, 255, 255, .32);
-    }
 
-    /* Main content */
-    div[data-testid="stAppViewContainer"] > :nth-child(2 of section) {
-        background-color: rgb(14, 17, 23);
+        --app-background-color: rgb(14, 17, 23);
     }
-    /*  */
 
     div[data-modal-container='true']::before {
         background-color: rgba(100, 100, 100, 0.5) !important;
     }
 
     div[data-modal-container='true'] > div:first-child > div:first-child {
-        background-color: rgb(14, 17, 23) !important;
+        background-color: var(--app-background-color) !important;
     }
 }
 /* ... */
diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js
index 08b32393..d90b0034 100644
--- a/testgen/ui/components/frontend/js/components/button.js
+++ b/testgen/ui/components/frontend/js/components/button.js
@@ -214,10 +214,6 @@ button.tg-button.tg-warn-button.tg-stroked-button {
     color: var(--button-warn-stroked-text-color);
     background: var(--button-warn-stroked-background);
 }
-
-button.tg-button.tg-warn-button[disabled] {
-    color: rgba(255, 255, 255, .5) !important;
-}
 /* ... */
 `);
 

From c770e152f2decc8dd253b0ffd805bde2da8fd6f4 Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Wed, 9 Jul 2025 17:17:50 -0400
Subject: [PATCH 45/56] feat: allow filtering by partial column name

---
 testgen/ui/assets/style.css                   |   4 +
 testgen/ui/components/widgets/select.py       |  56 ++--
 testgen/ui/queries/profiling_queries.py       |  26 +-
 testgen/ui/queries/test_definition_queries.py | 130 +++++-----
 testgen/ui/services/test_results_service.py   | 202 ++++++++-------
 testgen/ui/views/hygiene_issues.py            | 245 ++++++++++--------
 testgen/ui/views/profiling_results.py         |  14 +-
 testgen/ui/views/test_definitions.py          |   3 +-
 testgen/ui/views/test_results.py              |  17 +-
 9 files changed, 393 insertions(+), 304 deletions(-)

diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index 3929be26..420f9605 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -67,6 +67,10 @@ img.dk-logo-img {
 .stApp:has(.stSidebar[aria-expanded="true"]) .stAppHeader {
     display: none;
 }
+
+.stStatusWidget {
+    display: none !important;
+}
 /* End Header */
 
 #stDecoration {
diff --git a/testgen/ui/components/widgets/select.py b/testgen/ui/components/widgets/select.py
index 31fa748c..23d65d96 100644
--- a/testgen/ui/components/widgets/select.py
+++ b/testgen/ui/components/widgets/select.py
@@ -1,3 +1,5 @@
+import re
+
 import pandas as pd
 import streamlit as st
 from streamlit_extras.no_default_selectbox import selectbox
@@ -5,6 +7,9 @@
 from testgen.ui.navigation.router import Router
 
 EMPTY_VALUE = "---"
+CUSTOM_VALUE_TEMPLATE = "Custom: {value}"
+CUSTOM_VALUE_PATTERN = r"Custom: (.+)"
+
 
 def select(
     label: str,
@@ -15,42 +20,61 @@ def select(
     required: bool = False,
     bind_to_query: str | None = None,
     bind_empty_value: bool = False,
+    accept_new_options: bool = False,
+    custom_values_wrap: str | None = "%{}%",
     **kwargs,
 ):
-    kwargs = {**kwargs}
+    kwargs = {**kwargs, "accept_new_options": accept_new_options}
     kwargs["label"] = label
+    kwargs["index"] = None
+
+    option_values = options
+    option_display_labels = options
 
     if isinstance(options, pd.DataFrame):
         value_column = value_column or options.columns[0]
         display_column = display_column or value_column
-        kwargs["options"] = options[display_column]
-        if default_value in options[value_column].values:
-            kwargs["index"] = int(options[options[value_column] == default_value].index[0]) + (0 if required else 1)
-    else:
-        kwargs["options"] = options
-        if default_value in options:
-            kwargs["index"] = options.index(default_value) + (0 if required else 1)
-        elif default_value == EMPTY_VALUE and not required: 
-            kwargs["index"] = 0
+
+        option_values = options[value_column].values.tolist()
+        option_display_labels = options[display_column].values.tolist()
+
+    kwargs["options"] = [*option_display_labels]
+    if default_value in option_values:
+        kwargs["index"] = option_values.index(default_value) + (0 if required else 1)
+    elif default_value == EMPTY_VALUE and not required:
+        kwargs["index"] = 0
+    elif default_value and default_value != EMPTY_VALUE and accept_new_options:
+        kwargs["options"].append(CUSTOM_VALUE_TEMPLATE.format(value=default_value))
+        kwargs["index"] = len(kwargs["options"])
 
     if bind_to_query:
         kwargs["key"] = kwargs.get("key", f"testgen_select_{bind_to_query}")
-        if default_value is not None and kwargs.get("index") is None:
-            Router().set_query_params({ bind_to_query: None }) # Unset the query params if the current value is not valid
+
+        # Unset the query params if the current value is not valid and new options are not allowed
+        if default_value is not None and kwargs.get("index") is None and not accept_new_options:
+            Router().set_query_params({ bind_to_query: None })
 
         def update_query_params():
             query_value = st.session_state[kwargs["key"]]
             if not required and query_value == EMPTY_VALUE and not bind_empty_value:
                 query_value = None
-            elif isinstance(options, pd.DataFrame):
-                query_value = options.loc[options[display_column] == query_value, value_column].iloc[0]
+            elif query_value in option_display_labels:
+                query_value = option_values[option_display_labels.index(query_value)]
+            # elif isinstance(options, pd.DataFrame) and default_value in options[value_column].values:
+            #     query_value = options.loc[options[display_column] == query_value, value_column].iloc[0]
             Router().set_query_params({ bind_to_query: query_value })
 
         kwargs["on_change"] = update_query_params
 
     selected = st.selectbox(**kwargs) if required else selectbox(**kwargs)
 
-    if selected and isinstance(options, pd.DataFrame):
-        return options.loc[options[display_column] == selected, value_column].iloc[0]
+    if selected:
+        if selected in option_display_labels:
+            selected = option_values[option_display_labels.index(selected)]
+
+        if accept_new_options and (match := re.match(CUSTOM_VALUE_PATTERN, selected)):
+            selected = match.group(1)
+            if custom_values_wrap:
+                selected = custom_values_wrap.format(selected)
 
     return selected
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 71139317..db755ab9 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -94,14 +94,20 @@ def get_run_by_id(profile_run_id: str) -> pd.Series:
 
 
 @st.cache_data(show_spinner=False)
-def get_profiling_results(profiling_run_id: str, table_name: str = "%%", column_name: str = "%%", sorting_columns = None):
+def get_profiling_results(profiling_run_id: str, table_name: str | None = None, column_name: str | None = None, sorting_columns = None):
+    db_session = get_current_session()
+    params = {
+        "profiling_run_id": profiling_run_id,
+        "table_name": table_name if table_name else "%%",
+        "column_name": column_name if column_name else "%%",
+    }
+
     order_by = ""
     if sorting_columns is None:
         order_by = "ORDER BY schema_name, table_name, position"
     elif len(sorting_columns):
         order_by = "ORDER BY " + ", ".join(" ".join(col) for col in sorting_columns)
 
-    schema: str = st.session_state["dbschema"]
     query = f"""
     SELECT
         id::VARCHAR,
@@ -125,18 +131,22 @@ def get_profiling_results(profiling_run_id: str, table_name: str = "%%", column_
         functional_table_type AS semantic_table_type,
         CASE WHEN EXISTS(
             SELECT 1
-            FROM {schema}.profile_anomaly_results
+            FROM profile_anomaly_results
             WHERE profile_run_id = profile_results.profile_run_id
                 AND table_name = profile_results.table_name
                 AND column_name = profile_results.column_name
         ) THEN 'Yes' END AS hygiene_issues
-    FROM {schema}.profile_results
-    WHERE profile_run_id = '{profiling_run_id}'
-        AND table_name ILIKE '{table_name}'
-        AND column_name ILIKE '{column_name}'
+    FROM profile_results
+    WHERE profile_run_id = :profiling_run_id
+        AND table_name ILIKE :table_name
+        AND column_name ILIKE :column_name
     {order_by};
     """
-    return db.retrieve_data(query)
+
+    results = db_session.execute(query, params=params)
+    columns = [column.name for column in results.cursor.description]
+
+    return pd.DataFrame(list(results), columns=columns)
 
 
 @st.cache_data(show_spinner=False)
diff --git a/testgen/ui/queries/test_definition_queries.py b/testgen/ui/queries/test_definition_queries.py
index a47d7dcf..161da4de 100644
--- a/testgen/ui/queries/test_definition_queries.py
+++ b/testgen/ui/queries/test_definition_queries.py
@@ -1,6 +1,8 @@
+import pandas as pd
 import streamlit as st
 
 import testgen.ui.services.database_service as db
+from testgen.common.models import get_current_session, with_database_session
 
 
 def update_attribute(schema, test_definition_ids, attribute, value):
@@ -19,73 +21,83 @@ def update_attribute(schema, test_definition_ids, attribute, value):
 
 
 @st.cache_data(show_spinner=False)
-def get_test_definitions(schema, project_code, test_suite, table_name, column_name, test_definition_ids):
-    if table_name:
-        table_condition = f" AND d.table_name = '{table_name}'"
-    else:
-        table_condition = ""
-    if column_name:
-        column_condition = f" AND d.column_name = '{column_name}'"
-    else:
-        column_condition = ""
-    sql = f"""
-            SELECT
-                   d.schema_name, d.table_name, d.column_name, t.test_name_short, t.test_name_long,
-                   d.id::VARCHAR(50),
-                   s.project_code, d.table_groups_id::VARCHAR(50), s.test_suite, d.test_suite_id::VARCHAR,
-                   d.test_type, d.cat_test_id::VARCHAR(50),
-                   d.test_active,
-                   CASE WHEN d.test_active = 'Y' THEN 'Yes' ELSE 'No' END as test_active_display,
-                   d.lock_refresh,
-                   CASE WHEN d.lock_refresh = 'Y' THEN 'Yes' ELSE 'No' END as lock_refresh_display,
-                   t.test_scope,
-                   d.test_description,
-                   d.profiling_as_of_date,
-                   d.last_manual_update,
-                   d.severity, COALESCE(d.severity, s.severity, t.default_severity) as urgency,
-                   d.export_to_observability as export_to_observability_raw,
-                   CASE
-                        WHEN d.export_to_observability = 'Y' THEN 'Yes'
-                        WHEN d.export_to_observability = 'N' THEN 'No'
-                        WHEN d.export_to_observability IS NULL AND s.export_to_observability = 'Y' THEN 'Inherited (Yes)'
-                        ELSE 'Inherited (No)'
-                    END as export_to_observability,
-                   -- test_action,
-                   d.threshold_value, COALESCE(t.measure_uom_description, t.measure_uom) as export_uom,
-                   d.baseline_ct, d.baseline_unique_ct, d.baseline_value,
-                   d.baseline_value_ct, d.baseline_sum, d.baseline_avg, d.baseline_sd,
-                   d.lower_tolerance, d.upper_tolerance,
-                   d.subset_condition,
-                   d.groupby_names, d.having_condition, d.window_date_column, d.window_days,
-                   d.match_schema_name, d.match_table_name, d.match_column_names,
-                   d.match_subset_condition, d.match_groupby_names, d.match_having_condition,
-                   d.skip_errors, d.custom_query,
-                   COALESCE(d.test_description, t.test_description) as final_test_description,
-                   t.default_parm_columns, t.selection_criteria,
-                   d.profile_run_id::VARCHAR(50), d.test_action, d.test_definition_status,
-                   d.watch_level, d.check_result, d.last_auto_gen_date,
-                   d.test_mode
-              FROM {schema}.test_definitions d
-            INNER JOIN {schema}.test_types t ON (d.test_type = t.test_type)
-            INNER JOIN {schema}.test_suites s ON (d.test_suite_id = s.id)
-            WHERE True
-    """
+@with_database_session
+def get_test_definitions(_, project_code, test_suite, table_name, column_name, test_definition_ids: list[str] | None):
+    db_session = get_current_session()
+    params = {}
+    order_by = "ORDER BY d.schema_name, d.table_name, d.column_name, d.test_type"
+    filters = ""
 
     if project_code:
-        sql += f"""             AND s.project_code = '{project_code}'
-        """
+        filters += " AND s.project_code = :project_code"
+        params["project_code"] = project_code
 
     if test_suite:
-        sql += f""" AND s.test_suite = '{test_suite}' {table_condition} {column_condition}
-        """
+        filters += " AND s.test_suite = :test_suite"
+        params["test_suite"] = test_suite
+
     if test_definition_ids:
-        sql += f""" AND d.id in ({"'" + "','".join(test_definition_ids) + "'"})
-        """
+        test_definition_params = {f"test_definition_id_{idx}": status for idx, status in enumerate(test_definition_ids)}
+        filters += f" AND d.id IN ({', '.join([f':{p}' for p in test_definition_params.keys()])})"
+        params.update(test_definition_params)
+
+    if table_name:
+        filters += " AND d.table_name = :table_name"
+        params["table_name"] = table_name
+
+    if column_name:
+        filters += " AND d.column_name ILIKE :column_name"
+        params["column_name"] = column_name
 
-    sql += """ORDER BY d.schema_name, d.table_name, d.column_name, d.test_type;
+    sql = f"""
+    SELECT
+        d.schema_name, d.table_name, d.column_name, t.test_name_short, t.test_name_long,
+        d.id::VARCHAR(50),
+        s.project_code, d.table_groups_id::VARCHAR(50), s.test_suite, d.test_suite_id::VARCHAR,
+        d.test_type, d.cat_test_id::VARCHAR(50),
+        d.test_active,
+        CASE WHEN d.test_active = 'Y' THEN 'Yes' ELSE 'No' END as test_active_display,
+        d.lock_refresh,
+        CASE WHEN d.lock_refresh = 'Y' THEN 'Yes' ELSE 'No' END as lock_refresh_display,
+        t.test_scope,
+        d.test_description,
+        d.profiling_as_of_date,
+        d.last_manual_update,
+        d.severity, COALESCE(d.severity, s.severity, t.default_severity) as urgency,
+        d.export_to_observability as export_to_observability_raw,
+        CASE
+            WHEN d.export_to_observability = 'Y' THEN 'Yes'
+            WHEN d.export_to_observability = 'N' THEN 'No'
+            WHEN d.export_to_observability IS NULL AND s.export_to_observability = 'Y' THEN 'Inherited (Yes)'
+            ELSE 'Inherited (No)'
+        END as export_to_observability,
+        -- test_action,
+        d.threshold_value, COALESCE(t.measure_uom_description, t.measure_uom) as export_uom,
+        d.baseline_ct, d.baseline_unique_ct, d.baseline_value,
+        d.baseline_value_ct, d.baseline_sum, d.baseline_avg, d.baseline_sd,
+        d.lower_tolerance, d.upper_tolerance,
+        d.subset_condition,
+        d.groupby_names, d.having_condition, d.window_date_column, d.window_days,
+        d.match_schema_name, d.match_table_name, d.match_column_names,
+        d.match_subset_condition, d.match_groupby_names, d.match_having_condition,
+        d.skip_errors, d.custom_query,
+        COALESCE(d.test_description, t.test_description) as final_test_description,
+        t.default_parm_columns, t.selection_criteria,
+        d.profile_run_id::VARCHAR(50), d.test_action, d.test_definition_status,
+        d.watch_level, d.check_result, d.last_auto_gen_date,
+        d.test_mode
+    FROM test_definitions d
+    INNER JOIN test_types t ON (d.test_type = t.test_type)
+    INNER JOIN test_suites s ON (d.test_suite_id = s.id)
+    WHERE True
+    {filters}
+    {order_by}
     """
 
-    return db.retrieve_data(sql)
+    results = db_session.execute(sql, params=params)
+    columns = [column.name for column in results.cursor.description]
+
+    return pd.DataFrame(list(results), columns=columns)
 
 
 def update(schema, test_definition):
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index d2e7440a..2cdf327d 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -1,124 +1,138 @@
 import pandas as pd
 
 from testgen.common import ConcatColumnList
+from testgen.common.models import get_current_session, with_database_session
 from testgen.common.read_file import replace_templated_functions
 from testgen.ui.services import database_service as db
 from testgen.ui.services.string_service import empty_if_null
 from testgen.ui.services.test_definition_service import get_test_definition
 
 
+@with_database_session
 def get_test_results(
-    schema: str,
+    _: str,
     run_id: str,
-    test_status: str | None = None,
+    test_status: str | list[str] | None = None,
     test_type_id: str | None = None,
     table_name: str | None = None,
     column_name: str | None = None,
     sorting_columns: list[str] | None = None,
 ) -> pd.DataFrame:
     # First visible row first, so multi-select checkbox will render
+    db_session = get_current_session()
+    params = {"run_id": run_id}
+
     order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) if sorting_columns else ""
     filters = ""
     if test_status:
-        filters += f" AND r.result_status IN ({test_status})"
+        if isinstance(test_status, str):
+            test_status = [status.strip() for status in test_status.split(",")]
+        test_status_params = {f"test_status_{idx}": status for idx, status in enumerate(test_status)}
+
+        filters += f" AND r.result_status IN ({', '.join([f':{p}' for p in test_status_params.keys()])})"
+        params.update(test_status_params)
     if test_type_id:
-        filters += f" AND r.test_type = '{test_type_id}'"
+        filters += " AND r.test_type = :test_type_id"
+        params["test_type_id"] = test_type_id
     if table_name:
-        filters += f" AND r.table_name = '{table_name}'"
+        filters += " AND r.table_name = :table_name"
+        params["table_name"] = table_name
     if column_name:
-        filters += f" AND r.column_names = '{column_name}'"
+        filters += " AND r.column_names ILIKE :column_name"
+        params["column_name"] = column_name
 
     sql = f"""
-            WITH run_results
-               AS (SELECT *
-                     FROM {schema}.test_results r
-                    WHERE
-                      r.test_run_id = '{run_id}'
-                      {filters}
-                    )
-            SELECT r.table_name,
-                   p.project_name, ts.test_suite, tg.table_groups_name, cn.connection_name, cn.project_host, cn.sql_flavor,
-                   tt.dq_dimension, tt.test_scope,
-                   r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id,
-                   tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description,
-                   c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status,
-                   CASE
-                     WHEN r.result_code <> 1 THEN r.disposition
-                        ELSE 'Passed'
-                   END as disposition,
-                   NULL::VARCHAR(1) as action,
-                   r.input_parameters, r.result_message, CASE WHEN result_code <> 1 THEN r.severity END as severity,
-                   r.result_code as passed_ct,
-                   (1 - r.result_code)::INTEGER as exception_ct,
-                   CASE
-                     WHEN result_status = 'Warning'
-                      AND result_message NOT ILIKE 'Inactivated%%' THEN 1
-                   END::INTEGER as warning_ct,
-                   CASE
-                     WHEN result_status = 'Failed'
-                      AND result_message NOT ILIKE 'Inactivated%%' THEN 1
-                   END::INTEGER as failed_ct,
-                   CASE
-                     WHEN result_message ILIKE 'Inactivated%%' THEN 1
-                   END as execution_error_ct,
-                   p.project_code, r.table_groups_id::VARCHAR,
-                   r.id::VARCHAR as test_result_id, r.test_run_id::VARCHAR,
-                   c.id::VARCHAR as connection_id, r.test_suite_id::VARCHAR,
-                   r.test_definition_id::VARCHAR as test_definition_id_runtime,
-                   CASE
-                     WHEN r.auto_gen = TRUE THEN d.id
-                                            ELSE r.test_definition_id
-                   END::VARCHAR as test_definition_id_current,
-                   r.auto_gen,
-
-                   -- These are used in the PDF report
-                   tt.threshold_description, tt.usage_notes, r.test_time,
-                   dcc.description as column_description,
-                   COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
-                   COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
-                   COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
-                   COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
-                   COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
-                   COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
-                   COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
-                   COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
-                   COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
-
-              FROM run_results r
-            INNER JOIN {schema}.test_types tt
-               ON (r.test_type = tt.test_type)
-            LEFT JOIN {schema}.test_definitions rd
-              ON (r.test_definition_id = rd.id)
-            LEFT JOIN {schema}.test_definitions d
-               ON (r.test_suite_id = d.test_suite_id
-              AND  r.table_name = d.table_name
-              AND  COALESCE(r.column_names, 'N/A') = COALESCE(d.column_name, 'N/A')
-              AND  r.test_type = d.test_type
-              AND  r.auto_gen = TRUE
-              AND  d.last_auto_gen_date IS NOT NULL)
-            INNER JOIN {schema}.test_suites ts
-               ON r.test_suite_id = ts.id
-            INNER JOIN {schema}.projects p
-               ON (ts.project_code = p.project_code)
-            INNER JOIN {schema}.table_groups tg
-               ON (ts.table_groups_id = tg.id)
-            INNER JOIN {schema}.connections cn
-               ON (tg.connection_id = cn.connection_id)
-            LEFT JOIN {schema}.cat_test_conditions c
-               ON (cn.sql_flavor = c.sql_flavor
-              AND  r.test_type = c.test_type)
-            LEFT JOIN {schema}.data_column_chars dcc
-               ON (tg.id = dcc.table_groups_id
-              AND  r.schema_name = dcc.schema_name
-              AND  r.table_name = dcc.table_name
-              AND  r.column_names = dcc.column_name)
-            LEFT JOIN {schema}.data_table_chars dtc
-               ON dcc.table_id = dtc.table_id
-            {order_by} ;
+    WITH run_results AS (
+        SELECT *
+        FROM test_results r
+        WHERE r.test_run_id = :run_id
+            {filters}
+    )
+    SELECT r.table_name,
+            p.project_name, ts.test_suite, tg.table_groups_name, cn.connection_name, cn.project_host, cn.sql_flavor,
+            tt.dq_dimension, tt.test_scope,
+            r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id,
+            tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description,
+            c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status,
+            CASE
+                WHEN r.result_code <> 1 THEN r.disposition
+                ELSE 'Passed'
+            END as disposition,
+            NULL::VARCHAR(1) as action,
+            r.input_parameters, r.result_message, CASE WHEN result_code <> 1 THEN r.severity END as severity,
+            r.result_code as passed_ct,
+            (1 - r.result_code)::INTEGER as exception_ct,
+            CASE
+                WHEN result_status = 'Warning'
+                AND result_message NOT ILIKE 'Inactivated%%' THEN 1
+            END::INTEGER as warning_ct,
+            CASE
+                WHEN result_status = 'Failed'
+                AND result_message NOT ILIKE 'Inactivated%%' THEN 1
+            END::INTEGER as failed_ct,
+            CASE
+                WHEN result_message ILIKE 'Inactivated%%' THEN 1
+            END as execution_error_ct,
+            p.project_code, r.table_groups_id::VARCHAR,
+            r.id::VARCHAR as test_result_id, r.test_run_id::VARCHAR,
+            c.id::VARCHAR as connection_id, r.test_suite_id::VARCHAR,
+            r.test_definition_id::VARCHAR as test_definition_id_runtime,
+            CASE
+                WHEN r.auto_gen = TRUE THEN d.id
+                                    ELSE r.test_definition_id
+            END::VARCHAR as test_definition_id_current,
+            r.auto_gen,
+
+            -- These are used in the PDF report
+            tt.threshold_description, tt.usage_notes, r.test_time,
+            dcc.description as column_description,
+            COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+            COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
+            COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
+            COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
+            COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
+            COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
+            COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
+            COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
+            COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
+
+        FROM run_results r
+    INNER JOIN test_types tt
+        ON (r.test_type = tt.test_type)
+    LEFT JOIN test_definitions rd
+        ON (r.test_definition_id = rd.id)
+    LEFT JOIN test_definitions d
+        ON (r.test_suite_id = d.test_suite_id
+        AND  r.table_name = d.table_name
+        AND  COALESCE(r.column_names, 'N/A') = COALESCE(d.column_name, 'N/A')
+        AND  r.test_type = d.test_type
+        AND  r.auto_gen = TRUE
+        AND  d.last_auto_gen_date IS NOT NULL)
+    INNER JOIN test_suites ts
+        ON r.test_suite_id = ts.id
+    INNER JOIN projects p
+        ON (ts.project_code = p.project_code)
+    INNER JOIN table_groups tg
+        ON (ts.table_groups_id = tg.id)
+    INNER JOIN connections cn
+        ON (tg.connection_id = cn.connection_id)
+    LEFT JOIN cat_test_conditions c
+        ON (cn.sql_flavor = c.sql_flavor
+        AND  r.test_type = c.test_type)
+    LEFT JOIN data_column_chars dcc
+        ON (tg.id = dcc.table_groups_id
+        AND  r.schema_name = dcc.schema_name
+        AND  r.table_name = dcc.table_name
+        AND  r.column_names = dcc.column_name)
+    LEFT JOIN data_table_chars dtc
+        ON dcc.table_id = dtc.table_id
+    {order_by}
     """
-    df = db.retrieve_data(sql)
 
-    # Clean Up
+    results = db_session.execute(sql, params=params)
+    columns = [column.name for column in results.cursor.description]
+
+    df = pd.DataFrame(list(results), columns=columns)
     df["test_date"] = pd.to_datetime(df["test_date"])
 
     return df
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 7f987bd9..19f61378 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -12,6 +12,7 @@
 from testgen.commands.run_rollup_scores import run_profile_rollup_scoring_queries
 from testgen.common import date_service
 from testgen.common.mixpanel_service import MixpanelService
+from testgen.common.models import get_current_session
 from testgen.ui.components import widgets as testgen
 from testgen.ui.components.widgets.download_dialog import (
     FILE_DATA_TYPE,
@@ -114,6 +115,7 @@ def render(
                 bind_to_query="column_name",
                 label="Column Name",
                 disabled=not table_name,
+                accept_new_options=True,
             )
 
         with sort_column:
@@ -143,72 +145,72 @@ def render(
                 action_map = df_action.set_index("id")["action"].to_dict()
                 df_pa["action"] = df_pa["id"].map(action_map).fillna(df_pa["action"])
 
-        if not df_pa.empty:
-            summaries = get_profiling_anomaly_summary(run_id)
-            others_summary = [summary for summary in summaries if summary.get("type") != "PII"]
-            with others_summary_column:
+        summaries = get_profiling_anomaly_summary(run_id)
+        others_summary = [summary for summary in summaries if summary.get("type") != "PII"]
+        with others_summary_column:
+            testgen.summary_bar(
+                items=others_summary,
+                label="Hygiene Issues",
+                height=20,
+                width=400,
+            )
+
+        anomalies_pii_summary = [summary for summary in summaries if summary.get("type") == "PII"]
+        if anomalies_pii_summary:
+            with pii_summary_column:
                 testgen.summary_bar(
-                    items=others_summary,
-                    label="Hygiene Issues",
+                    items=anomalies_pii_summary,
+                    label="Potential PII",
                     height=20,
                     width=400,
                 )
 
-            anomalies_pii_summary = [summary for summary in summaries if summary.get("type") == "PII"]
-            if anomalies_pii_summary:
-                with pii_summary_column:
-                    testgen.summary_bar(
-                        items=anomalies_pii_summary,
-                        label="Potential PII",
-                        height=20,
-                        width=400,
-                    )
+        with score_column:
+            render_score(run_df["project_code"], run_id)
+
+        lst_show_columns = [
+            "table_name",
+            "column_name",
+            "issue_likelihood",
+            "action",
+            "anomaly_name",
+            "detail",
+        ]
+
+        # Show main grid and retrieve selections
+        selected = fm.render_grid_select(
+            df_pa,
+            lst_show_columns,
+            int_height=400,
+            do_multi_select=do_multi_select,
+            bind_to_query_name="selected",
+            bind_to_query_prop="id",
+        )
 
-            with score_column:
-                render_score(run_df["project_code"], run_id)
+        popover_container = export_button_column.empty()
 
-            lst_show_columns = [
-                "table_name",
-                "column_name",
-                "issue_likelihood",
-                "action",
-                "anomaly_name",
-                "detail",
-            ]
+        def open_download_dialog(data: pd.DataFrame | None = None) -> None:
+            # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+            with popover_container.container():
+                flex_row_end()
+                st.button(label="Export", icon=":material/download:", disabled=True)
 
-            # Show main grid and retrieve selections
-            selected = fm.render_grid_select(
-                df_pa,
-                lst_show_columns,
-                int_height=400,
-                do_multi_select=do_multi_select,
-                bind_to_query_name="selected",
-                bind_to_query_prop="id",
+            download_dialog(
+                dialog_title="Download Excel Report",
+                file_content_func=get_excel_report_data,
+                args=(run_df["table_groups_name"], run_date, run_id, data),
             )
 
-            popover_container = export_button_column.empty()
-
-            def open_download_dialog(data: pd.DataFrame | None = None) -> None:
-                # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
-                with popover_container.container():
-                    flex_row_end()
-                    st.button(label="Export", icon=":material/download:", disabled=True)
-
-                download_dialog(
-                    dialog_title="Download Excel Report",
-                    file_content_func=get_excel_report_data,
-                    args=(run_df["table_groups_name"], run_date, run_id, data),
-                )
-
-            with popover_container.container(key="tg--export-popover"):
-                flex_row_end()
-                with st.popover(label="Export", icon=":material/download:", help="Download hygiene issues to Excel"):
-                    css_class("tg--export-wrapper")
-                    st.button(label="All issues", type="tertiary", on_click=open_download_dialog)
-                    st.button(label="Filtered issues", type="tertiary", on_click=partial(open_download_dialog, df_pa))
-                    if selected:
-                        st.button(label="Selected issues", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected)))
+        with popover_container.container(key="tg--export-popover"):
+            flex_row_end()
+            with st.popover(label="Export", icon=":material/download:", help="Download hygiene issues to Excel"):
+                css_class("tg--export-wrapper")
+                st.button(label="All issues", type="tertiary", on_click=open_download_dialog)
+                st.button(label="Filtered issues", type="tertiary", on_click=partial(open_download_dialog, df_pa))
+                if selected:
+                    st.button(label="Selected issues", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected)))
 
+        if not df_pa.empty:
             if selected:
                 # Always show details for last selected row
                 selected_row = selected[len(selected) - 1]
@@ -363,78 +365,97 @@ def get_profiling_anomalies(
     column_name: str | None = None,
     sorting_columns: list[str] | None = None,
 ):
-    schema: str = st.session_state["dbschema"]
+    db_session = get_current_session()
     criteria = ""
     order_by = ""
+    params = {"profile_run_id": profile_run_id}
 
     if likelihood:
-        criteria += f" AND t.issue_likelihood = '{likelihood}'"
+        criteria += " AND t.issue_likelihood = :likelihood"
+        params["likelihood"] = likelihood
     if issue_type_id:
-        criteria += f" AND t.id = '{issue_type_id}'"
+        criteria += " AND t.id = :issue_type_id"
+        params["issue_type_id"] = issue_type_id
     if table_name:
-        criteria += f" AND r.table_name = '{table_name}'"
+        criteria += " AND r.table_name = :table_name"
+        params["table_name"] = table_name
     if column_name:
-        criteria += f" AND r.column_name = '{column_name}'"
+        criteria += " AND r.column_name ILIKE :column_name"
+        params["column_name"] = column_name
 
     if sorting_columns:
         order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns))
 
     # Define the query -- first visible column must be first, because will hold the multi-select box
     str_sql = f"""
-            SELECT r.table_name, r.column_name, r.schema_name,
-                   r.column_type,t.anomaly_name, t.issue_likelihood,
-                   r.disposition, null as action,
-                   CASE
-                     WHEN t.issue_likelihood = 'Possible' THEN 'Possible: speculative test that often identifies problems'
-                     WHEN t.issue_likelihood = 'Likely'   THEN 'Likely: typically indicates a data problem'
-                     WHEN t.issue_likelihood = 'Definite'  THEN 'Definite: indicates a highly-likely data problem'
-                     WHEN t.issue_likelihood = 'Potential PII'
-                       THEN 'Potential PII: may require privacy policies, standards and procedures for access, storage and transmission.'
-                   END AS likelihood_explanation,
-                   CASE
-                     WHEN t.issue_likelihood = 'Potential PII' THEN 1
-                     WHEN t.issue_likelihood = 'Possible' THEN 2
-                     WHEN t.issue_likelihood = 'Likely'   THEN 3
-                     WHEN t.issue_likelihood = 'Definite'  THEN 4
-                   END AS likelihood_order,
-                   t.anomaly_description, r.detail, t.suggested_action,
-                   r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR,
-                   tg.table_groups_name,
-
-                   -- These are used in the PDF report
-                   dcc.functional_data_type,
-                   dcc.description as column_description,
-                   COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
-                   COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
-                   COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
-                   COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
-                   COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
-                   COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
-                   COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
-                   COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
-                   COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
-
-              FROM {schema}.profile_anomaly_results r
-            INNER JOIN {schema}.profile_anomaly_types t
-               ON r.anomaly_id = t.id
-            INNER JOIN {schema}.profiling_runs p
-                ON r.profile_run_id = p.id
-            INNER JOIN {schema}.table_groups tg
-                ON r.table_groups_id = tg.id
-            LEFT JOIN {schema}.data_column_chars dcc
-               ON (tg.id = dcc.table_groups_id
-              AND  r.schema_name = dcc.schema_name
-              AND  r.table_name = dcc.table_name
-              AND  r.column_name = dcc.column_name)
-            LEFT JOIN {schema}.data_table_chars dtc
-               ON dcc.table_id = dtc.table_id
-             WHERE r.profile_run_id = '{profile_run_id}'
-               {criteria}
-            {order_by}
+    SELECT
+        r.table_name,
+        r.column_name,
+        r.schema_name,
+        r.column_type,
+        t.anomaly_name,
+        t.issue_likelihood,
+        r.disposition,
+        null as action,
+        CASE
+            WHEN t.issue_likelihood = 'Possible' THEN 'Possible: speculative test that often identifies problems'
+            WHEN t.issue_likelihood = 'Likely'   THEN 'Likely: typically indicates a data problem'
+            WHEN t.issue_likelihood = 'Definite'  THEN 'Definite: indicates a highly-likely data problem'
+            WHEN t.issue_likelihood = 'Potential PII'
+            THEN 'Potential PII: may require privacy policies, standards and procedures for access, storage and transmission.'
+        END AS likelihood_explanation,
+        CASE
+            WHEN t.issue_likelihood = 'Potential PII' THEN 1
+            WHEN t.issue_likelihood = 'Possible' THEN 2
+            WHEN t.issue_likelihood = 'Likely'   THEN 3
+            WHEN t.issue_likelihood = 'Definite'  THEN 4
+        END AS likelihood_order,
+        t.anomaly_description,
+        r.detail,
+        t.suggested_action,
+        r.anomaly_id,
+        r.table_groups_id::VARCHAR,
+        r.id::VARCHAR,
+        p.profiling_starttime,
+        r.profile_run_id::VARCHAR,
+        tg.table_groups_name,
+
+        -- These are used in the PDF report
+        dcc.functional_data_type,
+        dcc.description as column_description,
+        COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+        COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
+        COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
+        COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
+        COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
+        COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
+        COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
+        COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
+        COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
+
+    FROM profile_anomaly_results r
+    INNER JOIN profile_anomaly_types t
+        ON r.anomaly_id = t.id
+    INNER JOIN profiling_runs p
+        ON r.profile_run_id = p.id
+    INNER JOIN table_groups tg
+        ON r.table_groups_id = tg.id
+    LEFT JOIN data_column_chars dcc
+        ON (tg.id = dcc.table_groups_id
+        AND  r.schema_name = dcc.schema_name
+        AND  r.table_name = dcc.table_name
+        AND  r.column_name = dcc.column_name)
+    LEFT JOIN data_table_chars dtc
+        ON dcc.table_id = dtc.table_id
+    WHERE r.profile_run_id = :profile_run_id
+        {criteria}
+    {order_by}
     """
-    # Retrieve data as df
-    df = db.retrieve_data(str_sql)
 
+    results = db_session.execute(str_sql, params=params)
+    columns = [column.name for column in results.cursor.description]
+
+    df = pd.DataFrame(list(results), columns=columns)
     dct_replace = {"Confirmed": "✓", "Dismissed": "✘", "Inactive": "🔇"}
     df["action"] = df["disposition"].replace(dct_replace)
 
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index c9af8f53..f32c9bdf 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -81,6 +81,7 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
                 bind_to_query="column_name",
                 label="Column Name",
                 disabled=not table_name,
+                accept_new_options=bool(table_name),
             )
 
         with sort_column:
@@ -95,16 +96,15 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
             default_sorting = [(sortable_columns[i][1], "ASC") for i in (0, 1, 2)]
             sorting_columns = testgen.sorting_selector(sortable_columns, default_sorting)
 
-        # Use SQL wildcard to match all values
-        if not table_name:
-            table_name = "%%"
-        if not column_name:
-            column_name = "%%"
-
         # Display main results grid
         with st.container():
             with st.spinner("Loading data ..."):
-                df = profiling_queries.get_profiling_results(run_id, table_name, column_name, sorting_columns)
+                df = profiling_queries.get_profiling_results(
+                    run_id,
+                    table_name=table_name,
+                    column_name=column_name,
+                    sorting_columns=sorting_columns,
+                )
                 
         show_columns = [
             "schema_name",
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index fd5753f9..81bdc2c6 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -82,13 +82,14 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name:
                 label="Table Name",
             )
         with column_filter_column:
-            column_options = list(columns_df.loc[columns_df["table_name"] == table_name]["column_name"].unique())
+            column_options = columns_df.loc[columns_df["table_name"] == table_name]["column_name"].dropna().unique().tolist()
             column_name = testgen.select(
                 options=column_options,
                 default_value=column_name,
                 bind_to_query="column_name",
                 label="Column Name",
                 disabled=not table_name,
+                accept_new_options=True,
             )
 
         with disposition_column:
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 920e007f..0a1e8fbf 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -128,7 +128,9 @@ def render(
             )
 
         with column_filter_column:
-            column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"].unique())
+            column_options = run_columns_df.loc[
+                run_columns_df["table_name"] == table_name
+            ]["column_name"].dropna().unique().tolist()
             column_name = testgen.select(
                 options=column_options,
                 value_column="column_name",
@@ -136,6 +138,7 @@ def render(
                 bind_to_query="column_name",
                 label="Column Name",
                 disabled=not table_name,
+                accept_new_options=True,
             )
 
         with sort_column:
@@ -157,15 +160,15 @@ def render(
 
         match status:
             case "Failed + Warning":
-                status = "'Failed','Warning'"
+                status = ["Failed", "Warning"]
             case "Failed":
-                status = "'Failed'"
+                status = "Failed"
             case "Warning":
-                status = "'Warning'"
+                status = "Warning"
             case "Passed":
-                status = "'Passed'"
+                status = "Passed"
             case "Error":
-                status = "'Error'"
+                status = "Error"
 
         # Display main grid and retrieve selection
         selected = show_result_detail(
@@ -294,7 +297,7 @@ def get_test_run_columns(test_run_id: str) -> pd.DataFrame:
 @st.cache_data(show_spinner=False)
 def get_test_results(
     run_id: str,
-    test_status: str | None = None,
+    test_status: str | list[str] | None = None,
     test_type_id: str | None = None,
     table_name: str | None = None,
     column_name: str | None = None,

From 633c64fecf65c1d3d049edbd32d4b109d37a63a4 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Fri, 11 Jul 2025 17:39:29 -0400
Subject: [PATCH 46/56] fix(grid): change to current selection not reflected

---
 testgen/ui/services/form_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index 771c9fc1..0194d291 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -312,7 +312,7 @@ def render_grid_select(
         enable_enterprise_modules=False,
         allow_unsafe_jscode=True,
         update_mode=GridUpdateMode.NO_UPDATE,
-        update_on=["selectionChanged"],
+        update_on=["selectionChanged", "modelUpdated"],
         data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
         columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS,
         height=int_height,

From add36107060215d18c2185803365ccae957993fd Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Fri, 11 Jul 2025 17:40:06 -0400
Subject: [PATCH 47/56] fix(issue-report): error status in test history breaks
 export

---
 testgen/ui/pdf/hygiene_issue_report.py | 8 ++++----
 testgen/ui/pdf/test_result_report.py   | 7 ++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py
index aa5747fd..31844a78 100644
--- a/testgen/ui/pdf/hygiene_issue_report.py
+++ b/testgen/ui/pdf/hygiene_issue_report.py
@@ -27,10 +27,10 @@
 SECTION_MIN_AVAILABLE_HEIGHT = 120
 
 CLASS_COLORS =  {
-    "Definite": HexColor(0xE94D4A),
-    "Likely": HexColor(0xFC8F2A),
-    "Possible": HexColor(0xFCD349),
-    "Potential PII": HexColor(0xFC8F2A),
+    "Definite": HexColor(0xEF5350),
+    "Likely": HexColor(0xFF9800),
+    "Possible": HexColor(0xFBC02D),
+    "Potential PII": HexColor(0x8D6E63),
 }
 
 def build_summary_table(document, hi_data):
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index dd5e9ed9..f583c71e 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -38,9 +38,10 @@
 SECTION_MIN_AVAILABLE_HEIGHT = 120
 
 RESULT_STATUS_COLORS = {
-    "Passed": HexColor(0x94C465),
-    "Warning": HexColor(0xFCD349),
-    "Failed": HexColor(0xE94D4A),
+    "Passed": HexColor(0x8BC34A),
+    "Warning": HexColor(0xFBC02D),
+    "Failed": HexColor(0xEF5350),
+    "Error": HexColor(0x8D6E63),
 }
 
 

From 3d1d469eec63f062d3eae33819451a910c5e0393 Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Fri, 11 Jul 2025 15:27:20 -0400
Subject: [PATCH 48/56] fix: Allowing literal underscores in the table group
 pattern fields

---
 .../commands/queries/refresh_data_chars_query.py | 13 ++++++++++++-
 tests/unit/test_profiling_query.py               | 16 ++++++++--------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/testgen/commands/queries/refresh_data_chars_query.py b/testgen/commands/queries/refresh_data_chars_query.py
index 325d6751..694eeefb 100644
--- a/testgen/commands/queries/refresh_data_chars_query.py
+++ b/testgen/commands/queries/refresh_data_chars_query.py
@@ -43,10 +43,21 @@ def _get_mask_query(self, mask: str, is_include: bool) -> str:
         if mask:
             sub_query += " AND (" if is_include else " AND NOT ("
             is_first = True
+            escape = ""
+            if self.sql_flavor.startswith("mssql"):
+                escaped_underscore = "[_]"
+            elif self.sql_flavor == "snowflake":
+                escaped_underscore = "\\\\_"
+                escape = "ESCAPE '\\\\'"
+            elif self.sql_flavor == "redshift":
+                escaped_underscore = "\\\\_"
+            else:
+                escaped_underscore = "\\_"
             for item in mask.split(","):
                 if not is_first:
                     sub_query += " OR "
-                sub_query += "(c.table_name LIKE '" + item.strip().replace("_", r"\_") + r"' ESCAPE '\')"
+                item = item.strip().replace("_", escaped_underscore)
+                sub_query += f"(c.table_name LIKE '{item}' {escape})"
                 is_first = False
             sub_query += ")"
         return sub_query
diff --git a/tests/unit/test_profiling_query.py b/tests/unit/test_profiling_query.py
index 113508d6..826faad1 100644
--- a/tests/unit/test_profiling_query.py
+++ b/tests/unit/test_profiling_query.py
@@ -7,7 +7,7 @@
 def test_include_exclude_mask_basic():
     # test configuration
     project_code = "dummy_project_code"
-    flavor = "redshift"
+    flavor = "postgresql"
     profiling_query = CProfilingSQL(project_code, flavor)
     profiling_query.parm_table_set = ""
     profiling_query.parm_table_include_mask = "important%, %useful%"
@@ -18,9 +18,9 @@ def test_include_exclude_mask_basic():
 
     # test assertions
     assert "SELECT 'dummy_project_code'" in query
-    assert r"AND ((c.table_name LIKE 'important%' ESCAPE '\') OR (c.table_name LIKE '%useful%' ESCAPE '\'))" in query
+    assert r"AND ((c.table_name LIKE 'important%' ) OR (c.table_name LIKE '%useful%' ))" in query
     assert (
-        r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\') OR (c.table_name LIKE 'tmp%' ESCAPE '\') OR (c.table_name LIKE 'raw\_slot\_utilization%' ESCAPE '\') OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ESCAPE '\'))"
+        r"AND NOT ((c.table_name LIKE 'temp%' ) OR (c.table_name LIKE 'tmp%' ) OR (c.table_name LIKE 'raw\_slot\_utilization%' ) OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ))"
         in query
     )
 
@@ -30,7 +30,7 @@ def test_include_exclude_mask_basic():
 def test_include_empty_exclude_mask(mask):
     # test configuration
     project_code = "dummy_project_code"
-    flavor = "redshift"
+    flavor = "snowflake"
     profiling_query = CProfilingSQL(project_code, flavor)
     profiling_query.parm_table_set = ""
     profiling_query.parm_table_include_mask = mask
@@ -41,7 +41,7 @@ def test_include_empty_exclude_mask(mask):
 
     # test assertions
     assert (
-        r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\') OR (c.table_name LIKE 'tmp%' ESCAPE '\') OR (c.table_name LIKE 'raw\_slot\_utilization%' ESCAPE '\') OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ESCAPE '\'))"
+        r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\\') OR (c.table_name LIKE 'tmp%' ESCAPE '\\') OR (c.table_name LIKE 'raw\\_slot\\_utilization%' ESCAPE '\\') OR (c.table_name LIKE 'gps\\_product\\_step\\_change\\_log' ESCAPE '\\')"
         in query
     )
 
@@ -51,14 +51,14 @@ def test_include_empty_exclude_mask(mask):
 def test_include_empty_include_mask(mask):
     # test configuration
     project_code = "dummy_project_code"
-    flavor = "redshift"
+    flavor = "mssql"
     profiling_query = CProfilingSQL(project_code, flavor)
     profiling_query.parm_table_set = ""
-    profiling_query.parm_table_include_mask = "important%, %useful%"
+    profiling_query.parm_table_include_mask = "important%, %useful_%"
     profiling_query.parm_table_exclude_mask = mask
 
     # test run
     query = profiling_query.GetDDFQuery()
 
     # test assertions
-    assert r"AND ((c.table_name LIKE 'important%' ESCAPE '\') OR (c.table_name LIKE '%useful%' ESCAPE '\'))" in query
+    assert r"AND ((c.table_name LIKE 'important%' ) OR (c.table_name LIKE '%useful[_]%' ))" in query

From a089c967f6df7bc2cac93d88079fb973f2333fdd Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Mon, 14 Jul 2025 17:54:06 -0400
Subject: [PATCH 49/56] fix(profiling): don't apply sample ratio to
 date_days_present

---
 .../profiling/project_update_profile_results_to_estimates.sql  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/testgen/template/profiling/project_update_profile_results_to_estimates.sql b/testgen/template/profiling/project_update_profile_results_to_estimates.sql
index 48d2d61c..640829cf 100644
--- a/testgen/template/profiling/project_update_profile_results_to_estimates.sql
+++ b/testgen/template/profiling/project_update_profile_results_to_estimates.sql
@@ -22,8 +22,7 @@ set sample_ratio = {PROFILE_SAMPLE_RATIO},
     within_1yr_date_ct = ROUND(within_1yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
     within_1mo_date_ct = ROUND(within_1mo_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
     future_date_ct = ROUND(future_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
-    boolean_true_ct = ROUND(boolean_true_ct * {PROFILE_SAMPLE_RATIO}, 0),
-    date_days_present = ROUND(date_days_present * {PROFILE_SAMPLE_RATIO}, 0)
+    boolean_true_ct = ROUND(boolean_true_ct * {PROFILE_SAMPLE_RATIO}, 0)
 where profile_run_id = '{PROFILE_RUN_ID}'
 and schema_name = split_part('{SAMPLING_TABLE}', '.', 1)
 and table_name = split_part('{SAMPLING_TABLE}', '.', 2)

From cfe80a1de30f3bcf9ce2a907cf5b63d011e6713d Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Mon, 14 Jul 2025 17:55:01 -0400
Subject: [PATCH 50/56] fix(table-groups): sampling incorrectly displayed

---
 testgen/ui/components/frontend/js/pages/table_group_list.js | 2 +-
 testgen/ui/queries/table_group_queries.py                   | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
index b201bd8f..18d3525b 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_list.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -123,7 +123,7 @@ const TableGroupList = (props) => {
                                             div(
                                                 { class: 'flex-column fx-flex' },
                                                 Caption({content: 'Uses Record Sampling', style: 'margin-bottom: 4px;'}),
-                                                span(tableGroup.profile_use_sampling || '--'),
+                                                span(tableGroup.profile_use_sampling ? 'Yes' : 'No'),
                                             ),
                                         ),
                                         div(
diff --git a/testgen/ui/queries/table_group_queries.py b/testgen/ui/queries/table_group_queries.py
index 1d6e9b1e..d69e54cb 100644
--- a/testgen/ui/queries/table_group_queries.py
+++ b/testgen/ui/queries/table_group_queries.py
@@ -20,7 +20,8 @@ def _get_select_statement(schema):
                 profile_id_column_mask, profile_sk_column_mask,
                 description, data_source, source_system, source_process, data_location,
                 business_domain, stakeholder_group, transform_level, data_product,
-                profile_use_sampling, profile_sample_percent, profile_sample_min_count,
+                CASE WHEN profile_use_sampling = 'Y' THEN true ELSE false END AS profile_use_sampling,
+                profile_sample_percent, profile_sample_min_count,
                 profiling_delay_days, profile_flag_cdes
         FROM table_groups
         """
@@ -176,7 +177,7 @@ def add(schema, table_group) -> str:
         '{table_group["profiling_exclude_mask"]}',
         '{table_group["profile_id_column_mask"]}'::character varying(2000),
         '{table_group["profile_sk_column_mask"]}'::character varying,
-        '{'Y' if table_group["profile_use_sampling"]=='True' else 'N' }'::character varying,
+        '{'Y' if table_group["profile_use_sampling"] else 'N' }'::character varying,
         '{table_group["profile_sample_percent"]}'::character varying,
         {table_group["profile_sample_min_count"]},
         '{table_group["profiling_delay_days"]}'::character varying,

From 8a41d6cbacee5d01680b6a20d62df3b2d90a108b Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Mon, 14 Jul 2025 17:56:47 -0400
Subject: [PATCH 51/56] fix(select): handle none values in run dialogs

---
 .../ui/views/dialogs/run_profiling_dialog.py  | 15 +++++++-----
 testgen/ui/views/dialogs/run_tests_dialog.py  | 23 +++++++++++--------
 testgen/ui/views/hygiene_issues.py            |  2 --
 testgen/ui/views/profiling_runs.py            |  2 ++
 testgen/ui/views/test_definitions.py          |  7 ++++--
 testgen/ui/views/test_results.py              |  2 --
 testgen/ui/views/test_runs.py                 |  3 +++
 7 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/testgen/ui/views/dialogs/run_profiling_dialog.py b/testgen/ui/views/dialogs/run_profiling_dialog.py
index 4250f1e7..1b6cf22f 100644
--- a/testgen/ui/views/dialogs/run_profiling_dialog.py
+++ b/testgen/ui/views/dialogs/run_profiling_dialog.py
@@ -26,16 +26,19 @@ def run_profiling_dialog(project_code: str, table_group: pd.Series | None = None
             display_column="table_groups_name",
             default_value=default_table_group_id,
             required=True,
+            placeholder="Select table group to profile",
         )
-        table_group_name: str = table_groups_df.loc[table_groups_df["id"] == table_group_id, "table_groups_name"].iloc[0]
+        if table_group_id:
+            table_group_name: str = table_groups_df.loc[table_groups_df["id"] == table_group_id, "table_groups_name"].iloc[0]
         testgen.whitespace(1)
 
-    with st.container():
-        st.markdown(f"Execute profiling for the table group **{table_group_name}**?")
-        st.markdown(":material/info: _Profiling will be performed in a background process._")
+    if table_group_id:        
+        with st.container():
+            st.markdown(f"Execute profiling for the table group **{table_group_name}**?")
+            st.markdown(":material/info: _Profiling will be performed in a background process._")
 
-    if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:run-tests-show-cli"):
-        st.code(f"testgen run-profile --table-group-id {table_group_id}", language="shellSession")
+        if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:run-tests-show-cli"):
+            st.code(f"testgen run-profile --table-group-id {table_group_id}", language="shellSession")
 
     button_container = st.empty()
     status_container = st.empty()
diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py
index 212c1361..93c89dbb 100644
--- a/testgen/ui/views/dialogs/run_tests_dialog.py
+++ b/testgen/ui/views/dialogs/run_tests_dialog.py
@@ -26,19 +26,22 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def
             display_column="test_suite",
             default_value=default_test_suite_id,
             required=True,
+            placeholder="Select test suite to run",
         )
-        test_suite_name: str = test_suites_df.loc[test_suites_df["id"] == test_suite_id, "test_suite"].iloc[0]
+        if test_suite_id:
+            test_suite_name: str = test_suites_df.loc[test_suites_df["id"] == test_suite_id, "test_suite"].iloc[0]
         testgen.whitespace(1)
 
-    with st.container():
-        st.markdown(f"Run tests for the test suite **{test_suite_name}**?")
-        st.markdown(":material/info: _Test execution will be performed in a background process._")
+    if test_suite_id:
+        with st.container():
+            st.markdown(f"Run tests for the test suite **{test_suite_name}**?")
+            st.markdown(":material/info: _Test execution will be performed in a background process._")
 
-    if testgen.expander_toggle(expand_label="Show CLI command", key="run_tests_dialog:keys:show-cli"):
-        st.code(
-            f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite_name}",
-            language="shellSession"
-        )
+        if testgen.expander_toggle(expand_label="Show CLI command", key="run_tests_dialog:keys:show-cli"):
+            st.code(
+                f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite_name}",
+                language="shellSession"
+            )
 
     button_container = st.empty()
     status_container = st.empty()
@@ -47,7 +50,7 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def
     with button_container:
         _, button_column = st.columns([.8, .2])
         with button_column:
-            run_test_button = st.button("Run Tests", use_container_width=True)
+            run_test_button = st.button("Run Tests", use_container_width=True, disabled=not test_suite_id)
 
     if run_test_button:
         button_container.empty()
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 19f61378..ed1f7991 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -79,7 +79,6 @@ def render(
             issue_class = testgen.select(
                 options=["Definite", "Likely", "Possible", "Potential PII"],
                 default_value=issue_class,
-                required=False,
                 bind_to_query="issue_class",
                 label="Issue Class",
             )
@@ -91,7 +90,6 @@ def render(
                 default_value=None if issue_class == "Potential PII" else issue_type,
                 value_column="id",
                 display_column="anomaly_name",
-                required=False,
                 bind_to_query="issue_type",
                 label="Issue Type",
                 disabled=issue_class == "Potential PII",
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index a90de40c..263b4b4d 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -64,6 +64,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
                 default_value=table_group_id,
                 bind_to_query="table_group_id",
                 label="Table Group",
+                placeholder="---",
             )
 
         with actions_column:
@@ -132,6 +133,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]
             value_column="id",
             display_column="table_groups_name",
             required=True,
+            placeholder="Select table group",
         )
         return bool(tg_id), [], {"table_group_id": tg_id}
 
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 81bdc2c6..15157e92 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -73,10 +73,11 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name:
 
         with table_filter_column:
             columns_df = get_test_suite_columns(test_suite_id)
+            table_options = list(columns_df["table_name"].unique())
             table_name = testgen.select(
-                options=list(columns_df["table_name"].unique()),
+                options=table_options,
                 value_column="table_name",
-                default_value=table_name,
+                default_value=table_name or (table_options[0] if table_options else None),
                 bind_to_query="table_name",
                 required=True,
                 label="Table Name",
@@ -691,6 +692,7 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s
             value_column="id",
             display_column="table_groups_name",
             default_value=origin_table_group["id"],
+            required=True,
             label="Target Table Group",
         )
 
@@ -701,6 +703,7 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s
             value_column="id",
             display_column="test_suite",
             default_value=None,
+            required=True,
             label="Target Test Suite",
         )
 
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 0a1e8fbf..8053babf 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -101,7 +101,6 @@ def render(
             status = testgen.select(
                 options=status_options,
                 default_value=status or "Failed + Warning",
-                required=False,
                 bind_to_query="status",
                 bind_empty_value=True,
                 label="Result Status",
@@ -113,7 +112,6 @@ def render(
                 value_column="test_type",
                 display_column="test_name_short",
                 default_value=test_type,
-                required=False,
                 bind_to_query="test_type",
                 label="Test Type",
             )
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index c3fe9913..0b50d649 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -63,6 +63,7 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit
                 default_value=table_group_id,
                 bind_to_query="table_group_id",
                 label="Table Group",
+                placeholder="---",
             )
 
         with suite_filter_column:
@@ -74,6 +75,7 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit
                 default_value=test_suite_id,
                 bind_to_query="test_suite_id",
                 label="Test Suite",
+                placeholder="---",
             )
 
         with actions_column:
@@ -139,6 +141,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]
             value_column="test_suite",
             display_column="test_suite",
             required=True,
+            placeholder="Select test suite",
         )
         return bool(ts_name), [], {"project_key": self.project_code, "test_suite_key": ts_name}
 

From c914376c7b51324569461e31fa5a97415b50d61b Mon Sep 17 00:00:00 2001
From: Luis <ltrinidad@datakitchen.io>
Date: Tue, 15 Jul 2025 09:49:23 -0400
Subject: [PATCH 52/56] fix(connections): stop connection form from
 re-rendering needlessly

---
 .../components/frontend/js/components/connection_form.js  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js
index 5486fd91..7ed5918f 100644
--- a/testgen/ui/components/frontend/js/components/connection_form.js
+++ b/testgen/ui/components/frontend/js/components/connection_form.js
@@ -119,14 +119,14 @@ const ConnectionForm = (props, saveButton) => {
         privateKeyPhrase.val = '';
     }
 
-    const flavor = getValue(props.flavors).find(f => f.value === connectionFlavor.val);
+    const flavor = getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal);
     const originalURLTemplate = van.state(flavor.connection_string);
-    const [prefixPart, sufixPart] = originalURLTemplate.val.split('@');
+    const [prefixPart, sufixPart] = originalURLTemplate.rawVal.split('@');
 
     const connectionStringPrefix = van.state(prefixPart);
     const connectionStringSuffix = van.state(connection?.url ?? '');
-    if (!connectionStringSuffix.val) {
-        connectionStringSuffix.val = formatURL(sufixPart ?? '', connectionHost.val, connectionPort.val, connectionDatabase.val);
+    if (!connectionStringSuffix.rawVal) {
+        connectionStringSuffix.val = formatURL(sufixPart ?? '', connectionHost.rawVal, connectionPort.rawVal, connectionDatabase.rawVal);
     }
 
     const updatedConnection = van.derive(() => {

From 3d3191b163d38271ad8d26e662950908067dcd0a Mon Sep 17 00:00:00 2001
From: Ricardo Boni <rboni@datakitchen.io>
Date: Mon, 14 Jul 2025 21:34:12 -0400
Subject: [PATCH 53/56] fix: Fixing hygiene issues lookup queries

---
 .../050_populate_new_schema_metadata.sql      | 86 +++++++++----------
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
index 5d57acf9..f0a8b8ab 100644
--- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
+++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
@@ -1635,86 +1635,86 @@ ORDER BY {GROUPBY_NAMES}'),
          GROUP BY {GROUPBY_NAMES}
          HAVING COUNT(*) > 1
 ORDER BY {GROUPBY_NAMES}'),
-        ('1258', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'redshift', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        ('1258', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'redshift', NULL, '(SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
-GROUP BY "{COLUMN_NAME}" LIMIT 20'),
-        ('1259', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'postgresql', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)'),
+        ('1259', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'postgresql', NULL, '(SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
-GROUP BY "{COLUMN_NAME}" LIMIT 20'),
+GROUP BY "{COLUMN_NAME}" LIMIT 20)'),
         ('1260', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'mssql', NULL, 'SELECT TOP 20 ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
 GROUP BY "{COLUMN_NAME}"
-UNION ALL ;
+UNION
 SELECT TOP 20 ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
 GROUP BY "{COLUMN_NAME}"'),
-        ('1261', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'snowflake', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+        ('1261', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'snowflake', NULL, '(SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
-GROUP BY "{COLUMN_NAME}" LIMIT 20'),
-        ('1262', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'databricks', NULL, 'SELECT ''Upper Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)'),
+        ('1262', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'databricks', NULL, '(SELECT ''Upper Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE UPPER(`{COLUMN_NAME}`) = `{COLUMN_NAME}`
-GROUP BY `{COLUMN_NAME}` LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY `{COLUMN_NAME}` LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
 WHERE `{COLUMN_NAME}` <> UPPER(`{COLUMN_NAME}`) AND `{COLUMN_NAME}` <> LOWER(`{COLUMN_NAME}`)
-GROUP BY `{COLUMN_NAME}` LIMIT 20'),
-        ('1263', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+GROUP BY `{COLUMN_NAME}` LIMIT 20)'),
+        ('1263', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
 GROUP BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1264', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1264', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
 GROUP BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1265', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1265', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
 GROUP BY "{COLUMN_NAME}"'),
-        ('1266', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1266', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
 GROUP BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1267', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'databricks', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
+        ('1267', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'databricks', NULL, 'SELECT any_value(`{COLUMN_NAME}`), COUNT(*) as record_ct FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
+ WHERE `{COLUMN_NAME}` = UPPER(`{COLUMN_NAME}`) AND `{COLUMN_NAME}` = LOWER(`{COLUMN_NAME}`) AND `{COLUMN_NAME}` > ''''
 GROUP BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1268', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1268', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1269', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1269', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1270', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
-GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1271', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1270', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}"'),
+        ('1271', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1272', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) as record_ct
- WHERE `{COLUMN_NAME}` < ''A'' AND LEFT(`{COLUMN_NAME}`, 1) NOT IN (''"'', '' '') AND RIGHT(`{COLUMN_NAME}`, 1) <> ''''''''
+        ('1272', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'databricks', NULL, 'SELECT any_value(`{COLUMN_NAME}`), COUNT(*) as record_ct FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
+WHERE `{COLUMN_NAME}` < ''A'' AND LEFT(`{COLUMN_NAME}`, 1) NOT IN (''"'', '' '') AND RIGHT(`{COLUMN_NAME}`, 1) <> ''''''''
 GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500'),
-        ('1273', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1273', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1274', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1274', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1275', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE TRANSLATE("{COLUMN_NAME}", NCHAR(160) + NCHAR(8201) + NCHAR(8203) + NCHAR(8204) + NCHAR(8205) + NCHAR(8206) + NCHAR(8207) + NCHAR(8239) + NCHAR(12288) + NCHAR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
+        ('1275', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE TRANSLATE("{COLUMN_NAME}", NCHAR(160), ''X'') <> "{COLUMN_NAME}"
 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}"'),
-        ('1276', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+        ('1276', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
  WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
 GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
-        ('1277', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'databricks', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500')
+        ('1277', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'databricks', NULL, 'SELECT any_value(`{COLUMN_NAME}`), COUNT(*) as record_ct FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
+ WHERE TRANSLATE(`{COLUMN_NAME}`, ''\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff'', ''XXXXXXXXXX'') <> `{COLUMN_NAME}`
+GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500')
 ;
 
 

From 4d0b330002fa9f6364a4a246f2d415ba230f0289 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 15 Jul 2025 14:42:25 -0400
Subject: [PATCH 54/56] fix(grid): dialogs on grids close abruptly

---
 testgen/ui/services/form_service.py   | 12 +++++++++++-
 testgen/ui/views/profiling_results.py |  4 +++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index 0194d291..b7112f4c 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -1,3 +1,4 @@
+import json
 import typing
 from builtins import float
 from pathlib import Path
@@ -312,7 +313,7 @@ def render_grid_select(
         enable_enterprise_modules=False,
         allow_unsafe_jscode=True,
         update_mode=GridUpdateMode.NO_UPDATE,
-        update_on=["selectionChanged", "modelUpdated"],
+        update_on=["selectionChanged"],
         data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
         columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS,
         height=int_height,
@@ -332,4 +333,13 @@ def render_grid_select(
     if len(selected_rows) > 0:
         if bind_to_query_name and bind_to_query_prop:
             Router().set_query_params({bind_to_query_name: selected_rows[0][bind_to_query_prop]})
+            
+            # We need to get the data from the original dataframe
+            # Otherwise changes to the dataframe (e.g., editing the current selection) do not get reflected in the returned rows
+            # Adding "modelUpdated" to AgGrid(update_on=...) does not work
+            # because it causes unnecessary reruns that cause dialogs to close abruptly
+            selected_props = [row[bind_to_query_prop] for row in selected_rows]
+            selected_df = df[df[bind_to_query_prop].isin(selected_props)]
+            selected_rows = json.loads(selected_df.to_json(orient="records"))
+
         return selected_rows
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index f32c9bdf..4b6b892c 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import streamlit as st
 
+from testgen.common.models import with_database_session
 import testgen.ui.queries.profiling_queries as profiling_queries
 import testgen.ui.services.database_service as db
 import testgen.ui.services.form_service as fm
@@ -171,6 +172,7 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
             )
 
 
+@with_database_session
 def get_excel_report_data(
     update_progress: PROGRESS_UPDATE_TYPE,
     table_group: str,
@@ -192,7 +194,7 @@ def get_excel_report_data(
 
     for key in ["min_date", "max_date"]:
         data[key] = data[key].apply(
-            lambda val: datetime.fromtimestamp(val / 1000).strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None
+            lambda val: datetime.strptime(val, "%Y-%m-%d %H:%M:%S").strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) and val != "NaT" else None
         )
 
     data["hygiene_issues"] = data["hygiene_issues"].apply(lambda val: "Yes" if val else None)

From aa6007e9e75632490a52239a396726fd48b02311 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 15 Jul 2025 16:25:38 -0400
Subject: [PATCH 55/56] fix: misc styling improvements

---
 testgen/ui/components/frontend/css/shared.css                | 1 +
 testgen/ui/components/frontend/js/pages/project_dashboard.js | 2 +-
 testgen/ui/components/frontend/js/pages/schedule_list.js     | 3 ++-
 testgen/ui/components/frontend/js/pages/table_group_list.js  | 2 +-
 testgen/ui/views/profiling_results.py                        | 2 +-
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index d096a445..d9ff025d 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -149,6 +149,7 @@ body {
     border: var(--button-stroked-border);
     border-radius: 8px;
     padding: 16px;
+    box-sizing: border-box;
 }
 
 .table-row {
diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js
index 92c0a156..92c22c0f 100644
--- a/testgen/ui/components/frontend/js/pages/project_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js
@@ -157,7 +157,7 @@ const TableGroupCard = (/** @type TableGroupSummary */ tableGroup) => {
                     ),
                     span(
                         { class: 'text-caption mt-1 mb-3 tg-overview--subtitle' },
-                        `${tableGroup.latest_profile_table_ct} tables | ${tableGroup.latest_profile_column_ct} columns`,
+                        `${tableGroup.latest_profile_table_ct ?? 0} tables | ${tableGroup.latest_profile_column_ct ?? 0} columns`,
                     ),
                     TableGroupTestSuiteSummary(tableGroup.test_suites),
                 ),
diff --git a/testgen/ui/components/frontend/js/pages/schedule_list.js b/testgen/ui/components/frontend/js/pages/schedule_list.js
index 2d9e7bf9..f8c54f96 100644
--- a/testgen/ui/components/frontend/js/pages/schedule_list.js
+++ b/testgen/ui/components/frontend/js/pages/schedule_list.js
@@ -19,7 +19,7 @@
 import van from '../van.min.js';
 import { Button } from '../components/button.js';
 import { Streamlit } from '../streamlit.js';
-import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js';
+import { emitEvent, getValue, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
 import { withTooltip } from '../components/tooltip.js';
 
 
@@ -42,6 +42,7 @@ const ScheduleList = (/** @type Properties */ props) => {
 
     const tableId = 'profiling-schedules-table';
     resizeFrameHeightToElement(tableId);
+    resizeFrameHeightOnDOMChange(tableId);
 
     return div(
         { class: 'table', id: tableId },
diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
index 18d3525b..333b133a 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_list.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -222,7 +222,7 @@ const Toolbar = (permissions, connections, selectedConnection) => {
                 })) ?? [],
                 onChange: (value) => emitEvent('ConnectionSelected', { payload: value }),
             })
-            : undefined,
+            : span(''),
         div(
             { class: 'flex-row fx-gap-4' },
             Button({
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index 4b6b892c..dec8b4ab 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -6,11 +6,11 @@
 import pandas as pd
 import streamlit as st
 
-from testgen.common.models import with_database_session
 import testgen.ui.queries.profiling_queries as profiling_queries
 import testgen.ui.services.database_service as db
 import testgen.ui.services.form_service as fm
 from testgen.common import date_service
+from testgen.common.models import with_database_session
 from testgen.ui.components import widgets as testgen
 from testgen.ui.components.widgets.download_dialog import (
     FILE_DATA_TYPE,

From 44a23a0f6be74b940bae474af7992fb8163f899e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Wed, 16 Jul 2025 13:59:18 -0400
Subject: [PATCH 56/56] release: 4.1.3 -> 4.12.6

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5ed0ceab..0320b0c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "dataops-testgen"
-version = "4.1.3"
+version = "4.12.6"
 description = "DataKitchen's Data Quality DataOps TestGen"
 authors = [
     { "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },