From 5aff0e045a04f6c9c447335cb75b071885a61657 Mon Sep 17 00:00:00 2001
From: Luis
Date: Tue, 24 Jun 2025 17:45:52 -0400
Subject: [PATCH 01/56] fix(test runs): sample the source data lookup DataFrame
---
testgen/settings.py | 5 +++++
testgen/ui/pdf/test_result_report.py | 13 +++++++++++--
testgen/ui/services/test_results_service.py | 8 ++++++--
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/testgen/settings.py b/testgen/settings.py
index 2d2c91c7..193aa4df 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -494,3 +494,8 @@
"""
Random key used to sign/verify the authentication token
"""
+
+ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 20)
+"""
+Limit the number of records used to generate the PDF with test results issue report.
+"""
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index 883b0346..54564eaa 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -10,6 +10,7 @@
TableStyle,
)
+from testgen.settings import ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT
from testgen.ui.pdf.dataframe_table import TABLE_STYLE_DATA, DataFrameTableBuilder
from testgen.ui.pdf.style import (
COLOR_GRAY_BG,
@@ -241,9 +242,17 @@ def get_report_content(document, tr_data):
yield build_history_table(document, tr_data)
if tr_data["test_type"] == "CUSTOM":
- sample_data_tuple = do_source_data_lookup_custom(get_schema(), tr_data)
+ sample_data_tuple = do_source_data_lookup_custom(
+ get_schema(),
+ tr_data,
+ limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT,
+ )
else:
- sample_data_tuple = do_source_data_lookup(get_schema(), tr_data)
+ sample_data_tuple = do_source_data_lookup(
+ get_schema(),
+ tr_data,
+ limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT,
+ )
yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
yield Paragraph("Sample Data", PARA_STYLE_H1)
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index 7f2d886b..0623ec34 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -153,7 +153,7 @@ def get_test_result_history(db_schema, tr_data):
return df
-def do_source_data_lookup_custom(db_schema, tr_data):
+def do_source_data_lookup_custom(db_schema, tr_data, limit: int | None = None):
# Define the query
str_sql = f"""
SELECT d.custom_query as lookup_query, tg.table_group_schema,
@@ -193,6 +193,8 @@ def do_source_data_lookup_custom(db_schema, tr_data):
if df.empty:
return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
else:
+ if limit:
+ df = df.sample(n=limit)
return "OK", None, str_sql, df
else:
return "NA", "Source data lookup is not available for this test.", None, None
@@ -201,7 +203,7 @@ def do_source_data_lookup_custom(db_schema, tr_data):
return "ERR", f"Source data lookup query caused an error:\n\n{e.args[0]}", str_sql, None
-def do_source_data_lookup(db_schema, tr_data, sql_only=False):
+def do_source_data_lookup(db_schema, tr_data, sql_only=False, limit: int | None = None):
# Define the query
str_sql = f"""
SELECT t.lookup_query, tg.table_group_schema,
@@ -298,6 +300,8 @@ def replace_parms(df_test, str_query):
if df.empty:
return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
else:
+ if limit:
+ df = df.sample(n=limit)
return "OK", None, str_sql, df
else:
return "NA", "A source data lookup for this Test is not available.", None, None
From d406e7bdf2403b625a448bbfc9f72fe2132eb4b6 Mon Sep 17 00:00:00 2001
From: Luis
Date: Tue, 24 Jun 2025 17:52:35 -0400
Subject: [PATCH 02/56] misc(test runs): limit result history in issue PDF
report
---
testgen/ui/pdf/test_result_report.py | 2 +-
testgen/ui/services/test_results_service.py | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index 54564eaa..dd5e9ed9 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -165,7 +165,7 @@ def build_summary_table(document, tr_data):
def build_history_table(document, tr_data):
- history_data = get_test_result_history(get_schema(), tr_data)
+ history_data = get_test_result_history(get_schema(), tr_data, limit=15)
history_table_style = TableStyle(
(
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index 0623ec34..e8a7452d 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -124,7 +124,7 @@ def get_test_results(
return df
-def get_test_result_history(db_schema, tr_data):
+def get_test_result_history(db_schema, tr_data, limit: int | None = None):
if tr_data["auto_gen"]:
str_where = f"""
WHERE test_suite_id = '{tr_data["test_suite_id"]}'
@@ -143,7 +143,8 @@ def get_test_result_history(db_schema, tr_data):
test_name_short, test_name_long, measure_uom, test_operator,
threshold_value::NUMERIC, result_measure, result_status
FROM {db_schema}.v_test_results {str_where}
- ORDER BY test_date DESC;
+ ORDER BY test_date DESC
+ {'LIMIT ' + str(limit) if limit else ''};
"""
df = db.retrieve_data(str_sql)
From 2d2dc39d0b1bd0d20aee517ca332bd097d3ca6cb Mon Sep 17 00:00:00 2001
From: Luis
Date: Wed, 25 Jun 2025 08:26:20 -0400
Subject: [PATCH 03/56] misc: apply limit to hygiene issue report
---
testgen/settings.py | 4 ++--
testgen/ui/pdf/hygiene_issue_report.py | 3 ++-
testgen/ui/services/hygiene_issues_service.py | 4 +++-
3 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/testgen/settings.py b/testgen/settings.py
index 193aa4df..b9f991ca 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -495,7 +495,7 @@
Random key used to sign/verify the authentication token
"""
-ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 20)
+ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 500)
"""
-Limit the number of records used to generate the PDF with test results issue report.
+Limit the number of records used to generate the PDF with test results and hygiene issue reports.
"""
diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py
index 1e3ddda3..aa5747fd 100644
--- a/testgen/ui/pdf/hygiene_issue_report.py
+++ b/testgen/ui/pdf/hygiene_issue_report.py
@@ -4,6 +4,7 @@
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import CondPageBreak, KeepTogether, Paragraph, Table, TableStyle
+from testgen.settings import ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT
from testgen.ui.pdf.dataframe_table import DataFrameTableBuilder
from testgen.ui.pdf.style import (
COLOR_GRAY_BG,
@@ -185,7 +186,7 @@ def get_report_content(document, hi_data):
yield Paragraph("Suggested Action", style=PARA_STYLE_H1)
yield Paragraph(hi_data["suggested_action"], style=PARA_STYLE_TEXT)
- sample_data_tuple = get_source_data(hi_data)
+ sample_data_tuple = get_source_data(hi_data, limit=ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT)
yield CondPageBreak(SECTION_MIN_AVAILABLE_HEIGHT)
yield Paragraph("Sample Data", PARA_STYLE_H1)
diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py
index 71a24fe7..f4490a23 100644
--- a/testgen/ui/services/hygiene_issues_service.py
+++ b/testgen/ui/services/hygiene_issues_service.py
@@ -4,7 +4,7 @@
from testgen.ui.services import database_service as db
-def get_source_data(hi_data):
+def get_source_data(hi_data, limit: int | None = None):
str_schema = st.session_state["dbschema"]
# Define the query
str_sql = f"""
@@ -83,6 +83,8 @@ def replace_parms(str_query):
if df.empty:
return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", str_sql, None
else:
+ if limit:
+ df = df.sample(n=limit)
return "OK", None, str_sql, df
else:
return "NA", "Source data lookup is not available for this Issue.", None, None
From c48bd3a86e0d932d1862d82aa3ff60ccd3e56117 Mon Sep 17 00:00:00 2001
From: Luis
Date: Wed, 25 Jun 2025 10:58:32 -0400
Subject: [PATCH 04/56] misc: set limit to source data lookups in ui dialog
---
testgen/settings.py | 2 +-
testgen/ui/views/hygiene_issues.py | 2 +-
testgen/ui/views/test_results.py | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/testgen/settings.py b/testgen/settings.py
index b9f991ca..528db06a 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -495,7 +495,7 @@
Random key used to sign/verify the authentication token
"""
-ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 500)
+ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT: int = os.getenv("TG_ISSUE_REPORT_SOURCE_DATA_LOOKUP_LIMIT", 50)
"""
Limit the number of records used to generate the PDF with test results and hygiene issue reports.
"""
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 3cf0fe3d..45fc1d61 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -529,7 +529,7 @@ def source_data_dialog(selected_row):
fm.render_html_list(selected_row, ["detail"], None, 700, ["Hygiene Issue Detail"])
with st.spinner("Retrieving source data..."):
- bad_data_status, bad_data_msg, _, df_bad = get_source_data(selected_row)
+ bad_data_status, bad_data_msg, _, df_bad = get_source_data(selected_row, limit=500)
if bad_data_status in {"ND", "NA"}:
st.info(bad_data_msg)
elif bad_data_status == "ERR":
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 39373a40..d472e731 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -380,13 +380,13 @@ def get_test_definition(str_test_def_id):
@st.cache_data(show_spinner=False)
def do_source_data_lookup(selected_row):
schema = st.session_state["dbschema"]
- return test_results_service.do_source_data_lookup(schema, selected_row)
+ return test_results_service.do_source_data_lookup(schema, selected_row, limit=500)
@st.cache_data(show_spinner=False)
def do_source_data_lookup_custom(selected_row):
schema = st.session_state["dbschema"]
- return test_results_service.do_source_data_lookup_custom(schema, selected_row)
+ return test_results_service.do_source_data_lookup_custom(schema, selected_row, limit=500)
@st.cache_data(show_spinner=False)
From aff987a79fdf0f27e48d646a2f1f9e0f56521469 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Wed, 25 Jun 2025 17:32:14 -0400
Subject: [PATCH 05/56] fix(source-data): bug when sampling dataframe
---
testgen/ui/services/hygiene_issues_service.py | 2 +-
testgen/ui/services/test_results_service.py | 4 ++--
testgen/ui/views/hygiene_issues.py | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/testgen/ui/services/hygiene_issues_service.py b/testgen/ui/services/hygiene_issues_service.py
index f4490a23..53ca43e7 100644
--- a/testgen/ui/services/hygiene_issues_service.py
+++ b/testgen/ui/services/hygiene_issues_service.py
@@ -84,7 +84,7 @@ def replace_parms(str_query):
return "ND", "Data that violates Hygiene Issue criteria is not present in the current dataset.", str_sql, None
else:
if limit:
- df = df.sample(n=limit)
+ df = df.sample(n=min(len(df), limit))
return "OK", None, str_sql, df
else:
return "NA", "Source data lookup is not available for this Issue.", None, None
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index e8a7452d..57de1dc0 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -195,7 +195,7 @@ def do_source_data_lookup_custom(db_schema, tr_data, limit: int | None = None):
return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
else:
if limit:
- df = df.sample(n=limit)
+ df = df.sample(n=min(len(df), limit))
return "OK", None, str_sql, df
else:
return "NA", "Source data lookup is not available for this test.", None, None
@@ -302,7 +302,7 @@ def replace_parms(df_test, str_query):
return "ND", "Data that violates Test criteria is not present in the current dataset.", str_sql, None
else:
if limit:
- df = df.sample(n=limit)
+ df = df.sample(n=min(len(df), limit))
return "OK", None, str_sql, df
else:
return "NA", "A source data lookup for this Test is not available.", None, None
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 45fc1d61..6b766a26 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -515,8 +515,8 @@ def get_excel_report_data(
@st.cache_data(show_spinner=False)
-def get_source_data(hi_data):
- return get_source_data_uncached(hi_data)
+def get_source_data(hi_data, limit):
+ return get_source_data_uncached(hi_data, limit)
@st.dialog(title="Source Data")
From 3f4861590be4fe31640a399270f66af2cab5d3b7 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 24 Jun 2025 01:36:39 -0400
Subject: [PATCH 06/56] fix(logo): prevent logo resize
---
testgen/ui/assets/style.css | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index a57b453c..791b4079 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -47,10 +47,14 @@ header {
/* ... */
/* Sidebar */
-[data-testid="stSidebarHeader"] {
+[data-testid="stSidebarContent"] [data-testid="stSidebarHeader"] {
padding: 16px 20px;
}
+[data-testid="stSidebarHeader"] [data-testid="stLogo"] {
+ max-width: fit-content;
+}
+
section[data-testid="stSidebar"] {
width: 250px;
z-index: 999;
From 325569b7d867efb14c24746b5da6ecb454dd4faf Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 24 Jun 2025 16:53:43 -0400
Subject: [PATCH 07/56] feat: add help menu, support mailto, and upgrade
notification
---
deploy/docker-bake.hcl | 5 +
deploy/testgen.dockerfile | 1 +
testgen/__main__.py | 9 +-
testgen/common/version_service.py | 49 +++++-
testgen/settings.py | 9 +-
testgen/ui/app.py | 8 +-
testgen/ui/assets/style.css | 53 +++++-
testgen/ui/bootstrap.py | 22 +--
testgen/ui/components/frontend/css/shared.css | 2 +-
.../frontend/js/components/help_menu.js | 161 ++++++++++++++++++
.../frontend/js/components/sidebar.js | 140 ++++-----------
testgen/ui/components/frontend/js/main.js | 2 +
testgen/ui/components/widgets/__init__.py | 2 +-
testgen/ui/components/widgets/page.py | 62 +++++--
testgen/ui/components/widgets/sidebar.py | 19 ++-
.../components/widgets/testgen_component.py | 1 +
testgen/ui/navigation/menu.py | 11 --
testgen/ui/session.py | 10 +-
testgen/ui/views/login.py | 2 +-
testgen/ui/views/project_dashboard.py | 1 -
tests/unit/test_version_service.py | 32 ++--
21 files changed, 401 insertions(+), 200 deletions(-)
create mode 100644 testgen/ui/components/frontend/js/components/help_menu.js
diff --git a/deploy/docker-bake.hcl b/deploy/docker-bake.hcl
index 2518cfc4..35efb2b4 100644
--- a/deploy/docker-bake.hcl
+++ b/deploy/docker-bake.hcl
@@ -4,12 +4,16 @@ variable "TESTGEN_VERSION" {}
variable "TESTGEN_DOCKER_HUB_REPO" {
default = "datakitchen/dataops-testgen"
}
+variable "TESTGEN_SUPPORT_EMAIL" {
+ default = "open-source-support@datakitchen.io"
+}
target "testgen-release" {
args = {
TESTGEN_VERSION = "${TESTGEN_VERSION}"
TESTGEN_BASE_LABEL = "${TESTGEN_BASE_LABEL}"
TESTGEN_DOCKER_HUB_REPO = "${TESTGEN_DOCKER_HUB_REPO}"
+ TESTGEN_SUPPORT_EMAIL = "${TESTGEN_SUPPORT_EMAIL}"
}
context = "."
dockerfile = "deploy/testgen.dockerfile"
@@ -31,6 +35,7 @@ target "testgen-qa" {
TESTGEN_VERSION = "${TESTGEN_VERSION}"
TESTGEN_BASE_LABEL = "${TESTGEN_BASE_LABEL}"
TESTGEN_DOCKER_HUB_REPO = "${TESTGEN_DOCKER_HUB_REPO}"
+ TESTGEN_SUPPORT_EMAIL = "${TESTGEN_SUPPORT_EMAIL}"
}
context = "."
dockerfile = "deploy/testgen.dockerfile"
diff --git a/deploy/testgen.dockerfile b/deploy/testgen.dockerfile
index 318a3add..0e85c3f6 100644
--- a/deploy/testgen.dockerfile
+++ b/deploy/testgen.dockerfile
@@ -24,6 +24,7 @@ RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/s
ENV TESTGEN_VERSION=${TESTGEN_VERSION}
ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO}
+ENV TESTGEN_SUPPORT_EMAIL=${TESTGEN_SUPPORT_EMAIL}
ENV TG_RELEASE_CHECK=docker
USER testgen
diff --git a/testgen/__main__.py b/testgen/__main__.py
index 2f09b169..74541d76 100644
--- a/testgen/__main__.py
+++ b/testgen/__main__.py
@@ -48,6 +48,7 @@
LOG = logging.getLogger("testgen")
APP_MODULES = ["ui", "scheduler"]
+VERSION_DATA = version_service.get_version()
@dataclass
@@ -69,7 +70,13 @@ def invoke(self, ctx: Context):
@click.group(
cls=CliGroup,
- help=f"This version: {settings.VERSION} \n\nLatest version: {version_service.get_latest_version()} \n\nSchema revision: {get_schema_revision()}"
+ help=f"""
+ {VERSION_DATA.edition} {VERSION_DATA.current or ""}
+
+ {f"New version available! {VERSION_DATA.latest}" if VERSION_DATA.latest != VERSION_DATA.current else ""}
+
+ Schema revision: {get_schema_revision()}
+ """
)
@click.option(
"-v",
diff --git a/testgen/common/version_service.py b/testgen/common/version_service.py
index 8e03cb17..5621d4c6 100644
--- a/testgen/common/version_service.py
+++ b/testgen/common/version_service.py
@@ -1,28 +1,59 @@
import logging
+from dataclasses import dataclass
import requests
from testgen import settings
+from testgen.ui.session import session
LOG = logging.getLogger("testgen")
-def get_latest_version() -> str:
+@dataclass
+class Version:
+ edition: str
+ current: str
+ latest: str
+
+
+def get_version() -> Version:
+ if not session.version:
+ session.version = Version(
+ edition=_get_app_edition(),
+ current=settings.VERSION,
+ latest=_get_latest_version(),
+ )
+ return session.version
+
+
+def _get_app_edition() -> str:
+ edition = (
+ settings.DOCKER_HUB_REPOSITORY
+ .replace("datakitchen/dataops-testgen", "")
+ .replace("-", " ")
+ .strip()
+ .title()
+ .replace("Qa", "QA")
+ )
+ return f"TestGen{' ' + edition if edition else ''}"
+
+
+def _get_latest_version() -> str | None:
try:
return {
"pypi": _get_last_pypi_release,
"docker": _get_last_docker_release,
"yes": _get_last_docker_release, # NOTE: kept for retrocompatibility
- }.get(settings.CHECK_FOR_LATEST_VERSION, lambda: "unknown")()
+ }.get(settings.CHECK_FOR_LATEST_VERSION, lambda: None)()
except:
- return "unknown"
+ return None
-def _get_last_pypi_release() -> str:
+def _get_last_pypi_release() -> str | None:
response = requests.get("https://pypi.org/pypi/dataops-testgen/json", timeout=3)
if response.status_code != 200:
LOG.warning(f"version_service: Failed to fetch PyPi releases. Status code: {response.status_code}")
- return "unknown"
+ return None
package_data = response.json()
package_releases = list((package_data.get("releases") or {}).keys())
@@ -30,7 +61,7 @@ def _get_last_pypi_release() -> str:
return _sorted_tags(package_releases)[0]
-def _get_last_docker_release() -> str:
+def _get_last_docker_release() -> str | None:
headers = {}
if settings.DOCKER_HUB_USERNAME and settings.DOCKER_HUB_PASSWORD:
auth_response = requests.post(
@@ -43,7 +74,7 @@ def _get_last_docker_release() -> str:
"version_service: unable to login against https://hub.docker.com."
f" Status code: {auth_response.status_code}"
)
- return "unknown"
+ return None
headers["Authorization"] = f"Bearer {auth_response.json()['token']}"
response = requests.get(
@@ -55,7 +86,7 @@ def _get_last_docker_release() -> str:
if response.status_code != 200:
LOG.debug(f"version_service: Failed to fetch docker tags. Status code: {response.status_code}")
- return "unknown"
+ return None
tags_to_return = []
tags_data = response.json()
@@ -66,7 +97,7 @@ def _get_last_docker_release() -> str:
tags_to_return.append(tag_name)
if len(tags_to_return) <= 0:
- return "unkown"
+ return None
return _sorted_tags(tags_to_return)[0]
diff --git a/testgen/settings.py b/testgen/settings.py
index 528db06a..205caf96 100644
--- a/testgen/settings.py
+++ b/testgen/settings.py
@@ -452,12 +452,17 @@
defaults to: None
"""
-VERSION: str = os.getenv("TESTGEN_VERSION", "unknown")
+VERSION: str = os.getenv("TESTGEN_VERSION", None)
"""
Current deployed version. The value is displayed in the UI menu.
from env variable: `TESTGEN_VERSION`
-defaults to: `unknown`
+defaults to: None
+"""
+
+SUPPORT_EMAIL: str = os.getenv("TESTGEN_SUPPORT_EMAIL", "open-source-support@datakitchen.io")
+"""
+Email for contacting DataKitchen support.
"""
SSL_CERT_FILE: str = os.getenv("SSL_CERT_FILE", "")
diff --git a/testgen/ui/app.py b/testgen/ui/app.py
index 36bff7b5..de4f8d0e 100644
--- a/testgen/ui/app.py
+++ b/testgen/ui/app.py
@@ -3,6 +3,7 @@
import streamlit as st
from testgen import settings
+from testgen.common import version_service
from testgen.common.docker_service import check_basic_configuration
from testgen.common.models import with_database_session
from testgen.ui import bootstrap
@@ -51,9 +52,12 @@ def render(log_level: int = logging.INFO):
testgen.sidebar(
projects=project_service.get_projects(),
current_project=session.sidebar_project,
- menu=application.menu.update_version(application.get_version()),
- username=session.username,
+ menu=application.menu,
current_page=session.current_page,
+ username=session.username,
+ role=session.auth_role,
+ version=version_service.get_version(),
+ support_email=settings.SUPPORT_EMAIL,
)
application.router.run()
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index 791b4079..b60981cf 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -125,6 +125,8 @@ div.st-key-data_catalog-spinner {
}
/* Theming for buttons, tabs and form inputs */
+button[data-testid="stPopoverButton"]:hover,
+button[data-testid="stPopoverButton"]:focus:not(:active),
button[data-testid="stBaseButton-secondary"]:hover,
button[data-testid="stBaseButton-secondary"]:focus:not(:active),
button[data-testid="stBaseButton-secondaryFormSubmit"]:hover,
@@ -133,6 +135,7 @@ button[data-testid="stBaseButton-secondaryFormSubmit"]:focus:not(:active) {
color: var(--primary-color);
}
+button[data-testid="stPopoverButton"]:active,
button[data-testid="stBaseButton-secondary"]:active,
button[data-testid="stBaseButton-secondaryFormSubmit"]:active,
label[data-baseweb="checkbox"]:has(input[aria-checked="true"]) > span {
@@ -305,17 +308,53 @@ Use as testgen.text("text", "extra_styles") */
background-color: var(--disabled-text-color);
}
-div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.tg-header--links) [data-testid="stLinkButton"] a {
+/* Help menu */
+.st-key-tg-header--help [data-testid="stPageLink"] {
+ position: absolute;
+ top: -7px;
+ right: 0;
+ z-index: 5;
+}
+
+.st-key-tg-header--help [data-testid="stPageLink"] [data-testid="stPageLink-NavLink"] {
+ line-height: 1;
+}
+
+.st-key-tg-header--help [data-testid="stPopover"] {
+ width: auto;
+}
+
+.st-key-tg-header--help button[data-testid="stPopoverButton"] {
border: none;
background: none;
- padding: 6px;
- min-height: 24px;
- color: var(--primary-text-color);
+ padding: 0;
+ margin-top: 8px;
+ min-height: fit-content;
}
-div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.tg-header--links) [data-testid="stLinkButton"] a p {
- font-size: 20px;
- line-height: 1;
+.st-key-tg-header--help button[data-testid="stPopoverButton"]:focus:not(:hover) {
+ color: inherit;
+}
+
+.st-key-tg-header--help-dummy [data-testid="stMarkdownContainer"] p {
+ display: flex;
+ align-items: center;
+ margin-top: 8px;
+ min-height: fit-content;
+}
+
+.st-key-tg-header--help-dummy p span {
+ width: 1.25rem;
+ height: 1.25rem;
+ font-size: 1.25rem;
+ line-height: 1.25rem;
+ margin-top: 0.125rem;
+ margin-left: 0.125rem;
+ margin-right: -0.3125rem;
+}
+
+div[data-testid="stPopoverBody"]:has(i.tg-header--help-wrapper) {
+ padding: 0;
}
/* */
diff --git a/testgen/ui/bootstrap.py b/testgen/ui/bootstrap.py
index 6b0fed7a..3b048414 100644
--- a/testgen/ui/bootstrap.py
+++ b/testgen/ui/bootstrap.py
@@ -2,12 +2,10 @@
import logging
from testgen import settings
-from testgen.commands.run_upgrade_db_config import get_schema_revision
-from testgen.common import configure_logging, version_service
-from testgen.ui.navigation.menu import Menu, Version
+from testgen.common import configure_logging
+from testgen.ui.navigation.menu import Menu
from testgen.ui.navigation.page import Page
from testgen.ui.navigation.router import Router
-from testgen.ui.session import session
from testgen.ui.views.connections import ConnectionsPage
from testgen.ui.views.data_catalog import DataCatalogPage
from testgen.ui.views.hygiene_issues import HygieneIssuesPage
@@ -55,17 +53,6 @@ def __init__(self, logo: plugins.Logo, router: Router, menu: Menu, logger: loggi
self.menu = menu
self.logger = logger
- def get_version(self) -> Version:
- latest_version = self.menu.version.latest
- if not session.latest_version:
- latest_version = version_service.get_latest_version()
-
- return Version(
- current=settings.VERSION,
- latest=latest_version,
- schema=get_schema_revision(),
- )
-
def run(log_level: int = logging.INFO) -> Application:
pages = [*BUILTIN_PAGES]
@@ -106,11 +93,6 @@ def run(log_level: int = logging.INFO) -> Application:
for page in pages if page.menu_item
}.values()
),
- version=Version(
- current=settings.VERSION,
- latest="...",
- schema=get_schema_revision(),
- ),
),
logger=LOG,
)
diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index 643b4ffb..c2574212 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -118,7 +118,7 @@ body {
--portal-background: #14181f;
--portal-box-shadow: rgba(0, 0, 0, 0.95) 0px 4px 16px;
- --select-hover-background: rgba(255, 255, 255, .32);
+ --select-hover-background: rgb(38, 39, 48);
}
}
diff --git a/testgen/ui/components/frontend/js/components/help_menu.js b/testgen/ui/components/frontend/js/components/help_menu.js
new file mode 100644
index 00000000..1a364a23
--- /dev/null
+++ b/testgen/ui/components/frontend/js/components/help_menu.js
@@ -0,0 +1,161 @@
+/**
+ * @typedef Version
+ * @type {object}
+ * @property {string} edition
+ * @property {string} current
+ * @property {string} latest
+ *
+ * @typedef Permissions
+ * @type {object}
+ * @property {boolean} can_edit
+ *
+ * @typedef Properties
+ * @type {object}
+ * @property {string} page_help
+ * @property {string} support_email
+ * @property {Version} version
+ * @property {Permissions} permissions
+*/
+import van from '../van.min.js';
+import { emitEvent, getRandomId, getValue, loadStylesheet, resizeFrameHeightOnDOMChange, resizeFrameHeightToElement } from '../utils.js';
+import { Streamlit } from '../streamlit.js';
+import { Icon } from './icon.js';
+
+const { a, div, span } = van.tags;
+
+const baseHelpUrl = 'https://docs.datakitchen.io/articles/#!dataops-testgen-help/';
+const releaseNotesTopic = 'testgen-release-notes';
+const upgradeTopic = 'upgrade-testgen';
+
+const slackUrl = 'https://data-observability-slack.datakitchen.io/join';
+const trainingUrl = 'https://info.datakitchen.io/data-quality-training-and-certifications';
+
+const HelpMenu = (/** @type Properties */ props) => {
+ loadStylesheet('help-menu', stylesheet);
+ Streamlit.setFrameHeight(1);
+ window.testgen.isPage = true;
+
+ const domId = `help-menu-${getRandomId()}`;
+ const version = getValue(props.version) ?? {};
+
+ resizeFrameHeightToElement(domId);
+ resizeFrameHeightOnDOMChange(domId);
+
+ return div(
+ { id: domId },
+ div(
+ { class: 'flex-column pt-3' },
+ getValue(props.help_topic)
+ ? HelpLink(`${baseHelpUrl}${getValue(props.help_topic)}`, 'Help for this Page', 'description')
+ : null,
+ HelpLink(baseHelpUrl, 'TestGen Help', 'help'),
+ HelpLink(trainingUrl, 'Training Portal', 'school'),
+ getValue(props.permissions)?.can_edit
+ ? div(
+ { class: 'help-item', onclick: () => emitEvent('AppLogsClicked') },
+ Icon({ classes: 'help-item-icon' }, 'browse_activity'),
+ 'Application Logs',
+ )
+ : null,
+ span({ class: 'help-divider' }),
+ HelpLink(slackUrl, 'Slack Community', 'group'),
+ getValue(props.support_email)
+ ? HelpLink(
+ `mailto:${getValue(props.support_email)}
+ ?subject=${version.edition}: Contact Support
+ &body=%0D%0D%0DVersion: ${version.edition} ${version.current}`,
+ 'Contact Support',
+ 'email',
+ )
+ : null,
+ span({ class: 'help-divider' }),
+ version.current || version.latest
+ ? div(
+ { class: 'help-version' },
+ version.current
+ ? HelpLink(`${baseHelpUrl}${releaseNotesTopic}`, `${version.edition} ${version.current}`, null, null)
+ : null,
+ version.latest !== version.current
+ ? HelpLink(
+ `${baseHelpUrl}${upgradeTopic}`,
+ `New version available! ${version.latest}`,
+ null,
+ 'latest',
+ )
+ : null,
+ )
+ : null,
+ ),
+ );
+}
+
+const HelpLink = (
+ /** @type string */ url,
+ /** @type string */ label,
+ /** @type string? */ icon,
+ /** @type string */ classes = 'help-item',
+) => {
+ return a(
+ {
+ class: classes,
+ href: url,
+ target: '_blank',
+ onclick: () => emitEvent('ExternalLinkClicked'),
+ },
+ icon ? Icon({ classes: 'help-item-icon' }, icon) : null,
+ label,
+ );
+};
+
+const stylesheet = new CSSStyleSheet();
+stylesheet.replace(`
+.help-item {
+ padding: 12px 24px;
+ color: var(--primary-text-color);
+ text-decoration: none;
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ cursor: pointer;
+ transition: 0.3s;
+}
+
+.help-item:hover {
+ background-color: var(--select-hover-background);
+ color: var(--primary-color);
+}
+
+.help-item-icon {
+ color: var(--primary-text-color);
+ transition: 0.3s;
+}
+
+.help-item:hover .help-item-icon {
+ color: var(--primary-color);
+}
+
+.help-divider {
+ height: 1px;
+ background-color: var(--border-color);
+ margin: 0 16px;
+}
+
+.help-version {
+ padding: 16px 16px 8px;
+ display: flex;
+ flex-direction: column;
+ align-items: flex-end;
+ gap: 8px;
+}
+
+.help-version > a {
+ color: var(--secondary-text-color);
+ text-decoration: none;
+}
+
+.help-version > a.latest {
+ color: var(--red);
+}
+`);
+
+export { HelpMenu };
diff --git a/testgen/ui/components/frontend/js/components/sidebar.js b/testgen/ui/components/frontend/js/components/sidebar.js
index b2da6405..70b93883 100644
--- a/testgen/ui/components/frontend/js/components/sidebar.js
+++ b/testgen/ui/components/frontend/js/components/sidebar.js
@@ -9,38 +9,35 @@
*
* @typedef Version
* @type {object}
+ * @property {string} edition
* @property {string} current
* @property {string} latest
- * @property {string} schema
*
* @typedef Menu
* @type {object}
* @property {Array.
")
@@ -987,8 +1004,17 @@ def show_test_defs_grid(
return dct_selected_row
-def get_excel_report_data(update_progress: PROGRESS_UPDATE_TYPE, data: pd.DataFrame, test_suite: str) -> FILE_DATA_TYPE:
- data = data.copy()
+def get_excel_report_data(
+ update_progress: PROGRESS_UPDATE_TYPE,
+ project_code: str,
+ test_suite: str,
+ data: pd.DataFrame | None = None,
+) -> FILE_DATA_TYPE:
+ if data is not None:
+ data = data.copy()
+ else:
+ data = test_definition_service.get_test_definitions(project_code, test_suite)
+ date_service.accommodate_dataframe_to_timezone(data, st.session_state)
for key in ["test_active_display", "lock_refresh_display"]:
data[key] = data[key].apply(lambda val: val if val == "Yes" else None)
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 4a0312a0..d1897353 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -24,6 +24,7 @@
get_excel_file_data,
zip_multi_file_data,
)
+from testgen.ui.components.widgets.page import css_class, flex_row_end
from testgen.ui.navigation.page import Page
from testgen.ui.pdf.test_result_report import create_report
from testgen.ui.services import project_service, test_definition_service, test_results_service, user_session_service
@@ -76,7 +77,7 @@ def render(
summary_column, score_column, actions_column = st.columns([.4, .2, .4], vertical_alignment="bottom")
status_filter_column, test_type_filter_column, table_filter_column, column_filter_column, sort_column, export_button_column = st.columns(
- [.2, .2, .2, .2, .1, .1], vertical_alignment="bottom"
+ [.175, .175, .2, .2, .1, .15], vertical_alignment="bottom"
)
testgen.flex_row_end(actions_column)
@@ -518,13 +519,28 @@ def show_result_detail(
bind_to_query_prop="test_result_id",
)
- with export_container:
- if st.button(label=":material/download: Export", help="Download filtered test results to Excel"):
- download_dialog(
- dialog_title="Download Excel Report",
- file_content_func=get_excel_report_data,
- args=(df, test_suite, run_date),
- )
+ popover_container = export_container.empty()
+
+ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
+ # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+ with popover_container.container():
+ flex_row_end()
+ st.button(label="Export", icon=":material/download:", disabled=True)
+
+ download_dialog(
+ dialog_title="Download Excel Report",
+ file_content_func=get_excel_report_data,
+ args=(test_suite, run_date, run_id, data),
+ )
+
+ with popover_container.container(key="tg--export-popover"):
+ flex_row_end()
+ with st.popover(label="Export", icon=":material/download:", help="Download test results to Excel"):
+ css_class("tg--export-wrapper")
+ st.button(label="All tests", type="tertiary", on_click=open_download_dialog)
+ st.button(label="Filtered tests", type="tertiary", on_click=partial(open_download_dialog, df))
+ if selected_rows:
+ st.button(label="Selected tests", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected_rows)))
# Display history and detail for selected row
if not selected_rows:
@@ -623,10 +639,14 @@ def show_result_detail(
def get_excel_report_data(
update_progress: PROGRESS_UPDATE_TYPE,
- data: pd.DataFrame,
test_suite: str,
run_date: str,
+ run_id: str,
+ data: pd.DataFrame | None = None,
) -> FILE_DATA_TYPE:
+ if data is None:
+ data = get_test_results(run_id)
+
columns = {
"schema_name": {"header": "Schema"},
"table_name": {"header": "Table"},
From 4beff73e7fa8a80bfec18c3091fa15b47e579070 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Wed, 9 Jul 2025 00:57:14 -0400
Subject: [PATCH 38/56] fix(sort): close popover on apply
---
.../ui/components/widgets/sorting_selector.py | 27 +++++++++++++------
1 file changed, 19 insertions(+), 8 deletions(-)
diff --git a/testgen/ui/components/widgets/sorting_selector.py b/testgen/ui/components/widgets/sorting_selector.py
index 8b168f1c..5dd1cc95 100644
--- a/testgen/ui/components/widgets/sorting_selector.py
+++ b/testgen/ui/components/widgets/sorting_selector.py
@@ -73,14 +73,25 @@ def sorting_selector(
if state is None:
state = default
- with st.popover(popover_label):
- new_state = component(
- id_="sorting_selector",
- key=key,
- default=state,
- on_change=on_change,
- props={"columns": columns, "state": state},
- )
+ popover_container = st.empty()
+
+ def handle_change() -> None:
+ if on_change:
+ on_change()
+
+ # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+ with popover_container.container():
+ st.button(label=f"{popover_label} :material/keyboard_arrow_up:", disabled=True)
+
+ with popover_container.container():
+ with st.popover(popover_label):
+ new_state = component(
+ id_="sorting_selector",
+ key=key,
+ default=state,
+ on_change=handle_change,
+ props={"columns": columns, "state": state},
+ )
# For some unknown reason, sometimes, streamlit returns None as the component state
new_state = [] if new_state is None else new_state
From 5fcf9d342577f5fd62e81b45a575603bf18a254e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Wed, 9 Jul 2025 00:57:30 -0400
Subject: [PATCH 39/56] fix: misc styling improvements
---
testgen/ui/components/frontend/js/pages/profiling_runs.js | 2 +-
testgen/ui/components/frontend/js/pages/test_runs.js | 4 ++--
testgen/ui/views/dialogs/data_preview_dialog.py | 5 +++--
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js
index dc955cb9..feee47de 100644
--- a/testgen/ui/components/frontend/js/pages/profiling_runs.js
+++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js
@@ -146,7 +146,7 @@ const ProfilingRunItem = (
{ label: 'Possible', value: item.anomalies_possible_ct, color: 'yellow' },
{ label: 'Dismissed', value: item.anomalies_dismissed_ct, color: 'grey' },
],
- height: 10,
+ height: 3,
width: 350,
}) : '--',
item.anomaly_ct ? Link({
diff --git a/testgen/ui/components/frontend/js/pages/test_runs.js b/testgen/ui/components/frontend/js/pages/test_runs.js
index 725d12c3..0159b0cd 100644
--- a/testgen/ui/components/frontend/js/pages/test_runs.js
+++ b/testgen/ui/components/frontend/js/pages/test_runs.js
@@ -206,8 +206,8 @@ const TestRunItem = (
{ label: 'Error', value: item.error_ct, color: 'brown' },
{ label: 'Dismissed', value: item.dismissed_ct, color: 'grey' },
],
- height: 10,
- width: 400,
+ height: 8,
+ width: 350,
}) : '--',
),
div(
diff --git a/testgen/ui/views/dialogs/data_preview_dialog.py b/testgen/ui/views/dialogs/data_preview_dialog.py
index 6911c3d6..9d5beaea 100644
--- a/testgen/ui/views/dialogs/data_preview_dialog.py
+++ b/testgen/ui/views/dialogs/data_preview_dialog.py
@@ -20,7 +20,8 @@ def data_preview_dialog(
f"Table: {table_name}"
)
- data = get_preview_data(table_group_id, schema_name, table_name, column_name)
+ with st.spinner("Loading data ..."):
+ data = get_preview_data(table_group_id, schema_name, table_name, column_name)
if data.empty:
st.warning("The preview data could not be loaded.")
@@ -32,7 +33,7 @@ def data_preview_dialog(
)
-@st.cache_data(show_spinner="Loading data ...")
+@st.cache_data(show_spinner=False)
def get_preview_data(
table_group_id: str,
schema_name: str,
From bc9a54ebb662e1ef6fcb4bd658fd67c162bc032e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Wed, 9 Jul 2025 15:58:33 -0400
Subject: [PATCH 40/56] feat(schedules): add schedule dialogs to table groups
and test suites pages
---
.../frontend/js/pages/schedule_list.js | 10 +++---
.../frontend/js/pages/table_group_list.js | 34 +++++++++++++------
.../frontend/js/pages/test_suites.js | 31 ++++++++++++-----
testgen/ui/views/profiling_runs.py | 2 +-
testgen/ui/views/table_groups.py | 2 ++
testgen/ui/views/test_runs.py | 2 +-
testgen/ui/views/test_suites.py | 2 ++
7 files changed, 57 insertions(+), 26 deletions(-)
diff --git a/testgen/ui/components/frontend/js/pages/schedule_list.js b/testgen/ui/components/frontend/js/pages/schedule_list.js
index a4621c5d..2d9e7bf9 100644
--- a/testgen/ui/components/frontend/js/pages/schedule_list.js
+++ b/testgen/ui/components/frontend/js/pages/schedule_list.js
@@ -35,7 +35,7 @@ const ScheduleList = (/** @type Properties */ props) => {
} catch (e) {
console.log(e)
}
- Streamlit.setFrameHeight(100 * items.length);
+ Streamlit.setFrameHeight(100 * items.length || 150);
return items;
});
const columns = ['40%', '50%', '10%'];
@@ -60,9 +60,11 @@ const ScheduleList = (/** @type Properties */ props) => {
'Actions',
),
),
- () => div(
- scheduleItems.val.map(item => ScheduleListItem(item, columns, getValue(props.permissions))),
- ),
+ () => scheduleItems.val?.length
+ ? div(
+ scheduleItems.val.map(item => ScheduleListItem(item, columns, getValue(props.permissions))),
+ )
+ : div({ class: 'mt-5 mb-3 ml-3 text-secondary' }, 'No schedules defined yet.'),
);
}
diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
index af9a25c7..b201bd8f 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_list.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -208,7 +208,7 @@ const TableGroupList = (props) => {
*/
const Toolbar = (permissions, connections, selectedConnection) => {
return div(
- { class: 'flex-row fx-align-flex-end mb-4' },
+ { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-4' },
(getValue(connections) ?? [])?.length > 1
? Select({
testId: 'connection-select',
@@ -223,17 +223,29 @@ const Toolbar = (permissions, connections, selectedConnection) => {
onChange: (value) => emitEvent('ConnectionSelected', { payload: value }),
})
: undefined,
- span({ style: 'margin: 0 auto;' }),
- permissions.can_edit
- ? Button({
+ div(
+ { class: 'flex-row fx-gap-4' },
+ Button({
+ icon: 'today',
type: 'stroked',
- icon: 'add',
- label: 'Add Table Group',
- color: 'basic',
- style: 'background: var(--button-generic-background-color); width: unset;',
- onclick: () => emitEvent('AddTableGroupClicked', {}),
- })
- : '',
+ label: 'Profiling Schedules',
+ tooltip: 'Manage when profiling should run for table groups',
+ tooltipPosition: 'bottom',
+ width: 'fit-content',
+ style: 'background: var(--dk-card-background);',
+ onclick: () => emitEvent('RunSchedulesClicked', {}),
+ }),
+ permissions.can_edit
+ ? Button({
+ type: 'stroked',
+ icon: 'add',
+ label: 'Add Table Group',
+ color: 'basic',
+ style: 'background: var(--button-generic-background-color); width: unset;',
+ onclick: () => emitEvent('AddTableGroupClicked', {}),
+ })
+ : '',
+ )
);
}
diff --git a/testgen/ui/components/frontend/js/pages/test_suites.js b/testgen/ui/components/frontend/js/pages/test_suites.js
index c9487fd8..4aba36ce 100644
--- a/testgen/ui/components/frontend/js/pages/test_suites.js
+++ b/testgen/ui/components/frontend/js/pages/test_suites.js
@@ -77,7 +77,7 @@ const TestSuites = (/** @type Properties */ props) => {
? div(
{ class: 'tg-test-suites'},
() => div(
- { class: 'tg-test-suites--toolbar flex-row fx-align-flex-end mb-4' },
+ { class: 'flex-row fx-align-flex-end fx-justify-space-between mb-4' },
Select({
label: 'Table Group',
value: getValue(props.table_group_filter_options)?.find((op) => op.selected)?.value ?? null,
@@ -88,16 +88,29 @@ const TestSuites = (/** @type Properties */ props) => {
testId: 'table-group-filter',
onChange: (value) => emitEvent('FilterApplied', {payload: value}),
}),
- userCanEdit
- ? Button({
- icon: 'add',
+ div(
+ { class: 'flex-row fx-gap-4' },
+ Button({
+ icon: 'today',
type: 'stroked',
- label: 'Add Test Suite',
+ label: 'Test Run Schedules',
+ tooltip: 'Manage when test suites should run',
+ tooltipPosition: 'bottom',
width: 'fit-content',
- style: 'margin-left: auto; background: var(--dk-card-background);',
- onclick: () => emitEvent('AddTestSuiteClicked', {}),
- })
- : '',
+ style: 'background: var(--dk-card-background);',
+ onclick: () => emitEvent('RunSchedulesClicked', {}),
+ }),
+ userCanEdit
+ ? Button({
+ icon: 'add',
+ type: 'stroked',
+ label: 'Add Test Suite',
+ width: 'fit-content',
+ style: 'background: var(--dk-card-background);',
+ onclick: () => emitEvent('AddTestSuiteClicked', {}),
+ })
+ : '',
+ ),
),
() => div(
{ class: 'flex-column' },
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index 951ff129..a5c7bfa5 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -68,7 +68,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
st.button(
":material/today: Profiling Schedules",
- help="Manages when profiling should run for a given table group",
+ help="Manage when profiling should run for table groups",
on_click=partial(ProfilingScheduleDialog().open, project_code)
)
diff --git a/testgen/ui/views/table_groups.py b/testgen/ui/views/table_groups.py
index 53cb9a07..aff424c7 100644
--- a/testgen/ui/views/table_groups.py
+++ b/testgen/ui/views/table_groups.py
@@ -15,6 +15,7 @@
from testgen.ui.services import user_session_service
from testgen.ui.session import session, temp_value
from testgen.ui.views.connections import FLAVOR_OPTIONS, format_connection
+from testgen.ui.views.profiling_runs import ProfilingScheduleDialog
PAGE_TITLE = "Table Groups"
@@ -56,6 +57,7 @@ def render(self, project_code: str, connection_id: str | None = None, **_kwargs)
]),
},
on_change_handlers={
+ "RunSchedulesClicked": lambda *_: ProfilingScheduleDialog().open(project_code),
"AddTableGroupClicked": partial(self.add_table_group_dialog, project_code),
"EditTableGroupClicked": partial(self.edit_table_group_dialog, project_code),
"DeleteTableGroupClicked": partial(self.delete_table_group_dialog, project_code),
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index 4a5484be..c3fe9913 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -81,7 +81,7 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit
st.button(
":material/today: Test Run Schedules",
- help="Manages when a test suite should run.",
+ help="Manage when test suites should run",
on_click=partial(TestRunScheduleDialog().open, project_code)
)
diff --git a/testgen/ui/views/test_suites.py b/testgen/ui/views/test_suites.py
index 524c74f5..13250493 100644
--- a/testgen/ui/views/test_suites.py
+++ b/testgen/ui/views/test_suites.py
@@ -18,6 +18,7 @@
from testgen.ui.session import session
from testgen.ui.views.dialogs.generate_tests_dialog import generate_tests_dialog
from testgen.ui.views.dialogs.run_tests_dialog import run_tests_dialog
+from testgen.ui.views.test_runs import TestRunScheduleDialog
from testgen.utils import format_field
PAGE_ICON = "rule"
@@ -96,6 +97,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
},
on_change_handlers={
"FilterApplied": on_test_suites_filtered,
+ "RunSchedulesClicked": lambda *_: TestRunScheduleDialog().open(project_code),
"AddTestSuiteClicked": lambda *_: add_test_suite_dialog(project_code, table_groups),
"ExportActionClicked": observability_export_dialog,
"EditActionClicked": partial(edit_test_suite_dialog, project_code, table_groups),
From 56d2d0d85ac8df4d80cbadb1661d9b60564d1fe5 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Thu, 10 Jul 2025 14:41:36 -0400
Subject: [PATCH 41/56] fix(tests): flag invalid tests and display Error
details in test results
---
testgen/commands/run_test_parameter_validation.py | 14 ++++++++++++--
testgen/ui/views/test_results.py | 5 +++++
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/testgen/commands/run_test_parameter_validation.py b/testgen/commands/run_test_parameter_validation.py
index b2b98936..71668bcd 100644
--- a/testgen/commands/run_test_parameter_validation.py
+++ b/testgen/commands/run_test_parameter_validation.py
@@ -29,6 +29,10 @@ def run_parameter_validation_queries(
strColumnList = clsExecute.GetTestValidationColumns(booClean)
test_columns, _ = RetrieveDBResultsToList("DKTG", strColumnList)
+ invalid_tests = [ test_ids for col, test_ids in test_columns if not col ]
+ invalid_tests = { item for sublist in invalid_tests for item in sublist }
+ test_columns = [ item for item in test_columns if item[0] ]
+
if not test_columns:
LOG.warning(f"No test columns are present to validate in Test Suite {strTestSuite}")
missing_columns = []
@@ -71,7 +75,7 @@ def run_parameter_validation_queries(
if missing_tables:
LOG.info("Missing tables: %s", ", ".join(missing_tables))
- if missing_columns or missing_tables:
+ if missing_columns or missing_tables or invalid_tests:
# Flag test_definitions tests with missing tables or columns
LOG.info("CurrentStep: Flagging Tests That Failed Validation")
@@ -86,7 +90,7 @@ def run_parameter_validation_queries(
tests_missing_columns[column_name].extend(test_ids)
clsExecute.flag_val = "D"
- clsExecute.test_ids = list(set(chain(*tests_missing_tables.values(), *tests_missing_columns.values())))
+ clsExecute.test_ids = list(set(chain(*tests_missing_tables.values(), *tests_missing_columns.values(), invalid_tests)))
strPrepFlagTests = clsExecute.PrepFlagTestsWithFailedValidation()
RunActionQueryList("DKTG", [strPrepFlagTests])
@@ -101,6 +105,12 @@ def run_parameter_validation_queries(
clsExecute.test_ids = test_ids
strFlagTests = clsExecute.FlagTestsWithFailedValidation()
RunActionQueryList("DKTG", [strFlagTests])
+
+ if invalid_tests:
+ clsExecute.message = "Invalid test: schema, table, or column not defined"
+ clsExecute.test_ids = invalid_tests
+ strFlagTests = clsExecute.FlagTestsWithFailedValidation()
+ RunActionQueryList("DKTG", [strFlagTests])
# Copy test results to DK DB, using temporary flagged D value to identify
LOG.info("CurrentStep: Saving error results for invalid tests")
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index d1897353..920e007f 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -96,6 +96,7 @@ def render(
"Failed",
"Warning",
"Passed",
+ "Error",
]
status = testgen.select(
options=status_options,
@@ -163,6 +164,8 @@ def render(
status = "'Warning'"
case "Passed":
status = "'Passed'"
+ case "Error":
+ status = "'Error'"
# Display main grid and retrieve selection
selected = show_result_detail(
@@ -498,6 +501,7 @@ def show_result_detail(
"measure_uom",
"result_status",
"action",
+ "result_message",
]
lst_show_headers = [
@@ -508,6 +512,7 @@ def show_result_detail(
"UOM",
"Status",
"Action",
+ "Details",
]
selected_rows = fm.render_grid_select(
From 773e771abd27b923294c04c6d28a8b860e08fdc8 Mon Sep 17 00:00:00 2001
From: Ricardo Boni
Date: Thu, 10 Jul 2025 15:41:09 -0400
Subject: [PATCH 42/56] fix: Do not generate Required Entity tests for empty
tables
---
testgen/template/dbsetup/050_populate_new_schema_metadata.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
index fa50cab9..5d57acf9 100644
--- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
+++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
@@ -123,7 +123,7 @@ VALUES ('1004', 'Alpha_Trunc', 'Alpha Truncation', 'Maximum character count con
('1025', 'Outlier_Pct_Below', 'Outliers Below', 'Consistent outlier counts under 2 SD below mean', 'Tests that percent of outliers over 2 SD below Mean doesn''t exceed threshold', 'Percent of outliers exceeding 2 SD below the mean is greater than expected threshold.', 'Pct records under limit', NULL, 'functional_data_type = ''Measurement'' AND distinct_value_ct > 30 AND NOT distinct_value_ct = max_value - min_value + 1 AND distinct_value_ct::FLOAT/value_ct::FLOAT > 0.1 AND stdev_value::FLOAT/avg_value::FLOAT > 0.01 AND column_name NOT ILIKE ''%latitude%'' AND column_name NOT ilike ''%longitude%''', 'GREATEST(0, {RESULT_MEASURE}::FLOAT-{THRESHOLD_VALUE}::FLOAT)', '0.75', NULL, NULL, 'baseline_avg,baseline_sd,threshold_value', 'avg_value,stdev_value,0.05', 'Baseline Mean, Baseline Std Deviation, Pct Records over 2 SD', NULL, 'Warning', 'CAT', 'column', 'Accuracy', 'Data Drift', 'Expected maximum pct records over lower 2 SD limit', 'This test counts the number of data points that may be considered as outliers, determined by whether their value exceeds 2 standard deviations below the mean at baseline. Assuming a normal distribution, a small percentage (defaulted to 5%) of outliers is expected. The actual number may vary for different distributions. The expected threshold reflects the maximum percentage of outliers you expect to see. This test uses the baseline mean rather than the mean for the latest dataset to capture systemic shift as well as individual outliers. ', 'Y'),
('1026', 'Pattern_Match', 'Pattern Match', 'Column values match alpha-numeric pattern', 'Tests that all values in the column match the same alpha-numeric pattern identified in baseline data', 'Alpha values do not match consistent pattern in baseline.', 'Pattern Mismatches', NULL, '(functional_data_type IN (''Attribute'', ''DateTime Stamp'', ''Phone'') OR functional_data_type ILIKE ''ID%'' OR functional_data_type ILIKE ''Period%'') AND fn_charcount(top_patterns, E'' \| '' ) = 1 AND REPLACE(SPLIT_PART(top_patterns, ''|'' , 2), ''N'' , '''' ) > '''' AND distinct_value_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'baseline_value,threshold_value', 'TRIM(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(SPLIT_PART(top_patterns, '' | '', 2), ''([*+\-%_])'', ''[\1]'', ''g''), ''A'', ''[A-Z]''), ''N'', ''[0-9]''), ''a'', ''[a-z]'')),0', 'Pattern at Baseline,Threshold Error Count', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected count of pattern mismatches', 'This test is appropriate for character fields that are expected to appear in a consistent format. It uses pattern matching syntax as appropriate for your database: REGEX matching if available, otherwise LIKE expressions. The expected threshold is the number of records that fail to match the defined pattern.', 'Y'),
('1028', 'Recency', 'Recency', 'Latest date within expected range of test date', 'Tests that the latest date in column is within a set number of days of the test date', 'Most recent date value not within expected days of test date.', 'Days before test', 'Number of days that most recent date precedes the date of test', 'general_type= ''D'' AND max_date <= run_date AND NOT column_name IN ( ''filedate'' , ''file_date'' ) AND NOT functional_data_type IN (''Future Date'', ''Schedule Date'') AND DATEDIFF( ''DAY'' , max_date, run_date) <= 62', '(ABS({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT*{PRO_RECORD_CT}::FLOAT/(1.0+DATEDIFF(''DAY'', ''{MIN_DATE}'', ''{MAX_DATE}''))::FLOAT)/NULLIF({RECORD_CT}::FLOAT, 0)', '0.75', NULL, NULL, 'threshold_value', 'CASE WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 3 THEN DATEDIFF(''DAY'', max_date, run_date) + 3 WHEN DATEDIFF(''DAY'', max_date, run_date) <= 7 then DATEDIFF(''DAY'', max_date, run_date) + 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) <= 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 7.0) * 7 WHEN DATEDIFF( ''DAY'' , max_date, run_date) > 31 THEN CEILING( DATEDIFF( ''DAY'' , max_date, run_date)::FLOAT / 30.0) * 30 END', 'Threshold Maximum Days before Test', NULL, 'Warning', 'CAT', 'column', 'Timeliness', 'Recency', 'Expected maximum count of days preceding test date', 'This test evaluates recency based on the latest referenced dates in the column. The test is appropriate for transactional dates and timestamps. The test can be especially valuable because timely data deliveries themselves may not assure that the most recent data is present. You can adjust the expected threshold to the maximum number of days that you expect the data to age before the dataset is refreshed. ', 'Y'),
- ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'),
+ ('1030', 'Required', 'Required Entry', 'Required non-null value present', 'Tests that a non-null value is present in each record for the column, consistent with baseline data', 'Every record for this column is expected to be filled, but some are missing.', 'Missing values', NULL, 'record_ct = value_ct AND record_ct > 10', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Missing Value Count', NULL, 'Fail', 'CAT', 'column', 'Completeness', 'Schema Drift', 'Expected count of missing values', NULL, 'Y'),
('1033', 'Street_Addr_Pattern', 'Street Address', 'Enough street address entries match defined pattern', 'Tests for percent of records matching standard street address pattern.', 'Percent of values matching standard street address format is under expected threshold.', 'Percent matches', 'Percent of records that match street address pattern', '(std_pattern_match=''STREET_ADDR'') AND (avg_length <> round(avg_length)) AND (avg_embedded_spaces BETWEEN 2 AND 6) AND (avg_length < 35)', '({VALUE_CT}::FLOAT * ({RESULT_MEASURE}::FLOAT - {THRESHOLD_VALUE}::FLOAT)/100.0)/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '75', 'Threshold Pct that Match Address Pattern', NULL, 'Fail', 'CAT', 'column', 'Validity', 'Schema Drift', 'Expected percent of records that match standard street address pattern', 'The street address pattern used in this test should match the vast majority of USA addresses. You can adjust the threshold percent of matches based on the results you are getting -- you may well want to tighten it to make the test more sensitive to invalid entries.', 'Y'),
('1034', 'Unique', 'Unique Values', 'Each column value is unique', 'Tests that no values for the column are repeated in multiple records.', 'Column values should be unique per row.', 'Duplicate values', 'Count of non-unique values', 'record_ct > 500 and record_ct = distinct_value_ct and value_ct > 0', '({RESULT_MEASURE}-{THRESHOLD_VALUE})::FLOAT/NULLIF({RECORD_CT}::FLOAT, 0)', '1.0', NULL, NULL, 'threshold_value', '0', 'Threshold Duplicate Value Count', NULL, 'Fail', 'CAT', 'column', 'Uniqueness', 'Schema Drift', 'Expected count of duplicate values', 'This test is ideal when the database itself does not enforce a primary key constraint on the table. It serves as an independent check on uniqueness. If''s also useful when there are a small number of exceptions to uniqueness, which can be reflected in the expected threshold count of duplicates.', 'Y'),
('1035', 'Unique_Pct', 'Percent Unique', 'Consistent ratio of unique values', 'Tests for statistically-significant shift in percentage of unique values vs. baseline data.', 'Significant shift in percent of unique values vs. baseline.', 'Difference measure', 'Cohen''s H Difference (0.20 small, 0.5 mod, 0.8 large, 1.2 very large, 2.0 huge)', 'distinct_value_ct > 10 AND functional_data_type NOT ILIKE ''Measurement%''', '2.0 * (1.0 - fn_normal_cdf(ABS({RESULT_MEASURE}::FLOAT) / 2.0))', '0.75', NULL, NULL, 'baseline_value_ct,baseline_unique_ct,threshold_value', 'value_ct,distinct_value_ct,0.5', 'Value Count at Baseline,Distinct Value Count at Baseline,Standardized Difference Measure (0 to 1)', NULL, 'Warning', 'CAT', 'column', 'Uniqueness', 'Data Drift', 'Expected maximum Cohen''s H Difference', 'You can think of this as a test of similarity that measures whether the percentage of unique values is consistent with the percentage at baseline. A significant change might indicate duplication or a telling shift in cardinality between entities. The test uses Cohen''s H, a statistical test to identify a significant difference between two ratios. Results are reported on a standardized scale, which can be interpreted via a rule-of-thumb from small to huge. You can refine the expected threshold value as you view legitimate results of the measure over time.', 'Y'),
From 2aba7f02d8c7ea7bbc6cea8da509fdd81ebf92b1 Mon Sep 17 00:00:00 2001
From: Luis
Date: Fri, 11 Jul 2025 15:45:55 -0400
Subject: [PATCH 43/56] feat: allow deleting profiling runs
---
.../frontend/js/pages/profiling_runs.js | 103 +++++++++++++++---
testgen/ui/queries/profiling_run_queries.py | 20 ++++
testgen/ui/views/profiling_runs.py | 78 ++++++++++++-
3 files changed, 185 insertions(+), 16 deletions(-)
diff --git a/testgen/ui/components/frontend/js/pages/profiling_runs.js b/testgen/ui/components/frontend/js/pages/profiling_runs.js
index feee47de..1290e270 100644
--- a/testgen/ui/components/frontend/js/pages/profiling_runs.js
+++ b/testgen/ui/components/frontend/js/pages/profiling_runs.js
@@ -35,8 +35,9 @@ import { Button } from '../components/button.js';
import { Streamlit } from '../streamlit.js';
import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js';
import { formatTimestamp, formatDuration } from '../display_utils.js';
+import { Checkbox } from '../components/checkbox.js';
-const { div, span, i } = van.tags;
+const { div, i, span, strong } = van.tags;
const ProfilingRuns = (/** @type Properties */ props) => {
window.testgen.isPage = true;
@@ -49,40 +50,104 @@ const ProfilingRuns = (/** @type Properties */ props) => {
Streamlit.setFrameHeight(100 * items.length);
return items;
});
- const columns = ['20%', '20%', '20%', '30%', '10%'];
+ const columns = ['5%', '15%', '20%', '20%', '30%', '10%'];
const userCanRun = getValue(props.permissions)?.can_run ?? false;
+ const userCanEdit = getValue(props.permissions)?.can_edit ?? false;
+ const selectedRuns = {};
const tableId = 'profiling-runs-table';
resizeFrameHeightToElement(tableId);
+ const initializeSelectedStates = (items) => {
+ for (const profilingRun of items) {
+ if (selectedRuns[profilingRun.profiling_run_id] == undefined) {
+ selectedRuns[profilingRun.profiling_run_id] = van.state(false);
+ }
+ }
+ };
+
+ initializeSelectedStates(profilingRunItems.val);
+
+ van.derive(() => {
+ initializeSelectedStates(profilingRunItems.val);
+ });
+
return div(
{ class: 'table', id: tableId },
+ () => {
+ const items = profilingRunItems.val;
+ const selectedItems = items.filter(i => selectedRuns[i.profiling_run_id]?.val ?? false);
+ const someRunSelected = selectedItems.length > 0;
+ const tooltipText = !someRunSelected ? 'No runs selected' : undefined;
+
+ if (!userCanEdit) {
+ return '';
+ }
+
+ return div(
+ { class: 'flex-row fx-justify-content-flex-end pb-2' },
+ someRunSelected ? strong({class: 'mr-1'}, selectedItems.length) : '',
+ someRunSelected ? span({class: 'mr-4'}, 'runs selected') : '',
+ Button({
+ type: 'stroked',
+ icon: 'delete',
+ label: 'Delete Runs',
+ tooltip: tooltipText,
+ tooltipPosition: 'bottom-left',
+ disabled: !someRunSelected,
+ width: 'auto',
+ onclick: () => emitEvent('RunsDeleted', { payload: selectedItems.map(i => i.profiling_run_id) }),
+ }),
+ );
+ },
div(
{ class: 'table-header flex-row' },
+ () => {
+ const items = profilingRunItems.val;
+ const selectedItems = items.filter(i => selectedRuns[i.profiling_run_id]?.val ?? false);
+ const allSelected = selectedItems.length === items.length;
+ const partiallySelected = selectedItems.length > 0 && selectedItems.length < items.length;
+
+ if (!userCanEdit) {
+ return '';
+ }
+
+ return span(
+ { style: `flex: ${columns[0]}` },
+ userCanEdit
+ ? Checkbox({
+ checked: allSelected,
+ indeterminate: partiallySelected,
+ onChange: (checked) => items.forEach(item => selectedRuns[item.profiling_run_id].val = checked),
+ testId: 'select-all-profiling-run',
+ })
+ : '',
+ );
+ },
span(
- { style: `flex: ${columns[0]}` },
+ { style: `flex: ${columns[1]}` },
'Start Time | Table Group',
),
span(
- { style: `flex: ${columns[1]}` },
+ { style: `flex: ${columns[2]}` },
'Status | Duration',
),
span(
- { style: `flex: ${columns[2]}` },
+ { style: `flex: ${columns[3]}` },
'Schema',
),
span(
- { style: `flex: ${columns[3]}` },
+ { style: `flex: ${columns[4]}` },
'Hygiene Issues',
),
span(
- { style: `flex: ${columns[4]}` },
+ { style: `flex: ${columns[5]}` },
'Profiling Score',
),
),
() => div(
- profilingRunItems.val.map(item => ProfilingRunItem(item, columns, userCanRun)),
+ profilingRunItems.val.map(item => ProfilingRunItem(item, columns, selectedRuns[item.profiling_run_id], userCanRun, userCanEdit)),
),
);
}
@@ -90,12 +155,24 @@ const ProfilingRuns = (/** @type Properties */ props) => {
const ProfilingRunItem = (
/** @type ProfilingRun */ item,
/** @type string[] */ columns,
+ /** @type boolean */ selected,
/** @type boolean */ userCanRun,
+ /** @type boolean */ userCanEdit,
) => {
return div(
{ class: 'table-row flex-row', 'data-testid': 'profiling-run-item' },
+ userCanEdit
+ ? div(
+ { style: `flex: ${columns[0]}; font-size: 16px;` },
+ Checkbox({
+ checked: selected,
+ onChange: (checked) => selected.val = checked,
+ testId: 'select-profiling-run',
+ }),
+ )
+ : '',
div(
- { style: `flex: ${columns[0]}` },
+ { style: `flex: ${columns[1]}` },
div({'data-testid': 'profiling-run-item-starttime'}, formatTimestamp(item.start_time)),
div(
{ class: 'text-caption mt-1', 'data-testid': 'profiling-run-item-tablegroup' },
@@ -103,7 +180,7 @@ const ProfilingRunItem = (
),
),
div(
- { class: 'flex-row', style: `flex: ${columns[1]}` },
+ { class: 'flex-row', style: `flex: ${columns[2]}` },
div(
ProfilingRunStatus(item),
div(
@@ -119,7 +196,7 @@ const ProfilingRunItem = (
}) : null,
),
div(
- { style: `flex: ${columns[2]}` },
+ { style: `flex: ${columns[3]}` },
div({'data-testid': 'profiling-run-item-schema'}, item.schema_name),
div(
{
@@ -138,7 +215,7 @@ const ProfilingRunItem = (
}) : null,
),
div(
- { class: 'pr-3', style: `flex: ${columns[3]}` },
+ { class: 'pr-3', style: `flex: ${columns[4]}` },
item.anomaly_ct ? SummaryBar({
items: [
{ label: 'Definite', value: item.anomalies_definite_ct, color: 'red' },
@@ -160,7 +237,7 @@ const ProfilingRunItem = (
}) : null,
),
div(
- { style: `flex: ${columns[4]}; font-size: 16px;` },
+ { style: `flex: ${columns[5]}; font-size: 16px;` },
item.dq_score_profiling ?? '--',
),
);
diff --git a/testgen/ui/queries/profiling_run_queries.py b/testgen/ui/queries/profiling_run_queries.py
index a2bfa805..ea40f93d 100644
--- a/testgen/ui/queries/profiling_run_queries.py
+++ b/testgen/ui/queries/profiling_run_queries.py
@@ -2,6 +2,7 @@
import testgen.ui.services.database_service as db
from testgen.common import date_service
+from testgen.common.models import get_current_session
def update_status(profile_run_id: str, status: str) -> None:
@@ -25,3 +26,22 @@ def cancel_all_running() -> None:
SET status = 'Cancelled'
WHERE status = 'Running';
""")
+
+
+def cascade_delete_multiple_profiling_runs(profiling_run_ids: list[str]) -> None:
+ session = get_current_session()
+
+ if not profiling_run_ids:
+ raise ValueError("No profiling run is specified.")
+
+ params = {f"id_{idx}": value for idx, value in enumerate(profiling_run_ids)}
+ param_keys = [f":{slot}" for slot in params.keys()]
+
+ with session.begin():
+ session.execute(f"DELETE FROM profile_pair_rules WHERE profile_run_id IN ({', '.join(param_keys)})", params=params)
+ session.execute(f"DELETE FROM profile_anomaly_results WHERE profile_run_id IN ({', '.join(param_keys)})", params=params)
+ session.execute(f"DELETE FROM profile_results WHERE profile_run_id IN ({', '.join(param_keys)})", params=params)
+ session.execute(f"DELETE FROM profiling_runs WHERE id IN ({', '.join(param_keys)})", params=params)
+ session.commit()
+
+ st.cache_data.clear()
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index a5c7bfa5..a90de40c 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -1,3 +1,4 @@
+import logging
import typing
from functools import partial
@@ -8,17 +9,19 @@
import testgen.ui.services.database_service as db
import testgen.ui.services.form_service as fm
import testgen.ui.services.query_service as dq
+from testgen.common.models import with_database_session
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets import testgen_component
from testgen.ui.navigation.menu import MenuItem
from testgen.ui.navigation.page import Page
from testgen.ui.queries import profiling_run_queries, project_queries
from testgen.ui.services import user_session_service
-from testgen.ui.session import session
+from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.manage_schedules import ScheduleDialog
from testgen.ui.views.dialogs.run_profiling_dialog import run_profiling_dialog
from testgen.utils import friendly_score, to_int
+LOG = logging.getLogger("testgen")
FORM_DATA_WIDTH = 400
PAGE_SIZE = 50
PAGE_ICON = "data_thresholding"
@@ -97,9 +100,13 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
"items": paginated_df.to_json(orient="records"),
"permissions": {
"can_run": user_can_run,
+ "can_edit": user_can_run,
},
},
- event_handlers={ "RunCanceled": on_cancel_run }
+ event_handlers={
+ "RunCanceled": on_cancel_run,
+ "RunsDeleted": partial(on_delete_runs, project_code, table_group_id),
+ }
)
@@ -178,6 +185,60 @@ def on_cancel_run(profiling_run: pd.Series) -> None:
fm.reset_post_updates(str_message=f":{'green' if process_status else 'red'}[{process_message}]", as_toast=True)
+@st.dialog(title="Delete Profiling Runs")
+@with_database_session
+def on_delete_runs(project_code: str, table_group_id: str, profiling_run_ids: list[str]) -> None:
+ def on_delete_confirmed(*_args) -> None:
+ set_delete_confirmed(True)
+
+ message = f"Are you sure you want to delete the {len(profiling_run_ids)} selected profiling runs?"
+ constraint = {
+ "warning": "Any running processes will be canceled.",
+ "confirmation": "Yes, cancel and delete the profiling runs.",
+ }
+ if len(profiling_run_ids) == 1:
+ message = "Are you sure you want to delete the selected profiling run?"
+ constraint["confirmation"] = "Yes, cancel and delete the profiling run."
+
+ result, set_result = temp_value("profiling-runs:result-value", default=None)
+ delete_confirmed, set_delete_confirmed = temp_value("profiling-runs:confirm-delete", default=False)
+
+ testgen.testgen_component(
+ "confirm_dialog",
+ props={
+ "project_code": project_code,
+ "message": message,
+ "constraint": constraint,
+ "button_label": "Delete",
+ "button_color": "warn",
+ "result": result(),
+ },
+ on_change_handlers={
+ "ActionConfirmed": on_delete_confirmed,
+ },
+ )
+
+ if delete_confirmed():
+ try:
+ with st.spinner("Deleting runs ..."):
+ profiling_runs = get_db_profiling_runs(project_code, table_group_id, profiling_run_ids=profiling_run_ids)
+ for _, profiling_run in profiling_runs.iterrows():
+ profiling_run_id = profiling_run["profiling_run_id"]
+ if profiling_run["status"] == "Running":
+ process_status, process_message = process_service.kill_profile_run(to_int(profiling_run["process_id"]))
+ if process_status:
+ profiling_run_queries.update_status(profiling_run_id, "Cancelled")
+ profiling_run_queries.cascade_delete_multiple_profiling_runs(profiling_run_ids)
+ st.rerun()
+ except Exception:
+ LOG.exception("Failed to delete profiling runs")
+ set_result({
+ "success": False,
+ "message": "Unable to delete the selected profiling runs, try again.",
+ })
+ st.rerun(scope="fragment")
+
+
@st.cache_data(show_spinner=False)
def get_db_table_group_choices(project_code: str) -> pd.DataFrame:
schema = st.session_state["dbschema"]
@@ -185,9 +246,19 @@ def get_db_table_group_choices(project_code: str) -> pd.DataFrame:
@st.cache_data(show_spinner="Loading data ...")
-def get_db_profiling_runs(project_code: str, table_group_id: str | None = None) -> pd.DataFrame:
+def get_db_profiling_runs(
+ project_code: str,
+ table_group_id: str | None = None,
+ profiling_run_ids: list[str] | None = None,
+) -> pd.DataFrame:
schema = st.session_state["dbschema"]
table_group_condition = f" AND v_profiling_runs.table_groups_id = '{table_group_id}' " if table_group_id else ""
+
+ profling_runs_condition = ""
+ if profiling_run_ids and len(profiling_run_ids) > 0:
+ profiling_run_ids_ = [f"'{run_id}'" for run_id in profiling_run_ids]
+ profling_runs_condition = f" AND v_profiling_runs.profiling_run_id::VARCHAR IN ({', '.join(profiling_run_ids_)})"
+
sql = f"""
WITH profile_anomalies AS (
SELECT profile_anomaly_results.profile_run_id,
@@ -245,6 +316,7 @@ def get_db_profiling_runs(project_code: str, table_group_id: str | None = None)
LEFT JOIN profile_anomalies ON (v_profiling_runs.profiling_run_id = profile_anomalies.profile_run_id)
WHERE project_code = '{project_code}'
{table_group_condition}
+ {profling_runs_condition}
ORDER BY start_time DESC;
"""
From c481d0da410985e1b151e71169122a0bd9bfbf9b Mon Sep 17 00:00:00 2001
From: Luis
Date: Wed, 9 Jul 2025 11:34:28 -0400
Subject: [PATCH 44/56] misc: upgrade Streamlit from 1.44.1 to 1.46.1
---
pyproject.toml | 2 +-
testgen/ui/assets/scripts.js | 4 -
testgen/ui/assets/style.css | 92 +++++++++++--------
.../frontend/js/components/button.js | 4 -
4 files changed, 56 insertions(+), 46 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 179a1545..5ed0ceab 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,7 +40,7 @@ dependencies = [
"requests_extensions==1.1.3",
"numpy==1.26.4",
"pandas==2.1.4",
- "streamlit==1.44.1",
+ "streamlit==1.46.1",
"streamlit-extras==0.3.0",
"streamlit-aggrid==0.3.4.post3",
"plotly_express==0.4.1",
diff --git a/testgen/ui/assets/scripts.js b/testgen/ui/assets/scripts.js
index 45da923a..46e0aafb 100644
--- a/testgen/ui/assets/scripts.js
+++ b/testgen/ui/assets/scripts.js
@@ -2,10 +2,6 @@ import van from './static/js/van.min.js';
window.van = van;
-window.addEventListener('load', function() {
- removeElements([ 'header[data-testid="stHeader"]' ]);
-});
-
window.addEventListener('message', async function(event) {
if (event.data.type === 'TestgenCopyToClipboard') {
await copyToClipboard(event.data.text || '');
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index a3e703f4..3929be26 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -33,6 +33,8 @@ body {
--portal-background: white;
--portal-box-shadow: rgba(0, 0, 0, 0.16) 0px 4px 16px;
--select-hover-background: rgb(240, 242, 246);
+
+ --app-background-color: #f8f9fa;
}
img.dk-logo-img {
@@ -40,22 +42,47 @@ img.dk-logo-img {
width: 100%;
}
-/* Streamlit header */
-header {
- display: none !important;
+/* Header */
+.stAppHeader {
+ width: 85px !important; /* allows clicking on the breadcrumbs */
+ left: calc(24px - 1rem) !important;
+ background: transparent !important;
+ min-height: unset !important;
+ overflow: hidden !important; /* hides the running man animation */
+}
+
+/* - with breadcrumbs */
+.stAppHeader:has(~ .stMain .st-key-testgen-breadcrumbs) {
+ height: 65px !important;
+ top: 5px !important;
+}
+
+/* - without breadcrumbs */
+.stAppHeader:not(:has(~ .stMain .st-key-testgen-breadcrumbs)) {
+ top: 7px !important;
+ height: 39.59px !important;
+}
+
+/* hide while sidebar expanded */
+.stApp:has(.stSidebar[aria-expanded="true"]) .stAppHeader {
+ display: none;
+}
+/* End Header */
+
+#stDecoration {
+ visibility: hidden;
}
-/* ... */
/* Sidebar */
[data-testid="stSidebarContent"] [data-testid="stSidebarHeader"] {
padding: 16px 20px;
}
-[data-testid="stSidebarHeader"] [data-testid="stLogo"] {
+[data-testid="stSidebarHeader"] .stLogo {
max-width: fit-content;
}
-section[data-testid="stSidebar"] {
+section.stSidebar {
width: 250px;
z-index: 999;
background-color: var(--sidebar-background-color);
@@ -68,30 +95,18 @@ section[data-testid="stSidebar"] {
/* */
/* Main content */
-div[data-testid="stAppViewContainer"] > :nth-child(2 of section) {
- background-color: #f8f9fa;
+.stMain {
+ background-color: var(--app-background-color);
}
-div[data-testid="stMainBlockContainer"] {
+.stMain > .stMainBlockContainer {
padding: 12px 24px 24px;
}
-div[data-testid="stVerticalBlock"] {
+.stVerticalBlock[data-testid="stVerticalBlock"] {
gap: 0.5rem;
}
-div[data-testid="stAppViewContainer"]:has(section[data-testid="stSidebar"]) div[data-testid="stSidebarCollapsedControl"] {
- top: 0.5rem;
- border-radius: 4px;
- background-color: var(--border-color);
- padding: 3px 0 0 8px;
-}
-
-div[data-testid="stAppViewContainer"]:has(section[data-testid="stSidebar"][aria-expanded="true"]) div[data-testid="stSidebarCollapsedControl"] {
- display: none;
-}
-/* */
-
/* Dialog - sets the width of all st.dialog */
/* There is no way to target "large" and "small" dialogs reliably */
div[data-testid="stDialog"] div[role="dialog"] {
@@ -203,11 +218,12 @@ button[title="Show password text"] {
}
/* ... */
-[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.bg-white) {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.bg-white),
+[data-testid="stVerticalBlockBorderWrapper"]:has(> .stVerticalBlock > .stElementContainer > div.stHtml > i.bg-white) {
background-color: var(--dk-card-background);
}
-div[data-testid="stVerticalBlockBorderWrapper"]:has(> div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-row) > div > [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-row) {
width: 100%;
flex-direction: row;
}
@@ -218,19 +234,19 @@ div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="st
max-height: 40px;
}
-div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-start) [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-start) {
justify-content: flex-start;
}
-div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-end) [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-end) {
justify-content: flex-end;
}
-div[data-testid="stVerticalBlockBorderWrapper"]:has( > div > div[data-testid="stVerticalBlock"] > div.element-container > div.stHtml > i.flex-center) [data-testid="stVerticalBlock"] {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.flex-center) {
justify-content: center;
}
-[data-testid="stVerticalBlock"]:has(> div.element-container > div.stHtml > i.no-flex-gap) {
+.stVerticalBlock:has(> div.stElementContainer > div.stHtml > i.no-flex-gap) {
gap: unset;
}
@@ -296,8 +312,13 @@ Use as testgen.text("text", "extra_styles") */
transition: padding 0.3s;
}
-[data-testid="stSidebar"][aria-expanded="false"] ~ [data-testid="stMain"] .tg-header {
- padding-left: 80px;
+.st-key-testgen-breadcrumbs {
+ transition: padding 0.3s;
+}
+
+[data-testid="stSidebar"][aria-expanded="false"] ~ div > [data-testid="stMain"] .tg-header,
+[data-testid="stSidebar"][aria-expanded="false"] ~ div > [data-testid="stMain"] .st-key-testgen-breadcrumbs {
+ padding-left: 85px;
}
.tg-header--line {
@@ -321,7 +342,8 @@ Use as testgen.text("text", "extra_styles") */
}
.st-key-tg-header--help [data-testid="stPopover"] {
- width: auto;
+ display: flex;
+ justify-content: flex-end;
}
.st-key-tg-header--help button[data-testid="stPopoverButton"] {
@@ -472,20 +494,16 @@ div[data-testid="stPopoverBody"] [data-testid="stVerticalBlock"]:has(i.tg--expor
--portal-background: #14181f;
--portal-box-shadow: rgba(0, 0, 0, 0.95) 0px 4px 16px;
--select-hover-background: rgba(255, 255, 255, .32);
- }
- /* Main content */
- div[data-testid="stAppViewContainer"] > :nth-child(2 of section) {
- background-color: rgb(14, 17, 23);
+ --app-background-color: rgb(14, 17, 23);
}
- /* */
div[data-modal-container='true']::before {
background-color: rgba(100, 100, 100, 0.5) !important;
}
div[data-modal-container='true'] > div:first-child > div:first-child {
- background-color: rgb(14, 17, 23) !important;
+ background-color: var(--app-background-color) !important;
}
}
/* ... */
diff --git a/testgen/ui/components/frontend/js/components/button.js b/testgen/ui/components/frontend/js/components/button.js
index 08b32393..d90b0034 100644
--- a/testgen/ui/components/frontend/js/components/button.js
+++ b/testgen/ui/components/frontend/js/components/button.js
@@ -214,10 +214,6 @@ button.tg-button.tg-warn-button.tg-stroked-button {
color: var(--button-warn-stroked-text-color);
background: var(--button-warn-stroked-background);
}
-
-button.tg-button.tg-warn-button[disabled] {
- color: rgba(255, 255, 255, .5) !important;
-}
/* ... */
`);
From c770e152f2decc8dd253b0ffd805bde2da8fd6f4 Mon Sep 17 00:00:00 2001
From: Luis
Date: Wed, 9 Jul 2025 17:17:50 -0400
Subject: [PATCH 45/56] feat: allow filtering by partial column name
---
testgen/ui/assets/style.css | 4 +
testgen/ui/components/widgets/select.py | 56 ++--
testgen/ui/queries/profiling_queries.py | 26 +-
testgen/ui/queries/test_definition_queries.py | 130 +++++-----
testgen/ui/services/test_results_service.py | 202 ++++++++-------
testgen/ui/views/hygiene_issues.py | 245 ++++++++++--------
testgen/ui/views/profiling_results.py | 14 +-
testgen/ui/views/test_definitions.py | 3 +-
testgen/ui/views/test_results.py | 17 +-
9 files changed, 393 insertions(+), 304 deletions(-)
diff --git a/testgen/ui/assets/style.css b/testgen/ui/assets/style.css
index 3929be26..420f9605 100644
--- a/testgen/ui/assets/style.css
+++ b/testgen/ui/assets/style.css
@@ -67,6 +67,10 @@ img.dk-logo-img {
.stApp:has(.stSidebar[aria-expanded="true"]) .stAppHeader {
display: none;
}
+
+.stStatusWidget {
+ display: none !important;
+}
/* End Header */
#stDecoration {
diff --git a/testgen/ui/components/widgets/select.py b/testgen/ui/components/widgets/select.py
index 31fa748c..23d65d96 100644
--- a/testgen/ui/components/widgets/select.py
+++ b/testgen/ui/components/widgets/select.py
@@ -1,3 +1,5 @@
+import re
+
import pandas as pd
import streamlit as st
from streamlit_extras.no_default_selectbox import selectbox
@@ -5,6 +7,9 @@
from testgen.ui.navigation.router import Router
EMPTY_VALUE = "---"
+CUSTOM_VALUE_TEMPLATE = "Custom: {value}"
+CUSTOM_VALUE_PATTERN = r"Custom: (.+)"
+
def select(
label: str,
@@ -15,42 +20,61 @@ def select(
required: bool = False,
bind_to_query: str | None = None,
bind_empty_value: bool = False,
+ accept_new_options: bool = False,
+ custom_values_wrap: str | None = "%{}%",
**kwargs,
):
- kwargs = {**kwargs}
+ kwargs = {**kwargs, "accept_new_options": accept_new_options}
kwargs["label"] = label
+ kwargs["index"] = None
+
+ option_values = options
+ option_display_labels = options
if isinstance(options, pd.DataFrame):
value_column = value_column or options.columns[0]
display_column = display_column or value_column
- kwargs["options"] = options[display_column]
- if default_value in options[value_column].values:
- kwargs["index"] = int(options[options[value_column] == default_value].index[0]) + (0 if required else 1)
- else:
- kwargs["options"] = options
- if default_value in options:
- kwargs["index"] = options.index(default_value) + (0 if required else 1)
- elif default_value == EMPTY_VALUE and not required:
- kwargs["index"] = 0
+
+ option_values = options[value_column].values.tolist()
+ option_display_labels = options[display_column].values.tolist()
+
+ kwargs["options"] = [*option_display_labels]
+ if default_value in option_values:
+ kwargs["index"] = option_values.index(default_value) + (0 if required else 1)
+ elif default_value == EMPTY_VALUE and not required:
+ kwargs["index"] = 0
+ elif default_value and default_value != EMPTY_VALUE and accept_new_options:
+ kwargs["options"].append(CUSTOM_VALUE_TEMPLATE.format(value=default_value))
+ kwargs["index"] = len(kwargs["options"])
if bind_to_query:
kwargs["key"] = kwargs.get("key", f"testgen_select_{bind_to_query}")
- if default_value is not None and kwargs.get("index") is None:
- Router().set_query_params({ bind_to_query: None }) # Unset the query params if the current value is not valid
+
+ # Unset the query params if the current value is not valid and new options are not allowed
+ if default_value is not None and kwargs.get("index") is None and not accept_new_options:
+ Router().set_query_params({ bind_to_query: None })
def update_query_params():
query_value = st.session_state[kwargs["key"]]
if not required and query_value == EMPTY_VALUE and not bind_empty_value:
query_value = None
- elif isinstance(options, pd.DataFrame):
- query_value = options.loc[options[display_column] == query_value, value_column].iloc[0]
+ elif query_value in option_display_labels:
+ query_value = option_values[option_display_labels.index(query_value)]
+ # elif isinstance(options, pd.DataFrame) and default_value in options[value_column].values:
+ # query_value = options.loc[options[display_column] == query_value, value_column].iloc[0]
Router().set_query_params({ bind_to_query: query_value })
kwargs["on_change"] = update_query_params
selected = st.selectbox(**kwargs) if required else selectbox(**kwargs)
- if selected and isinstance(options, pd.DataFrame):
- return options.loc[options[display_column] == selected, value_column].iloc[0]
+ if selected:
+ if selected in option_display_labels:
+ selected = option_values[option_display_labels.index(selected)]
+
+ if accept_new_options and (match := re.match(CUSTOM_VALUE_PATTERN, selected)):
+ selected = match.group(1)
+ if custom_values_wrap:
+ selected = custom_values_wrap.format(selected)
return selected
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index 71139317..db755ab9 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -94,14 +94,20 @@ def get_run_by_id(profile_run_id: str) -> pd.Series:
@st.cache_data(show_spinner=False)
-def get_profiling_results(profiling_run_id: str, table_name: str = "%%", column_name: str = "%%", sorting_columns = None):
+def get_profiling_results(profiling_run_id: str, table_name: str | None = None, column_name: str | None = None, sorting_columns = None):
+ db_session = get_current_session()
+ params = {
+ "profiling_run_id": profiling_run_id,
+ "table_name": table_name if table_name else "%%",
+ "column_name": column_name if column_name else "%%",
+ }
+
order_by = ""
if sorting_columns is None:
order_by = "ORDER BY schema_name, table_name, position"
elif len(sorting_columns):
order_by = "ORDER BY " + ", ".join(" ".join(col) for col in sorting_columns)
- schema: str = st.session_state["dbschema"]
query = f"""
SELECT
id::VARCHAR,
@@ -125,18 +131,22 @@ def get_profiling_results(profiling_run_id: str, table_name: str = "%%", column_
functional_table_type AS semantic_table_type,
CASE WHEN EXISTS(
SELECT 1
- FROM {schema}.profile_anomaly_results
+ FROM profile_anomaly_results
WHERE profile_run_id = profile_results.profile_run_id
AND table_name = profile_results.table_name
AND column_name = profile_results.column_name
) THEN 'Yes' END AS hygiene_issues
- FROM {schema}.profile_results
- WHERE profile_run_id = '{profiling_run_id}'
- AND table_name ILIKE '{table_name}'
- AND column_name ILIKE '{column_name}'
+ FROM profile_results
+ WHERE profile_run_id = :profiling_run_id
+ AND table_name ILIKE :table_name
+ AND column_name ILIKE :column_name
{order_by};
"""
- return db.retrieve_data(query)
+
+ results = db_session.execute(query, params=params)
+ columns = [column.name for column in results.cursor.description]
+
+ return pd.DataFrame(list(results), columns=columns)
@st.cache_data(show_spinner=False)
diff --git a/testgen/ui/queries/test_definition_queries.py b/testgen/ui/queries/test_definition_queries.py
index a47d7dcf..161da4de 100644
--- a/testgen/ui/queries/test_definition_queries.py
+++ b/testgen/ui/queries/test_definition_queries.py
@@ -1,6 +1,8 @@
+import pandas as pd
import streamlit as st
import testgen.ui.services.database_service as db
+from testgen.common.models import get_current_session, with_database_session
def update_attribute(schema, test_definition_ids, attribute, value):
@@ -19,73 +21,83 @@ def update_attribute(schema, test_definition_ids, attribute, value):
@st.cache_data(show_spinner=False)
-def get_test_definitions(schema, project_code, test_suite, table_name, column_name, test_definition_ids):
- if table_name:
- table_condition = f" AND d.table_name = '{table_name}'"
- else:
- table_condition = ""
- if column_name:
- column_condition = f" AND d.column_name = '{column_name}'"
- else:
- column_condition = ""
- sql = f"""
- SELECT
- d.schema_name, d.table_name, d.column_name, t.test_name_short, t.test_name_long,
- d.id::VARCHAR(50),
- s.project_code, d.table_groups_id::VARCHAR(50), s.test_suite, d.test_suite_id::VARCHAR,
- d.test_type, d.cat_test_id::VARCHAR(50),
- d.test_active,
- CASE WHEN d.test_active = 'Y' THEN 'Yes' ELSE 'No' END as test_active_display,
- d.lock_refresh,
- CASE WHEN d.lock_refresh = 'Y' THEN 'Yes' ELSE 'No' END as lock_refresh_display,
- t.test_scope,
- d.test_description,
- d.profiling_as_of_date,
- d.last_manual_update,
- d.severity, COALESCE(d.severity, s.severity, t.default_severity) as urgency,
- d.export_to_observability as export_to_observability_raw,
- CASE
- WHEN d.export_to_observability = 'Y' THEN 'Yes'
- WHEN d.export_to_observability = 'N' THEN 'No'
- WHEN d.export_to_observability IS NULL AND s.export_to_observability = 'Y' THEN 'Inherited (Yes)'
- ELSE 'Inherited (No)'
- END as export_to_observability,
- -- test_action,
- d.threshold_value, COALESCE(t.measure_uom_description, t.measure_uom) as export_uom,
- d.baseline_ct, d.baseline_unique_ct, d.baseline_value,
- d.baseline_value_ct, d.baseline_sum, d.baseline_avg, d.baseline_sd,
- d.lower_tolerance, d.upper_tolerance,
- d.subset_condition,
- d.groupby_names, d.having_condition, d.window_date_column, d.window_days,
- d.match_schema_name, d.match_table_name, d.match_column_names,
- d.match_subset_condition, d.match_groupby_names, d.match_having_condition,
- d.skip_errors, d.custom_query,
- COALESCE(d.test_description, t.test_description) as final_test_description,
- t.default_parm_columns, t.selection_criteria,
- d.profile_run_id::VARCHAR(50), d.test_action, d.test_definition_status,
- d.watch_level, d.check_result, d.last_auto_gen_date,
- d.test_mode
- FROM {schema}.test_definitions d
- INNER JOIN {schema}.test_types t ON (d.test_type = t.test_type)
- INNER JOIN {schema}.test_suites s ON (d.test_suite_id = s.id)
- WHERE True
- """
+@with_database_session
+def get_test_definitions(_, project_code, test_suite, table_name, column_name, test_definition_ids: list[str] | None):
+ db_session = get_current_session()
+ params = {}
+ order_by = "ORDER BY d.schema_name, d.table_name, d.column_name, d.test_type"
+ filters = ""
if project_code:
- sql += f""" AND s.project_code = '{project_code}'
- """
+ filters += " AND s.project_code = :project_code"
+ params["project_code"] = project_code
if test_suite:
- sql += f""" AND s.test_suite = '{test_suite}' {table_condition} {column_condition}
- """
+ filters += " AND s.test_suite = :test_suite"
+ params["test_suite"] = test_suite
+
if test_definition_ids:
- sql += f""" AND d.id in ({"'" + "','".join(test_definition_ids) + "'"})
- """
+ test_definition_params = {f"test_definition_id_{idx}": status for idx, status in enumerate(test_definition_ids)}
+ filters += f" AND d.id IN ({', '.join([f':{p}' for p in test_definition_params.keys()])})"
+ params.update(test_definition_params)
+
+ if table_name:
+ filters += " AND d.table_name = :table_name"
+ params["table_name"] = table_name
+
+ if column_name:
+ filters += " AND d.column_name ILIKE :column_name"
+ params["column_name"] = column_name
- sql += """ORDER BY d.schema_name, d.table_name, d.column_name, d.test_type;
+ sql = f"""
+ SELECT
+ d.schema_name, d.table_name, d.column_name, t.test_name_short, t.test_name_long,
+ d.id::VARCHAR(50),
+ s.project_code, d.table_groups_id::VARCHAR(50), s.test_suite, d.test_suite_id::VARCHAR,
+ d.test_type, d.cat_test_id::VARCHAR(50),
+ d.test_active,
+ CASE WHEN d.test_active = 'Y' THEN 'Yes' ELSE 'No' END as test_active_display,
+ d.lock_refresh,
+ CASE WHEN d.lock_refresh = 'Y' THEN 'Yes' ELSE 'No' END as lock_refresh_display,
+ t.test_scope,
+ d.test_description,
+ d.profiling_as_of_date,
+ d.last_manual_update,
+ d.severity, COALESCE(d.severity, s.severity, t.default_severity) as urgency,
+ d.export_to_observability as export_to_observability_raw,
+ CASE
+ WHEN d.export_to_observability = 'Y' THEN 'Yes'
+ WHEN d.export_to_observability = 'N' THEN 'No'
+ WHEN d.export_to_observability IS NULL AND s.export_to_observability = 'Y' THEN 'Inherited (Yes)'
+ ELSE 'Inherited (No)'
+ END as export_to_observability,
+ -- test_action,
+ d.threshold_value, COALESCE(t.measure_uom_description, t.measure_uom) as export_uom,
+ d.baseline_ct, d.baseline_unique_ct, d.baseline_value,
+ d.baseline_value_ct, d.baseline_sum, d.baseline_avg, d.baseline_sd,
+ d.lower_tolerance, d.upper_tolerance,
+ d.subset_condition,
+ d.groupby_names, d.having_condition, d.window_date_column, d.window_days,
+ d.match_schema_name, d.match_table_name, d.match_column_names,
+ d.match_subset_condition, d.match_groupby_names, d.match_having_condition,
+ d.skip_errors, d.custom_query,
+ COALESCE(d.test_description, t.test_description) as final_test_description,
+ t.default_parm_columns, t.selection_criteria,
+ d.profile_run_id::VARCHAR(50), d.test_action, d.test_definition_status,
+ d.watch_level, d.check_result, d.last_auto_gen_date,
+ d.test_mode
+ FROM test_definitions d
+ INNER JOIN test_types t ON (d.test_type = t.test_type)
+ INNER JOIN test_suites s ON (d.test_suite_id = s.id)
+ WHERE True
+ {filters}
+ {order_by}
"""
- return db.retrieve_data(sql)
+ results = db_session.execute(sql, params=params)
+ columns = [column.name for column in results.cursor.description]
+
+ return pd.DataFrame(list(results), columns=columns)
def update(schema, test_definition):
diff --git a/testgen/ui/services/test_results_service.py b/testgen/ui/services/test_results_service.py
index d2e7440a..2cdf327d 100644
--- a/testgen/ui/services/test_results_service.py
+++ b/testgen/ui/services/test_results_service.py
@@ -1,124 +1,138 @@
import pandas as pd
from testgen.common import ConcatColumnList
+from testgen.common.models import get_current_session, with_database_session
from testgen.common.read_file import replace_templated_functions
from testgen.ui.services import database_service as db
from testgen.ui.services.string_service import empty_if_null
from testgen.ui.services.test_definition_service import get_test_definition
+@with_database_session
def get_test_results(
- schema: str,
+ _: str,
run_id: str,
- test_status: str | None = None,
+ test_status: str | list[str] | None = None,
test_type_id: str | None = None,
table_name: str | None = None,
column_name: str | None = None,
sorting_columns: list[str] | None = None,
) -> pd.DataFrame:
# First visible row first, so multi-select checkbox will render
+ db_session = get_current_session()
+ params = {"run_id": run_id}
+
order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns)) if sorting_columns else ""
filters = ""
if test_status:
- filters += f" AND r.result_status IN ({test_status})"
+ if isinstance(test_status, str):
+ test_status = [status.strip() for status in test_status.split(",")]
+ test_status_params = {f"test_status_{idx}": status for idx, status in enumerate(test_status)}
+
+ filters += f" AND r.result_status IN ({', '.join([f':{p}' for p in test_status_params.keys()])})"
+ params.update(test_status_params)
if test_type_id:
- filters += f" AND r.test_type = '{test_type_id}'"
+ filters += " AND r.test_type = :test_type_id"
+ params["test_type_id"] = test_type_id
if table_name:
- filters += f" AND r.table_name = '{table_name}'"
+ filters += " AND r.table_name = :table_name"
+ params["table_name"] = table_name
if column_name:
- filters += f" AND r.column_names = '{column_name}'"
+ filters += " AND r.column_names ILIKE :column_name"
+ params["column_name"] = column_name
sql = f"""
- WITH run_results
- AS (SELECT *
- FROM {schema}.test_results r
- WHERE
- r.test_run_id = '{run_id}'
- {filters}
- )
- SELECT r.table_name,
- p.project_name, ts.test_suite, tg.table_groups_name, cn.connection_name, cn.project_host, cn.sql_flavor,
- tt.dq_dimension, tt.test_scope,
- r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id,
- tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description,
- c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status,
- CASE
- WHEN r.result_code <> 1 THEN r.disposition
- ELSE 'Passed'
- END as disposition,
- NULL::VARCHAR(1) as action,
- r.input_parameters, r.result_message, CASE WHEN result_code <> 1 THEN r.severity END as severity,
- r.result_code as passed_ct,
- (1 - r.result_code)::INTEGER as exception_ct,
- CASE
- WHEN result_status = 'Warning'
- AND result_message NOT ILIKE 'Inactivated%%' THEN 1
- END::INTEGER as warning_ct,
- CASE
- WHEN result_status = 'Failed'
- AND result_message NOT ILIKE 'Inactivated%%' THEN 1
- END::INTEGER as failed_ct,
- CASE
- WHEN result_message ILIKE 'Inactivated%%' THEN 1
- END as execution_error_ct,
- p.project_code, r.table_groups_id::VARCHAR,
- r.id::VARCHAR as test_result_id, r.test_run_id::VARCHAR,
- c.id::VARCHAR as connection_id, r.test_suite_id::VARCHAR,
- r.test_definition_id::VARCHAR as test_definition_id_runtime,
- CASE
- WHEN r.auto_gen = TRUE THEN d.id
- ELSE r.test_definition_id
- END::VARCHAR as test_definition_id_current,
- r.auto_gen,
-
- -- These are used in the PDF report
- tt.threshold_description, tt.usage_notes, r.test_time,
- dcc.description as column_description,
- COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
- COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
- COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
- COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
- COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
- COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
- COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
- COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
- COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
-
- FROM run_results r
- INNER JOIN {schema}.test_types tt
- ON (r.test_type = tt.test_type)
- LEFT JOIN {schema}.test_definitions rd
- ON (r.test_definition_id = rd.id)
- LEFT JOIN {schema}.test_definitions d
- ON (r.test_suite_id = d.test_suite_id
- AND r.table_name = d.table_name
- AND COALESCE(r.column_names, 'N/A') = COALESCE(d.column_name, 'N/A')
- AND r.test_type = d.test_type
- AND r.auto_gen = TRUE
- AND d.last_auto_gen_date IS NOT NULL)
- INNER JOIN {schema}.test_suites ts
- ON r.test_suite_id = ts.id
- INNER JOIN {schema}.projects p
- ON (ts.project_code = p.project_code)
- INNER JOIN {schema}.table_groups tg
- ON (ts.table_groups_id = tg.id)
- INNER JOIN {schema}.connections cn
- ON (tg.connection_id = cn.connection_id)
- LEFT JOIN {schema}.cat_test_conditions c
- ON (cn.sql_flavor = c.sql_flavor
- AND r.test_type = c.test_type)
- LEFT JOIN {schema}.data_column_chars dcc
- ON (tg.id = dcc.table_groups_id
- AND r.schema_name = dcc.schema_name
- AND r.table_name = dcc.table_name
- AND r.column_names = dcc.column_name)
- LEFT JOIN {schema}.data_table_chars dtc
- ON dcc.table_id = dtc.table_id
- {order_by} ;
+ WITH run_results AS (
+ SELECT *
+ FROM test_results r
+ WHERE r.test_run_id = :run_id
+ {filters}
+ )
+ SELECT r.table_name,
+ p.project_name, ts.test_suite, tg.table_groups_name, cn.connection_name, cn.project_host, cn.sql_flavor,
+ tt.dq_dimension, tt.test_scope,
+ r.schema_name, r.column_names, r.test_time::DATE as test_date, r.test_type, tt.id as test_type_id,
+ tt.test_name_short, tt.test_name_long, r.test_description, tt.measure_uom, tt.measure_uom_description,
+ c.test_operator, r.threshold_value::NUMERIC(16, 5), r.result_measure::NUMERIC(16, 5), r.result_status,
+ CASE
+ WHEN r.result_code <> 1 THEN r.disposition
+ ELSE 'Passed'
+ END as disposition,
+ NULL::VARCHAR(1) as action,
+ r.input_parameters, r.result_message, CASE WHEN result_code <> 1 THEN r.severity END as severity,
+ r.result_code as passed_ct,
+ (1 - r.result_code)::INTEGER as exception_ct,
+ CASE
+ WHEN result_status = 'Warning'
+ AND result_message NOT ILIKE 'Inactivated%%' THEN 1
+ END::INTEGER as warning_ct,
+ CASE
+ WHEN result_status = 'Failed'
+ AND result_message NOT ILIKE 'Inactivated%%' THEN 1
+ END::INTEGER as failed_ct,
+ CASE
+ WHEN result_message ILIKE 'Inactivated%%' THEN 1
+ END as execution_error_ct,
+ p.project_code, r.table_groups_id::VARCHAR,
+ r.id::VARCHAR as test_result_id, r.test_run_id::VARCHAR,
+ c.id::VARCHAR as connection_id, r.test_suite_id::VARCHAR,
+ r.test_definition_id::VARCHAR as test_definition_id_runtime,
+ CASE
+ WHEN r.auto_gen = TRUE THEN d.id
+ ELSE r.test_definition_id
+ END::VARCHAR as test_definition_id_current,
+ r.auto_gen,
+
+ -- These are used in the PDF report
+ tt.threshold_description, tt.usage_notes, r.test_time,
+ dcc.description as column_description,
+ COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+ COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
+ COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
+ COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
+ COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
+ COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
+ COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
+ COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
+ COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
+
+ FROM run_results r
+ INNER JOIN test_types tt
+ ON (r.test_type = tt.test_type)
+ LEFT JOIN test_definitions rd
+ ON (r.test_definition_id = rd.id)
+ LEFT JOIN test_definitions d
+ ON (r.test_suite_id = d.test_suite_id
+ AND r.table_name = d.table_name
+ AND COALESCE(r.column_names, 'N/A') = COALESCE(d.column_name, 'N/A')
+ AND r.test_type = d.test_type
+ AND r.auto_gen = TRUE
+ AND d.last_auto_gen_date IS NOT NULL)
+ INNER JOIN test_suites ts
+ ON r.test_suite_id = ts.id
+ INNER JOIN projects p
+ ON (ts.project_code = p.project_code)
+ INNER JOIN table_groups tg
+ ON (ts.table_groups_id = tg.id)
+ INNER JOIN connections cn
+ ON (tg.connection_id = cn.connection_id)
+ LEFT JOIN cat_test_conditions c
+ ON (cn.sql_flavor = c.sql_flavor
+ AND r.test_type = c.test_type)
+ LEFT JOIN data_column_chars dcc
+ ON (tg.id = dcc.table_groups_id
+ AND r.schema_name = dcc.schema_name
+ AND r.table_name = dcc.table_name
+ AND r.column_names = dcc.column_name)
+ LEFT JOIN data_table_chars dtc
+ ON dcc.table_id = dtc.table_id
+ {order_by}
"""
- df = db.retrieve_data(sql)
- # Clean Up
+ results = db_session.execute(sql, params=params)
+ columns = [column.name for column in results.cursor.description]
+
+ df = pd.DataFrame(list(results), columns=columns)
df["test_date"] = pd.to_datetime(df["test_date"])
return df
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 7f987bd9..19f61378 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -12,6 +12,7 @@
from testgen.commands.run_rollup_scores import run_profile_rollup_scoring_queries
from testgen.common import date_service
from testgen.common.mixpanel_service import MixpanelService
+from testgen.common.models import get_current_session
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import (
FILE_DATA_TYPE,
@@ -114,6 +115,7 @@ def render(
bind_to_query="column_name",
label="Column Name",
disabled=not table_name,
+ accept_new_options=True,
)
with sort_column:
@@ -143,72 +145,72 @@ def render(
action_map = df_action.set_index("id")["action"].to_dict()
df_pa["action"] = df_pa["id"].map(action_map).fillna(df_pa["action"])
- if not df_pa.empty:
- summaries = get_profiling_anomaly_summary(run_id)
- others_summary = [summary for summary in summaries if summary.get("type") != "PII"]
- with others_summary_column:
+ summaries = get_profiling_anomaly_summary(run_id)
+ others_summary = [summary for summary in summaries if summary.get("type") != "PII"]
+ with others_summary_column:
+ testgen.summary_bar(
+ items=others_summary,
+ label="Hygiene Issues",
+ height=20,
+ width=400,
+ )
+
+ anomalies_pii_summary = [summary for summary in summaries if summary.get("type") == "PII"]
+ if anomalies_pii_summary:
+ with pii_summary_column:
testgen.summary_bar(
- items=others_summary,
- label="Hygiene Issues",
+ items=anomalies_pii_summary,
+ label="Potential PII",
height=20,
width=400,
)
- anomalies_pii_summary = [summary for summary in summaries if summary.get("type") == "PII"]
- if anomalies_pii_summary:
- with pii_summary_column:
- testgen.summary_bar(
- items=anomalies_pii_summary,
- label="Potential PII",
- height=20,
- width=400,
- )
+ with score_column:
+ render_score(run_df["project_code"], run_id)
+
+ lst_show_columns = [
+ "table_name",
+ "column_name",
+ "issue_likelihood",
+ "action",
+ "anomaly_name",
+ "detail",
+ ]
+
+ # Show main grid and retrieve selections
+ selected = fm.render_grid_select(
+ df_pa,
+ lst_show_columns,
+ int_height=400,
+ do_multi_select=do_multi_select,
+ bind_to_query_name="selected",
+ bind_to_query_prop="id",
+ )
- with score_column:
- render_score(run_df["project_code"], run_id)
+ popover_container = export_button_column.empty()
- lst_show_columns = [
- "table_name",
- "column_name",
- "issue_likelihood",
- "action",
- "anomaly_name",
- "detail",
- ]
+ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
+ # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
+ with popover_container.container():
+ flex_row_end()
+ st.button(label="Export", icon=":material/download:", disabled=True)
- # Show main grid and retrieve selections
- selected = fm.render_grid_select(
- df_pa,
- lst_show_columns,
- int_height=400,
- do_multi_select=do_multi_select,
- bind_to_query_name="selected",
- bind_to_query_prop="id",
+ download_dialog(
+ dialog_title="Download Excel Report",
+ file_content_func=get_excel_report_data,
+ args=(run_df["table_groups_name"], run_date, run_id, data),
)
- popover_container = export_button_column.empty()
-
- def open_download_dialog(data: pd.DataFrame | None = None) -> None:
- # Hack to programmatically close popover: https://github.com/streamlit/streamlit/issues/8265#issuecomment-3001655849
- with popover_container.container():
- flex_row_end()
- st.button(label="Export", icon=":material/download:", disabled=True)
-
- download_dialog(
- dialog_title="Download Excel Report",
- file_content_func=get_excel_report_data,
- args=(run_df["table_groups_name"], run_date, run_id, data),
- )
-
- with popover_container.container(key="tg--export-popover"):
- flex_row_end()
- with st.popover(label="Export", icon=":material/download:", help="Download hygiene issues to Excel"):
- css_class("tg--export-wrapper")
- st.button(label="All issues", type="tertiary", on_click=open_download_dialog)
- st.button(label="Filtered issues", type="tertiary", on_click=partial(open_download_dialog, df_pa))
- if selected:
- st.button(label="Selected issues", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected)))
+ with popover_container.container(key="tg--export-popover"):
+ flex_row_end()
+ with st.popover(label="Export", icon=":material/download:", help="Download hygiene issues to Excel"):
+ css_class("tg--export-wrapper")
+ st.button(label="All issues", type="tertiary", on_click=open_download_dialog)
+ st.button(label="Filtered issues", type="tertiary", on_click=partial(open_download_dialog, df_pa))
+ if selected:
+ st.button(label="Selected issues", type="tertiary", on_click=partial(open_download_dialog, pd.DataFrame(selected)))
+ if not df_pa.empty:
if selected:
# Always show details for last selected row
selected_row = selected[len(selected) - 1]
@@ -363,78 +365,97 @@ def get_profiling_anomalies(
column_name: str | None = None,
sorting_columns: list[str] | None = None,
):
- schema: str = st.session_state["dbschema"]
+ db_session = get_current_session()
criteria = ""
order_by = ""
+ params = {"profile_run_id": profile_run_id}
if likelihood:
- criteria += f" AND t.issue_likelihood = '{likelihood}'"
+ criteria += " AND t.issue_likelihood = :likelihood"
+ params["likelihood"] = likelihood
if issue_type_id:
- criteria += f" AND t.id = '{issue_type_id}'"
+ criteria += " AND t.id = :issue_type_id"
+ params["issue_type_id"] = issue_type_id
if table_name:
- criteria += f" AND r.table_name = '{table_name}'"
+ criteria += " AND r.table_name = :table_name"
+ params["table_name"] = table_name
if column_name:
- criteria += f" AND r.column_name = '{column_name}'"
+ criteria += " AND r.column_name ILIKE :column_name"
+ params["column_name"] = column_name
if sorting_columns:
order_by = "ORDER BY " + (", ".join(" ".join(col) for col in sorting_columns))
# Define the query -- first visible column must be first, because will hold the multi-select box
str_sql = f"""
- SELECT r.table_name, r.column_name, r.schema_name,
- r.column_type,t.anomaly_name, t.issue_likelihood,
- r.disposition, null as action,
- CASE
- WHEN t.issue_likelihood = 'Possible' THEN 'Possible: speculative test that often identifies problems'
- WHEN t.issue_likelihood = 'Likely' THEN 'Likely: typically indicates a data problem'
- WHEN t.issue_likelihood = 'Definite' THEN 'Definite: indicates a highly-likely data problem'
- WHEN t.issue_likelihood = 'Potential PII'
- THEN 'Potential PII: may require privacy policies, standards and procedures for access, storage and transmission.'
- END AS likelihood_explanation,
- CASE
- WHEN t.issue_likelihood = 'Potential PII' THEN 1
- WHEN t.issue_likelihood = 'Possible' THEN 2
- WHEN t.issue_likelihood = 'Likely' THEN 3
- WHEN t.issue_likelihood = 'Definite' THEN 4
- END AS likelihood_order,
- t.anomaly_description, r.detail, t.suggested_action,
- r.anomaly_id, r.table_groups_id::VARCHAR, r.id::VARCHAR, p.profiling_starttime, r.profile_run_id::VARCHAR,
- tg.table_groups_name,
-
- -- These are used in the PDF report
- dcc.functional_data_type,
- dcc.description as column_description,
- COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
- COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
- COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
- COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
- COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
- COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
- COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
- COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
- COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
-
- FROM {schema}.profile_anomaly_results r
- INNER JOIN {schema}.profile_anomaly_types t
- ON r.anomaly_id = t.id
- INNER JOIN {schema}.profiling_runs p
- ON r.profile_run_id = p.id
- INNER JOIN {schema}.table_groups tg
- ON r.table_groups_id = tg.id
- LEFT JOIN {schema}.data_column_chars dcc
- ON (tg.id = dcc.table_groups_id
- AND r.schema_name = dcc.schema_name
- AND r.table_name = dcc.table_name
- AND r.column_name = dcc.column_name)
- LEFT JOIN {schema}.data_table_chars dtc
- ON dcc.table_id = dtc.table_id
- WHERE r.profile_run_id = '{profile_run_id}'
- {criteria}
- {order_by}
+ SELECT
+ r.table_name,
+ r.column_name,
+ r.schema_name,
+ r.column_type,
+ t.anomaly_name,
+ t.issue_likelihood,
+ r.disposition,
+ null as action,
+ CASE
+ WHEN t.issue_likelihood = 'Possible' THEN 'Possible: speculative test that often identifies problems'
+ WHEN t.issue_likelihood = 'Likely' THEN 'Likely: typically indicates a data problem'
+ WHEN t.issue_likelihood = 'Definite' THEN 'Definite: indicates a highly-likely data problem'
+ WHEN t.issue_likelihood = 'Potential PII'
+ THEN 'Potential PII: may require privacy policies, standards and procedures for access, storage and transmission.'
+ END AS likelihood_explanation,
+ CASE
+ WHEN t.issue_likelihood = 'Potential PII' THEN 1
+ WHEN t.issue_likelihood = 'Possible' THEN 2
+ WHEN t.issue_likelihood = 'Likely' THEN 3
+ WHEN t.issue_likelihood = 'Definite' THEN 4
+ END AS likelihood_order,
+ t.anomaly_description,
+ r.detail,
+ t.suggested_action,
+ r.anomaly_id,
+ r.table_groups_id::VARCHAR,
+ r.id::VARCHAR,
+ p.profiling_starttime,
+ r.profile_run_id::VARCHAR,
+ tg.table_groups_name,
+
+ -- These are used in the PDF report
+ dcc.functional_data_type,
+ dcc.description as column_description,
+ COALESCE(dcc.critical_data_element, dtc.critical_data_element) as critical_data_element,
+ COALESCE(dcc.data_source, dtc.data_source, tg.data_source) as data_source,
+ COALESCE(dcc.source_system, dtc.source_system, tg.source_system) as source_system,
+ COALESCE(dcc.source_process, dtc.source_process, tg.source_process) as source_process,
+ COALESCE(dcc.business_domain, dtc.business_domain, tg.business_domain) as business_domain,
+ COALESCE(dcc.stakeholder_group, dtc.stakeholder_group, tg.stakeholder_group) as stakeholder_group,
+ COALESCE(dcc.transform_level, dtc.transform_level, tg.transform_level) as transform_level,
+ COALESCE(dcc.aggregation_level, dtc.aggregation_level) as aggregation_level,
+ COALESCE(dcc.data_product, dtc.data_product, tg.data_product) as data_product
+
+ FROM profile_anomaly_results r
+ INNER JOIN profile_anomaly_types t
+ ON r.anomaly_id = t.id
+ INNER JOIN profiling_runs p
+ ON r.profile_run_id = p.id
+ INNER JOIN table_groups tg
+ ON r.table_groups_id = tg.id
+ LEFT JOIN data_column_chars dcc
+ ON (tg.id = dcc.table_groups_id
+ AND r.schema_name = dcc.schema_name
+ AND r.table_name = dcc.table_name
+ AND r.column_name = dcc.column_name)
+ LEFT JOIN data_table_chars dtc
+ ON dcc.table_id = dtc.table_id
+ WHERE r.profile_run_id = :profile_run_id
+ {criteria}
+ {order_by}
"""
- # Retrieve data as df
- df = db.retrieve_data(str_sql)
+ results = db_session.execute(str_sql, params=params)
+ columns = [column.name for column in results.cursor.description]
+
+ df = pd.DataFrame(list(results), columns=columns)
dct_replace = {"Confirmed": "✓", "Dismissed": "✘", "Inactive": "🔇"}
df["action"] = df["disposition"].replace(dct_replace)
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index c9af8f53..f32c9bdf 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -81,6 +81,7 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
bind_to_query="column_name",
label="Column Name",
disabled=not table_name,
+ accept_new_options=bool(table_name),
)
with sort_column:
@@ -95,16 +96,15 @@ def render(self, run_id: str, table_name: str | None = None, column_name: str |
default_sorting = [(sortable_columns[i][1], "ASC") for i in (0, 1, 2)]
sorting_columns = testgen.sorting_selector(sortable_columns, default_sorting)
- # Use SQL wildcard to match all values
- if not table_name:
- table_name = "%%"
- if not column_name:
- column_name = "%%"
-
# Display main results grid
with st.container():
with st.spinner("Loading data ..."):
- df = profiling_queries.get_profiling_results(run_id, table_name, column_name, sorting_columns)
+ df = profiling_queries.get_profiling_results(
+ run_id,
+ table_name=table_name,
+ column_name=column_name,
+ sorting_columns=sorting_columns,
+ )
show_columns = [
"schema_name",
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index fd5753f9..81bdc2c6 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -82,13 +82,14 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name:
label="Table Name",
)
with column_filter_column:
- column_options = list(columns_df.loc[columns_df["table_name"] == table_name]["column_name"].unique())
+ column_options = columns_df.loc[columns_df["table_name"] == table_name]["column_name"].dropna().unique().tolist()
column_name = testgen.select(
options=column_options,
default_value=column_name,
bind_to_query="column_name",
label="Column Name",
disabled=not table_name,
+ accept_new_options=True,
)
with disposition_column:
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 920e007f..0a1e8fbf 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -128,7 +128,9 @@ def render(
)
with column_filter_column:
- column_options = list(run_columns_df.loc[run_columns_df["table_name"] == table_name]["column_name"].unique())
+ column_options = run_columns_df.loc[
+ run_columns_df["table_name"] == table_name
+ ]["column_name"].dropna().unique().tolist()
column_name = testgen.select(
options=column_options,
value_column="column_name",
@@ -136,6 +138,7 @@ def render(
bind_to_query="column_name",
label="Column Name",
disabled=not table_name,
+ accept_new_options=True,
)
with sort_column:
@@ -157,15 +160,15 @@ def render(
match status:
case "Failed + Warning":
- status = "'Failed','Warning'"
+ status = ["Failed", "Warning"]
case "Failed":
- status = "'Failed'"
+ status = "Failed"
case "Warning":
- status = "'Warning'"
+ status = "Warning"
case "Passed":
- status = "'Passed'"
+ status = "Passed"
case "Error":
- status = "'Error'"
+ status = "Error"
# Display main grid and retrieve selection
selected = show_result_detail(
@@ -294,7 +297,7 @@ def get_test_run_columns(test_run_id: str) -> pd.DataFrame:
@st.cache_data(show_spinner=False)
def get_test_results(
run_id: str,
- test_status: str | None = None,
+ test_status: str | list[str] | None = None,
test_type_id: str | None = None,
table_name: str | None = None,
column_name: str | None = None,
From 633c64fecf65c1d3d049edbd32d4b109d37a63a4 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 11 Jul 2025 17:39:29 -0400
Subject: [PATCH 46/56] fix(grid): change to current selection not reflected
---
testgen/ui/services/form_service.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index 771c9fc1..0194d291 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -312,7 +312,7 @@ def render_grid_select(
enable_enterprise_modules=False,
allow_unsafe_jscode=True,
update_mode=GridUpdateMode.NO_UPDATE,
- update_on=["selectionChanged"],
+ update_on=["selectionChanged", "modelUpdated"],
data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS,
height=int_height,
From add36107060215d18c2185803365ccae957993fd Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Fri, 11 Jul 2025 17:40:06 -0400
Subject: [PATCH 47/56] fix(issue-report): error status in test history breaks
export
---
testgen/ui/pdf/hygiene_issue_report.py | 8 ++++----
testgen/ui/pdf/test_result_report.py | 7 ++++---
2 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/testgen/ui/pdf/hygiene_issue_report.py b/testgen/ui/pdf/hygiene_issue_report.py
index aa5747fd..31844a78 100644
--- a/testgen/ui/pdf/hygiene_issue_report.py
+++ b/testgen/ui/pdf/hygiene_issue_report.py
@@ -27,10 +27,10 @@
SECTION_MIN_AVAILABLE_HEIGHT = 120
CLASS_COLORS = {
- "Definite": HexColor(0xE94D4A),
- "Likely": HexColor(0xFC8F2A),
- "Possible": HexColor(0xFCD349),
- "Potential PII": HexColor(0xFC8F2A),
+ "Definite": HexColor(0xEF5350),
+ "Likely": HexColor(0xFF9800),
+ "Possible": HexColor(0xFBC02D),
+ "Potential PII": HexColor(0x8D6E63),
}
def build_summary_table(document, hi_data):
diff --git a/testgen/ui/pdf/test_result_report.py b/testgen/ui/pdf/test_result_report.py
index dd5e9ed9..f583c71e 100644
--- a/testgen/ui/pdf/test_result_report.py
+++ b/testgen/ui/pdf/test_result_report.py
@@ -38,9 +38,10 @@
SECTION_MIN_AVAILABLE_HEIGHT = 120
RESULT_STATUS_COLORS = {
- "Passed": HexColor(0x94C465),
- "Warning": HexColor(0xFCD349),
- "Failed": HexColor(0xE94D4A),
+ "Passed": HexColor(0x8BC34A),
+ "Warning": HexColor(0xFBC02D),
+ "Failed": HexColor(0xEF5350),
+ "Error": HexColor(0x8D6E63),
}
From 3d1d469eec63f062d3eae33819451a910c5e0393 Mon Sep 17 00:00:00 2001
From: Ricardo Boni
Date: Fri, 11 Jul 2025 15:27:20 -0400
Subject: [PATCH 48/56] fix: Allowing literal underscores in the table group
pattern fields
---
.../commands/queries/refresh_data_chars_query.py | 13 ++++++++++++-
tests/unit/test_profiling_query.py | 16 ++++++++--------
2 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/testgen/commands/queries/refresh_data_chars_query.py b/testgen/commands/queries/refresh_data_chars_query.py
index 325d6751..694eeefb 100644
--- a/testgen/commands/queries/refresh_data_chars_query.py
+++ b/testgen/commands/queries/refresh_data_chars_query.py
@@ -43,10 +43,21 @@ def _get_mask_query(self, mask: str, is_include: bool) -> str:
if mask:
sub_query += " AND (" if is_include else " AND NOT ("
is_first = True
+ escape = ""
+ if self.sql_flavor.startswith("mssql"):
+ escaped_underscore = "[_]"
+ elif self.sql_flavor == "snowflake":
+ escaped_underscore = "\\\\_"
+ escape = "ESCAPE '\\\\'"
+ elif self.sql_flavor == "redshift":
+ escaped_underscore = "\\\\_"
+ else:
+ escaped_underscore = "\\_"
for item in mask.split(","):
if not is_first:
sub_query += " OR "
- sub_query += "(c.table_name LIKE '" + item.strip().replace("_", r"\_") + r"' ESCAPE '\')"
+ item = item.strip().replace("_", escaped_underscore)
+ sub_query += f"(c.table_name LIKE '{item}' {escape})"
is_first = False
sub_query += ")"
return sub_query
diff --git a/tests/unit/test_profiling_query.py b/tests/unit/test_profiling_query.py
index 113508d6..826faad1 100644
--- a/tests/unit/test_profiling_query.py
+++ b/tests/unit/test_profiling_query.py
@@ -7,7 +7,7 @@
def test_include_exclude_mask_basic():
# test configuration
project_code = "dummy_project_code"
- flavor = "redshift"
+ flavor = "postgresql"
profiling_query = CProfilingSQL(project_code, flavor)
profiling_query.parm_table_set = ""
profiling_query.parm_table_include_mask = "important%, %useful%"
@@ -18,9 +18,9 @@ def test_include_exclude_mask_basic():
# test assertions
assert "SELECT 'dummy_project_code'" in query
- assert r"AND ((c.table_name LIKE 'important%' ESCAPE '\') OR (c.table_name LIKE '%useful%' ESCAPE '\'))" in query
+ assert r"AND ((c.table_name LIKE 'important%' ) OR (c.table_name LIKE '%useful%' ))" in query
assert (
- r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\') OR (c.table_name LIKE 'tmp%' ESCAPE '\') OR (c.table_name LIKE 'raw\_slot\_utilization%' ESCAPE '\') OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ESCAPE '\'))"
+ r"AND NOT ((c.table_name LIKE 'temp%' ) OR (c.table_name LIKE 'tmp%' ) OR (c.table_name LIKE 'raw\_slot\_utilization%' ) OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ))"
in query
)
@@ -30,7 +30,7 @@ def test_include_exclude_mask_basic():
def test_include_empty_exclude_mask(mask):
# test configuration
project_code = "dummy_project_code"
- flavor = "redshift"
+ flavor = "snowflake"
profiling_query = CProfilingSQL(project_code, flavor)
profiling_query.parm_table_set = ""
profiling_query.parm_table_include_mask = mask
@@ -41,7 +41,7 @@ def test_include_empty_exclude_mask(mask):
# test assertions
assert (
- r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\') OR (c.table_name LIKE 'tmp%' ESCAPE '\') OR (c.table_name LIKE 'raw\_slot\_utilization%' ESCAPE '\') OR (c.table_name LIKE 'gps\_product\_step\_change\_log' ESCAPE '\'))"
+ r"AND NOT ((c.table_name LIKE 'temp%' ESCAPE '\\') OR (c.table_name LIKE 'tmp%' ESCAPE '\\') OR (c.table_name LIKE 'raw\\_slot\\_utilization%' ESCAPE '\\') OR (c.table_name LIKE 'gps\\_product\\_step\\_change\\_log' ESCAPE '\\')"
in query
)
@@ -51,14 +51,14 @@ def test_include_empty_exclude_mask(mask):
def test_include_empty_include_mask(mask):
# test configuration
project_code = "dummy_project_code"
- flavor = "redshift"
+ flavor = "mssql"
profiling_query = CProfilingSQL(project_code, flavor)
profiling_query.parm_table_set = ""
- profiling_query.parm_table_include_mask = "important%, %useful%"
+ profiling_query.parm_table_include_mask = "important%, %useful_%"
profiling_query.parm_table_exclude_mask = mask
# test run
query = profiling_query.GetDDFQuery()
# test assertions
- assert r"AND ((c.table_name LIKE 'important%' ESCAPE '\') OR (c.table_name LIKE '%useful%' ESCAPE '\'))" in query
+ assert r"AND ((c.table_name LIKE 'important%' ) OR (c.table_name LIKE '%useful[_]%' ))" in query
From a089c967f6df7bc2cac93d88079fb973f2333fdd Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 14 Jul 2025 17:54:06 -0400
Subject: [PATCH 49/56] fix(profiling): don't apply sample ratio to
date_days_present
---
.../profiling/project_update_profile_results_to_estimates.sql | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/testgen/template/profiling/project_update_profile_results_to_estimates.sql b/testgen/template/profiling/project_update_profile_results_to_estimates.sql
index 48d2d61c..640829cf 100644
--- a/testgen/template/profiling/project_update_profile_results_to_estimates.sql
+++ b/testgen/template/profiling/project_update_profile_results_to_estimates.sql
@@ -22,8 +22,7 @@ set sample_ratio = {PROFILE_SAMPLE_RATIO},
within_1yr_date_ct = ROUND(within_1yr_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
within_1mo_date_ct = ROUND(within_1mo_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
future_date_ct = ROUND(future_date_ct * {PROFILE_SAMPLE_RATIO}, 0),
- boolean_true_ct = ROUND(boolean_true_ct * {PROFILE_SAMPLE_RATIO}, 0),
- date_days_present = ROUND(date_days_present * {PROFILE_SAMPLE_RATIO}, 0)
+ boolean_true_ct = ROUND(boolean_true_ct * {PROFILE_SAMPLE_RATIO}, 0)
where profile_run_id = '{PROFILE_RUN_ID}'
and schema_name = split_part('{SAMPLING_TABLE}', '.', 1)
and table_name = split_part('{SAMPLING_TABLE}', '.', 2)
From cfe80a1de30f3bcf9ce2a907cf5b63d011e6713d Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 14 Jul 2025 17:55:01 -0400
Subject: [PATCH 50/56] fix(table-groups): sampling incorrectly displayed
---
testgen/ui/components/frontend/js/pages/table_group_list.js | 2 +-
testgen/ui/queries/table_group_queries.py | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
index b201bd8f..18d3525b 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_list.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -123,7 +123,7 @@ const TableGroupList = (props) => {
div(
{ class: 'flex-column fx-flex' },
Caption({content: 'Uses Record Sampling', style: 'margin-bottom: 4px;'}),
- span(tableGroup.profile_use_sampling || '--'),
+ span(tableGroup.profile_use_sampling ? 'Yes' : 'No'),
),
),
div(
diff --git a/testgen/ui/queries/table_group_queries.py b/testgen/ui/queries/table_group_queries.py
index 1d6e9b1e..d69e54cb 100644
--- a/testgen/ui/queries/table_group_queries.py
+++ b/testgen/ui/queries/table_group_queries.py
@@ -20,7 +20,8 @@ def _get_select_statement(schema):
profile_id_column_mask, profile_sk_column_mask,
description, data_source, source_system, source_process, data_location,
business_domain, stakeholder_group, transform_level, data_product,
- profile_use_sampling, profile_sample_percent, profile_sample_min_count,
+ CASE WHEN profile_use_sampling = 'Y' THEN true ELSE false END AS profile_use_sampling,
+ profile_sample_percent, profile_sample_min_count,
profiling_delay_days, profile_flag_cdes
FROM table_groups
"""
@@ -176,7 +177,7 @@ def add(schema, table_group) -> str:
'{table_group["profiling_exclude_mask"]}',
'{table_group["profile_id_column_mask"]}'::character varying(2000),
'{table_group["profile_sk_column_mask"]}'::character varying,
- '{'Y' if table_group["profile_use_sampling"]=='True' else 'N' }'::character varying,
+ '{'Y' if table_group["profile_use_sampling"] else 'N' }'::character varying,
'{table_group["profile_sample_percent"]}'::character varying,
{table_group["profile_sample_min_count"]},
'{table_group["profiling_delay_days"]}'::character varying,
From 8a41d6cbacee5d01680b6a20d62df3b2d90a108b Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Mon, 14 Jul 2025 17:56:47 -0400
Subject: [PATCH 51/56] fix(select): handle none values in run dialogs
---
.../ui/views/dialogs/run_profiling_dialog.py | 15 +++++++-----
testgen/ui/views/dialogs/run_tests_dialog.py | 23 +++++++++++--------
testgen/ui/views/hygiene_issues.py | 2 --
testgen/ui/views/profiling_runs.py | 2 ++
testgen/ui/views/test_definitions.py | 7 ++++--
testgen/ui/views/test_results.py | 2 --
testgen/ui/views/test_runs.py | 3 +++
7 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/testgen/ui/views/dialogs/run_profiling_dialog.py b/testgen/ui/views/dialogs/run_profiling_dialog.py
index 4250f1e7..1b6cf22f 100644
--- a/testgen/ui/views/dialogs/run_profiling_dialog.py
+++ b/testgen/ui/views/dialogs/run_profiling_dialog.py
@@ -26,16 +26,19 @@ def run_profiling_dialog(project_code: str, table_group: pd.Series | None = None
display_column="table_groups_name",
default_value=default_table_group_id,
required=True,
+ placeholder="Select table group to profile",
)
- table_group_name: str = table_groups_df.loc[table_groups_df["id"] == table_group_id, "table_groups_name"].iloc[0]
+ if table_group_id:
+ table_group_name: str = table_groups_df.loc[table_groups_df["id"] == table_group_id, "table_groups_name"].iloc[0]
testgen.whitespace(1)
- with st.container():
- st.markdown(f"Execute profiling for the table group **{table_group_name}**?")
- st.markdown(":material/info: _Profiling will be performed in a background process._")
+ if table_group_id:
+ with st.container():
+ st.markdown(f"Execute profiling for the table group **{table_group_name}**?")
+ st.markdown(":material/info: _Profiling will be performed in a background process._")
- if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:run-tests-show-cli"):
- st.code(f"testgen run-profile --table-group-id {table_group_id}", language="shellSession")
+ if testgen.expander_toggle(expand_label="Show CLI command", key="test_suite:keys:run-tests-show-cli"):
+ st.code(f"testgen run-profile --table-group-id {table_group_id}", language="shellSession")
button_container = st.empty()
status_container = st.empty()
diff --git a/testgen/ui/views/dialogs/run_tests_dialog.py b/testgen/ui/views/dialogs/run_tests_dialog.py
index 212c1361..93c89dbb 100644
--- a/testgen/ui/views/dialogs/run_tests_dialog.py
+++ b/testgen/ui/views/dialogs/run_tests_dialog.py
@@ -26,19 +26,22 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def
display_column="test_suite",
default_value=default_test_suite_id,
required=True,
+ placeholder="Select test suite to run",
)
- test_suite_name: str = test_suites_df.loc[test_suites_df["id"] == test_suite_id, "test_suite"].iloc[0]
+ if test_suite_id:
+ test_suite_name: str = test_suites_df.loc[test_suites_df["id"] == test_suite_id, "test_suite"].iloc[0]
testgen.whitespace(1)
- with st.container():
- st.markdown(f"Run tests for the test suite **{test_suite_name}**?")
- st.markdown(":material/info: _Test execution will be performed in a background process._")
+ if test_suite_id:
+ with st.container():
+ st.markdown(f"Run tests for the test suite **{test_suite_name}**?")
+ st.markdown(":material/info: _Test execution will be performed in a background process._")
- if testgen.expander_toggle(expand_label="Show CLI command", key="run_tests_dialog:keys:show-cli"):
- st.code(
- f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite_name}",
- language="shellSession"
- )
+ if testgen.expander_toggle(expand_label="Show CLI command", key="run_tests_dialog:keys:show-cli"):
+ st.code(
+ f"testgen run-tests --project-key {project_code} --test-suite-key {test_suite_name}",
+ language="shellSession"
+ )
button_container = st.empty()
status_container = st.empty()
@@ -47,7 +50,7 @@ def run_tests_dialog(project_code: str, test_suite: pd.Series | None = None, def
with button_container:
_, button_column = st.columns([.8, .2])
with button_column:
- run_test_button = st.button("Run Tests", use_container_width=True)
+ run_test_button = st.button("Run Tests", use_container_width=True, disabled=not test_suite_id)
if run_test_button:
button_container.empty()
diff --git a/testgen/ui/views/hygiene_issues.py b/testgen/ui/views/hygiene_issues.py
index 19f61378..ed1f7991 100644
--- a/testgen/ui/views/hygiene_issues.py
+++ b/testgen/ui/views/hygiene_issues.py
@@ -79,7 +79,6 @@ def render(
issue_class = testgen.select(
options=["Definite", "Likely", "Possible", "Potential PII"],
default_value=issue_class,
- required=False,
bind_to_query="issue_class",
label="Issue Class",
)
@@ -91,7 +90,6 @@ def render(
default_value=None if issue_class == "Potential PII" else issue_type,
value_column="id",
display_column="anomaly_name",
- required=False,
bind_to_query="issue_type",
label="Issue Type",
disabled=issue_class == "Potential PII",
diff --git a/testgen/ui/views/profiling_runs.py b/testgen/ui/views/profiling_runs.py
index a90de40c..263b4b4d 100644
--- a/testgen/ui/views/profiling_runs.py
+++ b/testgen/ui/views/profiling_runs.py
@@ -64,6 +64,7 @@ def render(self, project_code: str, table_group_id: str | None = None, **_kwargs
default_value=table_group_id,
bind_to_query="table_group_id",
label="Table Group",
+ placeholder="---",
)
with actions_column:
@@ -132,6 +133,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]
value_column="id",
display_column="table_groups_name",
required=True,
+ placeholder="Select table group",
)
return bool(tg_id), [], {"table_group_id": tg_id}
diff --git a/testgen/ui/views/test_definitions.py b/testgen/ui/views/test_definitions.py
index 81bdc2c6..15157e92 100644
--- a/testgen/ui/views/test_definitions.py
+++ b/testgen/ui/views/test_definitions.py
@@ -73,10 +73,11 @@ def render(self, test_suite_id: str, table_name: str | None = None, column_name:
with table_filter_column:
columns_df = get_test_suite_columns(test_suite_id)
+ table_options = list(columns_df["table_name"].unique())
table_name = testgen.select(
- options=list(columns_df["table_name"].unique()),
+ options=table_options,
value_column="table_name",
- default_value=table_name,
+ default_value=table_name or (table_options[0] if table_options else None),
bind_to_query="table_name",
required=True,
label="Table Name",
@@ -691,6 +692,7 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s
value_column="id",
display_column="table_groups_name",
default_value=origin_table_group["id"],
+ required=True,
label="Target Table Group",
)
@@ -701,6 +703,7 @@ def copy_move_test_dialog(project_code, origin_table_group, origin_test_suite, s
value_column="id",
display_column="test_suite",
default_value=None,
+ required=True,
label="Target Test Suite",
)
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index 0a1e8fbf..8053babf 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -101,7 +101,6 @@ def render(
status = testgen.select(
options=status_options,
default_value=status or "Failed + Warning",
- required=False,
bind_to_query="status",
bind_empty_value=True,
label="Result Status",
@@ -113,7 +112,6 @@ def render(
value_column="test_type",
display_column="test_name_short",
default_value=test_type,
- required=False,
bind_to_query="test_type",
label="Test Type",
)
diff --git a/testgen/ui/views/test_runs.py b/testgen/ui/views/test_runs.py
index c3fe9913..0b50d649 100644
--- a/testgen/ui/views/test_runs.py
+++ b/testgen/ui/views/test_runs.py
@@ -63,6 +63,7 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit
default_value=table_group_id,
bind_to_query="table_group_id",
label="Table Group",
+ placeholder="---",
)
with suite_filter_column:
@@ -74,6 +75,7 @@ def render(self, project_code: str, table_group_id: str | None = None, test_suit
default_value=test_suite_id,
bind_to_query="test_suite_id",
label="Test Suite",
+ placeholder="---",
)
with actions_column:
@@ -139,6 +141,7 @@ def arg_value_input(self) -> tuple[bool, list[typing.Any], dict[str, typing.Any]
value_column="test_suite",
display_column="test_suite",
required=True,
+ placeholder="Select test suite",
)
return bool(ts_name), [], {"project_key": self.project_code, "test_suite_key": ts_name}
From c914376c7b51324569461e31fa5a97415b50d61b Mon Sep 17 00:00:00 2001
From: Luis
Date: Tue, 15 Jul 2025 09:49:23 -0400
Subject: [PATCH 52/56] fix(connections): stop connection form from
re-rendering needlessly
---
.../components/frontend/js/components/connection_form.js | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/testgen/ui/components/frontend/js/components/connection_form.js b/testgen/ui/components/frontend/js/components/connection_form.js
index 5486fd91..7ed5918f 100644
--- a/testgen/ui/components/frontend/js/components/connection_form.js
+++ b/testgen/ui/components/frontend/js/components/connection_form.js
@@ -119,14 +119,14 @@ const ConnectionForm = (props, saveButton) => {
privateKeyPhrase.val = '';
}
- const flavor = getValue(props.flavors).find(f => f.value === connectionFlavor.val);
+ const flavor = getValue(props.flavors).find(f => f.value === connectionFlavor.rawVal);
const originalURLTemplate = van.state(flavor.connection_string);
- const [prefixPart, sufixPart] = originalURLTemplate.val.split('@');
+ const [prefixPart, sufixPart] = originalURLTemplate.rawVal.split('@');
const connectionStringPrefix = van.state(prefixPart);
const connectionStringSuffix = van.state(connection?.url ?? '');
- if (!connectionStringSuffix.val) {
- connectionStringSuffix.val = formatURL(sufixPart ?? '', connectionHost.val, connectionPort.val, connectionDatabase.val);
+ if (!connectionStringSuffix.rawVal) {
+ connectionStringSuffix.val = formatURL(sufixPart ?? '', connectionHost.rawVal, connectionPort.rawVal, connectionDatabase.rawVal);
}
const updatedConnection = van.derive(() => {
From 3d3191b163d38271ad8d26e662950908067dcd0a Mon Sep 17 00:00:00 2001
From: Ricardo Boni
Date: Mon, 14 Jul 2025 21:34:12 -0400
Subject: [PATCH 53/56] fix: Fixing hygiene issues lookup queries
---
.../050_populate_new_schema_metadata.sql | 86 +++++++++----------
1 file changed, 43 insertions(+), 43 deletions(-)
diff --git a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
index 5d57acf9..f0a8b8ab 100644
--- a/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
+++ b/testgen/template/dbsetup/050_populate_new_schema_metadata.sql
@@ -1635,86 +1635,86 @@ ORDER BY {GROUPBY_NAMES}'),
GROUP BY {GROUPBY_NAMES}
HAVING COUNT(*) > 1
ORDER BY {GROUPBY_NAMES}'),
- ('1258', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'redshift', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+ ('1258', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'redshift', NULL, '(SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
-GROUP BY "{COLUMN_NAME}" LIMIT 20'),
- ('1259', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'postgresql', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)'),
+ ('1259', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'postgresql', NULL, '(SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
-GROUP BY "{COLUMN_NAME}" LIMIT 20'),
+GROUP BY "{COLUMN_NAME}" LIMIT 20)'),
('1260', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'mssql', NULL, 'SELECT TOP 20 ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}"
-UNION ALL ;
+UNION
SELECT TOP 20 ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
GROUP BY "{COLUMN_NAME}"'),
- ('1261', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'snowflake', NULL, 'SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+ ('1261', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'snowflake', NULL, '(SELECT ''Upper Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE UPPER("{COLUMN_NAME}") = "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, "{COLUMN_NAME}", COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE "{COLUMN_NAME}" <> UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" <> LOWER("{COLUMN_NAME}")
-GROUP BY "{COLUMN_NAME}" LIMIT 20'),
- ('1262', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'databricks', NULL, 'SELECT ''Upper Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY "{COLUMN_NAME}" LIMIT 20)'),
+ ('1262', '1028', 'Profile Anomaly', 'Inconsistent_Casing', 'databricks', NULL, '(SELECT ''Upper Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE UPPER(`{COLUMN_NAME}`) = `{COLUMN_NAME}`
-GROUP BY `{COLUMN_NAME}` LIMIT 20
-UNION ALL ;
-SELECT ''Mixed Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
+GROUP BY `{COLUMN_NAME}` LIMIT 20)
+UNION ALL
+(SELECT ''Mixed Case'' as casing, `{COLUMN_NAME}`, COUNT(*) AS count FROM {TARGET_SCHEMA}.{TABLE_NAME}
WHERE `{COLUMN_NAME}` <> UPPER(`{COLUMN_NAME}`) AND `{COLUMN_NAME}` <> LOWER(`{COLUMN_NAME}`)
-GROUP BY `{COLUMN_NAME}` LIMIT 20'),
- ('1263', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+GROUP BY `{COLUMN_NAME}` LIMIT 20)'),
+ ('1263', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
GROUP BY "{COLUMN_NAME}" LIMIT 500'),
- ('1264', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1264', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
GROUP BY "{COLUMN_NAME}" LIMIT 500'),
- ('1265', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1265', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
GROUP BY "{COLUMN_NAME}"'),
- ('1266', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1266', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
GROUP BY "{COLUMN_NAME}" LIMIT 500'),
- ('1267', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'databricks', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE "{COLUMN_NAME}" = UPPER("{COLUMN_NAME}") AND "{COLUMN_NAME}" = LOWER("{COLUMN_NAME}") AND "{COLUMN_NAME}" > ''''
+ ('1267', '1029', 'Profile Anomaly', 'Non_Alpha_Name_Address', 'databricks', NULL, 'SELECT any_value(`{COLUMN_NAME}`), COUNT(*) as record_ct FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
+ WHERE `{COLUMN_NAME}` = UPPER(`{COLUMN_NAME}`) AND `{COLUMN_NAME}` = LOWER(`{COLUMN_NAME}`) AND `{COLUMN_NAME}` > ''''
GROUP BY "{COLUMN_NAME}" LIMIT 500'),
- ('1268', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1268', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
- ('1269', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1269', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
- ('1270', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'mssql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
-GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
- ('1271', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1270', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
+GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}"'),
+ ('1271', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+WHERE "{COLUMN_NAME}" < ''A'' AND LEFT("{COLUMN_NAME}", 1) NOT IN (''"'', '' '') AND RIGHT("{COLUMN_NAME}", 1) <> ''''''''
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
- ('1272', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'databricks', NULL, 'SELECT `{COLUMN_NAME}`, COUNT(*) as record_ct
- WHERE `{COLUMN_NAME}` < ''A'' AND LEFT(`{COLUMN_NAME}`, 1) NOT IN (''"'', '' '') AND RIGHT(`{COLUMN_NAME}`, 1) <> ''''''''
+ ('1272', '1030', 'Profile Anomaly', 'Non_Alpha_Prefixed_Name', 'databricks', NULL, 'SELECT any_value(`{COLUMN_NAME}`), COUNT(*) as record_ct FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
+WHERE `{COLUMN_NAME}` < ''A'' AND LEFT(`{COLUMN_NAME}`, 1) NOT IN (''"'', '' '') AND RIGHT(`{COLUMN_NAME}`, 1) <> ''''''''
GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500'),
- ('1273', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1273', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'redshift', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
- ('1274', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1274', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'postgresql', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
- ('1275', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE TRANSLATE("{COLUMN_NAME}", NCHAR(160) + NCHAR(8201) + NCHAR(8203) + NCHAR(8204) + NCHAR(8205) + NCHAR(8206) + NCHAR(8207) + NCHAR(8239) + NCHAR(12288) + NCHAR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
+ ('1275', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'mssql', NULL, 'SELECT TOP 500 "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
+ WHERE TRANSLATE("{COLUMN_NAME}", NCHAR(160), ''X'') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}"'),
- ('1276', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
+ ('1276', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'snowflake', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct FROM "{TARGET_SCHEMA}"."{TABLE_NAME}"
WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500'),
- ('1277', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'databricks', NULL, 'SELECT "{COLUMN_NAME}", COUNT(*) as record_ct
- WHERE TRANSLATE("{COLUMN_NAME}", CHR(160) || CHR(8201) || CHR(8203) || CHR(8204) || CHR(8205) || CHR(8206) || CHR(8207) || CHR(8239) || CHR(12288) || CHR(65279), ''XXXXXXXXXX'') <> "{COLUMN_NAME}"
-GROUP BY "{COLUMN_NAME}" ORDER BY "{COLUMN_NAME}" LIMIT 500')
+ ('1277', '1031', 'Profile Anomaly', 'Non_Printing_Chars', 'databricks', NULL, 'SELECT any_value(`{COLUMN_NAME}`), COUNT(*) as record_ct FROM `{TARGET_SCHEMA}`.`{TABLE_NAME}`
+ WHERE TRANSLATE(`{COLUMN_NAME}`, ''\u00a0\u2009\u200b\u200c\u200d\u200e\u200f\u202f\u3000\ufeff'', ''XXXXXXXXXX'') <> `{COLUMN_NAME}`
+GROUP BY `{COLUMN_NAME}` ORDER BY `{COLUMN_NAME}` LIMIT 500')
;
From 4d0b330002fa9f6364a4a246f2d415ba230f0289 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 15 Jul 2025 14:42:25 -0400
Subject: [PATCH 54/56] fix(grid): dialogs on grids close abruptly
---
testgen/ui/services/form_service.py | 12 +++++++++++-
testgen/ui/views/profiling_results.py | 4 +++-
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/testgen/ui/services/form_service.py b/testgen/ui/services/form_service.py
index 0194d291..b7112f4c 100644
--- a/testgen/ui/services/form_service.py
+++ b/testgen/ui/services/form_service.py
@@ -1,3 +1,4 @@
+import json
import typing
from builtins import float
from pathlib import Path
@@ -312,7 +313,7 @@ def render_grid_select(
enable_enterprise_modules=False,
allow_unsafe_jscode=True,
update_mode=GridUpdateMode.NO_UPDATE,
- update_on=["selectionChanged", "modelUpdated"],
+ update_on=["selectionChanged"],
data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
columns_auto_size_mode=ColumnsAutoSizeMode.FIT_CONTENTS,
height=int_height,
@@ -332,4 +333,13 @@ def render_grid_select(
if len(selected_rows) > 0:
if bind_to_query_name and bind_to_query_prop:
Router().set_query_params({bind_to_query_name: selected_rows[0][bind_to_query_prop]})
+
+ # We need to get the data from the original dataframe
+ # Otherwise changes to the dataframe (e.g., editing the current selection) do not get reflected in the returned rows
+ # Adding "modelUpdated" to AgGrid(update_on=...) does not work
+ # because it causes unnecessary reruns that cause dialogs to close abruptly
+ selected_props = [row[bind_to_query_prop] for row in selected_rows]
+ selected_df = df[df[bind_to_query_prop].isin(selected_props)]
+ selected_rows = json.loads(selected_df.to_json(orient="records"))
+
return selected_rows
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index f32c9bdf..4b6b892c 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -6,6 +6,7 @@
import pandas as pd
import streamlit as st
+from testgen.common.models import with_database_session
import testgen.ui.queries.profiling_queries as profiling_queries
import testgen.ui.services.database_service as db
import testgen.ui.services.form_service as fm
@@ -171,6 +172,7 @@ def open_download_dialog(data: pd.DataFrame | None = None) -> None:
)
+@with_database_session
def get_excel_report_data(
update_progress: PROGRESS_UPDATE_TYPE,
table_group: str,
@@ -192,7 +194,7 @@ def get_excel_report_data(
for key in ["min_date", "max_date"]:
data[key] = data[key].apply(
- lambda val: datetime.fromtimestamp(val / 1000).strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) else None
+ lambda val: datetime.strptime(val, "%Y-%m-%d %H:%M:%S").strftime("%b %-d %Y, %-I:%M %p") if not pd.isna(val) and val != "NaT" else None
)
data["hygiene_issues"] = data["hygiene_issues"].apply(lambda val: "Yes" if val else None)
From aa6007e9e75632490a52239a396726fd48b02311 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Tue, 15 Jul 2025 16:25:38 -0400
Subject: [PATCH 55/56] fix: misc styling improvements
---
testgen/ui/components/frontend/css/shared.css | 1 +
testgen/ui/components/frontend/js/pages/project_dashboard.js | 2 +-
testgen/ui/components/frontend/js/pages/schedule_list.js | 3 ++-
testgen/ui/components/frontend/js/pages/table_group_list.js | 2 +-
testgen/ui/views/profiling_results.py | 2 +-
5 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/testgen/ui/components/frontend/css/shared.css b/testgen/ui/components/frontend/css/shared.css
index d096a445..d9ff025d 100644
--- a/testgen/ui/components/frontend/css/shared.css
+++ b/testgen/ui/components/frontend/css/shared.css
@@ -149,6 +149,7 @@ body {
border: var(--button-stroked-border);
border-radius: 8px;
padding: 16px;
+ box-sizing: border-box;
}
.table-row {
diff --git a/testgen/ui/components/frontend/js/pages/project_dashboard.js b/testgen/ui/components/frontend/js/pages/project_dashboard.js
index 92c0a156..92c22c0f 100644
--- a/testgen/ui/components/frontend/js/pages/project_dashboard.js
+++ b/testgen/ui/components/frontend/js/pages/project_dashboard.js
@@ -157,7 +157,7 @@ const TableGroupCard = (/** @type TableGroupSummary */ tableGroup) => {
),
span(
{ class: 'text-caption mt-1 mb-3 tg-overview--subtitle' },
- `${tableGroup.latest_profile_table_ct} tables | ${tableGroup.latest_profile_column_ct} columns`,
+ `${tableGroup.latest_profile_table_ct ?? 0} tables | ${tableGroup.latest_profile_column_ct ?? 0} columns`,
),
TableGroupTestSuiteSummary(tableGroup.test_suites),
),
diff --git a/testgen/ui/components/frontend/js/pages/schedule_list.js b/testgen/ui/components/frontend/js/pages/schedule_list.js
index 2d9e7bf9..f8c54f96 100644
--- a/testgen/ui/components/frontend/js/pages/schedule_list.js
+++ b/testgen/ui/components/frontend/js/pages/schedule_list.js
@@ -19,7 +19,7 @@
import van from '../van.min.js';
import { Button } from '../components/button.js';
import { Streamlit } from '../streamlit.js';
-import { emitEvent, getValue, resizeFrameHeightToElement } from '../utils.js';
+import { emitEvent, getValue, resizeFrameHeightToElement, resizeFrameHeightOnDOMChange } from '../utils.js';
import { withTooltip } from '../components/tooltip.js';
@@ -42,6 +42,7 @@ const ScheduleList = (/** @type Properties */ props) => {
const tableId = 'profiling-schedules-table';
resizeFrameHeightToElement(tableId);
+ resizeFrameHeightOnDOMChange(tableId);
return div(
{ class: 'table', id: tableId },
diff --git a/testgen/ui/components/frontend/js/pages/table_group_list.js b/testgen/ui/components/frontend/js/pages/table_group_list.js
index 18d3525b..333b133a 100644
--- a/testgen/ui/components/frontend/js/pages/table_group_list.js
+++ b/testgen/ui/components/frontend/js/pages/table_group_list.js
@@ -222,7 +222,7 @@ const Toolbar = (permissions, connections, selectedConnection) => {
})) ?? [],
onChange: (value) => emitEvent('ConnectionSelected', { payload: value }),
})
- : undefined,
+ : span(''),
div(
{ class: 'flex-row fx-gap-4' },
Button({
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index 4b6b892c..dec8b4ab 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -6,11 +6,11 @@
import pandas as pd
import streamlit as st
-from testgen.common.models import with_database_session
import testgen.ui.queries.profiling_queries as profiling_queries
import testgen.ui.services.database_service as db
import testgen.ui.services.form_service as fm
from testgen.common import date_service
+from testgen.common.models import with_database_session
from testgen.ui.components import widgets as testgen
from testgen.ui.components.widgets.download_dialog import (
FILE_DATA_TYPE,
From 44a23a0f6be74b940bae474af7992fb8163f899e Mon Sep 17 00:00:00 2001
From: Aarthy Adityan
Date: Wed, 16 Jul 2025 13:59:18 -0400
Subject: [PATCH 56/56] release: 4.1.3 -> 4.12.6
---
pyproject.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pyproject.toml b/pyproject.toml
index 5ed0ceab..0320b0c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "dataops-testgen"
-version = "4.1.3"
+version = "4.12.6"
description = "DataKitchen's Data Quality DataOps TestGen"
authors = [
{ "name" = "DataKitchen, Inc.", "email" = "info@datakitchen.io" },