Skip to content

Commit 42ad273

Browse files
committed
Automatically use _text columns for JSON depending on the query pattern
1 parent d42ae5b commit 42ad273

File tree

6 files changed

+372
-32
lines changed

6 files changed

+372
-32
lines changed

apps/webapp/app/components/code/TSQLResultsTable.tsx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,9 @@ function EnvironmentCellValue({ value }: { value: string }) {
672672
}
673673

674674
function JSONCellValue({ value }: { value: unknown }) {
675-
const jsonString = JSON.stringify(value);
675+
// If the value is already a string (e.g., from a textColumn optimization),
676+
// use it directly without double-stringifying
677+
const jsonString = typeof value === "string" ? value : JSON.stringify(value);
676678
const isTruncated = jsonString.length > MAX_STRING_DISPLAY_LENGTH;
677679

678680
if (isTruncated) {
@@ -1137,6 +1139,7 @@ export const TSQLResultsTable = memo(function TSQLResultsTable({
11371139
height: `${rowVirtualizer.getTotalSize()}px`,
11381140
position: "relative",
11391141
}}
1142+
className="bg-background-dimmed divide-y divide-charcoal-700"
11401143
>
11411144
{rowVirtualizer.getVirtualItems().map((virtualRow) => {
11421145
const row = tableRows[virtualRow.index];

apps/webapp/app/v3/querySchemas.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ export const runsSchema: TableSchema = {
174174
},
175175
idempotency_key_scope: {
176176
name: "idempotency_key_scope",
177-
...column("String", { description: "The idempotency key scope determines whether a task should be considered unique within a parent run, a specific attempt, or globally. An empty value means there's no idempotency key set (available from 4.3.3).", example: "run", allowedValues: ["", "global", "run", "attempt"], }),
177+
...column("String", { description: "The idempotency key scope determines whether a task should be considered unique within a parent run, a specific attempt, or globally. An empty value means there's no idempotency key set (available from 4.3.3).", example: "run", allowedValues: ["global", "run", "attempt"], }),
178178
},
179179
region: {
180180
name: "region",
@@ -329,13 +329,15 @@ export const runsSchema: TableSchema = {
329329
// Output & error (JSON columns)
330330
// For JSON columns, NULL checks are transformed to check for empty object '{}'
331331
// So `error IS NULL` becomes `error = '{}'` and `error IS NOT NULL` becomes `error != '{}'`
332+
// textColumn uses the pre-materialized text columns for better performance
332333
output: {
333334
name: "output",
334335
...column("JSON", {
335336
description: "The data you returned from the task.",
336337
example: '{"result": "success"}',
337338
}),
338339
nullValue: "'{}'", // Transform NULL checks to compare against empty object
340+
textColumn: "output_text", // Use output_text for full JSON value queries
339341
},
340342
error: {
341343
name: "error",
@@ -345,6 +347,7 @@ export const runsSchema: TableSchema = {
345347
example: '{"message": "Task failed"}',
346348
}),
347349
nullValue: "'{}'", // Transform NULL checks to compare against empty object
350+
textColumn: "error_text", // Use error_text for full JSON value queries
348351
},
349352

350353
// Tags & versions
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
-- +goose Up
2+
-- Update the materialized columns to extract the 'data' field if it exists
3+
-- This avoids the {"data": ...} wrapper in the text representation
4+
-- Note: Direct JSON path access (output.data) returns null for nested objects,
5+
-- so we use JSONExtractRaw on the stringified JSON instead
6+
ALTER TABLE trigger_dev.task_runs_v2
7+
ADD COLUMN output_text String MATERIALIZED if (
8+
toJSONString (output) = '{}',
9+
'',
10+
if (
11+
length (JSONExtractRaw (toJSONString (output), 'data')) > 0,
12+
JSONExtractRaw (toJSONString (output), 'data'),
13+
toJSONString (output)
14+
)
15+
);
16+
17+
-- For error: extract error.data if it exists
18+
ALTER TABLE trigger_dev.task_runs_v2
19+
ADD COLUMN error_text String MATERIALIZED if (
20+
toJSONString (error) = '{}',
21+
'',
22+
if (
23+
length (JSONExtractRaw (toJSONString (error), 'data')) > 0,
24+
JSONExtractRaw (toJSONString (error), 'data'),
25+
toJSONString (error)
26+
)
27+
);
28+
29+
-- Add the indexes
30+
ALTER TABLE trigger_dev.task_runs_v2 ADD INDEX idx_output_text output_text TYPE ngrambf_v1 (3, 131072, 3, 0) GRANULARITY 4;
31+
32+
ALTER TABLE trigger_dev.task_runs_v2 ADD INDEX idx_error_text error_text TYPE ngrambf_v1 (3, 131072, 3, 0) GRANULARITY 4;
33+
34+
-- +goose Down
35+
ALTER TABLE trigger_dev.task_runs_v2
36+
DROP INDEX IF EXISTS idx_output_text;
37+
38+
ALTER TABLE trigger_dev.task_runs_v2
39+
DROP INDEX IF EXISTS idx_error_text;
40+
41+
ALTER TABLE trigger_dev.task_runs_v2
42+
DROP COLUMN IF EXISTS output_text;
43+
44+
ALTER TABLE trigger_dev.task_runs_v2
45+
DROP COLUMN IF EXISTS error_text;

internal-packages/tsql/src/query/printer.test.ts

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,192 @@ describe("ClickHousePrinter", () => {
599599
});
600600
});
601601

602+
describe("textColumn optimization for JSON columns", () => {
603+
// Create a schema with JSON columns that have textColumn set
604+
const textColumnSchema: TableSchema = {
605+
name: "runs",
606+
clickhouseName: "trigger_dev.task_runs_v2",
607+
columns: {
608+
id: { name: "id", ...column("String") },
609+
output: {
610+
name: "output",
611+
...column("JSON"),
612+
nullValue: "'{}'",
613+
textColumn: "output_text",
614+
},
615+
error: {
616+
name: "error",
617+
...column("JSON"),
618+
nullValue: "'{}'",
619+
textColumn: "error_text",
620+
},
621+
status: { name: "status", ...column("String") },
622+
organization_id: { name: "organization_id", ...column("String") },
623+
project_id: { name: "project_id", ...column("String") },
624+
environment_id: { name: "environment_id", ...column("String") },
625+
},
626+
tenantColumns: {
627+
organizationId: "organization_id",
628+
projectId: "project_id",
629+
environmentId: "environment_id",
630+
},
631+
};
632+
633+
function createTextColumnContext() {
634+
const schema = createSchemaRegistry([textColumnSchema]);
635+
return createPrinterContext({
636+
organizationId: "org_test",
637+
projectId: "proj_test",
638+
environmentId: "env_test",
639+
schema,
640+
});
641+
}
642+
643+
describe("SELECT clause", () => {
644+
it("should use text column when selecting bare JSON column", () => {
645+
const ctx = createTextColumnContext();
646+
const { sql } = printQuery("SELECT output FROM runs", ctx);
647+
648+
// Should use the text column with an alias to preserve the column name
649+
expect(sql).toContain("output_text AS output");
650+
});
651+
652+
it("should use text column for multiple JSON columns", () => {
653+
const ctx = createTextColumnContext();
654+
const { sql } = printQuery("SELECT output, error FROM runs", ctx);
655+
656+
expect(sql).toContain("output_text AS output");
657+
expect(sql).toContain("error_text AS error");
658+
});
659+
660+
it("should use JSON column for subfield access", () => {
661+
const ctx = createTextColumnContext();
662+
const { sql } = printQuery("SELECT output.data.name FROM runs", ctx);
663+
664+
// Should use the original JSON column with .:String type hint
665+
expect(sql).toContain("output.data.name.:String");
666+
expect(sql).not.toContain("output_text");
667+
});
668+
});
669+
670+
describe("SELECT * expansion", () => {
671+
it("should use text columns when expanding SELECT *", () => {
672+
const ctx = createTextColumnContext();
673+
const { sql } = printQuery("SELECT * FROM runs", ctx);
674+
675+
// Should use text columns for JSON columns
676+
expect(sql).toContain("output_text AS output");
677+
expect(sql).toContain("error_text AS error");
678+
});
679+
});
680+
681+
describe("WHERE clause", () => {
682+
it("should use text column for exact equality comparison", () => {
683+
const ctx = createTextColumnContext();
684+
const { sql } = printQuery("SELECT id FROM runs WHERE output = '{}'", ctx);
685+
686+
expect(sql).toContain("equals(output_text,");
687+
expect(sql).not.toMatch(/equals\(output,/);
688+
});
689+
690+
it("should use text column for inequality comparison", () => {
691+
const ctx = createTextColumnContext();
692+
const { sql } = printQuery("SELECT id FROM runs WHERE output != '{}'", ctx);
693+
694+
expect(sql).toContain("notEquals(output_text,");
695+
});
696+
697+
it("should use text column for LIKE comparison", () => {
698+
const ctx = createTextColumnContext();
699+
const { sql } = printQuery("SELECT id FROM runs WHERE output LIKE '%error%'", ctx);
700+
701+
expect(sql).toContain("like(output_text,");
702+
expect(sql).not.toMatch(/like\(output,/);
703+
});
704+
705+
it("should use text column for ILIKE comparison", () => {
706+
const ctx = createTextColumnContext();
707+
const { sql } = printQuery("SELECT id FROM runs WHERE error ILIKE '%failed%'", ctx);
708+
709+
expect(sql).toContain("ilike(error_text,");
710+
});
711+
712+
it("should use text column for NOT LIKE comparison", () => {
713+
const ctx = createTextColumnContext();
714+
const { sql } = printQuery("SELECT id FROM runs WHERE output NOT LIKE '%test%'", ctx);
715+
716+
expect(sql).toContain("notLike(output_text,");
717+
});
718+
719+
it("should use JSON column for subfield comparison", () => {
720+
const ctx = createTextColumnContext();
721+
const { sql } = printQuery(
722+
"SELECT id FROM runs WHERE output.data.name = 'test'",
723+
ctx
724+
);
725+
726+
// Should use the original JSON column, not the text column
727+
expect(sql).toContain("equals(output.data.name.:String,");
728+
expect(sql).not.toContain("output_text");
729+
});
730+
731+
it("should still use nullValue transformation for IS NULL", () => {
732+
const ctx = createTextColumnContext();
733+
const { sql } = printQuery("SELECT id FROM runs WHERE output IS NULL", ctx);
734+
735+
// NULL check should use the text column with nullValue
736+
expect(sql).toContain("equals(output_text, '{}')");
737+
});
738+
739+
it("should still use nullValue transformation for IS NOT NULL", () => {
740+
const ctx = createTextColumnContext();
741+
const { sql } = printQuery("SELECT id FROM runs WHERE error IS NOT NULL", ctx);
742+
743+
expect(sql).toContain("notEquals(error_text, '{}')");
744+
});
745+
});
746+
747+
describe("edge cases", () => {
748+
it("should work with columns without textColumn defined", () => {
749+
const ctx = createTextColumnContext();
750+
const { sql } = printQuery("SELECT status FROM runs WHERE status = 'completed'", ctx);
751+
752+
// Regular column should work as before
753+
expect(sql).toContain("status");
754+
expect(sql).not.toContain("status_text");
755+
});
756+
757+
it("should use text column for aliased JSON columns in SELECT", () => {
758+
const ctx = createTextColumnContext();
759+
const { sql } = printQuery("SELECT output AS result FROM runs", ctx);
760+
761+
// Should use text column with user's alias
762+
expect(sql).toContain("output_text AS result");
763+
});
764+
765+
it("should use text column for table-qualified JSON columns in SELECT", () => {
766+
const ctx = createTextColumnContext();
767+
const { sql } = printQuery("SELECT runs.output FROM runs", ctx);
768+
769+
// Should use text column
770+
expect(sql).toContain("output_text AS output");
771+
});
772+
773+
it("should use text column in both SELECT and WHERE for same query", () => {
774+
const ctx = createTextColumnContext();
775+
const { sql } = printQuery(
776+
"SELECT output FROM runs WHERE output LIKE '%test%'",
777+
ctx
778+
);
779+
780+
// SELECT should use text column
781+
expect(sql).toContain("output_text AS output");
782+
// WHERE should use text column
783+
expect(sql).toContain("like(output_text,");
784+
});
785+
});
786+
});
787+
602788
describe("ORDER BY clauses", () => {
603789
it("should print ORDER BY ASC", () => {
604790
const { sql } = printQuery("SELECT * FROM task_runs ORDER BY created_at ASC");

0 commit comments

Comments
 (0)