Automatically use _text columns for JSON depending on the query pattern

matt-aitken · matt-aitken · commit 42ad2730ef0c · 2026-01-28T14:12:23.000Z
diff --git a/apps/webapp/app/components/code/TSQLResultsTable.tsx b/apps/webapp/app/components/code/TSQLResultsTable.tsx
@@ -672,7 +672,9 @@ function EnvironmentCellValue({ value }: { value: string }) {
 }
 
 function JSONCellValue({ value }: { value: unknown }) {
-  const jsonString = JSON.stringify(value);
+  // If the value is already a string (e.g., from a textColumn optimization),
+  // use it directly without double-stringifying
+  const jsonString = typeof value === "string" ? value : JSON.stringify(value);
   const isTruncated = jsonString.length > MAX_STRING_DISPLAY_LENGTH;
 
   if (isTruncated) {
@@ -1137,6 +1139,7 @@ export const TSQLResultsTable = memo(function TSQLResultsTable({
             height: `${rowVirtualizer.getTotalSize()}px`,
             position: "relative",
           }}
+          className="bg-background-dimmed divide-y divide-charcoal-700"
         >
           {rowVirtualizer.getVirtualItems().map((virtualRow) => {
             const row = tableRows[virtualRow.index];
diff --git a/apps/webapp/app/v3/querySchemas.ts b/apps/webapp/app/v3/querySchemas.ts
@@ -174,7 +174,7 @@ export const runsSchema: TableSchema = {
     },
     idempotency_key_scope: {
       name: "idempotency_key_scope",
-      ...column("String", { description: "The idempotency key scope determines whether a task should be considered unique within a parent run, a specific attempt, or globally. An empty value means there's no idempotency key set (available from 4.3.3).", example: "run", allowedValues: ["", "global", "run", "attempt"], }),
+      ...column("String", { description: "The idempotency key scope determines whether a task should be considered unique within a parent run, a specific attempt, or globally. An empty value means there's no idempotency key set (available from 4.3.3).", example: "run", allowedValues: ["global", "run", "attempt"], }),
     },
     region: {
       name: "region",
@@ -329,13 +329,15 @@ export const runsSchema: TableSchema = {
     // Output & error (JSON columns)
     // For JSON columns, NULL checks are transformed to check for empty object '{}'
     // So `error IS NULL` becomes `error = '{}'` and `error IS NOT NULL` becomes `error != '{}'`
+    // textColumn uses the pre-materialized text columns for better performance
     output: {
       name: "output",
       ...column("JSON", {
         description: "The data you returned from the task.",
         example: '{"result": "success"}',
       }),
       nullValue: "'{}'", // Transform NULL checks to compare against empty object
+      textColumn: "output_text", // Use output_text for full JSON value queries
     },
     error: {
       name: "error",
@@ -345,6 +347,7 @@ export const runsSchema: TableSchema = {
         example: '{"message": "Task failed"}',
       }),
       nullValue: "'{}'", // Transform NULL checks to compare against empty object
+      textColumn: "error_text", // Use error_text for full JSON value queries
     },
 
     // Tags & versions
diff --git a/internal-packages/clickhouse/schema/015_update_output_error_text_to_extract_data.sql b/internal-packages/clickhouse/schema/015_update_output_error_text_to_extract_data.sql
@@ -0,0 +1,45 @@
+-- +goose Up
+-- Update the materialized columns to extract the 'data' field if it exists
+-- This avoids the {"data": ...} wrapper in the text representation
+-- Note: Direct JSON path access (output.data) returns null for nested objects,
+-- so we use JSONExtractRaw on the stringified JSON instead
+ALTER TABLE trigger_dev.task_runs_v2
+ADD COLUMN output_text String MATERIALIZED if (
+  toJSONString (output) = '{}',
+  '',
+  if (
+    length (JSONExtractRaw (toJSONString (output), 'data')) > 0,
+    JSONExtractRaw (toJSONString (output), 'data'),
+    toJSONString (output)
+  )
+);
+
+-- For error: extract error.data if it exists
+ALTER TABLE trigger_dev.task_runs_v2
+ADD COLUMN error_text String MATERIALIZED if (
+  toJSONString (error) = '{}',
+  '',
+  if (
+    length (JSONExtractRaw (toJSONString (error), 'data')) > 0,
+    JSONExtractRaw (toJSONString (error), 'data'),
+    toJSONString (error)
+  )
+);
+
+-- Add the indexes
+ALTER TABLE trigger_dev.task_runs_v2 ADD INDEX idx_output_text output_text TYPE ngrambf_v1 (3, 131072, 3, 0) GRANULARITY 4;
+
+ALTER TABLE trigger_dev.task_runs_v2 ADD INDEX idx_error_text error_text TYPE ngrambf_v1 (3, 131072, 3, 0) GRANULARITY 4;
+
+-- +goose Down
+ALTER TABLE trigger_dev.task_runs_v2
+DROP INDEX IF EXISTS idx_output_text;
+
+ALTER TABLE trigger_dev.task_runs_v2
+DROP INDEX IF EXISTS idx_error_text;
+
+ALTER TABLE trigger_dev.task_runs_v2
+DROP COLUMN IF EXISTS output_text;
+
+ALTER TABLE trigger_dev.task_runs_v2
+DROP COLUMN IF EXISTS error_text;
diff --git a/internal-packages/tsql/src/query/printer.test.ts b/internal-packages/tsql/src/query/printer.test.ts
@@ -599,6 +599,192 @@ describe("ClickHousePrinter", () => {
     });
   });
 
+  describe("textColumn optimization for JSON columns", () => {
+    // Create a schema with JSON columns that have textColumn set
+    const textColumnSchema: TableSchema = {
+      name: "runs",
+      clickhouseName: "trigger_dev.task_runs_v2",
+      columns: {
+        id: { name: "id", ...column("String") },
+        output: {
+          name: "output",
+          ...column("JSON"),
+          nullValue: "'{}'",
+          textColumn: "output_text",
+        },
+        error: {
+          name: "error",
+          ...column("JSON"),
+          nullValue: "'{}'",
+          textColumn: "error_text",
+        },
+        status: { name: "status", ...column("String") },
+        organization_id: { name: "organization_id", ...column("String") },
+        project_id: { name: "project_id", ...column("String") },
+        environment_id: { name: "environment_id", ...column("String") },
+      },
+      tenantColumns: {
+        organizationId: "organization_id",
+        projectId: "project_id",
+        environmentId: "environment_id",
+      },
+    };
+
+    function createTextColumnContext() {
+      const schema = createSchemaRegistry([textColumnSchema]);
+      return createPrinterContext({
+        organizationId: "org_test",
+        projectId: "proj_test",
+        environmentId: "env_test",
+        schema,
+      });
+    }
+
+    describe("SELECT clause", () => {
+      it("should use text column when selecting bare JSON column", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT output FROM runs", ctx);
+
+        // Should use the text column with an alias to preserve the column name
+        expect(sql).toContain("output_text AS output");
+      });
+
+      it("should use text column for multiple JSON columns", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT output, error FROM runs", ctx);
+
+        expect(sql).toContain("output_text AS output");
+        expect(sql).toContain("error_text AS error");
+      });
+
+      it("should use JSON column for subfield access", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT output.data.name FROM runs", ctx);
+
+        // Should use the original JSON column with .:String type hint
+        expect(sql).toContain("output.data.name.:String");
+        expect(sql).not.toContain("output_text");
+      });
+    });
+
+    describe("SELECT * expansion", () => {
+      it("should use text columns when expanding SELECT *", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT * FROM runs", ctx);
+
+        // Should use text columns for JSON columns
+        expect(sql).toContain("output_text AS output");
+        expect(sql).toContain("error_text AS error");
+      });
+    });
+
+    describe("WHERE clause", () => {
+      it("should use text column for exact equality comparison", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT id FROM runs WHERE output = '{}'", ctx);
+
+        expect(sql).toContain("equals(output_text,");
+        expect(sql).not.toMatch(/equals\(output,/);
+      });
+
+      it("should use text column for inequality comparison", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT id FROM runs WHERE output != '{}'", ctx);
+
+        expect(sql).toContain("notEquals(output_text,");
+      });
+
+      it("should use text column for LIKE comparison", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT id FROM runs WHERE output LIKE '%error%'", ctx);
+
+        expect(sql).toContain("like(output_text,");
+        expect(sql).not.toMatch(/like\(output,/);
+      });
+
+      it("should use text column for ILIKE comparison", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT id FROM runs WHERE error ILIKE '%failed%'", ctx);
+
+        expect(sql).toContain("ilike(error_text,");
+      });
+
+      it("should use text column for NOT LIKE comparison", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT id FROM runs WHERE output NOT LIKE '%test%'", ctx);
+
+        expect(sql).toContain("notLike(output_text,");
+      });
+
+      it("should use JSON column for subfield comparison", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery(
+          "SELECT id FROM runs WHERE output.data.name = 'test'",
+          ctx
+        );
+
+        // Should use the original JSON column, not the text column
+        expect(sql).toContain("equals(output.data.name.:String,");
+        expect(sql).not.toContain("output_text");
+      });
+
+      it("should still use nullValue transformation for IS NULL", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT id FROM runs WHERE output IS NULL", ctx);
+
+        // NULL check should use the text column with nullValue
+        expect(sql).toContain("equals(output_text, '{}')");
+      });
+
+      it("should still use nullValue transformation for IS NOT NULL", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT id FROM runs WHERE error IS NOT NULL", ctx);
+
+        expect(sql).toContain("notEquals(error_text, '{}')");
+      });
+    });
+
+    describe("edge cases", () => {
+      it("should work with columns without textColumn defined", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT status FROM runs WHERE status = 'completed'", ctx);
+
+        // Regular column should work as before
+        expect(sql).toContain("status");
+        expect(sql).not.toContain("status_text");
+      });
+
+      it("should use text column for aliased JSON columns in SELECT", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT output AS result FROM runs", ctx);
+
+        // Should use text column with user's alias
+        expect(sql).toContain("output_text AS result");
+      });
+
+      it("should use text column for table-qualified JSON columns in SELECT", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery("SELECT runs.output FROM runs", ctx);
+
+        // Should use text column
+        expect(sql).toContain("output_text AS output");
+      });
+
+      it("should use text column in both SELECT and WHERE for same query", () => {
+        const ctx = createTextColumnContext();
+        const { sql } = printQuery(
+          "SELECT output FROM runs WHERE output LIKE '%test%'",
+          ctx
+        );
+
+        // SELECT should use text column
+        expect(sql).toContain("output_text AS output");
+        // WHERE should use text column
+        expect(sql).toContain("like(output_text,");
+      });
+    });
+  });
+
   describe("ORDER BY clauses", () => {
     it("should print ORDER BY ASC", () => {
       const { sql } = printQuery("SELECT * FROM task_runs ORDER BY created_at ASC");
diff --git a/internal-packages/tsql/src/query/printer.ts b/internal-packages/tsql/src/query/printer.ts
diff --git a/internal-packages/tsql/src/query/schema.ts b/internal-packages/tsql/src/query/schema.ts