Skip to content

Commit 3281331

Browse files
committed
Support JSON column data prefixes
When we store data in JSON columns, if there is data we wrap it in `{ "data": theData }`. We do this because it could be an array, a primitive or an object. Arrays and primitives can't be stored in a JSON column. This hides this from the user because it’s an implementation detail and means their expectation doesn’t match how we store
1 parent 208c6dd commit 3281331

File tree

4 files changed

+256
-5
lines changed

4 files changed

+256
-5
lines changed

apps/webapp/app/v3/querySchemas.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ export const runsSchema: TableSchema = {
330330
// For JSON columns, NULL checks are transformed to check for empty object '{}'
331331
// So `error IS NULL` becomes `error = '{}'` and `error IS NOT NULL` becomes `error != '{}'`
332332
// textColumn uses the pre-materialized text columns for better performance
333+
// dataPrefix handles the internal {"data": ...} wrapper transparently
333334
output: {
334335
name: "output",
335336
...column("JSON", {
@@ -338,6 +339,7 @@ export const runsSchema: TableSchema = {
338339
}),
339340
nullValue: "'{}'", // Transform NULL checks to compare against empty object
340341
textColumn: "output_text", // Use output_text for full JSON value queries
342+
dataPrefix: "data", // Internal data is wrapped in {"data": ...}
341343
},
342344
error: {
343345
name: "error",
@@ -348,6 +350,7 @@ export const runsSchema: TableSchema = {
348350
}),
349351
nullValue: "'{}'", // Transform NULL checks to compare against empty object
350352
textColumn: "error_text", // Use error_text for full JSON value queries
353+
dataPrefix: "data", // Internal data is wrapped in {"data": ...}
351354
},
352355

353356
// Tags & versions

internal-packages/tsql/src/query/printer.test.ts

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,156 @@ describe("ClickHousePrinter", () => {
842842
});
843843
});
844844

845+
describe("dataPrefix for JSON columns", () => {
846+
// Create a schema with JSON columns that have dataPrefix set
847+
const dataPrefixSchema: TableSchema = {
848+
name: "runs",
849+
clickhouseName: "trigger_dev.task_runs_v2",
850+
columns: {
851+
id: { name: "id", ...column("String") },
852+
output: {
853+
name: "output",
854+
...column("JSON"),
855+
nullValue: "'{}'",
856+
dataPrefix: "data",
857+
},
858+
error: {
859+
name: "error",
860+
...column("JSON"),
861+
nullValue: "'{}'",
862+
dataPrefix: "data",
863+
},
864+
status: { name: "status", ...column("String") },
865+
organization_id: { name: "organization_id", ...column("String") },
866+
project_id: { name: "project_id", ...column("String") },
867+
environment_id: { name: "environment_id", ...column("String") },
868+
},
869+
tenantColumns: {
870+
organizationId: "organization_id",
871+
projectId: "project_id",
872+
environmentId: "environment_id",
873+
},
874+
};
875+
876+
function createDataPrefixContext() {
877+
const schema = createSchemaRegistry([dataPrefixSchema]);
878+
return createPrinterContext({
879+
organizationId: "org_test",
880+
projectId: "proj_test",
881+
environmentId: "env_test",
882+
schema,
883+
});
884+
}
885+
886+
describe("SELECT clause", () => {
887+
it("should inject dataPrefix into JSON subfield path", () => {
888+
const ctx = createDataPrefixContext();
889+
const { sql } = printQuery("SELECT output.message FROM runs", ctx);
890+
891+
// Should transform output.message to output.data.message
892+
expect(sql).toContain("output.data.message");
893+
});
894+
895+
it("should generate clean alias without dataPrefix", () => {
896+
const ctx = createDataPrefixContext();
897+
const { sql, columns } = printQuery("SELECT output.message FROM runs", ctx);
898+
899+
// Alias should be output_message, not output_data_message
900+
expect(sql).toContain("AS output_message");
901+
expect(sql).not.toContain("AS output_data_message");
902+
expect(columns).toContainEqual(
903+
expect.objectContaining({ name: "output_message" })
904+
);
905+
});
906+
907+
it("should handle nested paths with dataPrefix", () => {
908+
const ctx = createDataPrefixContext();
909+
const { sql } = printQuery("SELECT output.user.name FROM runs", ctx);
910+
911+
// Should transform output.user.name to output.data.user.name
912+
expect(sql).toContain("output.data.user.name");
913+
// Alias should be output_user_name
914+
expect(sql).toContain("AS output_user_name");
915+
});
916+
917+
it("should work with multiple JSON columns with dataPrefix", () => {
918+
const ctx = createDataPrefixContext();
919+
const { sql } = printQuery("SELECT output.msg, error.code FROM runs", ctx);
920+
921+
expect(sql).toContain("output.data.msg");
922+
expect(sql).toContain("error.data.code");
923+
expect(sql).toContain("AS output_msg");
924+
expect(sql).toContain("AS error_code");
925+
});
926+
927+
it("should not affect bare JSON column selection", () => {
928+
const ctx = createDataPrefixContext();
929+
const { sql } = printQuery("SELECT output FROM runs", ctx);
930+
931+
// Bare column should not have dataPrefix injected
932+
expect(sql).not.toContain("output.data");
933+
expect(sql).toMatch(/SELECT\s+output[\s,]/);
934+
});
935+
});
936+
937+
describe("WHERE clause", () => {
938+
it("should inject dataPrefix into WHERE comparison", () => {
939+
const ctx = createDataPrefixContext();
940+
const { sql } = printQuery(
941+
"SELECT id FROM runs WHERE output.status = 'success'",
942+
ctx
943+
);
944+
945+
// Should transform output.status to output.data.status
946+
expect(sql).toContain("output.data.status");
947+
});
948+
949+
it("should inject dataPrefix into LIKE comparison", () => {
950+
const ctx = createDataPrefixContext();
951+
const { sql } = printQuery(
952+
"SELECT id FROM runs WHERE error.message LIKE '%failed%'",
953+
ctx
954+
);
955+
956+
expect(sql).toContain("error.data.message");
957+
});
958+
});
959+
960+
describe("GROUP BY clause", () => {
961+
it("should inject dataPrefix into GROUP BY", () => {
962+
const ctx = createDataPrefixContext();
963+
const { sql } = printQuery(
964+
"SELECT output.type, count() AS cnt FROM runs GROUP BY output.type",
965+
ctx
966+
);
967+
968+
// Should inject dataPrefix in both SELECT and GROUP BY
969+
expect(sql).toContain("output.data.type");
970+
expect(sql).toContain("GROUP BY output.data.type");
971+
});
972+
});
973+
974+
describe("edge cases", () => {
975+
it("should not affect columns without dataPrefix", () => {
976+
const ctx = createDataPrefixContext();
977+
const { sql } = printQuery("SELECT status FROM runs", ctx);
978+
979+
// Regular column should not be affected
980+
expect(sql).toContain("status");
981+
expect(sql).not.toContain("status.data");
982+
});
983+
984+
it("should work with explicit alias on JSON subfield", () => {
985+
const ctx = createDataPrefixContext();
986+
const { sql } = printQuery("SELECT output.message AS msg FROM runs", ctx);
987+
988+
// Should inject dataPrefix but use user's alias
989+
expect(sql).toContain("output.data.message");
990+
expect(sql).toContain("AS msg");
991+
});
992+
});
993+
});
994+
845995
describe("ORDER BY clauses", () => {
846996
it("should print ORDER BY ASC", () => {
847997
const { sql } = printQuery("SELECT * FROM task_runs ORDER BY created_at ASC");

internal-packages/tsql/src/query/printer.ts

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -657,10 +657,10 @@ export class ClickHousePrinter {
657657
// If so, add an alias to preserve the nice column name (dots → underscores)
658658
const isJsonSubfield = this.isJsonSubfieldAccess(field.chain);
659659
if (isJsonSubfield) {
660-
// Build the alias using underscores (e.g., "error_data_name")
661-
const aliasName = field.chain
662-
.filter((p): p is string => typeof p === "string")
663-
.join("_");
660+
// Build the alias using underscores, excluding any dataPrefix
661+
// e.g., output.message -> "output_message" (not "output_data_message")
662+
const dataPrefix = this.getDataPrefixForField(field.chain);
663+
const aliasName = this.buildAliasWithoutDataPrefix(field.chain, dataPrefix);
664664
sqlResult = `${visited} AS ${this.printIdentifier(aliasName)}`;
665665
// Override output name for metadata
666666
effectiveOutputName = aliasName;
@@ -2125,8 +2125,11 @@ export class ClickHousePrinter {
21252125
return `(${virtualExpression})`;
21262126
}
21272127

2128+
// Inject dataPrefix for JSON columns if needed (e.g., output.message -> output.data.message)
2129+
const chainWithPrefix = this.injectDataPrefix(node.chain);
2130+
21282131
// Try to resolve column names through table context
2129-
const resolvedChain = this.resolveFieldChain(node.chain);
2132+
const resolvedChain = this.resolveFieldChain(chainWithPrefix);
21302133

21312134
// Print each chain element
21322135
let result = resolvedChain.map((part) => this.printIdentifierOrIndex(part)).join(".");
@@ -2273,6 +2276,81 @@ export class ClickHousePrinter {
22732276
return this.getTextColumnForField((expr as Field).chain);
22742277
}
22752278

2279+
/**
2280+
* Get the dataPrefix for a field chain if the root column has one defined.
2281+
* Returns null if the column doesn't have a dataPrefix or if this isn't a subfield access.
2282+
*/
2283+
private getDataPrefixForField(chain: Array<string | number>): string | null {
2284+
if (chain.length < 2) return null; // Need at least column.subfield
2285+
2286+
const firstPart = chain[0];
2287+
if (typeof firstPart !== "string") return null;
2288+
2289+
// Check if first part is a table alias (table.column.subfield)
2290+
const tableSchema = this.tableContexts.get(firstPart);
2291+
if (tableSchema) {
2292+
// Qualified: table.column.subfield - need at least 3 parts
2293+
if (chain.length < 3) return null;
2294+
const columnName = chain[1];
2295+
if (typeof columnName !== "string") return null;
2296+
const columnSchema = tableSchema.columns[columnName];
2297+
return columnSchema?.dataPrefix ?? null;
2298+
}
2299+
2300+
// Unqualified: column.subfield
2301+
const columnSchema = this.resolveFieldToColumnSchema([firstPart]);
2302+
return columnSchema?.dataPrefix ?? null;
2303+
}
2304+
2305+
/**
2306+
* Inject dataPrefix into a field chain if the root column has one defined.
2307+
* e.g., [output, message] -> [output, data, message] when dataPrefix is "data"
2308+
* Returns the original chain if no dataPrefix applies.
2309+
*/
2310+
private injectDataPrefix(chain: Array<string | number>): Array<string | number> {
2311+
const dataPrefix = this.getDataPrefixForField(chain);
2312+
if (!dataPrefix) return chain;
2313+
2314+
const firstPart = chain[0];
2315+
if (typeof firstPart !== "string") return chain;
2316+
2317+
// Check if first part is a table alias
2318+
const tableSchema = this.tableContexts.get(firstPart);
2319+
if (tableSchema) {
2320+
// Qualified: table.column.subfield -> table.column.dataPrefix.subfield
2321+
// [table, column, subfield] -> [table, column, dataPrefix, subfield]
2322+
return [chain[0], chain[1], dataPrefix, ...chain.slice(2)];
2323+
}
2324+
2325+
// Unqualified: column.subfield -> column.dataPrefix.subfield
2326+
// [column, subfield] -> [column, dataPrefix, subfield]
2327+
return [chain[0], dataPrefix, ...chain.slice(1)];
2328+
}
2329+
2330+
/**
2331+
* Build an alias name for a field chain, excluding the dataPrefix if present.
2332+
* e.g., [output, message] with dataPrefix "data" -> "output_message"
2333+
* This gives users clean column names without the internal data wrapper.
2334+
*/
2335+
private buildAliasWithoutDataPrefix(
2336+
chain: Array<string | number>,
2337+
dataPrefix: string | null
2338+
): string {
2339+
// Filter to just string parts and join with underscores
2340+
const parts = chain.filter((p): p is string => typeof p === "string");
2341+
2342+
if (dataPrefix) {
2343+
// Remove the dataPrefix from the parts (it's an implementation detail)
2344+
const prefixIndex = parts.indexOf(dataPrefix);
2345+
if (prefixIndex > 0) {
2346+
// Only remove if it's not the first element (column name)
2347+
parts.splice(prefixIndex, 1);
2348+
}
2349+
}
2350+
2351+
return parts.join("_");
2352+
}
2353+
22762354
/**
22772355
* Resolve a field chain to its column schema (if it references a known column)
22782356
*/

internal-packages/tsql/src/query/schema.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,26 @@ export interface ColumnSchema {
230230
* ```
231231
*/
232232
textColumn?: string;
233+
/**
234+
* Prefix path for JSON column data access.
235+
*
236+
* When set, user paths like `output.message` are automatically transformed
237+
* to `output.data.message` in the actual query, and result aliases exclude
238+
* the prefix (e.g., `output_message` instead of `output_data_message`).
239+
*
240+
* This is useful when JSON data is stored wrapped in a container object
241+
* (e.g., `{"data": actualData}`) to handle arrays and primitives.
242+
*
243+
* @example
244+
* ```typescript
245+
* {
246+
* name: "output",
247+
* type: "JSON",
248+
* dataPrefix: "data", // output.message → output.data.message
249+
* }
250+
* ```
251+
*/
252+
dataPrefix?: string;
233253
}
234254

235255
/**

0 commit comments

Comments
 (0)