Skip to content

Commit 386a8cd

Browse files
committed
Try and avoid SELECT *, through warnings and training the AI
1 parent 361861c commit 386a8cd

File tree

5 files changed

+120
-18
lines changed

5 files changed

+120
-18
lines changed

apps/webapp/app/v3/querySchemas.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ export const runsSchema: TableSchema = {
4242
"A unique ID for a run. They always start with `run_`, e.g., run_cm1a2b3c4d5e6f7g8h9i",
4343
customRenderType: "runId",
4444
example: "run_cm1a2b3c4d5e6f7g8h9i",
45+
coreColumn: true,
4546
}),
4647
},
4748
environment: {
@@ -87,6 +88,7 @@ export const runsSchema: TableSchema = {
8788
valueMap: runStatusTitleFromStatus,
8889
customRenderType: "runStatus",
8990
example: "Completed",
91+
coreColumn: true,
9092
}),
9193
},
9294
is_finished: {
@@ -103,7 +105,11 @@ export const runsSchema: TableSchema = {
103105
// Task & queue
104106
task_identifier: {
105107
name: "task_identifier",
106-
...column("String", { description: "Task identifier/slug", example: "my-background-task" }),
108+
...column("String", {
109+
description: "Task identifier/slug",
110+
example: "my-background-task",
111+
coreColumn: true,
112+
}),
107113
},
108114
queue: {
109115
name: "queue",
@@ -182,6 +188,7 @@ export const runsSchema: TableSchema = {
182188
...column("DateTime64", {
183189
description: "When the run was triggered.",
184190
example: "2024-01-15 09:30:00.000",
191+
coreColumn: true,
185192
}),
186193
},
187194
queued_at: {
@@ -419,7 +426,7 @@ export const querySchemas: TableSchema[] = [runsSchema];
419426
/**
420427
* Default query for the query editor
421428
*/
422-
export const defaultQuery = `SELECT *
429+
export const defaultQuery = `SELECT run_id, task_identifier, triggered_at, status
423430
FROM runs
424431
ORDER BY triggered_at DESC
425432
LIMIT 100`;

apps/webapp/app/v3/services/aiQueryService.server.ts

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,23 @@ export class AIQueryService {
258258
parts.push(table.description);
259259
}
260260
parts.push("");
261+
262+
// Identify core columns
263+
const coreColumns = Object.values(table.columns)
264+
.filter((col) => col.coreColumn === true)
265+
.map((col) => col.name);
266+
if (coreColumns.length > 0) {
267+
parts.push(`Core columns (use these as defaults): ${coreColumns.join(", ")}`);
268+
parts.push("");
269+
}
270+
261271
parts.push("Columns:");
262272

263273
for (const col of Object.values(table.columns)) {
264274
let colDesc = `- ${col.name} (${col.type})`;
275+
if (col.coreColumn) {
276+
colDesc += " [CORE]";
277+
}
265278
if (col.description) {
266279
colDesc += `: ${col.description}`;
267280
}
@@ -350,13 +363,16 @@ HAVING cnt > 10
350363
351364
## Important Rules
352365
353-
1. ALWAYS use the validateTSQLQuery tool to check your query before returning it
354-
2. If validation fails, fix the issues and try again (up to 3 attempts)
355-
3. Use column names exactly as defined in the schema (case-sensitive)
356-
4. For enum columns like status, use the allowed values shown in the schema
357-
5. Always include a LIMIT clause (default to 100 if not specified)
358-
6. Use meaningful column aliases with AS for aggregations
359-
7. Format queries with proper indentation for readability
366+
1. NEVER use SELECT * - ClickHouse is a columnar database where SELECT * has very poor performance
367+
2. Always select only the specific columns needed for the request
368+
3. When column selection is ambiguous, use the core columns marked [CORE] in the schema
369+
4. ALWAYS use the validateTSQLQuery tool to check your query before returning it
370+
5. If validation fails, fix the issues and try again (up to 3 attempts)
371+
6. Use column names exactly as defined in the schema (case-sensitive)
372+
7. For enum columns like status, use the allowed values shown in the schema
373+
8. Always include a LIMIT clause (default to 100 if not specified)
374+
9. Use meaningful column aliases with AS for aggregations
375+
10. Format queries with proper indentation for readability
360376
361377
## Response Format
362378
@@ -431,13 +447,15 @@ HAVING cnt > 10
431447
432448
## Important Rules
433449
434-
1. ALWAYS use the validateTSQLQuery tool to check your modified query before returning it
435-
2. If validation fails, fix the issues and try again (up to 3 attempts)
436-
3. Use column names exactly as defined in the schema (case-sensitive)
437-
4. For enum columns like status, use the allowed values shown in the schema
438-
5. Always include a LIMIT clause (default to 100 if not specified)
439-
6. Preserve the user's existing query structure and style where possible
440-
7. Only make the changes specifically requested by the user
450+
1. NEVER use SELECT * - ClickHouse is a columnar database where SELECT * has very poor performance
451+
2. If the existing query uses SELECT *, replace it with specific columns (use core columns marked [CORE] as defaults)
452+
3. ALWAYS use the validateTSQLQuery tool to check your modified query before returning it
453+
4. If validation fails, fix the issues and try again (up to 3 attempts)
454+
5. Use column names exactly as defined in the schema (case-sensitive)
455+
6. For enum columns like status, use the allowed values shown in the schema
456+
7. Always include a LIMIT clause (default to 100 if not specified)
457+
8. Preserve the user's existing query structure and style where possible
458+
9. Only make the changes specifically requested by the user
441459
442460
## Response Format
443461

internal-packages/tsql/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ export {
6464
findColumn,
6565
findTable,
6666
getAllowedUserValues,
67+
// Core column utilities
68+
getCoreColumns,
6769
getExternalValue,
6870
getInternalValue,
6971
getInternalValueFromMapping,

internal-packages/tsql/src/query/schema.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,23 @@ export interface ColumnSchema {
133133
* ```
134134
*/
135135
example?: string;
136+
/**
137+
* Whether this is a core column that should be included in default queries.
138+
*
139+
* Core columns represent the essential information for a table and are suggested
140+
* as alternatives when users attempt to use SELECT * (which has poor performance
141+
* in columnar databases like ClickHouse).
142+
*
143+
* @example
144+
* ```typescript
145+
* {
146+
* name: "run_id",
147+
* type: "String",
148+
* coreColumn: true,
149+
* }
150+
* ```
151+
*/
152+
coreColumn?: boolean;
136153
/**
137154
* Name of the runtime field mapping to use for value translation.
138155
* When set, values are translated using the mapping provided at query time.
@@ -683,6 +700,21 @@ export function getAllTableNames(schema: SchemaRegistry): string[] {
683700
return Object.keys(schema.tables);
684701
}
685702

703+
/**
704+
* Get the names of core columns for a table.
705+
*
706+
* Core columns are the essential columns that should be used when users
707+
* need a default set of columns (e.g., as an alternative to SELECT *).
708+
*
709+
* @param table - The table schema
710+
* @returns Array of core column names, empty if none are marked as core
711+
*/
712+
export function getCoreColumns(table: TableSchema): string[] {
713+
return Object.values(table.columns)
714+
.filter((col) => col.coreColumn === true)
715+
.map((col) => col.name);
716+
}
717+
686718
// ============================================================
687719
// Error Message Sanitization
688720
// ============================================================

internal-packages/tsql/src/query/validator.ts

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import type {
2020
ArithmeticOperation,
2121
} from "./ast.js";
2222
import type { TableSchema, ColumnSchema } from "./schema.js";
23-
import { getAllowedUserValues, isValidUserValue } from "./schema.js";
23+
import { getAllowedUserValues, getCoreColumns, isValidUserValue } from "./schema.js";
2424
import { CompareOperationOp, ArithmeticOperationOp } from "./ast.js";
2525

2626
/**
@@ -37,7 +37,7 @@ export interface ValidationIssue {
3737
/** Severity of the issue */
3838
severity: ValidationSeverity;
3939
/** The type of issue */
40-
type: "unknown_column" | "unknown_table" | "invalid_enum_value";
40+
type: "unknown_column" | "unknown_table" | "invalid_enum_value" | "select_star";
4141
/** Optional: the column name that caused the issue */
4242
columnName?: string;
4343
/** Optional: the table name that caused the issue */
@@ -46,6 +46,8 @@ export interface ValidationIssue {
4646
invalidValue?: string;
4747
/** Optional: list of allowed values */
4848
allowedValues?: string[];
49+
/** Optional: suggested columns to use instead (for select_star) */
50+
suggestedColumns?: string[];
4951
}
5052

5153
/**
@@ -170,6 +172,19 @@ function validateSelectSetQuery(node: SelectSetQuery, context: ValidationContext
170172
}
171173
}
172174

175+
/**
176+
* Check if an expression is a SELECT * (asterisk)
177+
*/
178+
function isSelectStar(expr: Expression): boolean {
179+
if ((expr as Field).expression_type !== "field") return false;
180+
const field = expr as Field;
181+
// SELECT * or SELECT table.*
182+
return (
183+
(field.chain.length === 1 && field.chain[0] === "*") ||
184+
(field.chain.length === 2 && field.chain[1] === "*")
185+
);
186+
}
187+
173188
/**
174189
* Validate a SELECT query
175190
*/
@@ -183,6 +198,34 @@ function validateSelectQuery(node: SelectQuery, context: ValidationContext): voi
183198
extractTablesFromJoin(node.select_from, context);
184199
}
185200

201+
// Check for SELECT * and emit warning
202+
if (node.select) {
203+
const hasSelectStar = node.select.some(isSelectStar);
204+
if (hasSelectStar) {
205+
// Collect core columns from all tables in context
206+
const coreColumns: string[] = [];
207+
for (const tableSchema of context.tables.values()) {
208+
const tableCoreColumns = getCoreColumns(tableSchema);
209+
coreColumns.push(...tableCoreColumns);
210+
}
211+
212+
// Build suggestion message
213+
let suggestionMsg = "SELECT * will be far slower than selecting specific columns. ";
214+
if (coreColumns.length > 0) {
215+
suggestionMsg += `Consider selecting specific columns, e.g.: ${coreColumns.join(", ")}`;
216+
} else {
217+
suggestionMsg += "Consider selecting only the columns you need.";
218+
}
219+
220+
context.issues.push({
221+
message: suggestionMsg,
222+
severity: "warning",
223+
type: "select_star",
224+
suggestedColumns: coreColumns.length > 0 ? coreColumns : undefined,
225+
});
226+
}
227+
}
228+
186229
// Extract column aliases from SELECT clause before validation
187230
// This allows ORDER BY and HAVING to reference aliased columns
188231
if (node.select) {

0 commit comments

Comments
 (0)