diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..23c3d03 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,26 @@ +# AGENTS.md + +このリポジトリで作業するエージェント向けの最小ガイドです。 + +## 目的 +- 最終目標: SQL Formatter の実装 +- 現状: SQL パーサーの実装とテストの整備 + +## 主要ファイル +- `spec/apache-calcite-Parser.js`: lexer + parser 本体 +- `spec/apache-calcite-Parser-test.js`: 回帰/異常系テスト +- `spec/apache-calcite-Parser.md`: 仕様(EBNFベース) +- `index-parser.html`: SQL → AST 出力の簡易デモ + +## 作業の基本方針 +- `.md` と `.js` の production 名は一致させる +- 変更後は `node spec/apache-calcite-Parser-test.js` を実行 +- 破壊的変更は `TODO.md` に記録 + +## 進捗の記録 +- 直近のタスクは `TODO.md` を参照 + +## 実行コマンド +```bash +node spec/apache-calcite-Parser-test.js +``` diff --git a/AST.md b/AST.md new file mode 100644 index 0000000..ae5193a --- /dev/null +++ b/AST.md @@ -0,0 +1,142 @@ +# AST 仕様(暫定) + +このドキュメントは、`spec/apache-calcite-Parser.js` が出力する AST の最小仕様をまとめたものです。 +現状は実装の読みやすさ優先の軽量ノード構成です。 + +## 共通ルール +- すべてのノードは `type` フィールドを持つ +- 省略可能要素は `null` または未設定 +- リストは配列で表現 + +## ルート +- `SqlStmtList` + - `statements`: ステートメント配列 +- `SqlStmtEof` + - `stmt` +- `SqlExpressionEof` + - `expr` + +## クエリ +- `SqlSelect` + - `hints`: `TableHints | null` + - `stream`: `boolean` + - `setQuantifier`: `"ALL" | "DISTINCT" | null` + - `selectItems`: `AddSelectItem[]` + - `from`: `FromClause | null` + - `where`: `Where | null` + - `groupBy`: `GroupBy | null` + - `having`: `Having | null` + - `window`: `Window | null` + - `qualify`: `Qualify | null` +- `QueryOrExpr` + - `withList`, `leaf`, `setOps` +- `OrderedQueryOrExpr` + - `query`, `orderByLimitOpt` + +## 句 +- `FromClause` + - `first`: `TableRef` + - `joins`: (`JoinTable` | `CommaJoin` | `ApplyJoin`)[] +- `JoinTable` + - `natural`: `boolean` + - `joinType`: `string` (e.g. `"JOIN"`, `"LEFT JOIN"`) + - `table`: `TableRef` + - `condition`: `On | Using | null` +- `CommaJoin` + - `table` +- `Where` + - `expr` +- `GroupBy` + - `set`: `"DISTINCT" | "ALL" | null` + - `list`: `GroupingElementList` +- `Having` + - `expr` +- `Window` + - `items`: `AddWindowSpec[]` +- `Qualify` + - `expr` +- `OrderBy` + - `list`: `OrderItemList` +- `LimitClause` + - `value`, `offset` +- `OffsetClause` + - `value`, `rows` +- `FetchClause` + - `mode`, `value`, `rows` + +## テーブル参照 +- `TableRef` + - `base`, `pivot`, `unpivot`, `matchRecognize`, `alias`, `columns`, `tablesample` +- `TableName` + - `name`: `CompoundTableIdentifier` +- `Subquery` + - `query`: `OrderedQueryOrExpr` +- `Unnest` + - `items`: `Expression[]` + - `withOrdinality`: `boolean` +- `TableFunctionCall` + - `call`: `NamedRoutineCall` +- `Pivot` + - `aggs`: `AddPivotAgg[]` + - `axis`: `SimpleIdentifierOrList` + - `values`: `AddPivotValue[]` +- `Unpivot` + - `nulls`: `"INCLUDE" | "EXCLUDE" | null` + - `columns`: `SimpleIdentifierOrList` + - `axis`: `SimpleIdentifierOrList` + - `values`: `AddUnpivotValue[]` +- `MatchRecognize` + - `partitionBy`, `orderBy`, `measures`, `rowsPerMatch`, `afterMatchSkip` + - `pattern`, `anchorStart`, `anchorEnd`, `within`, `subsets`, `define` + +## 式 +- `Expression2b`(内部表現) + - `prefixes`, `base`, `extensions` +- `BinaryExpression` + - `operator`, `left`, `right` +- `Literal` + - `value` +- `StringLiteral` + - `value` +- `NumericLiteral` + - `value` +- `Identifier` + - `name` +- `CompoundIdentifier` + - `parts` + +## 関数 +- `NamedFunctionCall` + - `name`, `args`, `orderBy`, `withinGroup`, `nullTreatment`, `filter`, `over` +- `BuiltinFunctionCall` + - `kind` ごとにフィールドが異なる(各関数ノード参照) +- `MatchRecognizeFunctionCall` + +## DML/DDL +- `SqlInsert` / `SqlDelete` / `SqlUpdate` / `SqlMerge` +- `SqlSetOption` / `SqlAlter` / `SqlExplain` / `SqlDescribe` + +## 例 +```json +{ + "type": "SqlStmtList", + "statements": [ + { + "type": "SqlSelect", + "selectItems": [ + { "type": "AddSelectItem", "expr": { "type": "SelectExpression", "star": true } } + ], + "from": { + "type": "FromClause", + "first": { "type": "TableRef", "base": { "type": "TableName", "name": { "type": "CompoundTableIdentifier" } } }, + "joins": [] + } + } + ] +} +``` + +## TODO +- `Expression*` 系の正規化 +- `BuiltinFunctionCall` の各関数ノードの仕様を追加 +- テストで AST 形を固定 diff --git a/FORMATTER_RULES.md b/FORMATTER_RULES.md new file mode 100644 index 0000000..415b1d2 --- /dev/null +++ b/FORMATTER_RULES.md @@ -0,0 +1,57 @@ +# SQL Formatter ルール(暫定) + +本ドキュメントは SQL Formatter の出力スタイルを定義するルール表です。 + +## 基本スタイル +- インデント: **4 スペース** +- キーワード: **大文字 (UPPER)** +- 末尾セミコロン: **なし** +- カンマ位置: **先頭** + +## SELECT 句 +- カラムは **1 行 1 カラム** +- 例: + ```sql + SELECT + col_a + , col_b + , SUM(col_c) + FROM table_name + ``` + +## 主要句の改行方針 +- 以下の句は **必ず改行** + - `FROM` + - `JOIN` + - `WHERE` + - `GROUP BY` + - `HAVING` + - `ORDER BY` + +## JOIN の整形 +- JOIN 句は 1 行 1 句 +- `ON` / `USING` は次行にインデント + +例: +```sql +FROM table_a a +JOIN table_b b + ON a.id = b.id +``` + +## WHERE / GROUP BY / HAVING / ORDER BY +- 各句は 1 行開始 +- `AND` / `OR` は **次行** に出す + +例: +```sql +WHERE a = 1 + AND b = 2 +``` + +## TODO +- サブクエリ・括弧のインデント方針 +- 関数引数の改行ルール +- CASE 式の整形ルール +- SETOP(UNION/INTERSECT/EXCEPT)の改行規則 +- WINDOW 句/OVER 句の改行規則 diff --git a/README.md b/README.md new file mode 100644 index 0000000..dece042 --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +# SQL Formatter (WIP) + +このリポジトリは、最終的に **SQL Formatter** を作るためのプロジェクトです。 +現在は前段として **SQL パーサー** を実装しています。 + +![Playground screenshot](screenshot.png) + +## 目的 +- 最終目標: SQL を整形するフォーマッタの実装 +- 現状: SQL パーサーを自作し、AST を生成する段階 + +## 現在の構成 +- `spec/apache-calcite-Parser.js`: パーサー本体(lexer + parser) +- `spec/apache-calcite-Parser-test.js`: 回帰/異常系テスト +- `spec/apache-calcite-Parser.md`: 仕様(EBNFベース) +- `spec/apache-calcite-Parser.jj`: 参照文法 +- `AST.md`: AST 仕様(暫定) +- `FORMATTER_RULES.md`: SQL Formatter の出力ルール +- `index-parser.html`: SQL → AST 出力の簡易デモ +- `index.html`: SQL Formatter の UI(WIP) + +## 使い方(開発時) +テストを実行してパーサーの動作確認ができます。 + +```bash +node spec/apache-calcite-Parser-test.js +``` + +## 直近の作業予定 +- **index-parser.html**: SQL → AST を出力する簡易デモ(実装済み) +- **index.html**: 将来的に SQL Formatter の UI を実装予定 +- AST 仕様の整理と固定化 +- 構文の厳密化(句の順序/排他の追加チェック) +- 最終的に formatter へ接続 + +## 仕様情報の流れ(開発過程) +- `spec/apache-calcite-Parser.jj` の文法を元に `spec/apache-calcite-Parser.md`(EBNF仕様)を作成 +- `spec/apache-calcite-Parser.md` を元に `spec/apache-calcite-Parser.js`(実装)を生成 + +## デモ機能 +- AST の JSON 出力 +- トークン一覧の表示 +- エラートークンのハイライト表示 + +## ライセンス +`LICENSE` を参照してください。 diff --git a/TODO.md b/TODO.md index 5de8024..860d39c 100644 --- a/TODO.md +++ b/TODO.md @@ -4,22 +4,27 @@ - Stage 1: lexer + 全 production skeleton 済み。 - 入口/Select/From/Join/式/関数/型/JSON/日付系の主要骨格は実装済み。 - md⇔js の production 名・順序は一致済み(ヘルパー除外で一致)。 +- 回帰テスト/異常系テストを追加済み(全 187 件通過)。 +- lexer 実用化(コメント/文字列エスケープ/指数/引用識別子/Unicode/BigQuery/ヒント/数値種別/Unicodeエスケープ検証)対応済み。 +- 構文厳密化(FROM 必須/HAVING 制約/JOIN 条件/フレーム条件/ORDER+FETCH など)対応済み。 ## 残り作業(優先順の目安・更新) -1. SetOp/Query 周り - - AddSetOpQuery / BinaryQueryOperator / AddSetOpQueryOrExpr / Query / SqlQueryEof - - PartitionedQueryOrQueryOrExpr / OrderByOfSetSemanticsTable -2. テーブル参照の拡張 - - Snapshot / Tablesample / Pivot / Unpivot / MatchRecognize / TableOverOpt / Over / ExtendedTableRef -3. LIMIT/OFFSET/FETCH と ORDER BY 制約 - - LimitClause / OffsetClause / FetchClause -4. Add* 系の実装(実用上の肝) - - AddSelectItem / AddOrderItem / AddGroupingElement / AddWindowSpec / AddWithItem など -5. DDL/DML の細部 - - SqlSetOption/SqlAlter/SqlExplain などの分岐網羅(現状は最小実装) -6. 低優先ユーティリティ - - ReservedFunctionName / NonReserved* / CollectionsTypeName / CollateClause など -7. 厳格な突合(最終) +1. AST 仕様の整理 + - ノード型の命名とフィールドを最終定義し、テストで固定化 +2. 構文厳密化の追加候補 + - 句の出現順序/必須/排他の追加チェック(ORDER/LIMIT/OFFSET/FETCH/QUALIFY など) + - 省略可能要素の優先順位を整理(誤って別名に吸われる余地の洗い出し) +3. テスト拡張(必要なら) + - SETOP/ORDER/LIMIT の境界・エラー例を追加 +4. 仕様(.md)と実装(.js)の差分反映 + - TableRef 修飾子(TableHints/ExtendTable/TableOverOpt/Snapshot)の対応を整理 ✅ + - ASOF JOIN の MATCH_CONDITION を仕様に合わせて反映 ✅ + - lexer の識別子/文字列/数値トークン差分を解消 ✅ + - HYPHENATED/BRACKET/BIG_QUERY 系識別子 ✅ + - PREFIXED/UNICODE/BINARY/UESCAPE/連結文字列 ✅ + - DECIMAL/APPROX 数値リテラル ✅ + - TableHints がコメントとして破棄されないよう lexer と整合 ✅ +5. 厳格な突合(最終) - apache-calcite-Parser.md を正とし、apache-calcite-Parser.js の整合を最終チェック。 - 手順(最小): 1) .md から production 名一覧を抽出(順序付き) @@ -33,7 +38,7 @@ - 差分一覧(テキスト)と、反映後の .js ## 実装方針メモ -- lexer は最小: IDENT/NUMBER/STRING/SYMBOL/EOF のみ。予約語判定は isKeyword で大文字比較。 +- lexer は実用化済み(コメント/エスケープ/指数/引用識別子対応)。予約語判定は isKeyword で大文字比較。 - AST 形式は自由だが、既存の軽量ノード(type + 必要最小限のフィールド)で統一。 - 失敗時は notImplemented を残して段階的に埋める。 diff --git a/index-parser.html b/index-parser.html new file mode 100644 index 0000000..64fceb3 --- /dev/null +++ b/index-parser.html @@ -0,0 +1,190 @@ + + + + + + SQL Parser Playground + + + + + + + +
+
+
+

SQL Parser Playground

+

SQL を入力して AST をテキストとして出力します。

+
+ +
+
+
+

SQL Input

+ +
+ +

+          
+ + + +
+
+ +
+
+

AST Output

+ idle +
+

+        
+
+ +
+
+

Tokens

+ 0 tokens +
+

+      
+ + +
+
+ + + + + diff --git a/index.html b/index.html new file mode 100644 index 0000000..de51066 --- /dev/null +++ b/index.html @@ -0,0 +1,593 @@ + + + + + + SQL Formatter (WIP) + + + + + + + +
+
+
+

SQL Formatter (WIP)

+

+ ここは SQL Formatter の UI を実装予定のページです。現在はプレースホルダー。 +

+
+ +
+
+
+

SQL Input

+
+ + +
+
+ +
+ +
+
+

Formatted Output

+
+ + idle +
+
+
// formatted output will appear here
+
+
+ + +
+
+ + + + diff --git a/screenshot.png b/screenshot.png new file mode 100644 index 0000000..361df77 Binary files /dev/null and b/screenshot.png differ diff --git a/spec/apache-calcite-Parser-test.js b/spec/apache-calcite-Parser-test.js index 8bf336f..590155e 100644 --- a/spec/apache-calcite-Parser-test.js +++ b/spec/apache-calcite-Parser-test.js @@ -2,24 +2,255 @@ const { CalciteLexer, CalciteParser } = require('./apache-calcite-Parser'); +const stmt = (name, sql) => ({ name, sql, fn: 'SqlStmtList' }); +const selectList = (items) => `SELECT ${items.join(', ')}`; +const fromTable = (table) => `FROM ${table}`; +const joinSubquery = (left, right, onExpr) => + `SELECT ${left}.id FROM ${left} JOIN (SELECT id FROM ${right}) AS ${right} ON ${onExpr}`; + const cases = [ + // basic { name: 'select-basic', sql: 'SELECT 1', fn: 'SqlStmtList' }, { name: 'select-with-from', sql: 'SELECT a FROM t', fn: 'SqlStmtList' }, { name: 'select-where', sql: 'SELECT a FROM t WHERE b = 1', fn: 'SqlStmtList' }, { name: 'select-group-by', sql: 'SELECT a, COUNT(*) FROM t GROUP BY a', fn: 'SqlStmtList' }, + // group by variants + { name: 'group-by-distinct', sql: 'SELECT a FROM t GROUP BY DISTINCT a', fn: 'SqlStmtList' }, + { name: 'group-by-all', sql: 'SELECT a FROM t GROUP BY ALL a', fn: 'SqlStmtList' }, + { name: 'group-by-grouping-sets', sql: 'SELECT a, b FROM t GROUP BY GROUPING SETS (a, b)', fn: 'SqlStmtList' }, + { name: 'group-by-rollup', sql: 'SELECT a, b FROM t GROUP BY ROLLUP (a, b)', fn: 'SqlStmtList' }, + { name: 'group-by-cube', sql: 'SELECT a, b FROM t GROUP BY CUBE (a, b)', fn: 'SqlStmtList' }, { name: 'select-window', sql: 'SELECT a FROM t WINDOW w AS (PARTITION BY a)', fn: 'SqlStmtList' }, - { name: 'cte', sql: 'WITH t AS (SELECT 1) SELECT * FROM t', fn: 'SqlStmtList' }, + // CTE / DML core + { name: 'with-cte', sql: 'WITH t AS (SELECT 1) SELECT * FROM t', fn: 'SqlStmtList' }, { name: 'insert-values', sql: 'INSERT INTO t(a) VALUES (1)', fn: 'SqlStmtList' }, - { name: 'update', sql: 'UPDATE t SET a = 1 WHERE b = 2', fn: 'SqlStmtList' }, - { name: 'delete', sql: 'DELETE FROM t WHERE a IN (1,2,3)', fn: 'SqlStmtList' }, - { name: 'merge', sql: 'MERGE INTO t USING u ON t.id = u.id WHEN MATCHED THEN UPDATE SET a = 1', fn: 'SqlStmtList' }, - { name: 'json-value', sql: "SELECT JSON_VALUE(doc, '$.a' RETURNING VARCHAR) FROM t", fn: 'SqlStmtList' }, - { name: 'date-diff', sql: 'SELECT DATE_DIFF(d1, d2, DAY) FROM t', fn: 'SqlStmtList' }, + { name: 'update-basic', sql: 'UPDATE t SET a = 1 WHERE b = 2', fn: 'SqlStmtList' }, + { name: 'delete-basic', sql: 'DELETE FROM t WHERE a IN (1,2,3)', fn: 'SqlStmtList' }, + { name: 'merge-basic', sql: 'MERGE INTO t USING u ON t.id = u.id WHEN MATCHED THEN UPDATE SET a = 1', fn: 'SqlStmtList' }, + // functions / expressions + { name: 'select-json-value', sql: "SELECT JSON_VALUE(doc, '$.a' RETURNING VARCHAR) FROM t", fn: 'SqlStmtList' }, + { name: 'select-date-diff', sql: 'SELECT DATE_DIFF(d1, d2, DAY) FROM t', fn: 'SqlStmtList' }, + // MATCH_RECOGNIZE / PIVOT / UNPIVOT { name: 'match-recognize', sql: 'SELECT * FROM t MATCH_RECOGNIZE (PATTERN (A) DEFINE A AS a > 0)', fn: 'SqlStmtList' }, + { name: 'match-recognize-partition-order', sql: 'SELECT * FROM t MATCH_RECOGNIZE (PARTITION BY a ORDER BY b MEASURES CLASSIFIER() AS c PATTERN (A B) DEFINE A AS a > 0, B AS b > 0)', fn: 'SqlStmtList' }, + { name: 'match-recognize-after-skip', sql: 'SELECT * FROM t MATCH_RECOGNIZE (AFTER MATCH SKIP TO NEXT ROW PATTERN (A) DEFINE A AS a > 0)', fn: 'SqlStmtList' }, { name: 'pivot', sql: 'SELECT * FROM t PIVOT (SUM(x) FOR y IN (1))', fn: 'SqlStmtList' }, + { name: 'pivot-multi-values', sql: 'SELECT * FROM t PIVOT (SUM(x) FOR y IN (1 AS one, 2 AS two))', fn: 'SqlStmtList' }, + { name: 'pivot-multi-aggs', sql: 'SELECT * FROM t PIVOT (SUM(x) AS sx, COUNT(*) FOR y IN (1))', fn: 'SqlStmtList' }, { name: 'unpivot', sql: 'SELECT * FROM t UNPIVOT (v FOR c IN (a))', fn: 'SqlStmtList' }, + // window clause + { name: 'window-order-frame', sql: 'SELECT a FROM t WINDOW w AS (PARTITION BY a ORDER BY b ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)', fn: 'SqlStmtList' }, + { name: 'window-range', sql: 'SELECT a FROM t WINDOW w AS (ORDER BY a RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)', fn: 'SqlStmtList' }, + { name: 'window-allow-partial', sql: 'SELECT a FROM t WINDOW w AS (ORDER BY b ALLOW PARTIAL)', fn: 'SqlStmtList' }, + // lexer coverage + { name: 'lexer-line-comment', sql: 'SELECT 1 -- trailing comment', fn: 'SqlStmtList' }, + { name: 'lexer-block-comment', sql: 'SELECT /* block */ 1', fn: 'SqlStmtList' }, + { name: 'lexer-quoted-ident-double', sql: 'SELECT "Select" FROM "From"', fn: 'SqlStmtList' }, + { name: 'lexer-quoted-ident-backtick', sql: 'SELECT `a` FROM `b`', fn: 'SqlStmtList' }, + { name: 'lexer-quoted-ident-bracket', sql: 'SELECT [Select] FROM [From]', fn: 'SqlStmtList' }, + { name: 'lexer-quoted-ident-bracket-wide', sql: 'SELECT [a b,c] FROM t', fn: 'SqlStmtList' }, + { name: 'lexer-hyphenated-ident', sql: 'SELECT * FROM a-b', fn: 'SqlStmtList' }, + { name: 'lexer-unicode-ident', sql: 'SELECT ユーザーID, レシピID FROM recipes', fn: 'SqlStmtList' }, + { name: 'lexer-bigquery-double-quoted-string', sql: 'SELECT "a\\\"b" FROM t', fn: 'SqlStmtList' }, + { name: 'lexer-unicode-quoted-ident-uescape', sql: 'SELECT * FROM U&"A\\\\0042" UESCAPE \'\\\'', fn: 'SqlStmtList' }, + { name: 'lexer-string-escape', sql: "SELECT 'a''b' FROM t", fn: 'SqlStmtList' }, + { name: 'lexer-string-prefixed-n', sql: "SELECT N'abc' FROM t", fn: 'SqlStmtList' }, + { name: 'lexer-string-prefixed-e', sql: "SELECT E'\\n' FROM t", fn: 'SqlStmtList' }, + { name: 'lexer-string-prefixed-x', sql: "SELECT X'0A' FROM t", fn: 'SqlStmtList' }, + { name: 'lexer-string-unicode', sql: "SELECT U&'d\\\\0061' UESCAPE '\\' FROM t", fn: 'SqlStmtList' }, + { name: 'lexer-number-exponent', sql: 'SELECT 1.2e-3 FROM t', fn: 'SqlStmtList' }, + { name: 'lexer-number-leading-dot', sql: 'SELECT .5 FROM t', fn: 'SqlStmtList' }, + { name: 'lexer-number-approx', sql: 'SELECT 1E+10 FROM t', fn: 'SqlStmtList' }, + { name: 'lexer-number-decimal-dot', sql: 'SELECT 1. FROM t', fn: 'SqlStmtList' }, + // DDL / DCL + { name: 'ddl-set', sql: 'SET foo = 1', fn: 'SqlStmtList' }, + { name: 'ddl-reset', sql: 'RESET foo', fn: 'SqlStmtList' }, + { name: 'ddl-reset-all', sql: 'RESET ALL', fn: 'SqlStmtList' }, + { name: 'ddl-alter-system-set', sql: 'ALTER SYSTEM SET foo = ON', fn: 'SqlStmtList' }, + { name: 'ddl-alter-session-reset', sql: 'ALTER SESSION RESET foo', fn: 'SqlStmtList' }, + { name: 'ddl-explain', sql: 'EXPLAIN PLAN INCLUDING ATTRIBUTES WITH TYPE AS JSON FOR SELECT 1', fn: 'SqlStmtList' }, + { name: 'ddl-describe-table', sql: 'DESCRIBE TABLE t', fn: 'SqlStmtList' }, + { name: 'ddl-describe-database', sql: 'DESCRIBE DATABASE db', fn: 'SqlStmtList' }, + { name: 'ddl-describe-statement', sql: 'DESCRIBE STATEMENT SELECT 1', fn: 'SqlStmtList' }, + { name: 'ddl-call', sql: 'CALL foo(1)', fn: 'SqlStmtList' }, + // joins / set ops / order & limit + { name: 'select-join-inner', sql: 'SELECT * FROM a JOIN b ON a.id = b.id', fn: 'SqlStmtList' }, + { name: 'select-join-left', sql: 'SELECT * FROM a LEFT JOIN b ON a.id = b.id', fn: 'SqlStmtList' }, + { name: 'select-join-using', sql: 'SELECT * FROM a JOIN b USING (id)', fn: 'SqlStmtList' }, + { name: 'select-join-natural', sql: 'SELECT * FROM a NATURAL JOIN b', fn: 'SqlStmtList' }, + { name: 'select-join-cross', sql: 'SELECT * FROM a CROSS JOIN b', fn: 'SqlStmtList' }, + { name: 'select-join-comma', sql: 'SELECT * FROM a, b', fn: 'SqlStmtList' }, + { name: 'select-join-asof-match-condition', sql: 'SELECT * FROM a ASOF JOIN b MATCH_CONDITION a.ts <= b.ts ON a.id = b.id', fn: 'SqlStmtList' }, + // table ref variants + { name: 'from-lateral-subquery', sql: 'SELECT * FROM LATERAL (SELECT 1) AS x', fn: 'SqlStmtList' }, + { name: 'from-unnest', sql: 'SELECT * FROM UNNEST(arr)', fn: 'SqlStmtList' }, + { name: 'from-unnest-ordinality', sql: 'SELECT * FROM UNNEST(arr) WITH ORDINALITY', fn: 'SqlStmtList' }, + { name: 'from-table-function', sql: 'SELECT * FROM TABLE(foo(1))', fn: 'SqlStmtList' }, + { name: 'from-tablesample', sql: 'SELECT * FROM t TABLESAMPLE SYSTEM (10) REPEATABLE (1)', fn: 'SqlStmtList' }, + { name: 'from-snapshot', sql: "SELECT * FROM t FOR SYSTEM_TIME AS OF TIMESTAMP '2020-01-01 00:00:00'", fn: 'SqlStmtList' }, + stmt('join-subquery', joinSubquery('a', 'b', 'a.id = b.id')), + { name: 'select-setop-union', sql: 'SELECT a FROM t UNION SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'select-setop-intersect', sql: 'SELECT a FROM t INTERSECT SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'select-setop-except', sql: 'SELECT a FROM t EXCEPT SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'select-order-limit', sql: 'SELECT a FROM t ORDER BY a LIMIT 10', fn: 'SqlStmtList' }, + { name: 'select-offset-limit', sql: 'SELECT a FROM t ORDER BY a OFFSET 5 LIMIT 10', fn: 'SqlStmtList' }, + { name: 'select-fetch', sql: 'SELECT a FROM t ORDER BY a FETCH FIRST 3 ROWS ONLY', fn: 'SqlStmtList' }, + { name: 'order-by-nulls-first', sql: 'SELECT a FROM t ORDER BY a NULLS FIRST', fn: 'SqlStmtList' }, + { name: 'order-by-nulls-last', sql: 'SELECT a FROM t ORDER BY a DESC NULLS LAST', fn: 'SqlStmtList' }, + { name: 'select-limit-all', sql: 'SELECT a FROM t LIMIT ALL', fn: 'SqlStmtList' }, + { name: 'select-limit-offset-comma', sql: 'SELECT a FROM t LIMIT 3, 10', fn: 'SqlStmtList' }, + { name: 'select-offset-rows', sql: 'SELECT a FROM t ORDER BY a OFFSET 5 ROWS', fn: 'SqlStmtList' }, + { name: 'select-fetch-next', sql: 'SELECT a FROM t ORDER BY a FETCH NEXT 3 ROWS ONLY', fn: 'SqlStmtList' }, + { name: 'select-values', sql: 'VALUES (1), (2)', fn: 'SqlStmtList' }, + { name: 'select-table', sql: 'TABLE t', fn: 'SqlStmtList' }, + // japanese identifiers + stmt( + 'select-japanese-idents', + `${selectList(['顧客.会員ID', 'COUNT(注文ID) AS 注文数'])} ${fromTable('顧客')} GROUP BY 会員ID` + ), + // expression / function variants + { name: 'select-case', sql: 'SELECT CASE WHEN a > 0 THEN 1 ELSE 0 END FROM t', fn: 'SqlStmtList' }, + { name: 'select-cast', sql: 'SELECT CAST(a AS INTEGER) FROM t', fn: 'SqlStmtList' }, + { name: 'select-coalesce', sql: 'SELECT COALESCE(a, 0) FROM t', fn: 'SqlStmtList' }, + { name: 'select-json-exists', sql: "SELECT JSON_EXISTS(doc, '$.a') FROM t", fn: 'SqlStmtList' }, + { name: 'select-interval', sql: "SELECT INTERVAL '1' DAY FROM t", fn: 'SqlStmtList' }, + // window functions + { name: 'select-window-over-basic', sql: 'SELECT SUM(a) OVER (PARTITION BY b ORDER BY c) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-over-frame-rows', sql: 'SELECT SUM(a) OVER (ORDER BY c ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-over-frame-range', sql: 'SELECT SUM(a) OVER (ORDER BY c RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-qualify', sql: 'SELECT a FROM t QUALIFY ROW_NUMBER() OVER (ORDER BY a) = 1', fn: 'SqlStmtList' }, + { name: 'select-window-within-group', sql: 'SELECT LISTAGG(a) WITHIN GROUP (ORDER BY a) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-null-treatment', sql: 'SELECT LAST_VALUE(a) RESPECT NULLS OVER (ORDER BY a) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-ignore-nulls', sql: 'SELECT FIRST_VALUE(a) IGNORE NULLS OVER (ORDER BY a) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-filter', sql: 'SELECT COUNT(*) FILTER (WHERE a > 0) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-ordered-set', sql: 'SELECT PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY a) FROM t', fn: 'SqlStmtList' }, + { name: 'select-window-over-name', sql: 'SELECT SUM(a) OVER w FROM t WINDOW w AS (PARTITION BY b)', fn: 'SqlStmtList' }, + // JSON / text / datetime functions + { name: 'select-json-query', sql: "SELECT JSON_QUERY(doc, '$.a') FROM t", fn: 'SqlStmtList' }, + { name: 'select-json-object', sql: "SELECT JSON_OBJECT('a' VALUE 1) FROM t", fn: 'SqlStmtList' }, + { name: 'select-json-array', sql: "SELECT JSON_ARRAY(1, 2, 3) FROM t", fn: 'SqlStmtList' }, + { name: 'select-json-objectagg', sql: "SELECT JSON_OBJECTAGG(k VALUE v) FROM t", fn: 'SqlStmtList' }, + { name: 'select-json-arrayagg', sql: "SELECT JSON_ARRAYAGG(a) FROM t", fn: 'SqlStmtList' }, + { name: 'select-trim', sql: "SELECT TRIM(BOTH 'x' FROM a) FROM t", fn: 'SqlStmtList' }, + { name: 'select-substring', sql: 'SELECT SUBSTRING(a FROM 2 FOR 3) FROM t', fn: 'SqlStmtList' }, + { name: 'select-position', sql: "SELECT POSITION('a' IN b) FROM t", fn: 'SqlStmtList' }, + { name: 'select-translate', sql: "SELECT TRANSLATE(a, 'abc', 'xyz') FROM t", fn: 'SqlStmtList' }, + { name: 'select-overlay', sql: "SELECT OVERLAY(a PLACING 'x' FROM 2) FROM t", fn: 'SqlStmtList' }, + { name: 'select-date-trunc', sql: "SELECT DATE_TRUNC('DAY', d) FROM t", fn: 'SqlStmtList' }, + { name: 'select-timestamp-diff', sql: 'SELECT TIMESTAMP_DIFF(t1, t2, DAY) FROM t', fn: 'SqlStmtList' }, + { name: 'select-time-trunc', sql: "SELECT TIME_TRUNC(t, HOUR) FROM t", fn: 'SqlStmtList' }, + { name: 'select-convert', sql: 'SELECT CONVERT(a, INTEGER) FROM t', fn: 'SqlStmtList' }, + { name: 'select-extract', sql: 'SELECT EXTRACT(YEAR FROM d) FROM t', fn: 'SqlStmtList' }, + { name: 'select-contains-substr', sql: "SELECT CONTAINS_SUBSTR(a, 'x') FROM t", fn: 'SqlStmtList' }, + { name: 'select-contains-substr-with-scope', sql: "SELECT CONTAINS_SUBSTR(a, 'x', JSON_SCOPE := 2) FROM t", fn: 'SqlStmtList' }, + // typed literals / collections / cast + { name: 'select-typed-date', sql: "SELECT DATE '2020-01-01' FROM t", fn: 'SqlStmtList' }, + { name: 'select-typed-time', sql: "SELECT TIME '12:34:56' FROM t", fn: 'SqlStmtList' }, + { name: 'select-typed-timestamp', sql: "SELECT TIMESTAMP '2020-01-01 12:34:56' FROM t", fn: 'SqlStmtList' }, + { name: 'select-uuid', sql: "SELECT UUID '123e4567-e89b-12d3-a456-426614174000' FROM t", fn: 'SqlStmtList' }, + { name: 'select-interval-year', sql: "SELECT INTERVAL '2' YEAR FROM t", fn: 'SqlStmtList' }, + { name: 'select-interval-day-to-second', sql: "SELECT INTERVAL '1 02:03:04' DAY TO SECOND FROM t", fn: 'SqlStmtList' }, + { name: 'select-row-type', sql: 'SELECT ROW(1, 2) FROM t', fn: 'SqlStmtList' }, + { name: 'select-array-constructor', sql: 'SELECT ARRAY[1, 2, 3] FROM t', fn: 'SqlStmtList' }, + { name: 'select-map-constructor', sql: 'SELECT MAP[1, 2] FROM t', fn: 'SqlStmtList' }, + { name: 'select-multiset', sql: 'SELECT MULTISET[1, 2] FROM t', fn: 'SqlStmtList' }, + { name: 'select-cast-typed', sql: 'SELECT CAST(a AS DECIMAL(10,2)) FROM t', fn: 'SqlStmtList' }, + { name: 'select-cast-with-timezone', sql: 'SELECT CAST(a AS TIMESTAMP WITH TIME ZONE) FROM t', fn: 'SqlStmtList' }, + { name: 'select-period', sql: "SELECT PERIOD (DATE '2020-01-01', DATE '2020-12-31') FROM t", fn: 'SqlStmtList' }, + // DML variants + { name: 'insert-select', sql: 'INSERT INTO t(a) SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'insert-default-values', sql: 'INSERT INTO t VALUES (DEFAULT)', fn: 'SqlStmtList' }, + { name: 'update-multi-set', sql: 'UPDATE t SET a = 1, b = 2 WHERE c = 3', fn: 'SqlStmtList' }, + { name: 'delete-simple', sql: 'DELETE FROM t', fn: 'SqlStmtList' }, + { name: 'merge-update-insert', sql: 'MERGE INTO t USING u ON t.id = u.id WHEN MATCHED THEN UPDATE SET a = 1 WHEN NOT MATCHED THEN INSERT (id) VALUES (u.id)', fn: 'SqlStmtList' }, + { name: 'explain-without-impl', sql: 'EXPLAIN PLAN WITHOUT IMPLEMENTATION FOR SELECT 1', fn: 'SqlStmtList' }, + { name: 'describe-catalog', sql: 'DESCRIBE CATALOG cat', fn: 'SqlStmtList' }, + { name: 'describe-schema', sql: 'DESCRIBE SCHEMA sch', fn: 'SqlStmtList' }, + { name: 'set-string', sql: "SET foo = 'bar'", fn: 'SqlStmtList' }, + { name: 'set-null', sql: 'SET foo = NULL', fn: 'SqlStmtList' }, + // setop variants + { name: 'setop-union-all', sql: 'SELECT a FROM t UNION ALL SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'setop-union-distinct', sql: 'SELECT a FROM t UNION DISTINCT SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'setop-except-all', sql: 'SELECT a FROM t EXCEPT ALL SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'setop-intersect-distinct', sql: 'SELECT a FROM t INTERSECT DISTINCT SELECT a FROM u', fn: 'SqlStmtList' }, + { name: 'setop-chained', sql: 'SELECT a FROM t UNION SELECT a FROM u INTERSECT SELECT a FROM v', fn: 'SqlStmtList' }, + { name: 'setop-order-limit', sql: 'SELECT a FROM t UNION SELECT a FROM u ORDER BY a LIMIT 5', fn: 'SqlStmtList' }, + { name: 'setop-order-fetch', sql: 'SELECT a FROM t UNION SELECT a FROM u ORDER BY a FETCH FIRST 3 ROWS ONLY', fn: 'SqlStmtList' }, + { name: 'setop-order-offset-limit', sql: 'SELECT a FROM t UNION SELECT a FROM u ORDER BY a OFFSET 2 LIMIT 4', fn: 'SqlStmtList' }, + { name: 'setop-table-values', sql: 'VALUES (1) UNION SELECT 2', fn: 'SqlStmtList' }, + { name: 'setop-table-table', sql: 'TABLE t UNION TABLE u', fn: 'SqlStmtList' }, + // DML/DDL extras + { name: 'insert-upsert', sql: 'UPSERT INTO t(a) VALUES (1)', fn: 'SqlStmtList' }, + { name: 'insert-hints', sql: 'INSERT INTO t /*+ hint */ (a) VALUES (1)', fn: 'SqlStmtList' }, + { name: 'select-hints-multi', sql: 'SELECT /*+ hint1, hint2 */ * FROM t', fn: 'SqlStmtList' }, + { name: 'update-with-alias', sql: 'UPDATE t AS x SET a = 1', fn: 'SqlStmtList' }, + { name: 'update-with-extend', sql: 'UPDATE t EXTEND (a INTEGER) SET a = 1', fn: 'SqlStmtList' }, + { name: 'delete-with-alias', sql: 'DELETE FROM t AS x', fn: 'SqlStmtList' }, + { name: 'delete-with-extend', sql: 'DELETE FROM t EXTEND (a INTEGER)', fn: 'SqlStmtList' }, + { name: 'merge-with-extend', sql: 'MERGE INTO t EXTEND (a INTEGER) USING u ON t.id = u.id WHEN MATCHED THEN UPDATE SET a = 1', fn: 'SqlStmtList' }, + { name: 'explain-including-all', sql: 'EXPLAIN PLAN INCLUDING ALL ATTRIBUTES FOR SELECT 1', fn: 'SqlStmtList' }, + { name: 'explain-as-xml', sql: 'EXPLAIN PLAN AS XML FOR SELECT 1', fn: 'SqlStmtList' }, + { name: 'describe-table-extra', sql: 'DESCRIBE TABLE t EXTENDED', fn: 'SqlStmtList' }, + { name: 'set-interval', sql: "SET foo = INTERVAL '1' DAY", fn: 'SqlStmtList' }, +]; + +const negativeCases = [ + // clause order / required parts + { name: 'neg-having-without-group', sql: 'SELECT a FROM t HAVING a > 0', fn: 'SqlStmtList' }, + { name: 'neg-natural-join-on', sql: 'SELECT * FROM a NATURAL JOIN b ON a.id = b.id', fn: 'SqlStmtList' }, + { name: 'neg-join-no-condition', sql: 'SELECT * FROM a JOIN b', fn: 'SqlStmtList' }, + { name: 'neg-join-match-condition-nonasof', sql: 'SELECT * FROM a JOIN b MATCH_CONDITION a.ts <= b.ts ON a.id = b.id', fn: 'SqlStmtList' }, + { name: 'neg-window-frame-without-order', sql: 'SELECT a FROM t WINDOW w AS (ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)', fn: 'SqlStmtList' }, + { name: 'neg-fetch-without-order', sql: 'SELECT * FROM t FETCH FIRST 1 ROW ONLY', fn: 'SqlStmtList' }, + { name: 'neg-limit-and-fetch', sql: 'SELECT * FROM t ORDER BY a LIMIT 1 FETCH FIRST 1 ROW ONLY', fn: 'SqlStmtList' }, + { name: 'neg-where-without-from', sql: 'SELECT a WHERE a > 0', fn: 'SqlStmtList' }, + { name: 'neg-offset-without-order', sql: 'SELECT * FROM t OFFSET 1', fn: 'SqlStmtList' }, + { name: 'neg-setop-nonquery-left', sql: '1 UNION SELECT 1', fn: 'SqlStmtList' }, + // syntax shape errors + { name: 'neg-offset-without-order-2', sql: 'SELECT * FROM t OFFSET 1 ROW', fn: 'SqlStmtList' }, + { name: 'neg-join-using-nonlist', sql: 'SELECT * FROM a JOIN b USING id', fn: 'SqlStmtList' }, + { name: 'neg-having-before-group', sql: 'SELECT a FROM t HAVING a > 0 GROUP BY a', fn: 'SqlStmtList' }, + { name: 'neg-window-before-group', sql: 'SELECT a FROM t WINDOW w AS (PARTITION BY a) GROUP BY a', fn: 'SqlStmtList' }, + { name: 'neg-fetch-without-only', sql: 'SELECT * FROM t ORDER BY a FETCH FIRST 1 ROW', fn: 'SqlStmtList' }, + { name: 'neg-limit-comma', sql: 'SELECT * FROM t LIMIT , 10', fn: 'SqlStmtList' }, + { name: 'neg-explain-missing-for', sql: 'EXPLAIN PLAN SELECT 1', fn: 'SqlStmtList' }, + { name: 'neg-describe-missing-target', sql: 'DESCRIBE', fn: 'SqlStmtList' }, + { name: 'neg-qualify-without-from', sql: 'SELECT a QUALIFY ROW_NUMBER() OVER (ORDER BY a) = 1', fn: 'SqlStmtList' }, + { name: 'neg-order-without-by', sql: 'SELECT * FROM t ORDER a', fn: 'SqlStmtList' }, + { name: 'neg-group-without-by', sql: 'SELECT a FROM t GROUP a', fn: 'SqlStmtList' }, + { name: 'neg-fetch-invalid-mode', sql: 'SELECT * FROM t ORDER BY a FETCH MIDDLE 1 ROW ONLY', fn: 'SqlStmtList' }, + { name: 'neg-update-missing-set', sql: 'UPDATE t WHERE a = 1', fn: 'SqlStmtList' }, + { name: 'neg-delete-missing-from', sql: 'DELETE t', fn: 'SqlStmtList' }, + { name: 'neg-merge-missing-into', sql: 'MERGE t USING u ON t.id = u.id WHEN MATCHED THEN UPDATE SET a = 1', fn: 'SqlStmtList' }, + { name: 'neg-insert-missing-into', sql: 'INSERT t(a) VALUES (1)', fn: 'SqlStmtList' }, + { name: 'neg-unicode-escape-surrogate', sql: "SELECT U&'\\D800' FROM t", fn: 'SqlStmtList' }, + { name: 'neg-unicode-escape-out-of-range', sql: "SELECT U&'\\+110000' FROM t", fn: 'SqlStmtList' }, ]; +if (require.main === module) { + const samples = [ + "SELECT 1", + "SELECT /*+ index(t) */ a AS x FROM t WHERE a IS NOT DISTINCT FROM b", + "WITH t AS (SELECT 1) SELECT * FROM t", + "EXPLAIN PLAN FOR SELECT 1", + "INSERT INTO t(a) VALUES (1)", + "UPDATE t SET a = 1 WHERE b = 2", + "DELETE FROM t WHERE a IN (1,2,3)", + "MERGE INTO t USING u ON t.id = u.id WHEN MATCHED THEN UPDATE SET a = 1", + "SELECT ARRAY_AGG(x) FROM t", + "SELECT JSON_VALUE(doc, '$.a' RETURNING VARCHAR) FROM t", + "SELECT DATE_DIFF(d1, d2, DAY) FROM t", + ]; + for (const src of samples) { + try { + runCase({ name: 'sample', sql: src, fn: 'SqlStmtList' }); + console.log(`OK: ${src}`); + } catch (e) { + console.error(`NG: ${src} -> ${e.message}`); + } + } +} + function runCase({ name, sql, fn }) { const lexer = new CalciteLexer(sql); const tokens = lexer.tokenize(); @@ -42,9 +273,24 @@ for (const c of cases) { } } +for (const c of negativeCases) { + let ok = false; + try { + runCase(c); + } catch (_err) { + ok = true; + } + if (ok) { + console.log(`OK ${c.name} (rejected)`); + } else { + failed++; + console.error(`NG ${c.name}: expected rejection`); + } +} + if (failed > 0) { console.error(`\nFAILED: ${failed}`); process.exit(1); } -console.log(`\nALL OK: ${cases.length}`); +console.log(`\nALL OK: ${cases.length + negativeCases.length}`); diff --git a/spec/apache-calcite-Parser.js b/spec/apache-calcite-Parser.js index 0ea31f7..3c5ff56 100644 --- a/spec/apache-calcite-Parser.js +++ b/spec/apache-calcite-Parser.js @@ -14,53 +14,358 @@ class CalciteLexer { tokenize() { const s = this.input; + const len = s.length; + const skipWhitespace = (p) => { + while (p < len && /\s/.test(s[p])) p++; + return p; + }; + const isIdentStart = (ch) => { + return ch === "_" || ch === "$" || /\p{L}/u.test(ch); + }; + const isIdentPart = (ch) => { + return ch === "_" || ch === "$" || /\p{L}|\p{Nd}/u.test(ch); + }; + const startsWithKeywordAt = (p, keyword) => { + const slice = s.slice(p, p + keyword.length); + if (slice.toUpperCase() !== keyword) return false; + const next = s[p + keyword.length]; + return !(next && /[A-Za-z0-9_]/.test(next)); + }; + const readQuotedString = (allowBackslashEscape = true) => { + const start = this.pos; + let value = ""; + this.pos++; + while (this.pos < len) { + if (s[this.pos] === "'") { + if (s[this.pos + 1] === "'") { + value += "'"; + this.pos += 2; + continue; + } + break; + } + if (allowBackslashEscape && s[this.pos] === "\\" && this.pos + 1 < len) { + value += s[this.pos + 1]; + this.pos += 2; + continue; + } + value += s[this.pos++]; + } + if (s[this.pos] === "'") this.pos++; + return { value, start, end: this.pos }; + }; + const readQuotedIdentifier = (quote) => { + const start = this.pos; + let value = ""; + this.pos++; + while (this.pos < len) { + if (s[this.pos] === quote) { + if (s[this.pos + 1] === quote) { + value += quote; + this.pos += 2; + continue; + } + break; + } + value += s[this.pos++]; + } + if (s[this.pos] === quote) this.pos++; + return { value, start, end: this.pos }; + }; + const readDoubleQuotedMaybeString = () => { + const start = this.pos; + let value = ""; + let isString = false; + this.pos++; + while (this.pos < len) { + if (s[this.pos] === "\\") { + if (this.pos + 1 < len) { + value += s[this.pos + 1]; + this.pos += 2; + isString = true; + continue; + } + } + if (s[this.pos] === "\"") { + if (s[this.pos + 1] === "\"") { + value += "\""; + this.pos += 2; + continue; + } + break; + } + value += s[this.pos++]; + } + if (s[this.pos] === "\"") this.pos++; + return { value, start, end: this.pos, isString }; + }; + const tryReadBracketIdentifier = () => { + let p = this.pos + 1; + let value = ""; + while (p < len) { + if (s[p] === "]") { + if (s[p + 1] === "]") { + value += "]"; + p += 2; + continue; + } + break; + } + if (s[p] === "\n" || s[p] === "\r") return null; + value += s[p++]; + } + if (s[p] !== "]") return null; + if (!value || !/^[A-Za-z_]/.test(value)) return null; + const start = this.pos; + this.pos = p + 1; + return { value, start, end: this.pos }; + }; + const decodeUnicodeEscapes = (raw, escapeChar) => { + let out = ""; + for (let i = 0; i < raw.length; i++) { + const ch = raw[i]; + if (ch !== escapeChar) { + out += ch; + continue; + } + const next = raw[i + 1]; + if (next === escapeChar) { + out += escapeChar; + i++; + continue; + } + if (next === "+") { + const hex = raw.slice(i + 2, i + 8); + if (!/^[0-9A-Fa-f]{6}$/.test(hex)) { + throw new Error("Invalid Unicode escape sequence"); + } + const code = parseInt(hex, 16); + if (code > 0x10FFFF || (code >= 0xD800 && code <= 0xDFFF)) { + throw new Error("Invalid Unicode code point"); + } + out += String.fromCodePoint(code); + i += 7; + continue; + } + const hex = raw.slice(i + 1, i + 5); + if (!/^[0-9A-Fa-f]{4}$/.test(hex)) { + throw new Error("Invalid Unicode escape sequence"); + } + const code = parseInt(hex, 16); + if (code > 0x10FFFF || (code >= 0xD800 && code <= 0xDFFF)) { + throw new Error("Invalid Unicode code point"); + } + out += String.fromCodePoint(code); + i += 4; + } + return out; + }; while (this.pos < s.length) { const ch = s[this.pos]; if (/\s/.test(ch)) { this.pos++; continue; } - // strings (single-quoted, no escape handling) - if (ch === "'") { + // line comment + if (ch === "-" && s[this.pos + 1] === "-") { + const start = this.pos; + this.pos += 2; let value = ""; - this.pos++; - while (this.pos < s.length && s[this.pos] !== "'") { + while (this.pos < s.length && s[this.pos] !== "\n") { value += s[this.pos++]; } - this.pos++; - this.tokens.push({ type: "STRING", value }); + this.tokens.push({ type: "COMMENT_LINE", value: value.trim(), start, end: this.pos }); + continue; + } + // block comment + if (ch === "/" && s[this.pos + 1] === "*" && s[this.pos + 2] !== "+") { + this.pos += 2; + while (this.pos < s.length && !(s[this.pos] === "*" && s[this.pos + 1] === "/")) { + this.pos++; + } + if (this.pos < s.length) this.pos += 2; continue; } - // numbers - if (/[0-9]/.test(ch)) { + // table hints (/*+ ... */) + if (ch === "/" && s[this.pos + 1] === "*" && s[this.pos + 2] === "+") { + const start = this.pos; + this.pos += 3; let value = ""; - while (this.pos < s.length && /[0-9\.]/.test(s[this.pos])) { + while (this.pos < s.length && !(s[this.pos] === "*" && s[this.pos + 1] === "/")) { value += s[this.pos++]; } - this.tokens.push({ type: "NUMBER", value }); + if (this.pos < s.length) this.pos += 2; + this.tokens.push({ type: "HINT", value: value.trim(), start, end: this.pos }); + continue; + } + // strings (single-quoted, with prefix/unicode/binary support) + if (ch === "'" || ((ch === "N" || ch === "n" || ch === "E" || ch === "e" || ch === "X" || ch === "x") && s[this.pos + 1] === "'") || + ((ch === "U" || ch === "u") && s[this.pos + 1] === "&" && s[this.pos + 2] === "'") || + (ch === "_" && /[A-Za-z0-9:._-]/.test(s[this.pos + 1]))) { + let start = this.pos; + let unicodeString = false; + let escapeChar = "\\"; + if (ch === "N" || ch === "n" || ch === "E" || ch === "e" || ch === "X" || ch === "x") { + this.pos++; + } else if ((ch === "U" || ch === "u") && s[this.pos + 1] === "&") { + this.pos += 2; + unicodeString = true; + } else if (ch === "_") { + this.pos++; + while (this.pos < len && /[A-Za-z0-9:._-]/.test(s[this.pos])) { + this.pos++; + } + if (s[this.pos] !== "'") { + this.pos = start; + } + } + if (s[this.pos] === "'") { + let literal = readQuotedString(!unicodeString); + let value = literal.value; + let p = skipWhitespace(this.pos); + while (s[p] === "'") { + this.pos = p; + const extra = readQuotedString(!unicodeString); + value += extra.value; + p = skipWhitespace(this.pos); + } + if (startsWithKeywordAt(p, "UESCAPE")) { + let q = skipWhitespace(p + "UESCAPE".length); + if (s[q] === "'") { + this.pos = q; + const esc = readQuotedString(false); + if (esc.value.length !== 1) { + throw new Error("UESCAPE must be a single character"); + } + escapeChar = esc.value; + p = skipWhitespace(this.pos); + } + } + this.pos = p; + if (unicodeString) { + value = decodeUnicodeEscapes(value, escapeChar); + } + this.tokens.push({ type: "STRING", value, start, end: this.pos }); + continue; + } else { + this.pos = start; + } + } + // quoted identifiers + if (ch === "[") { + const ident = tryReadBracketIdentifier(); + if (ident) { + this.tokens.push({ type: "IDENT", value: ident.value, start: ident.start, end: ident.end }); + continue; + } + } + if ((ch === "U" || ch === "u") && s[this.pos + 1] === "&" && s[this.pos + 2] === "\"") { + const start = this.pos; + this.pos += 2; + let { value, end } = readQuotedIdentifier("\""); + let p = skipWhitespace(this.pos); + let escapeChar = "\\"; + if (startsWithKeywordAt(p, "UESCAPE")) { + let q = skipWhitespace(p + "UESCAPE".length); + if (s[q] !== "'") { + throw new Error("UESCAPE requires a quoted escape character"); + } + this.pos = q; + const esc = readQuotedString(false); + if (esc.value.length !== 1) { + throw new Error("UESCAPE must be a single character"); + } + escapeChar = esc.value; + p = skipWhitespace(this.pos); + } + this.pos = p; + value = decodeUnicodeEscapes(value, escapeChar); + this.tokens.push({ type: "IDENT", value, start, end: this.pos }); + continue; + } + if (ch === '"' || ch === "`") { + if (ch === "\"") { + const { value, start, end, isString } = readDoubleQuotedMaybeString(); + this.tokens.push({ type: isString ? "STRING" : "IDENT", value, start, end }); + } else { + const { value, start, end } = readQuotedIdentifier(ch); + this.tokens.push({ type: "IDENT", value, start, end }); + } + continue; + } + // numbers (including leading dot and exponent) + if (/[0-9]/.test(ch) || (ch === "." && /[0-9]/.test(s[this.pos + 1]))) { + const start = this.pos; + let value = ""; + if (ch === ".") { + value += "."; + this.pos++; + while (this.pos < s.length && /[0-9]/.test(s[this.pos])) { + value += s[this.pos++]; + } + } else { + while (this.pos < s.length && /[0-9]/.test(s[this.pos])) { + value += s[this.pos++]; + } + if (s[this.pos] === ".") { + value += "."; + this.pos++; + while (this.pos < s.length && /[0-9]/.test(s[this.pos])) { + value += s[this.pos++]; + } + } + } + if (/[eE]/.test(s[this.pos])) { + const e = s[this.pos]; + const sign = s[this.pos + 1]; + if (/[0-9\+\-]/.test(sign) && /[0-9]/.test(s[this.pos + 2] || "")) { + value += e; + this.pos++; + if (sign === "+" || sign === "-") { + value += sign; + this.pos++; + } + while (this.pos < s.length && /[0-9]/.test(s[this.pos])) { + value += s[this.pos++]; + } + } + } + this.tokens.push({ type: "NUMBER", value, start, end: this.pos }); continue; } // identifiers - if (/[A-Za-z_]/.test(ch)) { + if (isIdentStart(ch)) { + const start = this.pos; let value = ""; - while (this.pos < s.length && /[A-Za-z0-9_]/.test(s[this.pos])) { - value += s[this.pos++]; + while (this.pos < s.length) { + const c = s[this.pos]; + if (isIdentPart(c)) { + value += c; + this.pos++; + continue; + } + if (c === "-" && isIdentStart(s[this.pos + 1])) { + value += c; + this.pos++; + continue; + } + break; } - this.tokens.push({ type: "IDENT", value }); + this.tokens.push({ type: "IDENT", value, start, end: this.pos }); continue; } // symbols (two-char first) const two = s.slice(this.pos, this.pos + 2); const twoOps = ["<=", ">=", "<>", "!=", "||", "::", "->", ":="]; if (twoOps.includes(two)) { - this.tokens.push({ type: "SYMBOL", value: two }); + this.tokens.push({ type: "SYMBOL", value: two, start: this.pos, end: this.pos + 2 }); this.pos += 2; continue; } - this.tokens.push({ type: "SYMBOL", value: ch }); + this.tokens.push({ type: "SYMBOL", value: ch, start: this.pos, end: this.pos + 1 }); this.pos++; } - this.tokens.push({ type: "EOF", value: null }); + this.tokens.push({ type: "EOF", value: null, start: this.pos, end: this.pos }); return this.tokens; } } @@ -70,9 +375,32 @@ class CalciteParser { this.tokens = tokens || []; this.pos = 0; } - peek() { return this.tokens[this.pos] || { type: "EOF", value: null }; } - peekN(n) { return this.tokens[this.pos + n] || { type: "EOF", value: null }; } - next() { return this.tokens[this.pos++] || { type: "EOF", value: null }; } + isCommentToken(t) { return t && t.type === "COMMENT_LINE"; } + peekRaw() { return this.tokens[this.pos] || { type: "EOF", value: null }; } + peek() { + let i = this.pos; + while (this.isCommentToken(this.tokens[i])) i++; + return this.tokens[i] || { type: "EOF", value: null }; + } + peekN(n) { + let i = this.pos; + let count = 0; + while (i < this.tokens.length) { + const t = this.tokens[i]; + if (!this.isCommentToken(t)) { + if (count === n) return t; + count++; + } + i++; + } + return { type: "EOF", value: null }; + } + nextRaw() { return this.tokens[this.pos++] || { type: "EOF", value: null }; } + next() { + let t = this.nextRaw(); + while (this.isCommentToken(t)) t = this.nextRaw(); + return t || { type: "EOF", value: null }; + } isEOF() { return this.peek().type === "EOF"; } isSymbol(value) { const t = this.peek(); @@ -180,13 +508,14 @@ class CalciteParser { const keywords = new Set([ "FROM", "WHERE", "GROUP", "HAVING", "WINDOW", "QUALIFY", "ORDER", "LIMIT", "OFFSET", "FETCH", "UNION", "INTERSECT", "EXCEPT", - "JOIN", "INNER", "LEFT", "RIGHT", "FULL", "CROSS", "ASOF", - "SET", "USING", "ON", "WHEN", + "JOIN", "INNER", "LEFT", "RIGHT", "FULL", "CROSS", "ASOF", "NATURAL", + "SET", "USING", "ON", "WHEN", "FOR", + "MATCH_RECOGNIZE", "MATCH_CONDITION", "PIVOT", "UNPIVOT", "TABLESAMPLE", ]); return keywords.has(value); } isTableHintsStart() { - return this.isSymbol("/") && this.isSymbolAt("*", 1) && this.isSymbolAt("+", 2); + return this.peek().type === "HINT"; } expect(type) { const t = this.peek(); @@ -195,6 +524,39 @@ class CalciteParser { } return this.next(); } + collectLineComments() { + const comments = []; + while (this.isCommentToken(this.peekRaw())) { + const t = this.nextRaw(); + comments.push(t.value || ""); + } + return comments; + } + ExpressionUntilKeyword(keyword) { + let depth = 0; + let idx = -1; + for (let i = this.pos; i < this.tokens.length; i++) { + const t = this.tokens[i]; + if (t.type === "SYMBOL") { + if (t.value === "(" || t.value === "[" || t.value === "{") depth++; + else if (t.value === ")" || t.value === "]" || t.value === "}") depth = Math.max(0, depth - 1); + } + if (depth === 0 && t.type === "IDENT" && String(t.value).toUpperCase() === keyword) { + idx = i; + break; + } + } + if (idx === -1) { + throw new Error(`Expected keyword ${keyword} after expression`); + } + const subTokens = this.tokens.slice(this.pos, idx); + subTokens.push({ type: "EOF", value: null }); + const subParser = new CalciteParser(subTokens); + const expr = subParser.Expression(); + subParser.expect("EOF"); + this.pos = idx; + return expr; + } notImplemented(rule) { const t = this.peek(); throw new Error(`Not implemented: ${rule} at token ${t.type}`); @@ -202,6 +564,7 @@ class CalciteParser { SqlStmtList() { const statements = []; + const leadingComments = this.collectLineComments(); if (!this.isEOF()) { statements.push(this.SqlStmt()); while (this.acceptSymbol(";")) { @@ -210,7 +573,7 @@ class CalciteParser { } } this.expect("EOF"); - return { type: "SqlStmtList", statements }; + return { type: "SqlStmtList", leadingComments, statements }; } SqlStmtEof() { @@ -397,6 +760,22 @@ class CalciteParser { if (this.isSymbol("(")) { columns = this.ParenthesizedCompoundIdentifierList(); } + const isSourceStart = + this.isKeyword("WITH") || + this.isKeyword("SELECT") || + this.isKeyword("VALUES") || + this.isKeyword("VALUE") || + this.isKeyword("TABLE") || + (this.isSymbol("(") && ( + this.isKeywordAt("WITH", 1) || + this.isKeywordAt("SELECT", 1) || + this.isKeywordAt("VALUES", 1) || + this.isKeywordAt("VALUE", 1) || + this.isKeywordAt("TABLE", 1) + )); + if (!isSourceStart) { + throw new Error("Invalid INSERT source"); + } const source = this.OrderedQueryOrExpr(); return { type: "SqlInsert", mode, keywords, table, hints, extend, columns, source }; } @@ -557,6 +936,16 @@ class CalciteParser { while (this.isKeyword("UNION") || this.isKeyword("INTERSECT") || this.isKeyword("EXCEPT")) { setOps.push(this.AddSetOpQuery()); } + if (setOps.length > 0) { + const isQueryLeaf = + leaf && + (leaf.type === "SqlSelect" || + leaf.type === "TableConstructor" || + leaf.type === "ExplicitTable"); + if (!isQueryLeaf) { + throw new Error("SETOP requires query operands"); + } + } return { type: "QueryOrExpr", withList, leaf, setOps }; } @@ -580,6 +969,15 @@ class CalciteParser { } else if (this.isKeyword("FETCH")) { fetch = this.FetchClause(); } + if (offset && !orderBy) { + throw new Error("OFFSET requires ORDER BY"); + } + if (fetch && !orderBy) { + throw new Error("FETCH requires ORDER BY"); + } + if (limit && fetch) { + throw new Error("LIMIT and FETCH cannot be combined"); + } return { type: "OrderByLimitOpt", orderBy, limit, offset, fetch }; } @@ -659,6 +1057,7 @@ class CalciteParser { SqlSelect() { this.expectKeyword("SELECT"); + const selectComments = this.collectLineComments(); let hints = null; if (this.isTableHintsStart()) { hints = this.TableHints(); @@ -671,6 +1070,13 @@ class CalciteParser { while (this.acceptSymbol(",")) { selectItems.push(this.AddSelectItem()); } + if (!this.isKeyword("FROM")) { + if (this.isKeyword("WHERE") || this.isKeyword("GROUP") || this.isKeyword("HAVING") || + this.isKeyword("WINDOW") || this.isKeyword("QUALIFY")) { + const t = this.peek(); + throw new Error(`Expected FROM before ${String(t.value).toUpperCase()}`); + } + } let from = null; let where = null; let groupBy = null; @@ -685,8 +1091,15 @@ class CalciteParser { if (this.isKeyword("WINDOW")) window = this.Window(); if (this.isKeyword("QUALIFY")) qualify = this.Qualify(); } + if (having && !groupBy) { + throw new Error("HAVING requires GROUP BY"); + } + if (qualify && !from) { + throw new Error("QUALIFY requires FROM"); + } return { type: "SqlSelect", + selectComments, hints, stream, setQuantifier, @@ -718,7 +1131,17 @@ class CalciteParser { WindowSpecification() { this.expectSymbol("("); let name = null; - if (this.peek().type === "IDENT") { + if ( + this.peek().type === "IDENT" && + !this.isKeyword("PARTITION") && + !this.isKeyword("ORDER") && + !this.isKeyword("ROWS") && + !this.isKeyword("RANGE") && + !this.isKeyword("GROUPS") && + !this.isKeyword("EXCLUDE") && + !this.isKeyword("ALLOW") && + !this.isKeyword("DISALLOW") + ) { name = this.SimpleIdentifier(); } let partitionBy = null; @@ -745,6 +1168,9 @@ class CalciteParser { const exclusion = this.WindowExclusion(); frame.exclusion = exclusion; } + if (frame && !orderBy) { + throw new Error("Window frame requires ORDER BY"); + } let partial = null; if (this.acceptKeyword("ALLOW") || this.acceptKeyword("DISALLOW")) { const mode = String(this.tokens[this.pos - 1].value).toUpperCase(); @@ -908,12 +1334,28 @@ class CalciteParser { const joinType = this.JoinType(); const table = this.TableRef(); let condition = null; - if (this.acceptKeyword("ON")) { + let matchCondition = null; + if (joinType === "ASOF JOIN" && this.acceptKeyword("MATCH_CONDITION")) { + matchCondition = this.ExpressionUntilKeyword("ON"); + this.expectKeyword("ON"); condition = { type: "On", expr: this.Expression() }; - } else if (this.acceptKeyword("USING")) { - condition = { type: "Using", columns: this.ParenthesizedSimpleIdentifierList() }; + } else { + if (this.acceptKeyword("ON")) { + condition = { type: "On", expr: this.Expression() }; + } else if (this.acceptKeyword("USING")) { + condition = { type: "Using", columns: this.ParenthesizedSimpleIdentifierList() }; + } } - return { type: "JoinTable", natural, joinType, table, condition }; + if (!natural && joinType !== "CROSS JOIN" && !condition) { + throw new Error("JOIN requires ON or USING"); + } + if (natural && condition) { + throw new Error("NATURAL JOIN cannot use ON or USING"); + } + if (matchCondition && joinType !== "ASOF JOIN") { + throw new Error("MATCH_CONDITION is only valid for ASOF JOIN"); + } + return { type: "JoinTable", natural, joinType, table, matchCondition, condition }; } TableRef() { @@ -930,11 +1372,18 @@ class CalciteParser { TableRef3() { let base; + let hints = null; + let extend = null; + let over = null; + let snapshot = null; + let matchRecognize = null; if (this.acceptKeyword("LATERAL")) { if (this.isSymbol("(")) { this.expectSymbol("("); const query = this.OrderedQueryOrExpr(); this.expectSymbol(")"); + over = this.TableOverOpt(); + if (this.isKeyword("MATCH_RECOGNIZE")) matchRecognize = this.MatchRecognize(); base = { type: "LateralSubquery", query }; } else if (this.acceptKeyword("UNNEST")) { this.expectSymbol("("); @@ -949,6 +1398,8 @@ class CalciteParser { this.expectSymbol("("); const query = this.OrderedQueryOrExpr(); this.expectSymbol(")"); + over = this.TableOverOpt(); + if (this.isKeyword("MATCH_RECOGNIZE")) matchRecognize = this.MatchRecognize(); base = { type: "Subquery", query }; } else if (this.isKeyword("UNNEST")) { this.expectKeyword("UNNEST"); @@ -973,6 +1424,17 @@ class CalciteParser { this.expectSymbol(")"); base = { type: "ImplicitTableFunctionCall", name, args }; } else { + if (this.isTableHintsStart()) { + hints = this.TableHints(); + } + if (this.isKeyword("EXTEND")) { + extend = this.ExtendTable(); + } + over = this.TableOverOpt(); + if (this.isKeyword("FOR")) { + snapshot = this.Snapshot(); + } + if (this.isKeyword("MATCH_RECOGNIZE")) matchRecognize = this.MatchRecognize(); base = { type: "TableName", name }; } } else { @@ -991,7 +1453,10 @@ class CalciteParser { if (this.isSymbol("(")) { columns = this.ParenthesizedSimpleIdentifierList(); } - } else if (this.peek().type === "IDENT") { + } else if ( + this.peek().type === "IDENT" && + !this.isClauseKeyword(String(this.peek().value).toUpperCase()) + ) { alias = this.SimpleIdentifier(); if (this.isSymbol("(")) { columns = this.ParenthesizedSimpleIdentifierList(); @@ -1001,7 +1466,7 @@ class CalciteParser { if (this.isKeyword("TABLESAMPLE")) { tablesample = this.Tablesample(); } - return { type: "TableRef", base, pivot, unpivot, alias, columns, tablesample }; + return { type: "TableRef", base, hints, extend, over, snapshot, matchRecognize, pivot, unpivot, alias, columns, tablesample }; } Snapshot() { @@ -2208,11 +2673,15 @@ class CalciteParser { const t = this.peek(); if (t.type === "NUMBER") { this.next(); - return { type: "UnsignedNumericLiteral", value: t.value }; + const raw = String(t.value); + let kind = "INTEGER"; + if (/[eE]/.test(raw)) kind = "APPROX"; + else if (raw.includes(".")) kind = "DECIMAL"; + return { type: "UnsignedNumericLiteral", kind, value: t.value }; } if (this.acceptKeyword("DECIMAL")) { const literal = this.SimpleStringLiteral(); - return { type: "UnsignedNumericLiteral", value: { type: "DECIMAL", literal } }; + return { type: "UnsignedNumericLiteral", kind: "DECIMAL_STRING", value: { type: "DECIMAL", literal } }; } throw new Error("Invalid UnsignedNumericLiteral"); } @@ -2328,7 +2797,7 @@ class CalciteParser { AddSetOpQuery() { const op = this.BinaryQueryOperator(); - const right = this.LeafQueryOrExpr(); + const right = this.LeafQuery(); return { type: "AddSetOpQuery", op, right }; } @@ -2525,15 +2994,12 @@ class CalciteParser { } TableHints() { - this.expectSymbol("/"); - this.expectSymbol("*"); - this.expectSymbol("+"); - const hints = [this.AddHint()]; - while (this.acceptSymbol(",")) { - hints.push(this.AddHint()); + const t = this.peek(); + if (t.type !== "HINT") { + throw new Error(`Expected HINT but got ${t.type}:${t.value}`); } - this.expectSymbol("*"); - this.expectSymbol("/"); + this.next(); + const hints = t.value ? t.value.split(",").map((s) => s.trim()).filter(Boolean) : []; return { type: "TableHints", hints }; } @@ -3359,6 +3825,7 @@ class CalciteParser { PatternFactor() { const primary = this.PatternPrimary(); + if (!primary) return null; let quantifier = null; if (this.acceptSymbol("*")) quantifier = { kind: "*" }; else if (this.acceptSymbol("+")) quantifier = { kind: "+" }; @@ -3944,34 +4411,13 @@ class CalciteParser { } -module.exports = { - CalciteLexer, - CalciteParser, -}; - -if (require.main === module) { - const samples = [ - "SELECT 1", - "SELECT /*+ index(t) */ a AS x FROM t WHERE a IS NOT DISTINCT FROM b", - "WITH t AS (SELECT 1) SELECT * FROM t", - "EXPLAIN PLAN FOR SELECT 1", - "INSERT INTO t(a) VALUES (1)", - "UPDATE t SET a = 1 WHERE b = 2", - "DELETE FROM t WHERE a IN (1,2,3)", - "MERGE INTO t USING u ON t.id = u.id WHEN MATCHED THEN UPDATE SET a = 1", - "SELECT ARRAY_AGG(x) FROM t", - "SELECT JSON_VALUE(doc, '$.a' RETURNING VARCHAR) FROM t", - "SELECT DATE_DIFF(d1, d2, DAY) FROM t", - ]; - for (const src of samples) { - const lexer = new CalciteLexer(src); - const tokens = lexer.tokenize(); - const parser = new CalciteParser(tokens); - try { - parser.SqlStmtList(); - console.log(`OK: ${src}`); - } catch (e) { - console.error(`NG: ${src} -> ${e.message}`); - } - } +if (typeof module !== "undefined" && module.exports) { + module.exports = { + CalciteLexer, + CalciteParser, + }; +} +if (typeof window !== "undefined") { + window.CalciteLexer = CalciteLexer; + window.CalciteParser = CalciteParser; } diff --git a/sql-formatter-test.js b/sql-formatter-test.js new file mode 100644 index 0000000..2d4f0ff --- /dev/null +++ b/sql-formatter-test.js @@ -0,0 +1,99 @@ +'use strict'; + +const { formatSql } = require('./sql-formatter'); + +const cases = [ + { + name: 'basic-select', + sql: 'SELECT a FROM t', + expect: `SELECT + a +FROM + t`, + }, + { + name: 'select-where', + sql: 'SELECT a FROM t WHERE b = 1', + expect: `SELECT + a +FROM + t +WHERE + b = 1`, + }, + { + name: 'select-line-comment', + sql: "SELECT -- keep comment\n a\nFROM t", + expectIncludes: ['SELECT -- keep comment'], + }, + { + name: 'select-group-by', + sql: 'SELECT a, b FROM t GROUP BY a, b', + expect: `SELECT + a + , b +FROM + t +GROUP BY + a + , b`, + }, + { + name: 'select-order-by', + sql: 'SELECT a, b FROM t ORDER BY a DESC, b', + expect: `SELECT + a + , b +FROM + t +ORDER BY + a DESC + , b`, + }, + { + name: 'nested-subquery', + sql: 'SELECT u.name FROM (SELECT id, name FROM users WHERE active = 1) AS u', + expect: `SELECT + u.name +FROM + ( + SELECT + id + , name + FROM + users + WHERE + active = 1 + ) u`, + }, +]; + +let failed = 0; +for (const c of cases) { + try { + const out = formatSql(c.sql); + let ok = true; + if (c.expect !== undefined) { + ok = out.trim() === c.expect.trim(); + } else if (c.expectIncludes) { + ok = c.expectIncludes.every((frag) => out.includes(frag)); + } + if (!ok) { + failed++; + console.error(`NG ${c.name}: output missing expected fragments`); + console.error(out); + } else { + console.log(`OK ${c.name}`); + } + } catch (err) { + failed++; + console.error(`NG ${c.name}: ${err.message}`); + } +} + +if (failed > 0) { + console.error(`\nFAILED: ${failed}`); + process.exit(1); +} + +console.log(`\nALL OK: ${cases.length}`); diff --git a/sql-formatter.js b/sql-formatter.js new file mode 100644 index 0000000..edecbc9 --- /dev/null +++ b/sql-formatter.js @@ -0,0 +1,231 @@ +'use strict'; + +const { CalciteLexer, CalciteParser } = require('./spec/apache-calcite-Parser'); + +function formatSql(sql) { + const lexer = new CalciteLexer(sql); + const tokens = lexer.tokenize(); + const parser = new CalciteParser(tokens); + const ast = parser.SqlStmtList(); + return renderNode(ast, { indent: 0 }).trim(); +} + +function renderNode(node, ctx) { + if (!node) return ''; + switch (node.type) { + case 'SqlStmtList': + { + const lines = []; + if (node.leadingComments && node.leadingComments.length) { + node.leadingComments.forEach(c => lines.push(`-- ${c}`)); + } + lines.push(...node.statements.map(stmt => renderNode(stmt, ctx))); + return lines.filter(Boolean).join('\n'); + } + case 'OrderedQueryOrExpr': { + const withList = node.query && node.query.withList ? renderNode(node.query.withList, ctx) : null; + const base = node.query ? renderNode(node.query, ctx) : ''; + const lines = []; + if (withList) lines.push(withList); + if (base) lines.push(base); + if (node.orderByLimitOpt) lines.push(renderOrderByLimitOpt(node.orderByLimitOpt, ctx)); + return lines.filter(Boolean).join('\n'); + } + case 'QueryOrExpr': + return renderNode(node.leaf, ctx); + case 'SqlSelect': + return renderSelect(node, ctx); + case 'AddSelectItem': + return renderSelectItem(node, ctx); + case 'SelectExpression': + if (node.star) return '*'; + return renderNode(node.expr, ctx); + case 'TableRef': + return renderTableRef(node, ctx); + case 'Subquery': { + const inner = renderNode(node.query, { ...ctx, indent: 0 }); + const pad = indent(ctx, 1); + const body = inner ? inner.split('\n').map(line => pad + line).join('\n') : pad; + return `(\n${body}\n${indent(ctx)})`; + } + case 'LateralSubquery': { + const inner = renderNode(node.query, { ...ctx, indent: 0 }); + const pad = indent(ctx, 1); + const body = inner ? inner.split('\n').map(line => pad + line).join('\n') : pad; + return `LATERAL (\n${body}\n${indent(ctx)})`; + } + case 'TableName': + return renderNode(node.name, ctx); + case 'CompoundTableIdentifier': + return node.parts.map(p => (p.type === 'Identifier' ? (p.value || p.name) : '*')).join('.'); + case 'JoinTable': + return renderJoin(node, ctx); + case 'CommaJoin': + return `, ${renderNode(node.table, ctx)}`; + case 'FromClause': + return renderFrom(node, ctx); + case 'Where': + return `WHERE ${renderNode(node.expr, ctx)}`; + case 'GroupBy': + return renderGroupBy(node, ctx); + case 'OrderBy': + return renderOrderBy(node, ctx); + case 'OrderItemList': + return node.items.map(it => renderNode(it, ctx)).join('\n'); + case 'AddOrderItem': + return renderOrderItem(node, ctx); + case 'GroupingElementList': + return node.items.map(it => renderNode(it, ctx)).join('\n'); + case 'StringLiteral': + return `'${node.value}'`; + case 'NumericLiteral': + if (node.value && typeof node.value === 'object' && node.value.value !== undefined) { + return String(node.value.value); + } + return String(node.value); + case 'UnsignedNumericLiteral': + return String(node.value); + case 'Literal': + return String(node.value); + case 'Identifier': + return node.value || node.name; + case 'CompoundIdentifier': + return node.parts.map(p => (p.type === 'Identifier' ? (p.value || p.name) : '*')).join('.'); + case 'ParenthesizedExpression': + return `(${renderNode(node.node, ctx)})`; + case 'ParenExpression': + return `(${renderNode(node.node, ctx)})`; + case 'AddGroupingElement': + if (node.kind === 'EXPR') return renderNode(node.expr, ctx); + if (node.list) return renderNode(node.list, ctx); + return ''; + case 'Expression2b': + return renderExpression2b(node, ctx); + case 'BinaryExpression': + return `${renderNode(node.left, ctx)} ${renderBinaryOp(node.operator)} ${renderNode(node.right, ctx)}`; + default: + return `[${node.type}]`; + } +} + +function renderSelect(node, ctx) { + const lines = []; + if (node.selectComments && node.selectComments.length > 0) { + lines.push(`SELECT -- ${node.selectComments.join(' ')}`); + } else { + lines.push('SELECT'); + } + const items = node.selectItems || []; + items.forEach((item, idx) => { + const prefix = idx === 0 ? indent(ctx, 1) : `${indent(ctx, 1)}, `; + lines.push(prefix + renderNode(item, { ...ctx, indent: ctx.indent + 1 })); + }); + if (node.from) { + lines.push(`${indent(ctx)}FROM`); + lines.push(`${indent(ctx, 1)}${renderNode(node.from, { ...ctx, indent: ctx.indent + 1 })}`); + } + if (node.where) { + lines.push(`${indent(ctx)}WHERE`); + lines.push(`${indent(ctx, 1)}${renderNode(node.where.expr, { ...ctx, indent: ctx.indent + 1 })}`); + } + if (node.groupBy) { + lines.push(`${indent(ctx)}${renderNode(node.groupBy, ctx)}`); + } + if (node.having) { + lines.push(`${indent(ctx)}${renderNode(node.having, { ...ctx, indent: ctx.indent + 1 })}`); + } + if (node.orderBy) { + lines.push(`${indent(ctx)}${renderNode(node.orderBy, ctx)}`); + } + return lines.join('\n'); +} + +function renderSelectItem(node, ctx) { + const expr = renderNode(node.expr, ctx); + if (node.alias) return `${expr} AS ${node.alias.name || node.alias}`; + return expr; +} + +function renderFrom(node, ctx) { + const base = renderNode(node.first, ctx); + const joins = (node.joins || []).map(j => renderNode(j, ctx)); + return [base, ...joins].join('\n' + indent(ctx)); +} + +function renderTableRef(node, ctx) { + if (node.base) { + const base = renderNode(node.base, ctx); + const alias = node.alias ? (node.alias.value || node.alias.name || node.alias) : null; + return alias ? `${base} ${alias}` : base; + } + return '[TableRef]'; +} + +function renderGroupBy(node, ctx) { + const items = node.list.items.map(it => renderNode(it, ctx)); + if (items.length === 0) return 'GROUP BY'; + const lines = ['GROUP BY']; + lines.push(`${indent(ctx, 1)}${items[0]}`); + for (let i = 1; i < items.length; i++) { + lines.push(`${indent(ctx, 1)}, ${items[i]}`); + } + return lines.join('\n'); +} + +function renderOrderBy(node, ctx) { + const items = node.list.items.map(it => renderOrderItem(it, ctx)); + if (items.length === 0) return 'ORDER BY'; + const lines = ['ORDER BY']; + lines.push(`${indent(ctx, 1)}${items[0]}`); + for (let i = 1; i < items.length; i++) { + lines.push(`${indent(ctx, 1)}, ${items[i]}`); + } + return lines.join('\n'); +} + +function renderOrderItem(node, ctx) { + const expr = renderNode(node.expr, ctx); + const dir = node.direction ? ` ${node.direction}` : ''; + return `${expr}${dir}`; +} + +function renderJoin(node, ctx) { + const base = `${node.joinType} ${renderNode(node.table, ctx)}`; + if (!node.condition) return base; + if (node.condition.type === 'On') { + const expr = renderNode(node.condition.expr, { ...ctx, indent: ctx.indent + 1 }); + return `${base}\n${indent(ctx, 1)}ON ${expr}`; + } + if (node.condition.type === 'Using') { + const cols = node.condition.columns.items.map(c => c.value || c.name).join(', '); + return `${base}\n${indent(ctx, 1)}USING (${cols})`; + } + return base; +} + +function renderOrderByLimitOpt(opt, ctx) { + if (!opt) return ''; + const lines = []; + if (opt.orderBy) lines.push(renderOrderBy(opt.orderBy, ctx)); + if (opt.limit) lines.push(`LIMIT ${renderNode(opt.limit.value, ctx)}`); + if (opt.offset) lines.push(`OFFSET ${renderNode(opt.offset.value, ctx)}`); + if (opt.fetch) lines.push(`FETCH ${opt.fetch.mode} ${renderNode(opt.fetch.value, ctx)} ${opt.fetch.rows} ONLY`); + return lines.filter(Boolean).join('\n'); +} + +function renderExpression2b(node, ctx) { + const base = renderNode(node.base, ctx); + return base; +} + +function renderBinaryOp(op) { + if (!op) return ''; + if (typeof op === 'string') return op; + return op.op || ''; +} + +function indent(ctx, extra = 0) { + return ' '.repeat((ctx.indent + extra) * 4); +} + +module.exports = { formatSql };