From 9b09ae006dfea6f63f05e493552841d9d77dd5ff Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsumoto Date: Fri, 29 Dec 2023 23:00:06 +0900 Subject: [PATCH 1/4] [WIP] format with semicolon and comments --- ast/ast.go | 6 ++++++ internal/formatter/formatter.go | 9 +++++++++ parser/parser.go | 3 --- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index da9c27d..00af7c6 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -528,6 +528,9 @@ func (t *SQLToken) NoQuateString() string { case *token.SQLWord: return v.NoQuateString() case string: + if t.Kind == token.Comment { + return "--" + v + "\n" + } return v default: return " " @@ -539,6 +542,9 @@ func (t *SQLToken) Render(opts *RenderOptions) string { case *token.SQLWord: return renderSQLWord(v, opts) case string: + if t.Kind == token.Comment { + return "--" + v + "\n" + } return v default: return " " diff --git a/internal/formatter/formatter.go b/internal/formatter/formatter.go index 42b264e..4bffe52 100644 --- a/internal/formatter/formatter.go +++ b/internal/formatter/formatter.go @@ -235,6 +235,15 @@ func formatItem(node ast.Node, env *formatEnvironment) ast.Node { results = append(results, linebreakNode) results = append(results, env.genIndent()...) } + breakStatementAfterMatcher := astutil.NodeMatcher{ + ExpectTokens: []token.Kind{ + token.Semicolon, + }, + } + if breakStatementAfterMatcher.IsMatch(node) { + results = append(results, linebreakNode) + env.indentLevelDown() + } return &ast.ItemWith{Toks: results} } diff --git a/parser/parser.go b/parser/parser.go index 8215cef..bc18494 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -79,9 +79,6 @@ func NewParser(src io.Reader, d dialect.Dialect) (*Parser, error) { parsed := []ast.Node{} for _, tok := range tokens { - if tok.Kind == token.Comment { - continue - } parsed = append(parsed, ast.NewItem(tok)) } From 47c598eec13c739b792f76447c0f71d148cede23 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsumoto Date: Sat, 30 Dec 2023 01:21:58 +0900 Subject: [PATCH 2/4] handle multiline comments --- ast/ast.go | 16 +++++++-- internal/formatter/formatter.go | 9 ++++++ parser/parser.go | 14 +++++++- parser/parser_test.go | 21 ++++++------ token/kind.go | 2 ++ token/kind_string.go | 57 +++++++++++++++++---------------- token/lexer.go | 2 +- token/lexer_test.go | 2 +- 8 files changed, 80 insertions(+), 43 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index 00af7c6..c70886e 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -517,6 +517,12 @@ func (t *SQLToken) String() string { case *token.SQLWord: return v.String() case string: + if t.Kind == token.Comment { + return "--" + v + } + if t.Kind == token.MultilineComment { + return "/*" + v + "*/" + } return v default: return " " @@ -529,7 +535,10 @@ func (t *SQLToken) NoQuateString() string { return v.NoQuateString() case string: if t.Kind == token.Comment { - return "--" + v + "\n" + return "--" + v + } + if t.Kind == token.MultilineComment { + return "/*" + v + "*/" } return v default: @@ -543,7 +552,10 @@ func (t *SQLToken) Render(opts *RenderOptions) string { return renderSQLWord(v, opts) case string: if t.Kind == token.Comment { - return "--" + v + "\n" + return "--" + v + } + if t.Kind == token.MultilineComment { + return "/*" + v + "*/" } return v default: diff --git a/internal/formatter/formatter.go b/internal/formatter/formatter.go index 4bffe52..f7df66c 100644 --- a/internal/formatter/formatter.go +++ b/internal/formatter/formatter.go @@ -235,6 +235,15 @@ func formatItem(node ast.Node, env *formatEnvironment) ast.Node { results = append(results, linebreakNode) results = append(results, env.genIndent()...) } + commentAfterMatcher := astutil.NodeMatcher{ + ExpectTokens: []token.Kind{ + token.Comment, + }, + } + if commentAfterMatcher.IsMatch(node) { + results = append(results, linebreakNode) + } + breakStatementAfterMatcher := astutil.NodeMatcher{ ExpectTokens: []token.Kind{ token.Semicolon, diff --git a/parser/parser.go b/parser/parser.go index bc18494..48a6fb8 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -526,6 +526,12 @@ func parseAliased(reader *astutil.NodeReader) ast.Node { } } +var commentInfixMatcher = astutil.NodeMatcher{ + ExpectTokens: []token.Kind{ + token.Comment, + token.MultilineComment, + }, +} var identifierListInfixMatcher = astutil.NodeMatcher{ ExpectTokens: []token.Kind{ token.Comma, @@ -567,7 +573,7 @@ func parseIdentifierList(reader *astutil.NodeReader) ast.Node { peekNode ast.Node ) for { - if !tmpReader.PeekNodeIs(true, identifierListTargetMatcher) { + if !tmpReader.PeekNodeIs(true, identifierListTargetMatcher) && !tmpReader.PeekNodeIs(true, commentInfixMatcher) { // Include white space after the comma peekIndex, peekNode := tmpReader.PeekNode(true) if peekNode != nil { @@ -581,12 +587,18 @@ func parseIdentifierList(reader *astutil.NodeReader) ast.Node { } break } + for tmpReader.PeekNodeIs(true, commentInfixMatcher) { + tmpReader.NextNode(true) + } peekIndex, peekNode = tmpReader.PeekNode(true) idents = append(idents, peekNode) endIndex = peekIndex tmpReader.NextNode(true) + //for tmpReader.PeekNodeIs(true, commentInfixMatcher) { + //tmpReader.NextNode(true) + //} if !tmpReader.PeekNodeIs(true, identifierListInfixMatcher) { break } diff --git a/parser/parser_test.go b/parser/parser_test.go index f70300d..4823f7f 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -84,42 +84,43 @@ func TestParseComments(t *testing.T) { name: "line comment with identiger", input: "-- foo\nbar", checkFn: func(t *testing.T, stmts []*ast.Statement, input string) { - testStatement(t, stmts[0], 2, "\nbar") + testStatement(t, stmts[0], 3, "-- foo\nbar") list := stmts[0].GetTokens() - testItem(t, list[0], "\n") - testIdentifier(t, list[1], "bar") + testItem(t, list[0], "-- foo") + testIdentifier(t, list[2], "bar") }, }, { name: "range comment with identiger", input: "/* foo */bar", checkFn: func(t *testing.T, stmts []*ast.Statement, input string) { - testStatement(t, stmts[0], 1, "bar") + testStatement(t, stmts[0], 2, "/* foo */bar") list := stmts[0].GetTokens() - testIdentifier(t, list[0], "bar") + testIdentifier(t, list[1], "bar") }, }, { name: "range comment with identiger list", input: "foo, /* foo */bar", checkFn: func(t *testing.T, stmts []*ast.Statement, input string) { - testStatement(t, stmts[0], 1, "foo, bar") + testStatement(t, stmts[0], 1, "foo, /* foo */bar") list := stmts[0].GetTokens() - testIdentifierList(t, list[0], "foo, bar") + testIdentifierList(t, list[0], "foo, /* foo */bar") }, }, { name: "multi line range comment with identiger", input: "/*\n * foo\n */\nbar", checkFn: func(t *testing.T, stmts []*ast.Statement, input string) { - testStatement(t, stmts[0], 2, "\nbar") + testStatement(t, stmts[0], 3, "/*\n foo\n */\nbar") list := stmts[0].GetTokens() - testItem(t, list[0], "\n") - testIdentifier(t, list[1], "bar") + testItem(t, list[0], "/*\n foo\n */") + testItem(t, list[1], "\n") + testIdentifier(t, list[2], "bar") }, }, } diff --git a/token/kind.go b/token/kind.go index 87bb807..8fc1727 100644 --- a/token/kind.go +++ b/token/kind.go @@ -20,6 +20,8 @@ const ( Whitespace // comment node Comment + // multiline comment node + MultilineComment // = operator Eq // != or <> operator diff --git a/token/kind_string.go b/token/kind_string.go index a42ad58..60c83ff 100644 --- a/token/kind_string.go +++ b/token/kind_string.go @@ -1,4 +1,4 @@ -// Code generated by "stringer -type Kind ./token/kind.go"; DO NOT EDIT. +// Code generated by "stringer -type Kind kind.go"; DO NOT EDIT. package token @@ -16,36 +16,37 @@ func _() { _ = x[Comma-5] _ = x[Whitespace-6] _ = x[Comment-7] - _ = x[Eq-8] - _ = x[Neq-9] - _ = x[Lt-10] - _ = x[Gt-11] - _ = x[LtEq-12] - _ = x[GtEq-13] - _ = x[Plus-14] - _ = x[Minus-15] - _ = x[Mult-16] - _ = x[Div-17] - _ = x[Caret-18] - _ = x[Mod-19] - _ = x[LParen-20] - _ = x[RParen-21] - _ = x[Period-22] - _ = x[Colon-23] - _ = x[DoubleColon-24] - _ = x[Semicolon-25] - _ = x[Backslash-26] - _ = x[LBracket-27] - _ = x[RBracket-28] - _ = x[Ampersand-29] - _ = x[LBrace-30] - _ = x[RBrace-31] - _ = x[ILLEGAL-32] + _ = x[MultilineComment-8] + _ = x[Eq-9] + _ = x[Neq-10] + _ = x[Lt-11] + _ = x[Gt-12] + _ = x[LtEq-13] + _ = x[GtEq-14] + _ = x[Plus-15] + _ = x[Minus-16] + _ = x[Mult-17] + _ = x[Div-18] + _ = x[Caret-19] + _ = x[Mod-20] + _ = x[LParen-21] + _ = x[RParen-22] + _ = x[Period-23] + _ = x[Colon-24] + _ = x[DoubleColon-25] + _ = x[Semicolon-26] + _ = x[Backslash-27] + _ = x[LBracket-28] + _ = x[RBracket-29] + _ = x[Ampersand-30] + _ = x[LBrace-31] + _ = x[RBrace-32] + _ = x[ILLEGAL-33] } -const _Kind_name = "SQLKeywordNumberCharSingleQuotedStringNationalStringLiteralCommaWhitespaceCommentEqNeqLtGtLtEqGtEqPlusMinusMultDivCaretModLParenRParenPeriodColonDoubleColonSemicolonBackslashLBracketRBracketAmpersandLBraceRBraceILLEGAL" +const _Kind_name = "SQLKeywordNumberCharSingleQuotedStringNationalStringLiteralCommaWhitespaceCommentMultilineCommentEqNeqLtGtLtEqGtEqPlusMinusMultDivCaretModLParenRParenPeriodColonDoubleColonSemicolonBackslashLBracketRBracketAmpersandLBraceRBraceILLEGAL" -var _Kind_index = [...]uint8{0, 10, 16, 20, 38, 59, 64, 74, 81, 83, 86, 88, 90, 94, 98, 102, 107, 111, 114, 119, 122, 128, 134, 140, 145, 156, 165, 174, 182, 190, 199, 205, 211, 218} +var _Kind_index = [...]uint8{0, 10, 16, 20, 38, 59, 64, 74, 81, 97, 99, 102, 104, 106, 110, 114, 118, 123, 127, 130, 135, 138, 144, 150, 156, 161, 172, 181, 190, 198, 206, 215, 221, 227, 234} func (i Kind) String() string { if i < 0 || i >= Kind(len(_Kind_index)-1) { diff --git a/token/lexer.go b/token/lexer.go index e1af7a1..e9c2fca 100644 --- a/token/lexer.go +++ b/token/lexer.go @@ -274,7 +274,7 @@ func (t *Tokenizer) next() (Kind, interface{}, error) { if err != nil { return ILLEGAL, str, err } - return Comment, str, nil + return MultilineComment, str, nil } t.Col++ return Div, "/", nil diff --git a/token/lexer_test.go b/token/lexer_test.go index 474fe8b..8ddf7c0 100644 --- a/token/lexer_test.go +++ b/token/lexer_test.go @@ -462,7 +462,7 @@ multiline comment */`, out: []*Token{ { - Kind: Comment, + Kind: MultilineComment, Value: " test\nmultiline\ncomment ", From: Pos{Line: 0, Col: 0}, To: Pos{Line: 2, Col: 10}, From cc185335648b625401abdde8e16f4a3af1190efe Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsumoto Date: Sat, 30 Dec 2023 13:18:00 +0900 Subject: [PATCH 3/4] remove debug comment --- parser/parser.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/parser/parser.go b/parser/parser.go index 48a6fb8..84173b3 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -596,9 +596,6 @@ func parseIdentifierList(reader *astutil.NodeReader) ast.Node { endIndex = peekIndex tmpReader.NextNode(true) - //for tmpReader.PeekNodeIs(true, commentInfixMatcher) { - //tmpReader.NextNode(true) - //} if !tmpReader.PeekNodeIs(true, identifierListInfixMatcher) { break } From d9d5dc89a8635c9895ab47d9f29922022d677cf6 Mon Sep 17 00:00:00 2001 From: Yasuhiro Matsumoto Date: Sat, 30 Dec 2023 15:10:38 +0900 Subject: [PATCH 4/4] fix lint error --- dialect/keyword.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dialect/keyword.go b/dialect/keyword.go index beb1328..b5cc38a 100644 --- a/dialect/keyword.go +++ b/dialect/keyword.go @@ -436,6 +436,8 @@ func DataBaseFunctions(driver DatabaseDriver) []string { return []string{} case DatabaseDriverOracle: return oracleReservedWords + case DatabaseDriverH2: + return []string{} case DatabaseDriverVertica: return verticaReservedWords default: