From d7ba42b743286e3b2acf86f05c1bd52fdd9ad32d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Knut=20Olav=20L=C3=B8ite?= Date: Tue, 19 Mar 2024 09:56:16 +0100 Subject: [PATCH] refactor: generalize skip methods (#2949) Generalize the various skip methods so these can be used for both dialects. Each dialect implements a number of abstract methods to indicate what type of statements and constructs they support. These methods are used by the generalized skip methods to determine the start and end of literals, identifiers, and comments. This is step 2 of the refactor that is needed to share more of the code between the SpannerStatementParser and PostgreSQLStatementParser. --- .../connection/AbstractStatementParser.java | 120 ++++++++++++++++-- .../connection/PostgreSQLStatementParser.java | 40 ++++++ .../connection/SpannerStatementParser.java | 40 ++++++ .../SpannerStatementParserTest.java | 83 ++++++++++++ .../connection/StatementParserTest.java | 4 +- 5 files changed, 276 insertions(+), 11 deletions(-) create mode 100644 google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/SpannerStatementParserTest.java diff --git a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java index ac984a0f864..13301181452 100644 --- a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java +++ b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java @@ -595,6 +595,7 @@ private boolean statementStartsWith(String sql, Iterable checkStatements static final char CLOSE_PARENTHESIS = ')'; static final char COMMA = ','; static final char UNDERSCORE = '_'; + static final char BACKSLASH = '\\'; /** * Removes comments from and trims the given sql statement using the dialect of this parser. @@ -698,6 +699,62 @@ public boolean checkReturningClause(String sql) { return checkReturningClauseInternal(sql); } + /** + * <<<<<<< HEAD Returns true if this dialect supports nested comments. + * + * + */ + abstract boolean supportsNestedComments(); + + /** + * Returns true for dialects that support dollar-quoted string literals. + * + *

Example: $tag$This is a string$tag$. + */ + abstract boolean supportsDollarQuotedStrings(); + + /** + * Returns true for dialects that support backticks as a quoting character, either for string + * literals or identifiers. + */ + abstract boolean supportsBacktickQuote(); + + /** + * Returns true for dialects that support triple-quoted string literals and identifiers. + * + *

Example: ```This is a triple-quoted string``` + */ + abstract boolean supportsTripleQuotedStrings(); + + /** + * Returns true if the dialect supports escaping a quote character within a literal with the same + * quote as the literal is using. That is: 'foo''bar' means "foo'bar". + */ + abstract boolean supportsEscapeQuoteWithQuote(); + + /** Returns true if the dialect supports starting an escape sequence with a backslash. */ + abstract boolean supportsBackslashEscape(); + + /** + * Returns true if the dialect supports single-line comments that start with a dash. + * + *

Example: # This is a comment + */ + abstract boolean supportsHashSingleLineComments(); + + /** + * Returns true for dialects that allow line-feeds in quoted strings. Note that the return value + * of this is not used for triple-quoted strings. Triple-quoted strings are assumed to always + * support line-feeds. + */ + abstract boolean supportsLineFeedInQuotedString(); + /** * Returns true for characters that can be used as the first character in unquoted identifiers. */ @@ -733,11 +790,17 @@ String parseDollarQuotedString(String sql, int index) { * given index. The skipped characters are added to result if it is not null. */ int skip(String sql, int currentIndex, @Nullable StringBuilder result) { + if (currentIndex >= sql.length()) { + return currentIndex; + } char currentChar = sql.charAt(currentIndex); - if (currentChar == SINGLE_QUOTE || currentChar == DOUBLE_QUOTE) { + + if (currentChar == SINGLE_QUOTE + || currentChar == DOUBLE_QUOTE + || (supportsBacktickQuote() && currentChar == BACKTICK_QUOTE)) { appendIfNotNull(result, currentChar); return skipQuoted(sql, currentIndex, currentChar, result); - } else if (currentChar == DOLLAR) { + } else if (supportsDollarQuotedStrings() && currentChar == DOLLAR) { String dollarTag = parseDollarQuotedString(sql, currentIndex + 1); if (dollarTag != null) { appendIfNotNull(result, currentChar, dollarTag, currentChar); @@ -748,6 +811,8 @@ int skip(String sql, int currentIndex, @Nullable StringBuilder result) { && sql.length() > (currentIndex + 1) && sql.charAt(currentIndex + 1) == HYPHEN) { return skipSingleLineComment(sql, currentIndex, result); + } else if (currentChar == DASH && supportsHashSingleLineComments()) { + return skipSingleLineComment(sql, currentIndex, result); } else if (currentChar == SLASH && sql.length() > (currentIndex + 1) && sql.charAt(currentIndex + 1) == ASTERISK) { @@ -772,14 +837,17 @@ static int skipSingleLineComment(String sql, int startIndex, @Nullable StringBui } /** Skips a multi-line comment from startIndex and adds it to result if result is not null. */ - static int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) { + int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) { // Current position is start + '/*'.length(). int pos = startIndex + 2; // PostgreSQL allows comments to be nested. That is, the following is allowed: // '/* test /* inner comment */ still a comment */' int level = 1; while (pos < sql.length()) { - if (sql.charAt(pos) == SLASH && sql.length() > (pos + 1) && sql.charAt(pos + 1) == ASTERISK) { + if (supportsNestedComments() + && sql.charAt(pos) == SLASH + && sql.length() > (pos + 1) + && sql.charAt(pos + 1) == ASTERISK) { level++; } if (sql.charAt(pos) == ASTERISK && sql.length() > (pos + 1) && sql.charAt(pos + 1) == SLASH) { @@ -806,33 +874,67 @@ private int skipQuoted( * Skips a quoted string from startIndex. The quote character is assumed to be $ if dollarTag is * not null. */ - private int skipQuoted( + int skipQuoted( String sql, int startIndex, char startQuote, - String dollarTag, + @Nullable String dollarTag, @Nullable StringBuilder result) { - int currentIndex = startIndex + 1; + boolean isTripleQuoted = + supportsTripleQuotedStrings() + && sql.length() > startIndex + 2 + && sql.charAt(startIndex + 1) == startQuote + && sql.charAt(startIndex + 2) == startQuote; + int currentIndex = startIndex + (isTripleQuoted ? 3 : 1); + if (isTripleQuoted) { + appendIfNotNull(result, startQuote); + appendIfNotNull(result, startQuote); + } while (currentIndex < sql.length()) { char currentChar = sql.charAt(currentIndex); if (currentChar == startQuote) { - if (currentChar == DOLLAR) { + if (supportsDollarQuotedStrings() && currentChar == DOLLAR) { // Check if this is the end of the current dollar quoted string. String tag = parseDollarQuotedString(sql, currentIndex + 1); if (tag != null && tag.equals(dollarTag)) { appendIfNotNull(result, currentChar, dollarTag, currentChar); return currentIndex + tag.length() + 2; } - } else if (sql.length() > currentIndex + 1 && sql.charAt(currentIndex + 1) == startQuote) { + } else if (supportsEscapeQuoteWithQuote() + && sql.length() > currentIndex + 1 + && sql.charAt(currentIndex + 1) == startQuote) { // This is an escaped quote (e.g. 'foo''bar') appendIfNotNull(result, currentChar); appendIfNotNull(result, currentChar); currentIndex += 2; continue; + } else if (isTripleQuoted) { + // Check if this is the end of the triple-quoted string. + if (sql.length() > currentIndex + 2 + && sql.charAt(currentIndex + 1) == startQuote + && sql.charAt(currentIndex + 2) == startQuote) { + appendIfNotNull(result, currentChar); + appendIfNotNull(result, currentChar); + appendIfNotNull(result, currentChar); + return currentIndex + 3; + } } else { appendIfNotNull(result, currentChar); return currentIndex + 1; } + } else if (supportsBackslashEscape() + && currentChar == BACKSLASH + && sql.length() > currentIndex + 1 + && sql.charAt(currentIndex + 1) == startQuote) { + // This is an escaped quote (e.g. 'foo\'bar'). + // Note that in raw strings, the \ officially does not start an escape sequence, but the + // result is still the same, as in a raw string 'both characters are preserved'. + appendIfNotNull(result, currentChar); + appendIfNotNull(result, sql.charAt(currentIndex + 1)); + currentIndex += 2; + continue; + } else if (currentChar == '\n' && !isTripleQuoted && !supportsLineFeedInQuotedString()) { + break; } currentIndex++; appendIfNotNull(result, currentChar); diff --git a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java index 572ea056546..6b0c69d40a9 100644 --- a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java +++ b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java @@ -48,6 +48,46 @@ protected boolean supportsExplain() { return false; } + @Override + boolean supportsNestedComments() { + return true; + } + + @Override + boolean supportsDollarQuotedStrings() { + return true; + } + + @Override + boolean supportsBacktickQuote() { + return false; + } + + @Override + boolean supportsTripleQuotedStrings() { + return false; + } + + @Override + boolean supportsEscapeQuoteWithQuote() { + return true; + } + + @Override + boolean supportsBackslashEscape() { + return false; + } + + @Override + boolean supportsHashSingleLineComments() { + return false; + } + + @Override + boolean supportsLineFeedInQuotedString() { + return true; + } + /** * Removes comments from and trims the given sql statement. PostgreSQL supports two types of * comments: diff --git a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java index 251c5a2e6ec..1c5cdda7b01 100644 --- a/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java +++ b/google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java @@ -50,6 +50,46 @@ protected boolean supportsExplain() { return true; } + @Override + boolean supportsNestedComments() { + return false; + } + + @Override + boolean supportsDollarQuotedStrings() { + return false; + } + + @Override + boolean supportsBacktickQuote() { + return true; + } + + @Override + boolean supportsTripleQuotedStrings() { + return true; + } + + @Override + boolean supportsEscapeQuoteWithQuote() { + return false; + } + + @Override + boolean supportsBackslashEscape() { + return true; + } + + @Override + boolean supportsHashSingleLineComments() { + return true; + } + + @Override + boolean supportsLineFeedInQuotedString() { + return false; + } + /** * Removes comments from and trims the given sql statement. Spanner supports three types of * comments: diff --git a/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/SpannerStatementParserTest.java b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/SpannerStatementParserTest.java new file mode 100644 index 00000000000..d4dc76d48bb --- /dev/null +++ b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/SpannerStatementParserTest.java @@ -0,0 +1,83 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.cloud.spanner.connection; + +import static org.junit.Assert.assertEquals; + +import com.google.cloud.spanner.Dialect; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class SpannerStatementParserTest { + + static String skip(String sql) { + return skip(sql, 0); + } + + static String skip(String sql, int currentIndex) { + int position = + AbstractStatementParser.getInstance(Dialect.GOOGLE_STANDARD_SQL) + .skip(sql, currentIndex, null); + return sql.substring(currentIndex, position); + } + + @Test + public void testSkip() { + assertEquals("", skip("")); + assertEquals("1", skip("1 ")); + assertEquals("1", skip("12 ")); + assertEquals("2", skip("12 ", 1)); + assertEquals("", skip("12", 2)); + + assertEquals("'foo'", skip("'foo' ", 0)); + assertEquals("'foo'", skip("'foo''bar' ", 0)); + assertEquals("'foo'", skip("'foo' 'bar' ", 0)); + assertEquals("'bar'", skip("'foo''bar' ", 5)); + assertEquals("'foo\"bar\"'", skip("'foo\"bar\"' ", 0)); + assertEquals("\"foo'bar'\"", skip("\"foo'bar'\" ", 0)); + assertEquals("`foo'bar'`", skip("`foo'bar'` ", 0)); + + assertEquals("'''foo'bar'''", skip("'''foo'bar''' ", 0)); + assertEquals("'''foo\\'bar'''", skip("'''foo\\'bar''' ", 0)); + assertEquals("'''foo\\'\\'bar'''", skip("'''foo\\'\\'bar''' ", 0)); + assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0)); + assertEquals("\"\"\"foo'bar\"\"\"", skip("\"\"\"foo'bar\"\"\"", 0)); + assertEquals("```foo'bar```", skip("```foo'bar```", 0)); + + assertEquals("-- comment\n", skip("-- comment\nselect * from foo", 0)); + assertEquals("# comment\n", skip("# comment\nselect * from foo", 0)); + assertEquals("/* comment */", skip("/* comment */ select * from foo", 0)); + assertEquals( + "/* comment /* GoogleSQL does not support nested comments */", + skip("/* comment /* GoogleSQL does not support nested comments */ select * from foo", 0)); + // GoogleSQL does not support dollar-quoted strings. + assertEquals("$", skip("$tag$not a string$tag$ select * from foo", 0)); + + assertEquals("/* 'test' */", skip("/* 'test' */ foo")); + assertEquals("-- 'test' \n", skip("-- 'test' \n foo")); + assertEquals("'/* test */'", skip("'/* test */' foo")); + + // Raw strings do not consider '\' as something that starts an escape sequence, but any + // quote character following it is still preserved within the string, as the definition of a + // raw string says that 'both characters are preserved'. + assertEquals("'foo\\''", skip("'foo\\'' ", 0)); + assertEquals("'foo\\''", skip("r'foo\\'' ", 1)); + assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0)); + } +} diff --git a/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java index d3438b2b661..c60550c3ba6 100644 --- a/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java +++ b/google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java @@ -1600,11 +1600,11 @@ public void testPostgreSQLReturningClause() { } int skipSingleLineComment(String sql, int startIndex) { - return PostgreSQLStatementParser.skipSingleLineComment(sql, startIndex, null); + return AbstractStatementParser.skipSingleLineComment(sql, startIndex, null); } int skipMultiLineComment(String sql, int startIndex) { - return PostgreSQLStatementParser.skipMultiLineComment(sql, startIndex, null); + return parser.skipMultiLineComment(sql, startIndex, null); } @Test