Skip to content

Commit 81ec3e0

Browse files
authored
refactor: generalize skip methods (googleapis#2949)
Generalize the various skip methods so these can be used for both dialects. Each dialect implements a number of abstract methods to indicate what type of statements and constructs they support. These methods are used by the generalized skip methods to determine the start and end of literals, identifiers, and comments. This is step 2 of the refactor that is needed to share more of the code between the SpannerStatementParser and PostgreSQLStatementParser.
1 parent 6e937ab commit 81ec3e0

File tree

5 files changed

+276
-11
lines changed

5 files changed

+276
-11
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/AbstractStatementParser.java

Lines changed: 111 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,7 @@ private boolean statementStartsWith(String sql, Iterable<String> checkStatements
595595
static final char CLOSE_PARENTHESIS = ')';
596596
static final char COMMA = ',';
597597
static final char UNDERSCORE = '_';
598+
static final char BACKSLASH = '\\';
598599

599600
/**
600601
* Removes comments from and trims the given sql statement using the dialect of this parser.
@@ -698,6 +699,62 @@ public boolean checkReturningClause(String sql) {
698699
return checkReturningClauseInternal(sql);
699700
}
700701

702+
/**
703+
* <<<<<<< HEAD Returns true if this dialect supports nested comments.
704+
*
705+
* <ul>
706+
* <li>This method should return false for dialects that consider this to be a valid comment:
707+
* <code>/* A comment /* still a comment *&#47;</code>.
708+
* <li>This method should return true for dialects that require all comment start sequences to
709+
* be balanced with a comment end sequence: <code>
710+
* /* A comment /* still a comment *&#47; Also still a comment *&#47;</code>.
711+
* </ul>
712+
*/
713+
abstract boolean supportsNestedComments();
714+
715+
/**
716+
* Returns true for dialects that support dollar-quoted string literals.
717+
*
718+
* <p>Example: <code>$tag$This is a string$tag$</code>.
719+
*/
720+
abstract boolean supportsDollarQuotedStrings();
721+
722+
/**
723+
* Returns true for dialects that support backticks as a quoting character, either for string
724+
* literals or identifiers.
725+
*/
726+
abstract boolean supportsBacktickQuote();
727+
728+
/**
729+
* Returns true for dialects that support triple-quoted string literals and identifiers.
730+
*
731+
* <p>Example: ```This is a triple-quoted string```
732+
*/
733+
abstract boolean supportsTripleQuotedStrings();
734+
735+
/**
736+
* Returns true if the dialect supports escaping a quote character within a literal with the same
737+
* quote as the literal is using. That is: 'foo''bar' means "foo'bar".
738+
*/
739+
abstract boolean supportsEscapeQuoteWithQuote();
740+
741+
/** Returns true if the dialect supports starting an escape sequence with a backslash. */
742+
abstract boolean supportsBackslashEscape();
743+
744+
/**
745+
* Returns true if the dialect supports single-line comments that start with a dash.
746+
*
747+
* <p>Example: # This is a comment
748+
*/
749+
abstract boolean supportsHashSingleLineComments();
750+
751+
/**
752+
* Returns true for dialects that allow line-feeds in quoted strings. Note that the return value
753+
* of this is not used for triple-quoted strings. Triple-quoted strings are assumed to always
754+
* support line-feeds.
755+
*/
756+
abstract boolean supportsLineFeedInQuotedString();
757+
701758
/**
702759
* Returns true for characters that can be used as the first character in unquoted identifiers.
703760
*/
@@ -733,11 +790,17 @@ String parseDollarQuotedString(String sql, int index) {
733790
* given index. The skipped characters are added to result if it is not null.
734791
*/
735792
int skip(String sql, int currentIndex, @Nullable StringBuilder result) {
793+
if (currentIndex >= sql.length()) {
794+
return currentIndex;
795+
}
736796
char currentChar = sql.charAt(currentIndex);
737-
if (currentChar == SINGLE_QUOTE || currentChar == DOUBLE_QUOTE) {
797+
798+
if (currentChar == SINGLE_QUOTE
799+
|| currentChar == DOUBLE_QUOTE
800+
|| (supportsBacktickQuote() && currentChar == BACKTICK_QUOTE)) {
738801
appendIfNotNull(result, currentChar);
739802
return skipQuoted(sql, currentIndex, currentChar, result);
740-
} else if (currentChar == DOLLAR) {
803+
} else if (supportsDollarQuotedStrings() && currentChar == DOLLAR) {
741804
String dollarTag = parseDollarQuotedString(sql, currentIndex + 1);
742805
if (dollarTag != null) {
743806
appendIfNotNull(result, currentChar, dollarTag, currentChar);
@@ -748,6 +811,8 @@ int skip(String sql, int currentIndex, @Nullable StringBuilder result) {
748811
&& sql.length() > (currentIndex + 1)
749812
&& sql.charAt(currentIndex + 1) == HYPHEN) {
750813
return skipSingleLineComment(sql, currentIndex, result);
814+
} else if (currentChar == DASH && supportsHashSingleLineComments()) {
815+
return skipSingleLineComment(sql, currentIndex, result);
751816
} else if (currentChar == SLASH
752817
&& sql.length() > (currentIndex + 1)
753818
&& sql.charAt(currentIndex + 1) == ASTERISK) {
@@ -772,14 +837,17 @@ static int skipSingleLineComment(String sql, int startIndex, @Nullable StringBui
772837
}
773838

774839
/** Skips a multi-line comment from startIndex and adds it to result if result is not null. */
775-
static int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) {
840+
int skipMultiLineComment(String sql, int startIndex, @Nullable StringBuilder result) {
776841
// Current position is start + '/*'.length().
777842
int pos = startIndex + 2;
778843
// PostgreSQL allows comments to be nested. That is, the following is allowed:
779844
// '/* test /* inner comment */ still a comment */'
780845
int level = 1;
781846
while (pos < sql.length()) {
782-
if (sql.charAt(pos) == SLASH && sql.length() > (pos + 1) && sql.charAt(pos + 1) == ASTERISK) {
847+
if (supportsNestedComments()
848+
&& sql.charAt(pos) == SLASH
849+
&& sql.length() > (pos + 1)
850+
&& sql.charAt(pos + 1) == ASTERISK) {
783851
level++;
784852
}
785853
if (sql.charAt(pos) == ASTERISK && sql.length() > (pos + 1) && sql.charAt(pos + 1) == SLASH) {
@@ -806,33 +874,67 @@ private int skipQuoted(
806874
* Skips a quoted string from startIndex. The quote character is assumed to be $ if dollarTag is
807875
* not null.
808876
*/
809-
private int skipQuoted(
877+
int skipQuoted(
810878
String sql,
811879
int startIndex,
812880
char startQuote,
813-
String dollarTag,
881+
@Nullable String dollarTag,
814882
@Nullable StringBuilder result) {
815-
int currentIndex = startIndex + 1;
883+
boolean isTripleQuoted =
884+
supportsTripleQuotedStrings()
885+
&& sql.length() > startIndex + 2
886+
&& sql.charAt(startIndex + 1) == startQuote
887+
&& sql.charAt(startIndex + 2) == startQuote;
888+
int currentIndex = startIndex + (isTripleQuoted ? 3 : 1);
889+
if (isTripleQuoted) {
890+
appendIfNotNull(result, startQuote);
891+
appendIfNotNull(result, startQuote);
892+
}
816893
while (currentIndex < sql.length()) {
817894
char currentChar = sql.charAt(currentIndex);
818895
if (currentChar == startQuote) {
819-
if (currentChar == DOLLAR) {
896+
if (supportsDollarQuotedStrings() && currentChar == DOLLAR) {
820897
// Check if this is the end of the current dollar quoted string.
821898
String tag = parseDollarQuotedString(sql, currentIndex + 1);
822899
if (tag != null && tag.equals(dollarTag)) {
823900
appendIfNotNull(result, currentChar, dollarTag, currentChar);
824901
return currentIndex + tag.length() + 2;
825902
}
826-
} else if (sql.length() > currentIndex + 1 && sql.charAt(currentIndex + 1) == startQuote) {
903+
} else if (supportsEscapeQuoteWithQuote()
904+
&& sql.length() > currentIndex + 1
905+
&& sql.charAt(currentIndex + 1) == startQuote) {
827906
// This is an escaped quote (e.g. 'foo''bar')
828907
appendIfNotNull(result, currentChar);
829908
appendIfNotNull(result, currentChar);
830909
currentIndex += 2;
831910
continue;
911+
} else if (isTripleQuoted) {
912+
// Check if this is the end of the triple-quoted string.
913+
if (sql.length() > currentIndex + 2
914+
&& sql.charAt(currentIndex + 1) == startQuote
915+
&& sql.charAt(currentIndex + 2) == startQuote) {
916+
appendIfNotNull(result, currentChar);
917+
appendIfNotNull(result, currentChar);
918+
appendIfNotNull(result, currentChar);
919+
return currentIndex + 3;
920+
}
832921
} else {
833922
appendIfNotNull(result, currentChar);
834923
return currentIndex + 1;
835924
}
925+
} else if (supportsBackslashEscape()
926+
&& currentChar == BACKSLASH
927+
&& sql.length() > currentIndex + 1
928+
&& sql.charAt(currentIndex + 1) == startQuote) {
929+
// This is an escaped quote (e.g. 'foo\'bar').
930+
// Note that in raw strings, the \ officially does not start an escape sequence, but the
931+
// result is still the same, as in a raw string 'both characters are preserved'.
932+
appendIfNotNull(result, currentChar);
933+
appendIfNotNull(result, sql.charAt(currentIndex + 1));
934+
currentIndex += 2;
935+
continue;
936+
} else if (currentChar == '\n' && !isTripleQuoted && !supportsLineFeedInQuotedString()) {
937+
break;
836938
}
837939
currentIndex++;
838940
appendIfNotNull(result, currentChar);

google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/PostgreSQLStatementParser.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,46 @@ protected boolean supportsExplain() {
4848
return false;
4949
}
5050

51+
@Override
52+
boolean supportsNestedComments() {
53+
return true;
54+
}
55+
56+
@Override
57+
boolean supportsDollarQuotedStrings() {
58+
return true;
59+
}
60+
61+
@Override
62+
boolean supportsBacktickQuote() {
63+
return false;
64+
}
65+
66+
@Override
67+
boolean supportsTripleQuotedStrings() {
68+
return false;
69+
}
70+
71+
@Override
72+
boolean supportsEscapeQuoteWithQuote() {
73+
return true;
74+
}
75+
76+
@Override
77+
boolean supportsBackslashEscape() {
78+
return false;
79+
}
80+
81+
@Override
82+
boolean supportsHashSingleLineComments() {
83+
return false;
84+
}
85+
86+
@Override
87+
boolean supportsLineFeedInQuotedString() {
88+
return true;
89+
}
90+
5191
/**
5292
* Removes comments from and trims the given sql statement. PostgreSQL supports two types of
5393
* comments:

google-cloud-spanner/src/main/java/com/google/cloud/spanner/connection/SpannerStatementParser.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,46 @@ protected boolean supportsExplain() {
5050
return true;
5151
}
5252

53+
@Override
54+
boolean supportsNestedComments() {
55+
return false;
56+
}
57+
58+
@Override
59+
boolean supportsDollarQuotedStrings() {
60+
return false;
61+
}
62+
63+
@Override
64+
boolean supportsBacktickQuote() {
65+
return true;
66+
}
67+
68+
@Override
69+
boolean supportsTripleQuotedStrings() {
70+
return true;
71+
}
72+
73+
@Override
74+
boolean supportsEscapeQuoteWithQuote() {
75+
return false;
76+
}
77+
78+
@Override
79+
boolean supportsBackslashEscape() {
80+
return true;
81+
}
82+
83+
@Override
84+
boolean supportsHashSingleLineComments() {
85+
return true;
86+
}
87+
88+
@Override
89+
boolean supportsLineFeedInQuotedString() {
90+
return false;
91+
}
92+
5393
/**
5494
* Removes comments from and trims the given sql statement. Spanner supports three types of
5595
* comments:
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner.connection;
18+
19+
import static org.junit.Assert.assertEquals;
20+
21+
import com.google.cloud.spanner.Dialect;
22+
import org.junit.Test;
23+
import org.junit.runner.RunWith;
24+
import org.junit.runners.JUnit4;
25+
26+
@RunWith(JUnit4.class)
27+
public class SpannerStatementParserTest {
28+
29+
static String skip(String sql) {
30+
return skip(sql, 0);
31+
}
32+
33+
static String skip(String sql, int currentIndex) {
34+
int position =
35+
AbstractStatementParser.getInstance(Dialect.GOOGLE_STANDARD_SQL)
36+
.skip(sql, currentIndex, null);
37+
return sql.substring(currentIndex, position);
38+
}
39+
40+
@Test
41+
public void testSkip() {
42+
assertEquals("", skip(""));
43+
assertEquals("1", skip("1 "));
44+
assertEquals("1", skip("12 "));
45+
assertEquals("2", skip("12 ", 1));
46+
assertEquals("", skip("12", 2));
47+
48+
assertEquals("'foo'", skip("'foo' ", 0));
49+
assertEquals("'foo'", skip("'foo''bar' ", 0));
50+
assertEquals("'foo'", skip("'foo' 'bar' ", 0));
51+
assertEquals("'bar'", skip("'foo''bar' ", 5));
52+
assertEquals("'foo\"bar\"'", skip("'foo\"bar\"' ", 0));
53+
assertEquals("\"foo'bar'\"", skip("\"foo'bar'\" ", 0));
54+
assertEquals("`foo'bar'`", skip("`foo'bar'` ", 0));
55+
56+
assertEquals("'''foo'bar'''", skip("'''foo'bar''' ", 0));
57+
assertEquals("'''foo\\'bar'''", skip("'''foo\\'bar''' ", 0));
58+
assertEquals("'''foo\\'\\'bar'''", skip("'''foo\\'\\'bar''' ", 0));
59+
assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0));
60+
assertEquals("\"\"\"foo'bar\"\"\"", skip("\"\"\"foo'bar\"\"\"", 0));
61+
assertEquals("```foo'bar```", skip("```foo'bar```", 0));
62+
63+
assertEquals("-- comment\n", skip("-- comment\nselect * from foo", 0));
64+
assertEquals("# comment\n", skip("# comment\nselect * from foo", 0));
65+
assertEquals("/* comment */", skip("/* comment */ select * from foo", 0));
66+
assertEquals(
67+
"/* comment /* GoogleSQL does not support nested comments */",
68+
skip("/* comment /* GoogleSQL does not support nested comments */ select * from foo", 0));
69+
// GoogleSQL does not support dollar-quoted strings.
70+
assertEquals("$", skip("$tag$not a string$tag$ select * from foo", 0));
71+
72+
assertEquals("/* 'test' */", skip("/* 'test' */ foo"));
73+
assertEquals("-- 'test' \n", skip("-- 'test' \n foo"));
74+
assertEquals("'/* test */'", skip("'/* test */' foo"));
75+
76+
// Raw strings do not consider '\' as something that starts an escape sequence, but any
77+
// quote character following it is still preserved within the string, as the definition of a
78+
// raw string says that 'both characters are preserved'.
79+
assertEquals("'foo\\''", skip("'foo\\'' ", 0));
80+
assertEquals("'foo\\''", skip("r'foo\\'' ", 1));
81+
assertEquals("'''foo\\'\\'\\'bar'''", skip("'''foo\\'\\'\\'bar''' ", 0));
82+
}
83+
}

google-cloud-spanner/src/test/java/com/google/cloud/spanner/connection/StatementParserTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,11 +1600,11 @@ public void testPostgreSQLReturningClause() {
16001600
}
16011601

16021602
int skipSingleLineComment(String sql, int startIndex) {
1603-
return PostgreSQLStatementParser.skipSingleLineComment(sql, startIndex, null);
1603+
return AbstractStatementParser.skipSingleLineComment(sql, startIndex, null);
16041604
}
16051605

16061606
int skipMultiLineComment(String sql, int startIndex) {
1607-
return PostgreSQLStatementParser.skipMultiLineComment(sql, startIndex, null);
1607+
return parser.skipMultiLineComment(sql, startIndex, null);
16081608
}
16091609

16101610
@Test

0 commit comments

Comments
 (0)