@@ -44,6 +44,13 @@ class MySQLLexer {
4444 const SQL_MODE_IGNORE_SPACE = 1 << 3 ;
4545 const SQL_MODE_NO_BACKSLASH_ESCAPES = 1 << 4 ;
4646
47+ /**
48+ * MySQL unquoted identifiers: https://dev.mysql.com/doc/refman/8.4/en/identifiers.html
49+ * 1. Allowed characters are ASCII a-z, A-Z, 0-9, $, _ and Unicode \x{0080}-\x{ffff}.
50+ * 2. Unquoted identifiers may begin with a digit but may not consist solely of digits.
51+ */
52+ const PATTERN_UNQUOTED_IDENTIFIER = '(?=\D)[\w_$\x{80}-\x{ffff}]+ ' ;
53+
4754 // Constants for token types.
4855 // Operators
4956 public const EQUAL_OPERATOR = 1 ;
@@ -1129,7 +1136,11 @@ private function nextToken()
11291136 $ this ->NUMBER ();
11301137 } elseif (($ la === 'x ' || $ la === 'X ' || $ la === 'b ' || $ la === 'B ' ) && $ this ->LA (2 ) === "' " ) {
11311138 $ this ->NUMBER ();
1132- } elseif (safe_ctype_alpha ($ la )) {
1139+ } elseif (preg_match ('/\G ' . self ::PATTERN_UNQUOTED_IDENTIFIER . '/u ' , $ this ->input , $ matches , 0 , $ this ->position )) {
1140+ $ this ->text = $ matches [0 ];
1141+ $ this ->position += strlen ($ this ->text );
1142+ $ this ->c = $ this ->input [$ this ->position ] ?? null ;
1143+ $ this ->n = $ this ->input [$ this ->position + 1 ] ?? null ;
11331144 $ this ->IDENTIFIER_OR_KEYWORD ();
11341145 } elseif ($ la === null ) {
11351146 $ this ->matchEOF ();
@@ -3090,11 +3101,7 @@ protected function emitDot(): void
30903101
30913102 protected function IDENTIFIER_OR_KEYWORD ()
30923103 {
3093- // Match the longest possible keyword.
3094- while (safe_ctype_alnum ($ this ->LA (1 )) || $ this ->LA (1 ) === '_ ' || $ this ->LA (1 ) === '$ ' ) {
3095- $ this ->consume ();
3096- }
3097- $ text = strtoupper ($ this ->getText ());
3104+ $ text = strtoupper ($ this ->getText ());
30983105
30993106 // Lookup the string in the token table.
31003107 $ this ->type = self ::TOKENS [$ text ] ?? self ::IDENTIFIER ;
0 commit comments