@@ -880,8 +880,13 @@ multi-line-string-body := (('"' | '""')? string-character)*
880
880
string-character := '\\' (["\\bfnrts] | 'u{' hex-unicode '}') | ws-escape | [^\\"] - disallowed-literal-code-points
881
881
ws-escape := '\\' (unicode-space | newline)+
882
882
hex-digit := [0-9a-fA-F]
883
- hex-unicode := [\u{0}-\u{10FFFF}] - surrogate // Unicode Scalar Value₁₆, leading 0s allowed as long as length ≤ 6
884
- surrogate := [\u{D800}-\u{DFFF}]
883
+ hex-unicode := hex-digit{1, 6} - surrogate - above-max-scalar // Unicode Scalar Value in hex₁₆, leading 0s allowed within length ≤ 6
884
+ surrogate := [0]{0,2}[dD][8-9a-fA-F]hex-digit{2}
885
+ // U+D800-DFFF: D 8 00
886
+ // D F FF
887
+ above-max-scalar = [2-9a-fA-F]hex-digit{5} | [1][1-9a-fA-F]hex-digit{4}
888
+ // >U+10FFFF: >1 _____ 1 >0 ____
889
+
885
890
886
891
raw-string := '#' raw-string-quotes '#' | '#' raw-string '#'
887
892
raw-string-quotes := '"' single-line-raw-string-body '"' | '"""' newline multi-line-raw-string-body newline unicode-space* '"""'
@@ -941,8 +946,9 @@ Specifically:
941
946
string is used for escaping other single-quotes, for initiating unicode
942
947
characters using hex values (` \u{FEFF} ` ), and for escaping ` \ ` itself
943
948
(` \\ ` ).
944
- * ` * ` is used for "zero or more", ` + ` is used for "one or more", and ` ? ` is
945
- used for "zero or one". Per standard regex semantics, ` * ` and ` + ` are * greedy* ;
949
+ * ` * ` is used for "zero or more", ` + ` is used for "one or more", ` ? ` is used for "zero or one",
950
+ ` {3} ` is used for "exactly 3", and ` {0, 4} ` is used for "from 1 to 4" (inclusive range).
951
+ Per standard regex semantics, ` * ` and ` + ` are * greedy* ;
946
952
they match as many instances as possible without failing the match.
947
953
* ` *? ` (used only in raw strings) indicates a * non-greedy* match;
948
954
it matches as * few* instances as possible without failing the match.
0 commit comments