@@ -144,6 +144,15 @@ impl From<UnicodeError> for ParsingError {
144144 }
145145}
146146
147+ enum Token {
148+ Operator ( String ) ,
149+ String_ ( String ) ,
150+ Integer ( AppropriateInt ) ,
151+ Float ( f64 ) ,
152+ Boolean ( bool ) ,
153+ Null ,
154+ }
155+
147156#[ pymethods]
148157impl RustTokenizer {
149158 #[ new]
@@ -196,7 +205,7 @@ impl RustTokenizer {
196205 }
197206 match slf. c {
198207 Some ( c) => {
199- match RustTokenizer :: process_char ( slf. borrow_mut ( ) , py, Char ( c) ) {
208+ match RustTokenizer :: process_char_py ( slf. borrow_mut ( ) , py, Char ( c) ) {
200209 Ok ( tok) => {
201210 now_token = tok;
202211 slf. state = slf. next_state . clone ( ) ;
@@ -218,7 +227,7 @@ impl RustTokenizer {
218227 }
219228 }
220229 }
221- match RustTokenizer :: process_char ( slf. borrow_mut ( ) , py, Eof ) {
230+ match RustTokenizer :: process_char_py ( slf. borrow_mut ( ) , py, Eof ) {
222231 Ok ( tok) => {
223232 now_token = tok;
224233 }
@@ -283,11 +292,24 @@ impl RustTokenizer {
283292}
284293
285294impl RustTokenizer {
286- fn process_char < ' a > (
295+ fn process_char_py < ' a > (
287296 slf : & mut Self ,
288297 py : Python < ' _ > ,
289298 c : CharOrEof ,
290299 ) -> Result < Option < ( TokenType , Option < PyObject > ) > , ParsingError > {
300+ match RustTokenizer :: process_char ( slf. borrow_mut ( ) , c) {
301+ Ok ( Some ( Token :: Operator ( s) ) ) => Ok ( Some ( ( TokenType :: Operator , Some ( s. into_py ( py) ) ) ) ) ,
302+ Ok ( Some ( Token :: String_ ( s) ) ) => Ok ( Some ( ( TokenType :: String_ , Some ( s. into_py ( py) ) ) ) ) ,
303+ Ok ( Some ( Token :: Integer ( n) ) ) => Ok ( Some ( ( TokenType :: Number , Some ( n. into_py ( py) ) ) ) ) ,
304+ Ok ( Some ( Token :: Float ( f) ) ) => Ok ( Some ( ( TokenType :: Number , Some ( f. into_py ( py) ) ) ) ) ,
305+ Ok ( Some ( Token :: Boolean ( b) ) ) => Ok ( Some ( ( TokenType :: Boolean , Some ( b. into_py ( py) ) ) ) ) ,
306+ Ok ( Some ( Token :: Null ) ) => Ok ( Some ( ( TokenType :: Null , None ) ) ) ,
307+ Ok ( None ) => Ok ( None ) ,
308+ Err ( e) => Err ( e) ,
309+ }
310+ }
311+
312+ fn process_char < ' a > ( slf : & mut Self , c : CharOrEof ) -> Result < Option < Token > , ParsingError > {
291313 slf. advance = true ;
292314 slf. next_state = slf. state . clone ( ) ;
293315 let mut now_token = None ;
@@ -298,27 +320,27 @@ impl RustTokenizer {
298320 State :: Whitespace => match c {
299321 Char ( '{' ) => {
300322 slf. completed = true ;
301- now_token = Some ( ( TokenType :: Operator , Some ( "{" . into_py ( py ) ) ) ) ;
323+ now_token = Some ( Token :: Operator ( "{" . to_owned ( ) ) ) ;
302324 }
303325 Char ( '}' ) => {
304326 slf. completed = true ;
305- now_token = Some ( ( TokenType :: Operator , Some ( "}" . into_py ( py ) ) ) ) ;
327+ now_token = Some ( Token :: Operator ( "}" . to_owned ( ) ) ) ;
306328 }
307329 Char ( '[' ) => {
308330 slf. completed = true ;
309- now_token = Some ( ( TokenType :: Operator , Some ( "[" . into_py ( py ) ) ) ) ;
331+ now_token = Some ( Token :: Operator ( "[" . to_owned ( ) ) ) ;
310332 }
311333 Char ( ']' ) => {
312334 slf. completed = true ;
313- now_token = Some ( ( TokenType :: Operator , Some ( "]" . into_py ( py ) ) ) ) ;
335+ now_token = Some ( Token :: Operator ( "]" . to_owned ( ) ) ) ;
314336 }
315337 Char ( ',' ) => {
316338 slf. completed = true ;
317- now_token = Some ( ( TokenType :: Operator , Some ( "," . into_py ( py ) ) ) ) ;
339+ now_token = Some ( Token :: Operator ( "," . to_owned ( ) ) ) ;
318340 }
319341 Char ( ':' ) => {
320342 slf. completed = true ;
321- now_token = Some ( ( TokenType :: Operator , Some ( ":" . into_py ( py ) ) ) ) ;
343+ now_token = Some ( Token :: Operator ( ":" . to_owned ( ) ) ) ;
322344 }
323345 Char ( '"' ) => {
324346 slf. next_state = State :: String_ ;
@@ -370,7 +392,7 @@ impl RustTokenizer {
370392 slf. completed = true ;
371393 match AppropriateInt :: from_str ( & slf. token ) {
372394 Ok ( parsed_num) => {
373- now_token = Some ( ( TokenType :: Number , Some ( parsed_num. into_py ( py ) ) ) ) ;
395+ now_token = Some ( Token :: Integer ( parsed_num) ) ;
374396 }
375397 Err ( ParseIntError :: General ( e) ) => {
376398 return Err ( ParsingError :: InvalidJson ( format ! (
@@ -403,7 +425,7 @@ impl RustTokenizer {
403425 _ if is_delimiter ( c) => {
404426 slf. next_state = State :: Whitespace ;
405427 slf. completed = true ;
406- now_token = Some ( ( TokenType :: Number , Some ( 0 . into_py ( py ) ) ) ) ;
428+ now_token = Some ( Token :: Integer ( AppropriateInt :: Normal ( 0 ) ) ) ;
407429 slf. advance = false ;
408430 }
409431 _ => {
@@ -444,10 +466,7 @@ impl RustTokenizer {
444466 }
445467 _ if is_delimiter ( c) => {
446468 slf. completed = true ;
447- now_token = Some ( (
448- TokenType :: Number ,
449- Some ( slf. token . parse :: < f64 > ( ) ?. into_py ( py) ) ,
450- ) ) ;
469+ now_token = Some ( Token :: Float ( slf. token . parse :: < f64 > ( ) ?) ) ;
451470 slf. next_state = State :: Whitespace ;
452471 slf. advance = false ;
453472 }
@@ -467,10 +486,7 @@ impl RustTokenizer {
467486 }
468487 _ if is_delimiter ( c) => {
469488 slf. completed = true ;
470- now_token = Some ( (
471- TokenType :: Number ,
472- Some ( slf. token . parse :: < f64 > ( ) ?. into_py ( py) ) ,
473- ) ) ;
489+ now_token = Some ( Token :: Float ( slf. token . parse :: < f64 > ( ) ?) ) ;
474490 slf. next_state = State :: Whitespace ;
475491 slf. advance = false ;
476492 }
@@ -525,7 +541,7 @@ impl RustTokenizer {
525541 Char ( 'e' ) => {
526542 slf. next_state = State :: Whitespace ;
527543 slf. completed = true ;
528- now_token = Some ( ( TokenType :: Boolean , Some ( false . into_py ( py ) ) ) ) ;
544+ now_token = Some ( Token :: Boolean ( false ) ) ;
529545 }
530546 _ => {
531547 return Err ( ParsingError :: InvalidJson ( format ! (
@@ -557,7 +573,7 @@ impl RustTokenizer {
557573 Char ( 'e' ) => {
558574 slf. next_state = State :: Whitespace ;
559575 slf. completed = true ;
560- now_token = Some ( ( TokenType :: Boolean , Some ( true . into_py ( py ) ) ) ) ;
576+ now_token = Some ( Token :: Boolean ( true ) ) ;
561577 }
562578 _ => {
563579 return Err ( ParsingError :: InvalidJson ( format ! (
@@ -589,7 +605,7 @@ impl RustTokenizer {
589605 Char ( 'l' ) => {
590606 slf. next_state = State :: Whitespace ;
591607 slf. completed = true ;
592- now_token = Some ( ( TokenType :: Null , None ) ) ;
608+ now_token = Some ( Token :: Null ) ;
593609 }
594610 _ => {
595611 return Err ( ParsingError :: InvalidJson ( format ! (
@@ -600,7 +616,7 @@ impl RustTokenizer {
600616 State :: String_ => match c {
601617 Char ( '\"' ) => {
602618 slf. completed = true ;
603- now_token = Some ( ( TokenType :: String_ , Some ( slf. token . clone ( ) . into_py ( py ) ) ) ) ;
619+ now_token = Some ( Token :: String_ ( slf. token . clone ( ) ) ) ;
604620 slf. next_state = State :: StringEnd ;
605621 }
606622 Char ( '\\' ) => {
0 commit comments