@@ -89,6 +89,9 @@ enum owl_error {
8989 // The input is valid so far, but incomplete; more tokens could be added to
9090 // complete it.
9191 ERROR_MORE_INPUT_NEEDED ,
92+
93+ // A call to a system allocator returned NULL.
94+ ERROR_ALLOCATION_FAILURE ,
9295};
9396// Returns an error code, or ERROR_NONE if there wasn't an error.
9497// The error_range parameter can be null.
@@ -252,6 +255,10 @@ struct parsed_integer parsed_integer_get(struct owl_ref);
252255#else
253256#define OWL_DONT_INLINE
254257#endif
258+ // This can be overridden to reduce the amount Owl allocates at once.
259+ #ifndef OWL_TOKEN_RUN_LENGTH
260+ #define OWL_TOKEN_RUN_LENGTH 4096
261+ #endif
255262
256263struct owl_tree {
257264 const char * string ;
@@ -272,7 +279,7 @@ static inline uint64_t read_tree(size_t *offset, struct owl_tree *tree) {
272279 uint8_t * parse_tree = tree -> parse_tree ;
273280 size_t parse_tree_size = tree -> parse_tree_size ;
274281 size_t i = * offset ;
275- if (i + RESERVATION_AMOUNT >= parse_tree_size )
282+ if (i + RESERVATION_AMOUNT > parse_tree_size )
276283 return 0 ;
277284 uint64_t result = 0 ;
278285 int shift_amount = 0 ;
@@ -289,7 +296,7 @@ static inline uint64_t read_tree(size_t *offset, struct owl_tree *tree) {
289296static bool grow_tree (struct owl_tree * tree , size_t size )
290297{
291298 size_t n = tree -> parse_tree_size ;
292- while (n < size || n < 4096 )
299+ while (n < size )
293300 n = (n + 1 ) * 3 / 2 ;
294301 uint8_t * parse_tree = realloc (tree -> parse_tree , n );
295302 if (!parse_tree )
@@ -935,6 +942,9 @@ static void check_for_error(struct owl_tree *tree) {
935942 case ERROR_MORE_INPUT_NEEDED :
936943 fprintf (stderr , "more input needed\n" );
937944 break ;
945+ case ERROR_ALLOCATION_FAILURE :
946+ fprintf (stderr , "allocation failure\n" );
947+ break ;
938948 default :
939949 break ;
940950 }
@@ -1345,9 +1355,9 @@ struct owl_token_run {
13451355 struct owl_token_run * prev ;
13461356 uint16_t number_of_tokens ;
13471357 uint16_t lengths_size ;
1348- uint8_t lengths [4096 * 2 ];
1349- uint32_t tokens [4096 ];
1350- uint32_t states [4096 ];
1358+ uint8_t lengths [OWL_TOKEN_RUN_LENGTH * 2 ];
1359+ uint32_t tokens [OWL_TOKEN_RUN_LENGTH ];
1360+ uint32_t states [OWL_TOKEN_RUN_LENGTH ];
13511361};
13521362struct owl_default_tokenizer {
13531363 const char * text ;
@@ -1357,6 +1367,7 @@ struct owl_default_tokenizer {
13571367 uint32_t number_token ;
13581368 uint32_t string_token ;
13591369 void * info ;
1370+ bool allocation_failed ;
13601371};
13611372static bool char_is_whitespace (char c ) {
13621373 switch (c ) {
@@ -1422,14 +1433,18 @@ static size_t decode_token_length(struct owl_token_run *run, uint16_t *length_of
14221433 return length ;
14231434}
14241435static bool OWL_DONT_INLINE owl_default_tokenizer_advance (struct owl_default_tokenizer * tokenizer , struct owl_token_run * * previous_run ) {
1436+ if (tokenizer -> text [tokenizer -> offset ] == '\0' ) return false;
14251437 struct owl_token_run * run = malloc (sizeof (struct owl_token_run ));
1426- if (!run ) return false;
1438+ if (!run ) {
1439+ tokenizer -> allocation_failed = true;
1440+ return false;
1441+ }
14271442 uint16_t number_of_tokens = 0 ;
14281443 uint16_t lengths_size = 0 ;
14291444 const char * text = tokenizer -> text ;
14301445 size_t whitespace = tokenizer -> whitespace ;
14311446 size_t offset = tokenizer -> offset ;
1432- while (number_of_tokens < 4096 ) {
1447+ while (number_of_tokens < OWL_TOKEN_RUN_LENGTH ) {
14331448 char c = text [offset ];
14341449 if (c == '\0' ) break ;
14351450 size_t whitespace_length = read_whitespace (text + offset , tokenizer -> info );
@@ -1554,7 +1569,7 @@ static bool OWL_DONT_INLINE owl_default_tokenizer_advance(struct owl_default_tok
15541569 free (run );
15551570 return false;
15561571 }
1557- if (end_token && number_of_tokens + 1 >= 4096 ) break ;
1572+ if (end_token && number_of_tokens + 1 >= OWL_TOKEN_RUN_LENGTH ) break ;
15581573 if (!encode_token_length (run , & lengths_size , token_length , whitespace )) break ;
15591574 if (token == 27 ) {
15601575 write_identifier_token (offset , token_length , tokenizer -> info );
@@ -1585,8 +1600,11 @@ static bool OWL_DONT_INLINE owl_default_tokenizer_advance(struct owl_default_tok
15851600 for (i = 0 ;
15861601 i < content_length ;
15871602 ++ i ) {
1588- if (text [content_offset + i ] == '\\' ) i ++ ;
1589- unescaped [j ++ ] = ESCAPE_CHAR (text [content_offset + i ], tokenizer -> info );
1603+ if (text [content_offset + i ] == '\\' && i + 1 < content_length ) {
1604+ i ++ ;
1605+ unescaped [j ++ ] = ESCAPE_CHAR (text [content_offset + i ], tokenizer -> info );
1606+ }
1607+ else unescaped [j ++ ] = text [content_offset + i ];
15901608 }
15911609 string = unescaped ;
15921610 }
@@ -1600,7 +1618,7 @@ static bool OWL_DONT_INLINE owl_default_tokenizer_advance(struct owl_default_tok
16001618 number_of_tokens ++ ;
16011619 offset += token_length ;
16021620 if (end_token ) {
1603- assert (number_of_tokens < 4096 );
1621+ assert (number_of_tokens < OWL_TOKEN_RUN_LENGTH );
16041622 run -> tokens [number_of_tokens ] = 4294967295U ;
16051623 number_of_tokens ++ ;
16061624 }
@@ -2972,6 +2990,11 @@ static void parse_string(struct owl_tree *tree, const char *string) {
29722990 }
29732991 struct fill_run_state top = c .stack [c .top_index ];
29742992 free (c .stack );
2993+ if (tokenizer .allocation_failed ) {
2994+ tree -> error = ERROR_ALLOCATION_FAILURE ;
2995+ free_token_runs (& token_run );
2996+ return ;
2997+ }
29752998 if (string [tokenizer .offset ] != '\0' ) {
29762999 tree -> error = ERROR_INVALID_TOKEN ;
29773000 estimate_next_token_range (& tokenizer , & tree -> error_range .start , & tree -> error_range .end );
0 commit comments