Skip to content

Commit babf7e7

Browse files
committed
implement parser_fork.c
1 parent 587a12b commit babf7e7

File tree

3 files changed

+412
-0
lines changed

3 files changed

+412
-0
lines changed

src/cmark-gfm.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,13 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
594594
CMARK_GFM_EXPORT
595595
cmark_node *cmark_parser_finish(cmark_parser *parser);
596596

597+
/**
598+
* Deep copy a parser object, returning a new parser
599+
* likely to be used in streaming mode.
600+
*/
601+
CMARK_GFM_EXPORT
602+
cmark_parser *cmark_parser_fork(cmark_parser *parser);
603+
597604
/** Parse a CommonMark document in 'buffer' of length 'len'.
598605
* Returns a pointer to a tree of nodes. The memory allocated for
599606
* the node tree should be released using 'cmark_node_free'

src/parser_fork.c

Lines changed: 383 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,383 @@
1+
#include <string.h>
2+
#include "parser.h"
3+
#include "node.h"
4+
#include "buffer.h"
5+
#include "chunk.h"
6+
#include "cmark-gfm.h"
7+
#include "references.h"
8+
#include "map.h"
9+
#include "syntax_extension.h"
10+
11+
static cmark_node *copy_node(cmark_mem *mem, cmark_node *src)
12+
{
13+
if (!src)
14+
return NULL;
15+
16+
cmark_node *dst = (cmark_node *)mem->calloc(1, sizeof(*dst));
17+
if (!dst)
18+
return NULL;
19+
20+
dst->type = src->type;
21+
dst->flags = src->flags;
22+
dst->start_line = src->start_line;
23+
dst->start_column = src->start_column;
24+
dst->end_line = src->end_line;
25+
dst->end_column = src->end_column;
26+
dst->internal_offset = src->internal_offset;
27+
dst->backtick_count = src->backtick_count;
28+
// dont copy extension/ancestor_extension as they may point to original parser state
29+
dst->extension = src->extension; // Copy extension reference for proper node handling
30+
dst->ancestor_extension = NULL;
31+
// dont copy user_data/user_data_free_func as they may contain callbacks to original parser
32+
dst->user_data = NULL;
33+
dst->user_data_free_func = NULL;
34+
dst->footnote = src->footnote;
35+
// dont copy parent_footnote_def as it will be set during tree construction
36+
dst->parent_footnote_def = NULL;
37+
38+
cmark_strbuf_init(mem, &dst->content, 0);
39+
if (src->content.ptr && src->content.size > 0)
40+
{
41+
cmark_strbuf_put(&dst->content, src->content.ptr, src->content.size);
42+
}
43+
44+
switch (src->type)
45+
{
46+
case CMARK_NODE_CODE_BLOCK:
47+
dst->as.code = src->as.code;
48+
if (src->as.code.info.data)
49+
{
50+
dst->as.code.info = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.code.info));
51+
}
52+
if (src->as.code.literal.data)
53+
{
54+
dst->as.code.literal = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.code.literal));
55+
}
56+
break;
57+
58+
case CMARK_NODE_HEADING:
59+
dst->as.heading = src->as.heading;
60+
break;
61+
62+
case CMARK_NODE_LIST:
63+
dst->as.list = src->as.list;
64+
break;
65+
66+
case CMARK_NODE_LINK:
67+
case CMARK_NODE_IMAGE:
68+
dst->as.link = src->as.link;
69+
if (src->as.link.url.data)
70+
{
71+
dst->as.link.url = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.link.url));
72+
}
73+
if (src->as.link.title.data)
74+
{
75+
dst->as.link.title = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.link.title));
76+
}
77+
break;
78+
79+
case CMARK_NODE_CUSTOM_BLOCK:
80+
case CMARK_NODE_CUSTOM_INLINE:
81+
dst->as.custom = src->as.custom;
82+
if (src->as.custom.on_enter.data)
83+
{
84+
dst->as.custom.on_enter = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.custom.on_enter));
85+
}
86+
if (src->as.custom.on_exit.data)
87+
{
88+
dst->as.custom.on_exit = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.custom.on_exit));
89+
}
90+
break;
91+
92+
case CMARK_NODE_HTML_BLOCK:
93+
dst->as.html_block_type = src->as.html_block_type;
94+
break;
95+
96+
case CMARK_NODE_TEXT:
97+
case CMARK_NODE_HTML_INLINE:
98+
case CMARK_NODE_CODE:
99+
case CMARK_NODE_FOOTNOTE_REFERENCE:
100+
case CMARK_NODE_FOOTNOTE_DEFINITION:
101+
dst->as.literal = src->as.literal;
102+
if (src->as.literal.data)
103+
{
104+
dst->as.literal = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.literal));
105+
}
106+
break;
107+
108+
case CMARK_NODE_ATTRIBUTE:
109+
dst->as.attribute = src->as.attribute;
110+
if (src->as.attribute.attributes.data)
111+
{
112+
dst->as.attribute.attributes = cmark_chunk_literal(cmark_chunk_to_cstr(mem, &src->as.attribute.attributes));
113+
}
114+
break;
115+
116+
default:
117+
dst->as = src->as;
118+
break;
119+
}
120+
121+
dst->parent = NULL;
122+
dst->first_child = NULL;
123+
dst->last_child = NULL;
124+
dst->prev = NULL;
125+
dst->next = NULL;
126+
127+
return dst;
128+
}
129+
130+
struct node_mapping
131+
{
132+
cmark_node *src;
133+
cmark_node *dst;
134+
struct node_mapping *next;
135+
};
136+
137+
static int add_node_mapping(cmark_mem *mem, struct node_mapping **mappings,
138+
cmark_node *src, cmark_node *dst)
139+
{
140+
struct node_mapping *mapping = (struct node_mapping *)mem->calloc(1, sizeof(struct node_mapping));
141+
if (!mapping)
142+
return 0;
143+
144+
mapping->src = src;
145+
mapping->dst = dst;
146+
mapping->next = *mappings;
147+
*mappings = mapping;
148+
return 1;
149+
}
150+
151+
static cmark_node *find_mapped_node(struct node_mapping *mappings, cmark_node *src)
152+
{
153+
struct node_mapping *current = mappings;
154+
while (current)
155+
{
156+
if (current->src == src)
157+
{
158+
return current->dst;
159+
}
160+
current = current->next;
161+
}
162+
return NULL;
163+
}
164+
165+
static void free_node_mappings(cmark_mem *mem, struct node_mapping *mappings)
166+
{
167+
while (mappings)
168+
{
169+
struct node_mapping *next = mappings->next;
170+
mem->free(mappings);
171+
mappings = next;
172+
}
173+
}
174+
175+
static cmark_node *copy_node_tree_with_mapping(cmark_mem *mem, cmark_node *src,
176+
struct node_mapping **mappings)
177+
{
178+
if (!src)
179+
return NULL;
180+
181+
cmark_node *dst = copy_node(mem, src);
182+
if (!dst)
183+
return NULL;
184+
185+
add_node_mapping(mem, mappings, src, dst);
186+
187+
cmark_node *child = src->first_child;
188+
while (child)
189+
{
190+
cmark_node *child_copy = copy_node_tree_with_mapping(mem, child, mappings);
191+
if (child_copy)
192+
{
193+
cmark_node_append_child(dst, child_copy);
194+
}
195+
child = child->next;
196+
}
197+
198+
return dst;
199+
}
200+
201+
static cmark_node *copy_node_tree(cmark_mem *mem, cmark_node *src)
202+
{
203+
struct node_mapping *mappings = NULL;
204+
cmark_node *result = copy_node_tree_with_mapping(mem, src, &mappings);
205+
free_node_mappings(mem, mappings);
206+
return result;
207+
}
208+
209+
static cmark_map *copy_reference_map(cmark_mem *mem, cmark_map *src)
210+
{
211+
if (!src)
212+
return NULL;
213+
214+
cmark_map *dst = cmark_reference_map_new(mem);
215+
if (!dst)
216+
return NULL;
217+
218+
dst->max_ref_size = src->max_ref_size;
219+
220+
cmark_map_entry *entry = src->refs;
221+
while (entry)
222+
{
223+
if (entry->label)
224+
{
225+
// found at references.c
226+
// `cmark_reference *ref = (cmark_reference *)_ref;`
227+
// so we assume entry is a cmark_reference
228+
cmark_reference *ref = (cmark_reference *)entry;
229+
230+
cmark_chunk label_chunk = cmark_chunk_literal((char *)entry->label);
231+
cmark_chunk url_chunk = {0};
232+
cmark_chunk title_chunk = {0};
233+
234+
if (ref->is_attributes_reference)
235+
{
236+
cmark_chunk attributes_chunk = {0};
237+
if (ref->attributes.data)
238+
{
239+
attributes_chunk = cmark_chunk_literal((char *)ref->attributes.data);
240+
}
241+
cmark_reference_create_attributes(dst, &label_chunk, &attributes_chunk);
242+
}
243+
else
244+
{
245+
if (ref->url.data)
246+
{
247+
url_chunk = cmark_chunk_literal((char *)ref->url.data);
248+
}
249+
if (ref->title.data)
250+
{
251+
title_chunk = cmark_chunk_literal((char *)ref->title.data);
252+
}
253+
cmark_reference_create(dst, &label_chunk, &url_chunk, &title_chunk);
254+
}
255+
}
256+
entry = entry->next;
257+
}
258+
259+
return dst;
260+
}
261+
262+
static cmark_llist *copy_syntax_extensions(cmark_mem *mem, cmark_llist *src)
263+
{
264+
if (!src)
265+
return NULL;
266+
267+
cmark_llist *dst = NULL;
268+
cmark_llist *current = src;
269+
270+
while (current)
271+
{
272+
cmark_syntax_extension *src_ext = (cmark_syntax_extension *)current->data;
273+
if (src_ext)
274+
{
275+
// as extensions are usually stateless and shared
276+
dst = cmark_llist_append(mem, dst, src_ext);
277+
}
278+
current = current->next;
279+
}
280+
281+
return dst;
282+
}
283+
284+
cmark_parser *cmark_parser_fork(cmark_parser *parser)
285+
{
286+
if (!parser)
287+
return NULL;
288+
289+
cmark_mem *mem = parser->mem;
290+
cmark_parser *fork = (cmark_parser *)mem->calloc(1, sizeof(*fork));
291+
if (!fork)
292+
return NULL;
293+
294+
fork->mem = parser->mem;
295+
fork->refmap = copy_reference_map(mem, parser->refmap);
296+
297+
struct node_mapping *mappings = NULL;
298+
fork->root = copy_node_tree_with_mapping(mem, parser->root, &mappings);
299+
300+
if (!fork->root)
301+
{
302+
if (fork->refmap)
303+
{
304+
cmark_map_free(fork->refmap);
305+
}
306+
mem->free(fork);
307+
free_node_mappings(mem, mappings);
308+
return NULL;
309+
}
310+
311+
if (parser->current)
312+
{
313+
fork->current = find_mapped_node(mappings, parser->current);
314+
if (!fork->current)
315+
{
316+
fork->current = fork->root;
317+
}
318+
}
319+
else
320+
{
321+
fork->current = fork->root;
322+
}
323+
324+
free_node_mappings(mem, mappings);
325+
326+
if (!fork->current)
327+
{
328+
fork->current = fork->root;
329+
}
330+
331+
fork->line_number = parser->line_number;
332+
fork->offset = parser->offset;
333+
fork->column = parser->column;
334+
fork->first_nonspace = parser->first_nonspace;
335+
fork->first_nonspace_column = parser->first_nonspace_column;
336+
fork->thematic_break_kill_pos = parser->thematic_break_kill_pos;
337+
fork->indent = parser->indent;
338+
fork->blank = parser->blank;
339+
fork->partially_consumed_tab = parser->partially_consumed_tab;
340+
fork->last_line_length = parser->last_line_length;
341+
fork->options = parser->options;
342+
fork->last_buffer_ended_with_cr = parser->last_buffer_ended_with_cr;
343+
fork->total_size = parser->total_size;
344+
fork->backslash_ispunct = parser->backslash_ispunct;
345+
346+
cmark_strbuf_init(mem, &fork->curline, 0);
347+
if (parser->curline.ptr && parser->curline.size > 0)
348+
{
349+
cmark_strbuf_put(&fork->curline, parser->curline.ptr, parser->curline.size);
350+
}
351+
352+
cmark_strbuf_init(mem, &fork->linebuf, 0);
353+
if (parser->linebuf.ptr && parser->linebuf.size > 0)
354+
{
355+
cmark_strbuf_put(&fork->linebuf, parser->linebuf.ptr, parser->linebuf.size);
356+
}
357+
358+
fork->syntax_extensions = copy_syntax_extensions(mem, parser->syntax_extensions);
359+
fork->inline_syntax_extensions = copy_syntax_extensions(mem, parser->inline_syntax_extensions);
360+
361+
if (parser->skip_chars)
362+
{
363+
// parser->skip_chars = (int8_t *)parser->mem->calloc(sizeof(int8_t), 256);
364+
size_t table_size = 256 * sizeof(int8_t);
365+
fork->skip_chars = (int8_t *)mem->calloc(1, table_size);
366+
if (fork->skip_chars)
367+
{
368+
memcpy(fork->skip_chars, parser->skip_chars, table_size);
369+
}
370+
}
371+
if (parser->special_chars)
372+
{
373+
// parser->special_chars = (int8_t *)parser->mem->calloc(sizeof(int8_t), 256);
374+
size_t table_size = 256 * sizeof(int8_t);
375+
fork->special_chars = (int8_t *)mem->calloc(1, table_size);
376+
if (fork->special_chars)
377+
{
378+
memcpy(fork->special_chars, parser->special_chars, table_size);
379+
}
380+
}
381+
382+
return fork;
383+
}

0 commit comments

Comments
 (0)