Skip to content

Commit c7ef25a

Browse files
committed
Attach magic comments to the parse result
1 parent 2b3d59f commit c7ef25a

File tree

12 files changed

+149
-16
lines changed

12 files changed

+149
-16
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,6 @@ a.out
4343

4444
compile_commands.json
4545
.cache/
46+
.vscode/
4647

4748
tags

bin/parse

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ value = value.accept(Prism::DesugarCompiler.new) if ENV["DESUGAR"]
2222

2323
parts = {}
2424
parts["Comments"] = result.comments if result.comments.any?
25+
parts["Magic comments"] = result.magic_comments if result.magic_comments.any?
2526
parts["Warnings"] = result.warnings if result.warnings.any?
2627
parts["Errors"] = result.errors if result.errors.any?
2728

docs/serialization.md

+10
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ This drastically cuts down on the size of the serialized string, especially when
3131
### comment
3232

3333
The comment type is one of:
34+
3435
* 0=`INLINE` (`# comment`)
3536
* 1=`EMBEDDED_DOCUMENT` (`=begin`/`=end`)
3637
* 2=`__END__` (after `__END__`)
@@ -40,6 +41,13 @@ The comment type is one of:
4041
| `1` | comment type |
4142
| location | the location in the source of this comment |
4243

44+
### magic comment
45+
46+
| # bytes | field |
47+
| --- | --- |
48+
| location | the location of the key of the magic comment |
49+
| location | the location of the value of the magic comment |
50+
4351
### diagnostic
4452

4553
| # bytes | field |
@@ -66,6 +74,8 @@ The header is structured like the following table:
6674
| string | the encoding name |
6775
| varint | number of comments |
6876
| comment* | comments |
77+
| varint | number of magic comments |
78+
| magic comment* | magic comments |
6979
| varint | number of errors |
7080
| diagnostic* | errors |
7181
| varint | number of warnings |

ext/prism/extension.c

+35-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ VALUE rb_cPrismToken;
1010
VALUE rb_cPrismLocation;
1111

1212
VALUE rb_cPrismComment;
13+
VALUE rb_cPrismMagicComment;
1314
VALUE rb_cPrismParseError;
1415
VALUE rb_cPrismParseWarning;
1516
VALUE rb_cPrismParseResult;
@@ -153,6 +154,35 @@ parser_comments(pm_parser_t *parser, VALUE source) {
153154
return comments;
154155
}
155156

157+
// Extract the magic comments out of the parser into an array.
158+
static VALUE
159+
parser_magic_comments(pm_parser_t *parser, VALUE source) {
160+
VALUE magic_comments = rb_ary_new();
161+
162+
for (pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) parser->magic_comment_list.head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
163+
VALUE key_loc_argv[] = {
164+
source,
165+
LONG2FIX(magic_comment->key_start - parser->start),
166+
LONG2FIX(magic_comment->key_length)
167+
};
168+
169+
VALUE value_loc_argv[] = {
170+
source,
171+
LONG2FIX(magic_comment->value_start - parser->start),
172+
LONG2FIX(magic_comment->value_length)
173+
};
174+
175+
VALUE magic_comment_argv[] = {
176+
rb_class_new_instance(3, key_loc_argv, rb_cPrismLocation),
177+
rb_class_new_instance(3, value_loc_argv, rb_cPrismLocation)
178+
};
179+
180+
rb_ary_push(magic_comments, rb_class_new_instance(2, magic_comment_argv, rb_cPrismMagicComment));
181+
}
182+
183+
return magic_comments;
184+
}
185+
156186
// Extract the errors out of the parser into an array.
157187
static VALUE
158188
parser_errors(pm_parser_t *parser, rb_encoding *encoding, VALUE source) {
@@ -297,14 +327,15 @@ parse_lex_input(pm_string_t *input, const char *filepath, bool return_nodes) {
297327
VALUE result_argv[] = {
298328
value,
299329
parser_comments(&parser, source),
330+
parser_magic_comments(&parser, source),
300331
parser_errors(&parser, parse_lex_data.encoding, source),
301332
parser_warnings(&parser, parse_lex_data.encoding, source),
302333
source
303334
};
304335

305336
pm_node_destroy(&parser, node);
306337
pm_parser_free(&parser);
307-
return rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
338+
return rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
308339
}
309340

310341
// Return an array of tokens corresponding to the given string.
@@ -351,12 +382,13 @@ parse_input(pm_string_t *input, const char *filepath) {
351382
VALUE result_argv[] = {
352383
pm_ast_new(&parser, node, encoding),
353384
parser_comments(&parser, source),
385+
parser_magic_comments(&parser, source),
354386
parser_errors(&parser, encoding, source),
355387
parser_warnings(&parser, encoding, source),
356388
source
357389
};
358390

359-
VALUE result = rb_class_new_instance(5, result_argv, rb_cPrismParseResult);
391+
VALUE result = rb_class_new_instance(6, result_argv, rb_cPrismParseResult);
360392

361393
pm_node_destroy(&parser, node);
362394
pm_parser_free(&parser);
@@ -547,6 +579,7 @@ Init_prism(void) {
547579
rb_cPrismToken = rb_define_class_under(rb_cPrism, "Token", rb_cObject);
548580
rb_cPrismLocation = rb_define_class_under(rb_cPrism, "Location", rb_cObject);
549581
rb_cPrismComment = rb_define_class_under(rb_cPrism, "Comment", rb_cObject);
582+
rb_cPrismMagicComment = rb_define_class_under(rb_cPrism, "MagicComment", rb_cObject);
550583
rb_cPrismParseError = rb_define_class_under(rb_cPrism, "ParseError", rb_cObject);
551584
rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject);
552585
rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject);

include/prism/parser.h

+11
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,16 @@ typedef struct pm_comment {
250250
pm_comment_type_t type;
251251
} pm_comment_t;
252252

253+
// This is a node in the linked list of magic comments that we've found while
254+
// parsing.
255+
typedef struct {
256+
pm_list_node_t node;
257+
const uint8_t *key_start;
258+
const uint8_t *value_start;
259+
uint32_t key_length;
260+
uint32_t value_length;
261+
} pm_magic_comment_t;
262+
253263
// When the encoding that is being used to parse the source is changed by prism,
254264
// we provide the ability here to call out to a user-defined function.
255265
typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
@@ -353,6 +363,7 @@ struct pm_parser {
353363
const uint8_t *heredoc_end;
354364

355365
pm_list_t comment_list; // the list of comments that have been found while parsing
366+
pm_list_t magic_comment_list; // the list of magic comments that have been found while parsing.
356367
pm_list_t warning_list; // the list of warnings that have been found while parsing
357368
pm_list_t error_list; // the list of errors that have been found while parsing
358369
pm_scope_t *current_scope; // the current local scope

lib/prism/ffi.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,11 @@ def self.parse_lex(code, filepath = nil)
234234
loader = Serialize::Loader.new(source, buffer.read)
235235

236236
tokens = loader.load_tokens
237-
node, comments, errors, warnings = loader.load_nodes
237+
node, comments, magic_comments, errors, warnings = loader.load_nodes
238238

239239
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
240240

241-
ParseResult.new([node, tokens], comments, errors, warnings, source)
241+
ParseResult.new([node, tokens], comments, magic_comments, errors, warnings, source)
242242
end
243243
end
244244

lib/prism/lex_compat.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,7 @@ def result
825825
# We sort by location to compare against Ripper's output
826826
tokens.sort_by!(&:location)
827827

828-
ParseResult.new(tokens, result.comments, result.errors, result.warnings, [])
828+
ParseResult.new(tokens, result.comments, result.magic_comments, result.errors, result.warnings, [])
829829
end
830830
end
831831

lib/prism/parse_result.rb

+31-4
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def join(other)
137137
end
138138

139139
def self.null
140-
new(0, 0)
140+
new(nil, 0, 0)
141141
end
142142
end
143143

@@ -166,6 +166,32 @@ def inspect
166166
end
167167
end
168168

169+
# This represents a magic comment that was encountered during parsing.
170+
class MagicComment
171+
attr_reader :key_loc, :value_loc
172+
173+
def initialize(key_loc, value_loc)
174+
@key_loc = key_loc
175+
@value_loc = value_loc
176+
end
177+
178+
def key
179+
key_loc.slice
180+
end
181+
182+
def value
183+
value_loc.slice
184+
end
185+
186+
def deconstruct_keys(keys)
187+
{ key_loc: key_loc, value_loc: value_loc }
188+
end
189+
190+
def inspect
191+
"#<Prism::MagicComment @key=#{key.inspect} @value=#{value.inspect}>"
192+
end
193+
end
194+
169195
# This represents an error that was encountered during parsing.
170196
class ParseError
171197
attr_reader :message, :location
@@ -206,18 +232,19 @@ def inspect
206232
# the AST, any comments that were encounters, and any errors that were
207233
# encountered.
208234
class ParseResult
209-
attr_reader :value, :comments, :errors, :warnings, :source
235+
attr_reader :value, :comments, :magic_comments, :errors, :warnings, :source
210236

211-
def initialize(value, comments, errors, warnings, source)
237+
def initialize(value, comments, magic_comments, errors, warnings, source)
212238
@value = value
213239
@comments = comments
240+
@magic_comments = magic_comments
214241
@errors = errors
215242
@warnings = warnings
216243
@source = source
217244
end
218245

219246
def deconstruct_keys(keys)
220-
{ value: value, comments: comments, errors: errors, warnings: warnings }
247+
{ value: value, comments: comments, magic_comments: magic_comments, errors: errors, warnings: warnings }
221248
end
222249

223250
def success?

rakelib/check_manifest.rake

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ task :check_manifest => [:templates] do
99
.github
1010
.cache
1111
.ruby-lsp
12+
.vscode
1213
autom4te.cache
1314
bin
1415
build

src/prism.c

+25
Original file line numberDiff line numberDiff line change
@@ -5443,6 +5443,16 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
54435443
// When we're done, we want to free the string in case we had to
54445444
// allocate memory for it.
54455445
pm_string_free(&key);
5446+
5447+
// Allocate a new magic comment node to append to the parser's list.
5448+
pm_magic_comment_t *magic_comment;
5449+
if ((magic_comment = (pm_magic_comment_t *) malloc(sizeof(pm_magic_comment_t))) != NULL) {
5450+
magic_comment->key_start = key_start;
5451+
magic_comment->value_start = value_start;
5452+
magic_comment->key_length = (uint32_t) key_length;
5453+
magic_comment->value_length = (uint32_t) (value_end - value_start);
5454+
pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
5455+
}
54465456
}
54475457
}
54485458

@@ -15257,6 +15267,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
1525715267
.next_start = NULL,
1525815268
.heredoc_end = NULL,
1525915269
.comment_list = PM_LIST_EMPTY,
15270+
.magic_comment_list = PM_LIST_EMPTY,
1526015271
.warning_list = PM_LIST_EMPTY,
1526115272
.error_list = PM_LIST_EMPTY,
1526215273
.current_scope = NULL,
@@ -15351,13 +15362,27 @@ pm_comment_list_free(pm_list_t *list) {
1535115362
}
1535215363
}
1535315364

15365+
// Free all of the memory associated with the magic comment list.
15366+
static inline void
15367+
pm_magic_comment_list_free(pm_list_t *list) {
15368+
pm_list_node_t *node, *next;
15369+
15370+
for (node = list->head; node != NULL; node = next) {
15371+
next = node->next;
15372+
15373+
pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
15374+
free(magic_comment);
15375+
}
15376+
}
15377+
1535415378
// Free any memory associated with the given parser.
1535515379
PRISM_EXPORTED_FUNCTION void
1535615380
pm_parser_free(pm_parser_t *parser) {
1535715381
pm_string_free(&parser->filepath_string);
1535815382
pm_diagnostic_list_free(&parser->error_list);
1535915383
pm_diagnostic_list_free(&parser->warning_list);
1536015384
pm_comment_list_free(&parser->comment_list);
15385+
pm_magic_comment_list_free(&parser->magic_comment_list);
1536115386
pm_constant_pool_free(&parser->constant_pool);
1536215387
pm_newline_list_free(&parser->newline_list);
1536315388

templates/lib/prism/serialize.rb.erb

+8-7
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ module Prism
5555

5656
def load_metadata
5757
comments = load_varint.times.map { Comment.new(Comment::TYPES.fetch(load_varint), load_location) }
58+
magic_comments = load_varint.times.map { MagicComment.new(load_location, load_location) }
5859
errors = load_varint.times.map { ParseError.new(load_embedded_string, load_location) }
5960
warnings = load_varint.times.map { ParseWarning.new(load_embedded_string, load_location) }
60-
[comments, errors, warnings]
61+
[comments, magic_comments, errors, warnings]
6162
end
6263

6364
def load_tokens
@@ -76,14 +77,14 @@ module Prism
7677
def load_tokens_result
7778
tokens = load_tokens
7879
encoding = load_encoding
79-
comments, errors, warnings = load_metadata
80+
comments, magic_comments, errors, warnings = load_metadata
8081

8182
if encoding != @encoding
8283
tokens.each { |token,| token.value.force_encoding(encoding) }
8384
end
8485

8586
raise "Expected to consume all bytes while deserializing" unless @io.eof?
86-
Prism::ParseResult.new(tokens, comments, errors, warnings, @source)
87+
Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
8788
end
8889

8990
def load_nodes
@@ -97,17 +98,17 @@ module Prism
9798
@encoding = load_encoding
9899
@input = input.force_encoding(@encoding).freeze
99100

100-
comments, errors, warnings = load_metadata
101+
comments, magic_comments, errors, warnings = load_metadata
101102

102103
@constant_pool_offset = io.read(4).unpack1("L")
103104
@constant_pool = Array.new(load_varint, nil)
104105

105-
[load_node, comments, errors, warnings]
106+
[load_node, comments, magic_comments, errors, warnings]
106107
end
107108

108109
def load_result
109-
node, comments, errors, warnings = load_nodes
110-
Prism::ParseResult.new(node, comments, errors, warnings, @source)
110+
node, comments, magic_comments, errors, warnings = load_nodes
111+
Prism::ParseResult.new(node, comments, magic_comments, errors, warnings, @source)
111112
end
112113

113114
private

templates/src/serialize.c.erb

+23
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,27 @@ pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buf
146146
}
147147
}
148148

149+
static void
150+
pm_serialize_magic_comment(pm_parser_t *parser, pm_magic_comment_t *magic_comment, pm_buffer_t *buffer) {
151+
// serialize key location
152+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_start - parser->start));
153+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->key_length));
154+
155+
// serialize value location
156+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_start - parser->start));
157+
pm_buffer_append_u32(buffer, pm_ptrdifft_to_u32(magic_comment->value_length));
158+
}
159+
160+
static void
161+
pm_serialize_magic_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer) {
162+
pm_buffer_append_u32(buffer, pm_sizet_to_u32(pm_list_size(list)));
163+
164+
pm_magic_comment_t *magic_comment;
165+
for (magic_comment = (pm_magic_comment_t *) list->head; magic_comment != NULL; magic_comment = (pm_magic_comment_t *) magic_comment->node.next) {
166+
pm_serialize_magic_comment(parser, magic_comment, buffer);
167+
}
168+
}
169+
149170
static void
150171
pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buffer_t *buffer) {
151172
// serialize message
@@ -180,6 +201,7 @@ void
180201
pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
181202
pm_serialize_encoding(&parser->encoding, buffer);
182203
pm_serialize_comment_list(parser, &parser->comment_list, buffer);
204+
pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
183205
pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
184206
pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
185207

@@ -268,6 +290,7 @@ pm_lex_serialize(const uint8_t *source, size_t size, const char *filepath, pm_bu
268290

269291
pm_serialize_encoding(&parser.encoding, buffer);
270292
pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
293+
pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
271294
pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
272295
pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
273296

0 commit comments

Comments
 (0)