Skip to content

Commit 8bf55e1

Browse files
authored
Generalize parsing and add handling for byte_string_literal and file_header (#23)
* Add 'warn_on_unhandled_token' to Config - Add a new bool to control printing a warning if we don't recognize a token durng parsing - Update the call hierarchy to pass a reference to a Config rather than the KeyboardLayout alone. * Handle 'file_version' * Add byte_string_literal and generalize more parsing - Adds support for byte_string_literal which is exposed by treesitter as just text for the most part - Consolidate Node & Property since handling of their top-level token is the same in the grammar from the perspective of being a parent with some child nodes to traverse now that identifier handling was added in a previous commit - Identifiers, string_literals, and unit-address can all now be handled generically - Consolidated Zephyr syntax handling slightly
1 parent d4a0c38 commit 8bf55e1

File tree

7 files changed

+152
-59
lines changed

7 files changed

+152
-59
lines changed

src/config/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ mod constants;
1111
pub struct Config {
1212
#[serde(default)]
1313
pub layout: KeyboardLayoutType,
14+
15+
#[serde(default)]
16+
pub warn_on_unhandled_tokens: bool,
1417
}
1518

1619
impl Config {

src/context.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
1-
use crate::layouts::KeyboardLayout;
1+
use crate::config::Config;
22

33
pub struct Context<'a> {
44
pub indent: usize,
55
pub keymap: bool,
66
pub bindings: bool,
7-
pub layout: &'a KeyboardLayout,
7+
pub config: &'a Config,
88
}
99

1010
impl Context<'_> {
11-
pub fn with_indent(&self, indent: usize) -> Self {
12-
Self { indent, ..*self }
13-
}
14-
1511
pub fn inc(&self, increment: usize) -> Self {
1612
Self { indent: self.indent + increment, ..*self }
1713
}
@@ -27,4 +23,11 @@ impl Context<'_> {
2723
pub fn bindings(&self) -> Self {
2824
Self { bindings: true, ..*self }
2925
}
26+
27+
// If a node named 'bindings' has a parent node named 'keymap' then we've
28+
// encountered a Zephyr keymap that will be handled as a special case by the
29+
// printer.
30+
pub fn has_zephyr_syntax(&self) -> bool {
31+
self.bindings && self.keymap
32+
}
3033
}

src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ fn format(
169169
config: &Config,
170170
check: bool,
171171
) -> FormattingStatus {
172-
let output = dtsfmt::printer::print(&source, &config.layout);
172+
let output = dtsfmt::printer::print(&source, config);
173173
let result = FormattedFile {
174174
filename: &filename,
175175
original_text: &source,

src/printer.rs

Lines changed: 69 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ use std::collections::VecDeque;
22

33
use tree_sitter::TreeCursor;
44

5+
use crate::config::Config;
56
use crate::context::Context;
6-
use crate::layouts::{self, KeyboardLayoutType};
7+
use crate::layouts;
78
use crate::parser::parse;
89
use crate::utils::{
910
get_text,
@@ -30,6 +31,9 @@ fn traverse(
3031
let node = cursor.node();
3132

3233
match node.kind() {
34+
"file_version" => {
35+
writer.push_str(&format!("{}\n\n", get_text(source, cursor)));
36+
}
3337
"comment" => {
3438
// Add a newline before the comment if the previous node is not a
3539
// comment
@@ -156,19 +160,12 @@ fn traverse(
156160
writer.push('\n');
157161
}
158162
}
159-
"identifier" => {
163+
"identifier" | "string_literal" | "unit_address" => {
160164
writer.push_str(get_text(source, cursor));
161-
// Identifier itself only contains the token string so we need to
162-
// peek forward to see if we're a label or a node name.
163-
if let Some(n) = lookahead(cursor) {
164-
match n.kind() {
165-
":" => writer.push_str(": "),
166-
"{" => writer.push_str(" {\n"),
167-
_ => (),
168-
};
169-
}
170165
}
171-
"node" => {
166+
// This is a general handler for any type that just needs to traverse
167+
// its children.
168+
"node" | "property" => {
172169
// A node will typically have children in a format of:
173170
// [<identifier>:] [&]<identifier> { [nodes and properties] }
174171
cursor.goto_first_child();
@@ -185,6 +182,7 @@ fn traverse(
185182
let ctx = ctx.inc(1);
186183
let ctx = match get_text(source, cursor) {
187184
"keymap" => ctx.keymap(),
185+
"bindings" => ctx.bindings(),
188186
_ => ctx,
189187
};
190188

@@ -195,49 +193,47 @@ fn traverse(
195193
}
196194
}
197195

198-
// Return to the "node"'s node to continue traversal.
196+
// Return to the "node"'s node element to continue traversal.
199197
cursor.goto_parent();
200-
}
201-
"property" => {
202-
cursor.goto_first_child();
203-
print_indent(writer, ctx);
204-
205-
let name = get_text(source, cursor);
206-
writer.push_str(name);
207198

208-
while cursor.goto_next_sibling() {
209-
// When we are inside a bindings node, we want to increase the
210-
// indentation level and print the bindings according to the
211-
// keyboard layout.
212-
let ctx = match name {
213-
"bindings" => ctx.inc(1).bindings(),
214-
_ => ctx.with_indent(0),
215-
};
216-
217-
match cursor.node().kind() {
218-
"," => writer.push_str(", "),
219-
"=" => writer.push_str(" = "),
220-
";" => break,
221-
_ => traverse(writer, source, cursor, &ctx),
222-
}
223-
}
224-
225-
writer.push_str(";\n");
226-
cursor.goto_parent();
227-
228-
// Add a newline if the next item is a node
229-
if lookahead(cursor).is_some_and(|n| n.kind() == "node") {
199+
// Place a newline before node siblings if they follow a property.
200+
if node.kind() == "property"
201+
&& lookahead(cursor).is_some_and(|n| n.kind() == "node")
202+
{
230203
writer.push('\n');
231204
}
232205
}
233-
"string_literal" => {
234-
writer.push_str(get_text(source, cursor));
206+
"byte_string_literal" => {
207+
let hex_string = get_text(source, cursor);
208+
// Trim the [ and ] off of the source string we obtained.
209+
let hex_bytes = hex_string[1..hex_string.len() - 1]
210+
.split_whitespace()
211+
.collect::<Vec<&str>>();
212+
let hex_chunks = hex_bytes.chunks(16).collect::<Vec<&[&str]>>();
213+
214+
// For smaller byte chunks it reads better if we just one line
215+
// everything, but for anything beyond 16 bytes we split it into
216+
// multiple lines.
217+
if hex_chunks.len() == 1 {
218+
writer.push_str(&format!("[{}]", hex_chunks[0].join(" ")));
219+
} else {
220+
writer.push_str("[\n");
221+
for (i, &line) in hex_chunks.iter().enumerate() {
222+
print_indent(writer, ctx);
223+
writer.push_str(&format!("{}\n", &line.join(" ")));
224+
if i == hex_chunks.len() - 1 {
225+
print_indent(writer, &ctx.dec(1));
226+
writer.push(']');
227+
}
228+
}
229+
}
235230
}
231+
236232
"integer_cells" => {
237233
cursor.goto_first_child();
238234

239235
// Keymap bindings are a special snowflake
240-
if ctx.keymap && ctx.bindings {
236+
if ctx.has_zephyr_syntax() {
241237
print_bindings(writer, source, cursor, ctx);
242238
return;
243239
}
@@ -263,14 +259,38 @@ fn traverse(
263259
writer.push('>');
264260
cursor.goto_parent();
265261
}
262+
// All the non-named grammatical tokens that are emitted but handled
263+
// simply with some output structure.
266264
"}" => {
267265
print_indent(writer, &ctx.dec(1));
268266
writer.push('}');
269267
}
268+
"{" => {
269+
writer.push_str(" {\n");
270+
}
271+
":" => {
272+
writer.push_str(": ");
273+
}
270274
";" => {
271275
writer.push_str(";\n");
272276
}
277+
"," => {
278+
writer.push_str(", ");
279+
}
280+
"=" => {
281+
writer.push_str(" = ");
282+
}
273283
_ => {
284+
if ctx.config.warn_on_unhandled_tokens {
285+
eprintln!(
286+
"unhandled type '{}' ({} {}): {}",
287+
node.kind(),
288+
node.child_count(),
289+
if node.child_count() == 1 { "child" } else { "children" },
290+
get_text(source, cursor)
291+
);
292+
}
293+
// Since we're unsure of this node just traverse its children
274294
if cursor.goto_first_child() {
275295
traverse(writer, source, cursor, ctx);
276296

@@ -320,7 +340,7 @@ fn collect_bindings(
320340

321341
// Move the items from the temporary buffer into a new vector that contains
322342
// the empty key spaces.
323-
ctx.layout
343+
layouts::get_layout(&ctx.config.layout)
324344
.bindings
325345
.iter()
326346
.map(|is_key| match is_key {
@@ -361,7 +381,7 @@ fn print_bindings(
361381
writer.push('<');
362382

363383
let buf = collect_bindings(cursor, source, ctx);
364-
let row_size = ctx.layout.row_size();
384+
let row_size = layouts::get_layout(&ctx.config.layout).row_size();
365385
let sizes = calculate_sizes(&buf, row_size);
366386

367387
buf.iter().enumerate().for_each(|(i, item)| {
@@ -390,14 +410,12 @@ fn print_bindings(
390410
cursor.goto_parent();
391411
}
392412

393-
pub fn print(source: &String, layout: &KeyboardLayoutType) -> String {
413+
pub fn print(source: &String, config: &Config) -> String {
394414
let mut writer = String::new();
395415
let tree = parse(source.clone());
396416
let mut cursor = tree.walk();
397417

398-
let layout = layouts::get_layout(layout);
399-
let ctx =
400-
Context { indent: 0, bindings: false, keymap: false, layout: &layout };
418+
let ctx = Context { indent: 0, keymap: false, bindings: false, config };
401419

402420
// The first node is the root document node, so we have to traverse all it's
403421
// children with the same indentation level.

src/test_utils/spec_helpers.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use console::Style;
55
use similar::{ChangeTag, TextDiff};
66

77
use super::get_specs_in_dir;
8+
use crate::config::Config;
89
use crate::layouts::KeyboardLayoutType;
910
use crate::printer::print;
1011

@@ -48,9 +49,13 @@ pub fn run_specs(directory_path: &Path) {
4849
let specs = get_specs_in_dir(directory_path);
4950
let test_count = specs.len();
5051
let mut failed_tests = Vec::new();
52+
let config = Config {
53+
layout: KeyboardLayoutType::Adv360,
54+
warn_on_unhandled_tokens: false,
55+
};
5156

5257
for (_, spec) in specs {
53-
let result = print(&spec.file_text, &KeyboardLayoutType::Adv360);
58+
let result = print(&spec.file_text, &config);
5459

5560
if result != spec.expected_text {
5661
failed_tests.push(FailedTestResult {

tests/specs/byte_literals.txt

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
== Single line byte literals ==
2+
name {
3+
byte_literal = [00 01 02 03 04 05 06 07 08 09 0A];
4+
};
5+
6+
[expect]
7+
name {
8+
byte_literal = [00 01 02 03 04 05 06 07 08 09 0A];
9+
};
10+
11+
== Wrap byte literals ==
12+
name {
13+
byte_literal = [00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F];
14+
};
15+
16+
[expect]
17+
name {
18+
byte_literal = [
19+
00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
20+
10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
21+
];
22+
};
23+
24+
== Nested byte literals wrapped to a closer indentation ==
25+
name {
26+
node1 {
27+
node2 {
28+
node3 {
29+
byte_literal = [00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F];
30+
};
31+
};
32+
};
33+
};
34+
35+
[expect]
36+
name {
37+
node1 {
38+
node2 {
39+
node3 {
40+
byte_literal = [
41+
00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
42+
10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F
43+
];
44+
};
45+
};
46+
};
47+
};

tests/specs/headers.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
== Ensure the header is retained ==
2+
/dts-v1/;
3+
4+
/ {
5+
model = "Something new";
6+
compatible = "dtsfmt devices";
7+
#size-cells = <0x02>;
8+
};
9+
10+
[expect]
11+
/dts-v1/;
12+
13+
/ {
14+
model = "Something new";
15+
compatible = "dtsfmt devices";
16+
#size-cells = <0x02>;
17+
};

0 commit comments

Comments
 (0)