Skip to content

Commit d2db872

Browse files
authored
tac: add regex flavor translation for compatibility and new test case (#10416)
1 parent e061f8e commit d2db872

File tree

2 files changed

+244
-2
lines changed

2 files changed

+244
-2
lines changed

src/uu/tac/src/tac.rs

Lines changed: 174 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,11 +223,99 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()>
223223
Ok(())
224224
}
225225

226+
/// Make the regex flavor compatible with `regex` crate
227+
///
228+
/// Concretely:
229+
/// - Toggle escaping of (), |, {}
230+
/// - Escape ^ and $ when not at edges
231+
/// - Leave expressions inside [] unchanged
232+
fn translate_regex_flavor(regex: &str) -> String {
233+
let mut result = String::new();
234+
let mut chars = regex.chars().peekable();
235+
let mut inside_brackets = false;
236+
let mut prev_was_backslash = false;
237+
let mut last_char: Option<char> = None;
238+
239+
while let Some(c) = chars.next() {
240+
let is_escaped = prev_was_backslash;
241+
prev_was_backslash = false;
242+
243+
match c {
244+
// Unescape escaped (), |, {} when not inside brackets
245+
'\\' if !inside_brackets && !is_escaped => {
246+
if let Some(&next) = chars.peek() {
247+
if matches!(next, '(' | ')' | '|' | '{' | '}') {
248+
result.push(next);
249+
last_char = Some(next);
250+
chars.next();
251+
continue;
252+
}
253+
}
254+
255+
result.push('\\');
256+
last_char = Some('\\');
257+
prev_was_backslash = true;
258+
}
259+
// Bracket tracking
260+
'[' => {
261+
inside_brackets = true;
262+
result.push(c);
263+
last_char = Some(c);
264+
}
265+
']' => {
266+
inside_brackets = false;
267+
result.push(c);
268+
last_char = Some(c);
269+
}
270+
// Escape (), |, {} when not escaped and outside brackets
271+
'(' | ')' | '|' | '{' | '}' if !inside_brackets && !is_escaped => {
272+
result.push('\\');
273+
result.push(c);
274+
last_char = Some(c);
275+
}
276+
'^' if !inside_brackets && !is_escaped => {
277+
let is_anchor_position = result.is_empty() || matches!(last_char, Some('(' | '|'));
278+
if !is_anchor_position {
279+
result.push('\\');
280+
}
281+
result.push(c);
282+
last_char = Some(c);
283+
}
284+
'$' if !inside_brackets && !is_escaped => {
285+
let next_is_anchor_position = match chars.peek() {
286+
None => true,
287+
Some(&')' | &'|') => true,
288+
Some(&'\\') => {
289+
// Peek two ahead to see if it's \) or \|
290+
let chars_vec: Vec<char> = chars.clone().take(2).collect();
291+
matches!(chars_vec.get(1), Some(&')' | &'|'))
292+
}
293+
_ => false,
294+
};
295+
if !next_is_anchor_position {
296+
result.push('\\');
297+
}
298+
result.push(c);
299+
last_char = Some(c);
300+
}
301+
_ => {
302+
result.push(c);
303+
last_char = Some(c);
304+
}
305+
}
306+
}
307+
308+
result
309+
}
310+
226311
#[allow(clippy::cognitive_complexity)]
227312
fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &str) -> UResult<()> {
228313
// Compile the regular expression pattern if it is provided.
229314
let maybe_pattern = if regex {
230-
match regex::bytes::Regex::new(separator) {
315+
match regex::bytes::RegexBuilder::new(&translate_regex_flavor(separator))
316+
.multi_line(true)
317+
.build()
318+
{
231319
Ok(p) => Some(p),
232320
Err(e) => return Err(TacError::InvalidRegex(e).into()),
233321
}
@@ -359,3 +447,88 @@ fn try_mmap_path(path: &Path) -> Option<Mmap> {
359447

360448
Some(mmap)
361449
}
450+
451+
#[cfg(test)]
452+
mod tests_hybrid_flavor {
453+
use super::translate_regex_flavor;
454+
455+
#[test]
456+
fn test_grouping_and_alternation() {
457+
assert_eq!(translate_regex_flavor(r"\(abc\)"), r"(abc)");
458+
459+
assert_eq!(translate_regex_flavor(r"(abc)"), r"\(abc\)");
460+
461+
assert_eq!(translate_regex_flavor(r"a\|b"), r"a|b");
462+
463+
assert_eq!(translate_regex_flavor(r"a|b"), r"a\|b");
464+
}
465+
466+
#[test]
467+
fn test_quantifiers() {
468+
assert_eq!(translate_regex_flavor("a+"), "a+");
469+
470+
assert_eq!(translate_regex_flavor("a*"), "a*");
471+
472+
assert_eq!(translate_regex_flavor("a?"), "a?");
473+
474+
assert_eq!(translate_regex_flavor(r"a\+"), r"a\+");
475+
476+
assert_eq!(translate_regex_flavor(r"a\*"), r"a\*");
477+
478+
assert_eq!(translate_regex_flavor(r"a\?"), r"a\?");
479+
}
480+
481+
#[test]
482+
fn test_intervals() {
483+
assert_eq!(translate_regex_flavor(r"a\{1,3\}"), r"a{1,3}");
484+
485+
assert_eq!(translate_regex_flavor(r"a{1,3}"), r"a\{1,3\}");
486+
}
487+
488+
#[test]
489+
fn test_anchors_context() {
490+
assert_eq!(translate_regex_flavor(r"^abc$"), r"^abc$");
491+
492+
assert_eq!(translate_regex_flavor(r"a^b"), r"a\^b");
493+
assert_eq!(translate_regex_flavor(r"a$b"), r"a\$b");
494+
495+
// Anchors inside groups (reset by \(...\) regardless of position)
496+
assert_eq!(translate_regex_flavor(r"\(^abc\)"), r"(^abc)");
497+
assert_eq!(translate_regex_flavor(r"z\(^abc\)"), r"z(^abc)");
498+
assert_eq!(translate_regex_flavor(r"\(abc$\)"), r"(abc$)");
499+
assert_eq!(translate_regex_flavor(r"\(abc$\)z"), r"(abc$)z");
500+
501+
// Anchors inside alternation (reset by \| regardless of position)
502+
assert_eq!(translate_regex_flavor(r"^a\|^b"), r"^a|^b");
503+
assert_eq!(translate_regex_flavor(r"x\|^b"), r"x|^b");
504+
assert_eq!(translate_regex_flavor(r"a$\|b$"), r"a$|b$");
505+
}
506+
507+
#[test]
508+
fn test_character_classes() {
509+
assert_eq!(translate_regex_flavor(r"[a-z]"), r"[a-z]");
510+
511+
assert_eq!(translate_regex_flavor(r"[.]"), r"[.]");
512+
assert_eq!(translate_regex_flavor(r"[+]"), r"[+]");
513+
514+
assert_eq!(translate_regex_flavor(r"[]abc]"), r"[]abc]");
515+
516+
assert_eq!(translate_regex_flavor(r"[^]abc]"), r"[^]abc]");
517+
}
518+
519+
#[test]
520+
fn test_complex_strings() {
521+
assert_eq!(translate_regex_flavor(r"(\d+)[+*]"), r"\(\d+\)[+*]");
522+
523+
assert_eq!(translate_regex_flavor(r"\(\d+\)\{2\}"), r"(\d+){2}");
524+
}
525+
526+
#[test]
527+
fn test_edge_cases() {
528+
assert_eq!(translate_regex_flavor(r"abc\"), r"abc\");
529+
530+
assert_eq!(translate_regex_flavor(r"\\"), r"\\");
531+
532+
assert_eq!(translate_regex_flavor(r"\^"), r"\^");
533+
}
534+
}

tests/by-util/test_tac.rs

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// For the full copyright and license information, please view the LICENSE
44
// file that was distributed with this source code.
5-
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa
5+
// spell-checker:ignore axxbxx bxxaxx axxx axxxx xxaxx xxax xxxxa axyz zyax zyxa bbaaa aaabc bcdddd cddddaaabc xyzabc abcxyzabc nbbaaa
66
#[cfg(target_os = "linux")]
77
use uutests::at_and_ucmd;
88
use uutests::new_ucmd;
@@ -347,3 +347,72 @@ fn test_stdin_bad_tmpdir_fallback() {
347347
.succeeds()
348348
.stdout_is("c\nb\na\n");
349349
}
350+
351+
#[test]
352+
fn test_regex_or_operator() {
353+
new_ucmd!()
354+
.args(&["-r", "-s", r"[^x]\|x"])
355+
.pipe_in("abc")
356+
.succeeds()
357+
.stdout_is("cba");
358+
}
359+
360+
#[test]
361+
fn test_unescaped_middle_anchor() {
362+
new_ucmd!()
363+
.args(&["-r", "-s", r"1^2"])
364+
.pipe_in("111^222")
365+
.succeeds()
366+
.stdout_is("22111^2");
367+
368+
new_ucmd!()
369+
.args(&["-r", "-s", r"a$b"])
370+
.pipe_in("aaa$bbb")
371+
.succeeds()
372+
.stdout_is("bbaaa$b");
373+
}
374+
375+
#[test]
376+
fn test_escaped_middle_anchor() {
377+
new_ucmd!()
378+
.args(&["-r", "-s", r"c\^b"])
379+
.pipe_in("aaabc^bcdddd")
380+
.succeeds()
381+
.stdout_is("cddddaaabc^b");
382+
383+
new_ucmd!()
384+
.args(&["-r", "-s", r"c\$b"])
385+
.pipe_in("aaabc$bcdddd")
386+
.succeeds()
387+
.stdout_is("cddddaaabc$b");
388+
}
389+
390+
#[test]
391+
fn test_regular_start_anchor() {
392+
new_ucmd!()
393+
.args(&["-r", "-s", r"^abc"])
394+
.pipe_in("xyzabc123abc")
395+
.succeeds()
396+
.stdout_is("xyzabc123abc");
397+
398+
new_ucmd!()
399+
.args(&["-r", "-s", r"^b"])
400+
.pipe_in("aaa\nbbb\nccc\n")
401+
.succeeds()
402+
.stdout_is("bb\nccc\naaa\nb");
403+
}
404+
405+
#[test]
406+
fn test_regular_end_anchor() {
407+
new_ucmd!()
408+
.args(&["-r", "-s", r"abc$"])
409+
.pipe_in("123abcxyzabc")
410+
.succeeds()
411+
.stdout_is("123abcxyzabc");
412+
413+
new_ucmd!()
414+
.args(&["-r", "-s", r"b$"])
415+
.pipe_in("aaa\nbbb\nccc\n")
416+
.succeeds()
417+
.stdout_is("\nccc\nbbaaa\nb");
418+
}

0 commit comments

Comments
 (0)