diff --git a/Cargo.toml b/Cargo.toml index 90cd38c..bc9a656 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ documentation = "https://docs.rs/crate/sumcol/latest" [dependencies] clap = { version = "4.4.7", features = ["derive"] } +colored = "2.1.0" env_logger = "0.10.1" log = "0.4.20" regex = "1.10.2" diff --git a/README.md b/README.md index 2854696..37793bc 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Options: -f, --field The field to sum. If not specified, uses the full line [default: 0] -x, --hex Treat all numbers as hex, not just those with a leading 0x -d, --delimiter The regex on which to split fields [default: \s+] + -v, --verbose Print each number that's being summed, along with some metadata -h, --help Print help -V, --version Print version ``` @@ -191,31 +192,30 @@ silently parsed them correctly and omitted the warning. ## Debugging If `sumcol` doesn't seem to be working right, feel free to look at the code on -github (it's pretty straight forward), or run it with the `RUST_LOG=debug` -environment variable set. For example: - -```console -$ ls -l /etc/ | sumcol -f3 -0 +github (it's pretty straight forward), or run it with the `-v` or `--verbose` +flag, or even enable the `RUST_LOG=debug` environment variable set. For +example: + +```console: +$ printf "1\n2.5\nOOPS\n3" | sumcol -v +1 # n=Integer(1) sum=Integer(1) cnt=1 radix=10 raw_str="1" +2.5 # n=Float(2.5) sum=Float(3.5) cnt=2 radix=10 raw_str="2.5" +0 # n=Integer(0) sum=Float(3.5) cnt=2 radix=10 raw_str="OOPS" err="ParseFloatError { kind: Invalid }" +3 # n=Integer(3) sum=Float(6.5) cnt=3 radix=10 raw_str="3" +== +6.5 ``` -Zero? Hmm. That's weird. Let's debug. -```console -$ ls -l /etc/ | RUST_LOG=debug sumcol -f3 -[2023-11-10T21:13:46Z DEBUG sumcol] args=Args { field: 3, hex: false, delimiter: Regex("\\s+"), files: [] } -[2023-11-10T21:13:46Z DEBUG sumcol] 0: line="total 840" -[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: Empty }, col="". Using 0 instead. -[2023-11-10T21:13:46Z DEBUG sumcol] 0: col="", n=0, sum=0 -[2023-11-10T21:13:46Z DEBUG sumcol] 1: line="-rw-r--r-- 1 root wheel 515 Sep 16 09:28 afpovertcp.cfg" -[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: InvalidDigit }, col="root". Using 0 instead. -[2023-11-10T21:13:46Z DEBUG sumcol] 1: col="root", n=0, sum=0 -[2023-11-10T21:13:46Z DEBUG sumcol] 2: line="lrwxr-xr-x 1 root wheel 15 Sep 16 09:28 aliases -> postfix/aliases" -[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: InvalidDigit }, col="root". Using 0 instead. -[2023-11-10T21:13:46Z DEBUG sumcol] 2: col="root", n=0, sum=0 -[2023-11-10T21:13:46Z DEBUG sumcol] 3: line="-rw-r----- 1 root wheel 16384 Sep 16 09:28 aliases.db" -[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: InvalidDigit }, col="root". Using 0 instead. -... -``` -And we can see here that it's trying to parse things like `col="root"` as a -number, which doesn't make sense. The problem is that we are trying to sum -column three (the file owner) rather than column 5 (the file size). \ No newline at end of file +The metadata that's displayed on each line is + +| Name | Description | +|------|-------------| +| `n` | The parsed numeric value | +| `sum` | The running sum up to and including the current `n` | +| `cnt` | The running count of _successfully_ parsed numbers. If a number fails to parse and 0 is used instead, it will not be included in `cnt` | +| `radix` | The radix used when trying to parse the number as an integer | +| `raw_str` | The raw string data that was parsed | +| `err` | If present, this shows the error from trying to parse the string into a number | + +This should be enough to help you debug the problem you're seeing. However, if +that's not enough, give it a try with `RUST_LOG=debug`. \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 514ee76..2595736 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use clap::Parser; +use colored::Colorize; use env_logger::Env; use regex::Regex; use std::fs; @@ -20,11 +21,23 @@ struct Args { #[arg(long, short, default_value(r"\s+"))] delimiter: Regex, + /// Print each number that's being summed, along with some metadata + #[arg(long, short = 'v')] + verbose: bool, + /// Files to read input from, otherwise uses stdin. #[arg(trailing_var_arg = true)] pub files: Vec, } +fn fmt_sum(sum: Sum, is_hex: bool) -> String { + if is_hex { + format!("{sum:#X}") + } else { + format!("{sum}") + } +} + fn main() -> std::result::Result<(), Box> { env_logger::Builder::from_env(Env::default().default_filter_or("warn")).init(); let args = Args::parse(); @@ -43,6 +56,7 @@ fn main() -> std::result::Result<(), Box> { }; let mut sum = Sum::Integer(0); + let mut cnt = 0; // Count of numbers we parse successfully. for reader in readers { for (i, line) in reader.lines().enumerate() { let line = line?.trim().to_string(); @@ -50,44 +64,71 @@ fn main() -> std::result::Result<(), Box> { if line.is_empty() { continue; } - let col = match args.field { + let raw_str = match args.field { 0 => line.as_str(), f => args.delimiter.split(&line).nth(f - 1).unwrap_or_default(), }; - let default_radix = if args.hex { 16 } else { 10 }; - let (col, radix) = match col.strip_prefix("0x") { + // Trim and remove commas. This may break localized numbers. + let clean_str = raw_str.trim().replace(',', ""); + let (clean_str, radix) = match clean_str.strip_prefix("0x") { Some(s) => (s, 16), - None => (col, default_radix), + None => (&clean_str as &str, if args.hex { 16 } else { 10 }), }; - let n = match i128::from_str_radix(col, radix) { - Ok(n) => Sum::Integer(n), + // Holds an optional error string from parsing that we may display in verbose output. + let mut err = None; + let n = match i128::from_str_radix(clean_str, radix) { + Ok(n) => { + cnt += 1; + Sum::Integer(n) + } Err(e) => { - log::info!("Not integer. {e:?}, col={col:?}, radix={radix:?}."); + log::info!("Not integer. {e:?}, clean={clean_str:?}, radix={radix:?}."); // Try parsing as a float - match col.parse::() { - Ok(n) => Sum::Float(n), + match clean_str.parse::() { + Ok(n) => { + cnt += 1; + Sum::Float(n) + } Err(e) => { - log::info!("Not float. {e:?}, col={col:?}."); + log::info!("Not float. {e:?}, clean={clean_str:?}."); // If it parses as hex, warn the user that they may want to use -x. - if i128::from_str_radix(col, 16).is_ok() { + if i128::from_str_radix(clean_str, 16).is_ok() { log::warn!( - "Failed to parse {col:?}, but it may be hex. Consider using -x" + "Failed to parse {clean_str:?}, but it may be hex. Consider using -x" ); } + err = Some(format!("{e:?}")); Sum::Integer(0) } } } }; sum += n; - log::debug!("{i}: col={col:?}, n={n:?}, sum={sum:?}"); + if args.verbose { + // Print each number that we're summing, along with some metadata. + let mut metadata = vec![]; + metadata.push(format!("n={}", format!("{:?}", n).bold()).cyan()); + metadata.push(format!("sum={}", format!("{:?}", sum).bold()).cyan()); + metadata.push(format!("cnt={}", format!("{cnt}").bold()).cyan()); + metadata.push(format!("radix={}", format!("{radix}").bold()).cyan()); + metadata.push(format!("raw_str={}", format!("{raw_str:?}").bold()).cyan()); + if let Some(err) = err { + metadata.push(format!("err={}", format!("{err:?}").bold()).red()); + } + print!("{}\t", fmt_sum(n, args.hex)); + ["#".cyan()] + .into_iter() + .chain(metadata.into_iter()) + .for_each(|x| print!(" {}", x)); + println!(); + } } } - if args.hex { - println!("{sum:#X}"); - } else { - println!("{sum}"); + if args.verbose { + println!("{}", "==".cyan()); } + println!("{}", fmt_sum(sum, args.hex)); + Ok(()) } diff --git a/tests/cli.rs b/tests/cli.rs index b26d506..b9f28ed 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -210,3 +210,35 @@ fn sum_float_hex_flag() -> TestResult { .stdout(predicate::str::contains("15.2")); Ok(()) } + +#[test] +fn sum_verbose_flag() -> TestResult { + let mut cmd = Command::cargo_bin("sumcol")?; + let input = r" + hello 2 blah + hello OOPS blah + hello 1.0 foo + hello 2.2 oo + "; + + // Without the -x flag, the A in the second line will be ignored. + cmd.write_stdin(input) + .args(["-f=2", "-v"]) + .assert() + .success() + .stdout(predicate::str::contains( + r#"n=Integer(2) sum=Integer(2) cnt=1 radix=10 raw_str="2""#, + )) + .stdout(predicate::str::contains( + r#"n=Integer(0) sum=Integer(2) cnt=1 radix=10 raw_str="OOPS" err="ParseFloatError { kind: Invalid }""#, + )) + .stdout(predicate::str::contains( + r#"n=Float(2.2) sum=Float(5.2) cnt=3 radix=10 raw_str="2.2""# + )) + .stdout(predicate::str::contains( + r#"=="#)) + .stdout(predicate::str::contains( + r#"5.2"# + )); + Ok(()) +}