Skip to content

Commit

Permalink
Added support for -v flag
Browse files Browse the repository at this point in the history
devjgm committed Dec 18, 2023
1 parent d34253e commit 6e2c55d
Showing 4 changed files with 117 additions and 43 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ documentation = "https://docs.rs/crate/sumcol/latest"

[dependencies]
clap = { version = "4.4.7", features = ["derive"] }
colored = "2.1.0"
env_logger = "0.10.1"
log = "0.4.20"
regex = "1.10.2"
52 changes: 26 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@ Options:
-f, --field <FIELD> The field to sum. If not specified, uses the full line [default: 0]
-x, --hex Treat all numbers as hex, not just those with a leading 0x
-d, --delimiter <DELIMITER> The regex on which to split fields [default: \s+]
-v, --verbose Print each number that's being summed, along with some metadata
-h, --help Print help
-V, --version Print version
```
@@ -191,31 +192,30 @@ silently parsed them correctly and omitted the warning.
## Debugging

If `sumcol` doesn't seem to be working right, feel free to look at the code on
github (it's pretty straight forward), or run it with the `RUST_LOG=debug`
environment variable set. For example:

```console
$ ls -l /etc/ | sumcol -f3
0
github (it's pretty straight forward), or run it with the `-v` or `--verbose`
flag, or even enable the `RUST_LOG=debug` environment variable set. For
example:

```console:
$ printf "1\n2.5\nOOPS\n3" | sumcol -v
1 # n=Integer(1) sum=Integer(1) cnt=1 radix=10 raw_str="1"
2.5 # n=Float(2.5) sum=Float(3.5) cnt=2 radix=10 raw_str="2.5"
0 # n=Integer(0) sum=Float(3.5) cnt=2 radix=10 raw_str="OOPS" err="ParseFloatError { kind: Invalid }"
3 # n=Integer(3) sum=Float(6.5) cnt=3 radix=10 raw_str="3"
==
6.5
```
Zero? Hmm. That's weird. Let's debug.

```console
$ ls -l /etc/ | RUST_LOG=debug sumcol -f3
[2023-11-10T21:13:46Z DEBUG sumcol] args=Args { field: 3, hex: false, delimiter: Regex("\\s+"), files: [] }
[2023-11-10T21:13:46Z DEBUG sumcol] 0: line="total 840"
[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: Empty }, col="". Using 0 instead.
[2023-11-10T21:13:46Z DEBUG sumcol] 0: col="", n=0, sum=0
[2023-11-10T21:13:46Z DEBUG sumcol] 1: line="-rw-r--r-- 1 root wheel 515 Sep 16 09:28 afpovertcp.cfg"
[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: InvalidDigit }, col="root". Using 0 instead.
[2023-11-10T21:13:46Z DEBUG sumcol] 1: col="root", n=0, sum=0
[2023-11-10T21:13:46Z DEBUG sumcol] 2: line="lrwxr-xr-x 1 root wheel 15 Sep 16 09:28 aliases -> postfix/aliases"
[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: InvalidDigit }, col="root". Using 0 instead.
[2023-11-10T21:13:46Z DEBUG sumcol] 2: col="root", n=0, sum=0
[2023-11-10T21:13:46Z DEBUG sumcol] 3: line="-rw-r----- 1 root wheel 16384 Sep 16 09:28 aliases.db"
[2023-11-10T21:13:46Z INFO sumcol] ParseIntError { kind: InvalidDigit }, col="root". Using 0 instead.
...
```
And we can see here that it's trying to parse things like `col="root"` as a
number, which doesn't make sense. The problem is that we are trying to sum
column three (the file owner) rather than column 5 (the file size).
The metadata that's displayed on each line is

| Name | Description |
|------|-------------|
| `n` | The parsed numeric value |
| `sum` | The running sum up to and including the current `n` |
| `cnt` | The running count of _successfully_ parsed numbers. If a number fails to parse and 0 is used instead, it will not be included in `cnt` |
| `radix` | The radix used when trying to parse the number as an integer |
| `raw_str` | The raw string data that was parsed |
| `err` | If present, this shows the error from trying to parse the string into a number |

This should be enough to help you debug the problem you're seeing. However, if
that's not enough, give it a try with `RUST_LOG=debug`.
75 changes: 58 additions & 17 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use clap::Parser;
use colored::Colorize;
use env_logger::Env;
use regex::Regex;
use std::fs;
@@ -20,11 +21,23 @@ struct Args {
#[arg(long, short, default_value(r"\s+"))]
delimiter: Regex,

/// Print each number that's being summed, along with some metadata
#[arg(long, short = 'v')]
verbose: bool,

/// Files to read input from, otherwise uses stdin.
#[arg(trailing_var_arg = true)]
pub files: Vec<String>,
}

fn fmt_sum(sum: Sum, is_hex: bool) -> String {
if is_hex {
format!("{sum:#X}")
} else {
format!("{sum}")
}
}

fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
env_logger::Builder::from_env(Env::default().default_filter_or("warn")).init();
let args = Args::parse();
@@ -43,51 +56,79 @@ fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
};

let mut sum = Sum::Integer(0);
let mut cnt = 0; // Count of numbers we parse successfully.
for reader in readers {
for (i, line) in reader.lines().enumerate() {
let line = line?.trim().to_string();
log::debug!("{i}: line={line:?}");
if line.is_empty() {
continue;
}
let col = match args.field {
let raw_str = match args.field {
0 => line.as_str(),
f => args.delimiter.split(&line).nth(f - 1).unwrap_or_default(),
};
let default_radix = if args.hex { 16 } else { 10 };
let (col, radix) = match col.strip_prefix("0x") {
// Trim and remove commas. This may break localized numbers.
let clean_str = raw_str.trim().replace(',', "");
let (clean_str, radix) = match clean_str.strip_prefix("0x") {
Some(s) => (s, 16),
None => (col, default_radix),
None => (&clean_str as &str, if args.hex { 16 } else { 10 }),
};
let n = match i128::from_str_radix(col, radix) {
Ok(n) => Sum::Integer(n),
// Holds an optional error string from parsing that we may display in verbose output.
let mut err = None;
let n = match i128::from_str_radix(clean_str, radix) {
Ok(n) => {
cnt += 1;
Sum::Integer(n)
}
Err(e) => {
log::info!("Not integer. {e:?}, col={col:?}, radix={radix:?}.");
log::info!("Not integer. {e:?}, clean={clean_str:?}, radix={radix:?}.");
// Try parsing as a float
match col.parse::<f64>() {
Ok(n) => Sum::Float(n),
match clean_str.parse::<f64>() {
Ok(n) => {
cnt += 1;
Sum::Float(n)
}
Err(e) => {
log::info!("Not float. {e:?}, col={col:?}.");
log::info!("Not float. {e:?}, clean={clean_str:?}.");
// If it parses as hex, warn the user that they may want to use -x.
if i128::from_str_radix(col, 16).is_ok() {
if i128::from_str_radix(clean_str, 16).is_ok() {
log::warn!(
"Failed to parse {col:?}, but it may be hex. Consider using -x"
"Failed to parse {clean_str:?}, but it may be hex. Consider using -x"
);
}
err = Some(format!("{e:?}"));
Sum::Integer(0)
}
}
}
};
sum += n;
log::debug!("{i}: col={col:?}, n={n:?}, sum={sum:?}");
if args.verbose {
// Print each number that we're summing, along with some metadata.
let mut metadata = vec![];
metadata.push(format!("n={}", format!("{:?}", n).bold()).cyan());
metadata.push(format!("sum={}", format!("{:?}", sum).bold()).cyan());
metadata.push(format!("cnt={}", format!("{cnt}").bold()).cyan());
metadata.push(format!("radix={}", format!("{radix}").bold()).cyan());
metadata.push(format!("raw_str={}", format!("{raw_str:?}").bold()).cyan());
if let Some(err) = err {
metadata.push(format!("err={}", format!("{err:?}").bold()).red());
}
print!("{}\t", fmt_sum(n, args.hex));
["#".cyan()]
.into_iter()
.chain(metadata.into_iter())
.for_each(|x| print!(" {}", x));
println!();
}
}
}

if args.hex {
println!("{sum:#X}");
} else {
println!("{sum}");
if args.verbose {
println!("{}", "==".cyan());
}
println!("{}", fmt_sum(sum, args.hex));

Ok(())
}
32 changes: 32 additions & 0 deletions tests/cli.rs
Original file line number Diff line number Diff line change
@@ -210,3 +210,35 @@ fn sum_float_hex_flag() -> TestResult {
.stdout(predicate::str::contains("15.2"));
Ok(())
}

#[test]
fn sum_verbose_flag() -> TestResult {
let mut cmd = Command::cargo_bin("sumcol")?;
let input = r"
hello 2 blah
hello OOPS blah
hello 1.0 foo
hello 2.2 oo
";

// Without the -x flag, the A in the second line will be ignored.
cmd.write_stdin(input)
.args(["-f=2", "-v"])
.assert()
.success()
.stdout(predicate::str::contains(
r#"n=Integer(2) sum=Integer(2) cnt=1 radix=10 raw_str="2""#,
))
.stdout(predicate::str::contains(
r#"n=Integer(0) sum=Integer(2) cnt=1 radix=10 raw_str="OOPS" err="ParseFloatError { kind: Invalid }""#,
))
.stdout(predicate::str::contains(
r#"n=Float(2.2) sum=Float(5.2) cnt=3 radix=10 raw_str="2.2""#
))
.stdout(predicate::str::contains(
r#"=="#))
.stdout(predicate::str::contains(
r#"5.2"#
));
Ok(())
}

0 comments on commit 6e2c55d

Please sign in to comment.