Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for some format variants #12

Merged
merged 6 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cli/src/command/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ pub(crate) fn cli() -> Command {

pub(crate) async fn exec(args: &ArgMatches) -> Result<()> {
let fname = args.get_one::<String>("PATH_OR_URI").unwrap();
let options = DataReaderOptions::ENABLE_READING_BODY;
let options = DataReaderOptions::ALLOW_TRAILING_COMMA
| DataReaderOptions::ALLOW_EMPTY_FIELD_NAME
| DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR
| DataReaderOptions::ENABLE_READING_BODY;
let options = if args.get_flag("ignore-size") {
options.union(DataReaderOptions::IGNORE_DATA_SIZE_FIELD)
} else {
Expand Down
4 changes: 3 additions & 1 deletion cli/src/command/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ pub(crate) fn cli() -> Command {
pub(crate) async fn exec(args: &ArgMatches) -> Result<()> {
let fname = args.get_one::<String>("PATH_OR_URI").unwrap();
let n_bytes = args.get_one::<usize>("N").unwrap();
let options = DataReaderOptions::default();
let options = DataReaderOptions::ALLOW_TRAILING_COMMA
| DataReaderOptions::ALLOW_EMPTY_FIELD_NAME
| DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR;
let (_, header, _) = read_from_source(fname, Some(n_bytes), options).await?;

println!("{}", HeaderDisplay(&header));
Expand Down
4 changes: 3 additions & 1 deletion cli/src/command/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ pub(crate) fn cli() -> Command {
pub(crate) async fn exec(args: &ArgMatches) -> Result<()> {
let fname = args.get_one::<String>("PATH_OR_URI").unwrap();
let n_bytes = args.get_one::<usize>("N").unwrap();
let options = DataReaderOptions::default();
let options = DataReaderOptions::ALLOW_TRAILING_COMMA
| DataReaderOptions::ALLOW_EMPTY_FIELD_NAME
| DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR;
let (schema, _, _) = read_from_source(fname, Some(n_bytes), options).await?;

if args.get_flag("tree") {
Expand Down
5 changes: 3 additions & 2 deletions cli/src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ fn prettify_special_field_name(name: &str) -> &str {

#[cfg(test)]
mod tests {
use rrr::Schema;
use rrr::{parse, DataReaderOptions};

use super::*;

Expand All @@ -205,7 +205,8 @@ mod tests {
#[test]
fn $name() {
let input = $input;
let schema = input.parse::<Schema>().unwrap();
let options = DataReaderOptions::default();
let schema = parse(input.as_bytes(), options).unwrap();
let actual = format!("{}", SchemaTreeDisplay(&schema.ast));
let actual = console::strip_ansi_codes(&actual);
let expected = $expected;
Expand Down
172 changes: 140 additions & 32 deletions src/ast.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,18 @@
use std::str::FromStr;
use crate::{param::ParamStack, DataReaderOptions};

use crate::param::ParamStack;
pub fn parse(bytes: &[u8], options: DataReaderOptions) -> Result<Schema, crate::Error> {
let parser = SchemaParser::new(bytes, options);
parser
.parse()
.map_err(|e| crate::Error::Schema(e, bytes.to_vec()))
}

#[derive(Debug, PartialEq, Eq)]
pub struct Schema {
pub ast: Ast,
pub params: ParamStack,
}

impl TryFrom<&[u8]> for Schema {
type Error = crate::Error;

fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
let parser = SchemaParser::new(bytes);
parser
.parse()
.map_err(|e| crate::Error::Schema(e, bytes.to_vec()))
}
}

impl FromStr for Schema {
type Err = crate::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
<Self>::try_from(s.as_bytes())
}
}

#[derive(Debug, PartialEq, Eq)]
pub struct Ast {
pub kind: AstKind,
Expand Down Expand Up @@ -86,19 +72,35 @@ struct SchemaParser<'b> {
lexer: std::iter::Peekable<SchemaLexer<'b>>,
location: Location,
params: ParamStack,
options: DataReaderOptions,
}

impl<'b> SchemaParser<'b> {
fn new(input: &'b [u8]) -> Self {
fn new(input: &'b [u8], options: DataReaderOptions) -> Self {
Self {
lexer: SchemaLexer::new(input).peekable(),
location: Location(0, 0),
params: ParamStack::new(),
options,
}
}

fn parse(mut self) -> Result<Schema, SchemaParseError> {
let kind = self.parse_field_list()?;
let kind = if self
.options
.contains(DataReaderOptions::ALLOW_EMPTY_FIELD_NAME)
&& matches!(
self.lexer.peek(),
Some(Ok(Token {
kind: TokenKind::Colon,
..
}))
) {
self.parse_field_with_empty_name()?
} else {
self.parse_field_list()?
};

if let Some(result) = self.lexer.next() {
// should be TokenKind::RBracket
let token = result.unwrap();
Expand All @@ -116,6 +118,17 @@ impl<'b> SchemaParser<'b> {
Ok(schema)
}

fn parse_field_with_empty_name(&mut self) -> Result<AstKind, SchemaParseError> {
self.consume_symbol(TokenKind::Colon)?;

let kind = self.parse_type()?;
let name = String::new();
let member = Ast { kind, name };
let members = vec![member];
let kind = AstKind::Struct(members);
Ok(kind)
}

fn parse_field_list(&mut self) -> Result<AstKind, SchemaParseError> {
let mut members = Vec::new();

Expand Down Expand Up @@ -148,6 +161,20 @@ impl<'b> SchemaParser<'b> {
if self.next_token()?.kind != TokenKind::Comma {
return Err(self.err_unexpected_token());
}

if self
.options
.contains(DataReaderOptions::ALLOW_TRAILING_COMMA)
&& matches!(
self.lexer.peek(),
None | Some(Ok(Token {
kind: TokenKind::RBracket,
..
}))
)
{
break;
}
}

if members.is_empty() {
Expand Down Expand Up @@ -201,7 +228,12 @@ impl<'b> SchemaParser<'b> {
self.consume_symbol(TokenKind::RAngleBracket)?;

if let TokenKind::Ident(s) = self.next_token()?.kind {
if s.as_str() != "NSTR" {
if !(s.as_str() == "NSTR"
|| (self
.options
.contains(DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR)
&& s.as_str() == "STR"))
{
return Err(self.err_unexpected_token());
}
} else {
Expand Down Expand Up @@ -460,7 +492,7 @@ mod tests {
#[test]
fn parse_single_field() {
let input = "fld1:INT16";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand All @@ -480,7 +512,7 @@ mod tests {
#[test]
fn parse_single_struct() {
let input = "fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -513,7 +545,7 @@ mod tests {
#[test]
fn parse_nested_struct() {
let input = "fld1:[sfld1:[ssfld1:<4>NSTR,ssfld2:STR,ssfld3:INT32]]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -549,7 +581,7 @@ mod tests {
#[test]
fn parse_single_fixed_length_builtin_type_array() {
let input = "fld1:{3}INT8";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand All @@ -575,7 +607,7 @@ mod tests {
#[test]
fn parse_single_fixed_length_struct_array() {
let input = "fld1:{3}[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -614,7 +646,7 @@ mod tests {
#[test]
fn parse_single_variable_length_struct_array() {
let input = "fld1:INT8,fld2:{fld1}[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -662,7 +694,7 @@ mod tests {
#[test]
fn parse_single_unlimited_length_struct_array() {
let input = "fld1:+[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -698,12 +730,88 @@ mod tests {
assert_eq!(actual, expected);
}

macro_rules! test_format_options_support {
($(($name:ident, $input:expr, $options:expr, $success_expected:expr),)*) => ($(
#[test]
fn $name() {
let input = $input;
let parser = SchemaParser::new(input.as_bytes(), $options);
let succeeded = parser.parse().is_ok();

assert_eq!(succeeded, $success_expected);
}
)*);
}

test_format_options_support! {
(
trailing_comma_not_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,],",
DataReaderOptions::default(),
false
),
(
trailing_comma_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,],",
DataReaderOptions::ALLOW_TRAILING_COMMA,
true
),
(
multiple_trailing_commas_not_allowed_even_when_trailing_comma_is_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,,],,",
DataReaderOptions::ALLOW_TRAILING_COMMA,
false
),
(
double_commas_not_allowed_even_when_trailing_comma_is_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,,sfld3:INT32]",
DataReaderOptions::ALLOW_TRAILING_COMMA,
false
),
(
empty_field_name_not_allowed,
":+UINT8",
DataReaderOptions::default(),
false
),
(
empty_field_name_allowed,
":+UINT8",
DataReaderOptions::ALLOW_EMPTY_FIELD_NAME,
true
),
(
empty_field_name_not_allowed_when_there_are_other_fields,
":UINT8,fld1:INT8",
DataReaderOptions::ALLOW_EMPTY_FIELD_NAME,
false
),
(
empty_field_name_not_allowed_when_trailing_comma_exists,
":UINT8,",
DataReaderOptions::ALLOW_TRAILING_COMMA | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME,
false
),
(
str_instead_of_nstr_not_allowed,
"fld1:<4>NSTR,fld2:<4>STR",
DataReaderOptions::default(),
false
),
(
str_instead_of_nstr_allowed,
"fld1:<4>NSTR,fld2:<4>STR",
DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR,
true
),
}

macro_rules! test_parse_errors {
($(($name:ident, $input:expr, $kind:ident, $start:expr, $end:expr),)*) => ($(
#[test]
fn $name() {
let input = $input;
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected = Err(SchemaParseError {
kind: SchemaParseErrorKind::$kind,
Expand Down
10 changes: 6 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mod walker;
use std::borrow::Cow;

pub use crate::{
ast::{Ast, AstKind, Len, Location, Schema, SchemaParseError, SchemaParseErrorKind},
ast::{parse, Ast, AstKind, Len, Location, Schema, SchemaParseError, SchemaParseErrorKind},
reader::{DataReader, DataReaderOptions},
utils::json_escape_str,
visitor::{AstVisitor, JsonDisplay, JsonFormattingStyle, SchemaOnelineDisplay},
Expand Down Expand Up @@ -100,21 +100,23 @@ mod tests {

use super::*;
use crate::{
ast::{Schema, Size},
ast::{parse, Schema, Size},
value::{Number, Value, ValueTree},
walker::BufWalker,
};

fn schema_without_str() -> Result<Schema, Error> {
let options = DataReaderOptions::default();
let ast = "date:[year:UINT16,month:UINT8,day:UINT8],\
data:{4}[loc:<4>NSTR,temp:INT16,rhum:UINT16],comment:<16>NSTR";
ast.parse()
parse(ast.as_bytes(), options)
}

fn schema_with_str() -> Result<Schema, Error> {
let options = DataReaderOptions::default();
let ast = "date:[year:UINT16,month:UINT8,day:UINT8],\
data:{4}[loc:STR,temp:INT16,rhum:UINT16],comment:<16>NSTR";
ast.parse()
parse(ast.as_bytes(), options)
}

#[test]
Expand Down
7 changes: 5 additions & 2 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ use std::{
use flate2::read::GzDecoder;
pub use options::DataReaderOptions;

use crate::{ast::Schema, Error};
use crate::{
ast::{parse, Schema},
Error,
};

mod options;

Expand Down Expand Up @@ -36,7 +39,7 @@ where
let map = self.read_header_fields()?;

let schema = map.get_required_field("format")?;
let schema: Schema = schema.as_slice().try_into()?;
let schema = parse(schema.as_slice(), self.options)?;

let body = if self
.options
Expand Down
Loading
Loading