Skip to content

Commit

Permalink
Bump version, format.
Browse files Browse the repository at this point in the history
  • Loading branch information
toots committed Mar 29, 2022
1 parent e35a026 commit d8eb183
Show file tree
Hide file tree
Showing 22 changed files with 12,967 additions and 3,473 deletions.
10 changes: 10 additions & 0 deletions .ocamlformat
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version=0.19.0
profile = conventional
break-separators = after
space-around-lists = false
doc-comments = before
match-indent = 2
match-indent-nested = always
parens-ite
exp-grouping = preserve
module-item-spacing = compact
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2.6:
* Adapted to ppxlib `0.26`, thanks to @pitag-ha

2.5:
* Fix exponential compilation time, thanks to @mnxn for reporting in #97
and @fangyi-zhou for fixing in #106
Expand Down
4 changes: 2 additions & 2 deletions dune-project
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(lang dune 2.8)
(version 2.5)
(version 2.6)
(name sedlex)
(source (github ocaml-community/sedlex))
(license MIT)
Expand All @@ -20,6 +20,6 @@ extension.")
(depends
(ocaml (>= 4.04))
dune
(ppxlib (>= 0.18.0))
(ppxlib (>= 0.26.0))
gen
uchar))
20 changes: 13 additions & 7 deletions examples/complement.ml
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
let ucase = [%sedlex.regexp? 'A'..'Z']
let lcase = [%sedlex.regexp? 'a'..'z']
let ucase = [%sedlex.regexp? 'A' .. 'Z']
let lcase = [%sedlex.regexp? 'a' .. 'z']

let rec token buf =
match%sedlex buf with
| lcase -> print_char 'L';token buf
| Compl (ucase | lcase) -> print_char '?'; token buf
| ucase -> print_char 'U';token buf
| eof -> print_endline "."
| _ -> assert false
| lcase ->
print_char 'L';
token buf
| Compl (ucase | lcase) ->
print_char '?';
token buf
| ucase ->
print_char 'U';
token buf
| eof -> print_endline "."
| _ -> assert false

let () =
let lexbuf = Sedlexing.Latin1.from_string "Abc::DefG" in
Expand Down
2 changes: 1 addition & 1 deletion examples/dune
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
(libraries sedlex sedlex_ppx)
(preprocess
(pps sedlex.ppx))
(flags :standard -w +39))
(flags :standard -w +39))

(rule
(alias runtest)
Expand Down
17 changes: 9 additions & 8 deletions examples/performance.ml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
let rec token buf =
match%sedlex buf with
| any -> token buf
| eof -> ()
| _ -> assert false
match%sedlex buf with any -> token buf | eof -> () | _ -> assert false

let time f x =
let rec acc f x = function
| 0 -> f x
| n -> f x|>ignore; acc f x (n-1) in
let t = Sys.time() in
| n ->
f x |> ignore;
acc f x (n - 1)
in
let t = Sys.time () in
let fx = acc f x 10 in
Printf.printf "Execution time: %fs\n" (Sys.time() -. t);
Printf.printf "Execution time: %fs\n" (Sys.time () -. t);
fx

let () =
Expand All @@ -19,4 +19,5 @@ let () =
let lexbuf = Sedlexing.Latin1.from_string long_str in
(* let () = Sedlexing.set_curr_p lexbuf Lexing.dummy_pos in *)
token lexbuf
in time token_from long_str
in
time token_from long_str
47 changes: 24 additions & 23 deletions examples/regressions.ml
Original file line number Diff line number Diff line change
@@ -1,49 +1,50 @@
(* This test that unicode_old.ml is a strict sub-set of
* new unicode.ml. *)

let test_versions = ("13.0.0","14.0.0")

let test_versions = ("13.0.0", "14.0.0")
let regressions = []

let interval s e =
Array.to_list
(Array.init (e-s) (fun pos -> s + pos))
let interval s e = Array.to_list (Array.init (e - s) (fun pos -> s + pos))

exception Found

let test_exception name x =
try
let l = List.assoc name regressions in
List.iter (fun (s,e) ->
if s<=x && x<=e then raise Found) l
List.iter (fun (s, e) -> if s <= x && x <= e then raise Found) l
with Not_found -> ()

let compare name old_l new_l =
let code_points = List.fold_left (fun res (s,e) ->
res@(interval s e)) [] old_l
let code_points =
List.fold_left (fun res (s, e) -> res @ interval s e) [] old_l
in
let test x =
try
test_exception name x;
List.iter (fun (s,e) ->
if s<=x && x<=e then raise Found) new_l;
List.iter (fun (s, e) -> if s <= x && x <= e then raise Found) new_l;
false
with Found -> true
in
List.iter (fun x ->
if not (test x) then
Printf.printf "Code point 0x%x missing in %s!\n" x name)
code_points
List.iter
(fun x ->
if not (test x) then
Printf.printf "Code point 0x%x missing in %s!\n" x name)
code_points

let test new_l (name, old_l) =
(* Cn is for unassigned code points, which are allowed to be
* used in future version. *)
if name <> "cn" then
compare name old_l (List.assoc name new_l)
if name <> "cn" then compare name old_l (List.assoc name new_l)

let () =
if (Unicode_old.version,Sedlex_ppx.Unicode.version) <> test_versions then
failwith (Printf.sprintf "Test written for versions: %s => %s\n%!" Unicode_old.version Sedlex_ppx.Unicode.version);
Printf.printf "Testing Unicode regression: %s => %s\n%!" Unicode_old.version Sedlex_ppx.Unicode.version;
List.iter (test Sedlex_ppx.Unicode.Categories.list) Unicode_old.Categories.list;
List.iter (test Sedlex_ppx.Unicode.Properties.list) Unicode_old.Properties.list
if (Unicode_old.version, Sedlex_ppx.Unicode.version) <> test_versions then
failwith
(Printf.sprintf "Test written for versions: %s => %s\n%!"
Unicode_old.version Sedlex_ppx.Unicode.version);
Printf.printf "Testing Unicode regression: %s => %s\n%!" Unicode_old.version
Sedlex_ppx.Unicode.version;
List.iter
(test Sedlex_ppx.Unicode.Categories.list)
Unicode_old.Categories.list;
List.iter
(test Sedlex_ppx.Unicode.Properties.list)
Unicode_old.Properties.list
25 changes: 18 additions & 7 deletions examples/repeat.ml
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
let rec token buf =
match%sedlex buf with
| white_space -> print_endline "\tWhitespace"; token buf
| 'a', Rep(white_space, 1) -> print_endline "a\n\tWhitespace"; token buf
| Rep("bc", 2) -> print_endline "bcbc"; token buf
| Rep("d", 1 .. 1) -> print_endline "d"; token buf
| Rep("ef", 1 .. 3) -> Printf.printf "%s\n" (Sedlexing.Utf8.lexeme buf); token buf
| white_space ->
print_endline "\tWhitespace";
token buf
| 'a', Rep (white_space, 1) ->
print_endline "a\n\tWhitespace";
token buf
| Rep ("bc", 2) ->
print_endline "bcbc";
token buf
| Rep ("d", 1 .. 1) ->
print_endline "d";
token buf
| Rep ("ef", 1 .. 3) ->
Printf.printf "%s\n" (Sedlexing.Utf8.lexeme buf);
token buf
| eof -> print_endline "\tEnd"
| any -> print_endline "Other"; token buf
| any ->
print_endline "Other";
token buf
| _ -> failwith "Internal failure: Reached impossible place"


let () =
let lexbuf = Sedlexing.Utf8.from_string "a bcbc d ef efef efefef" in
token lexbuf
21 changes: 15 additions & 6 deletions examples/subtraction.ml
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
let rec token buf =
match%sedlex buf with
| white_space -> print_endline "\tWhitespace"; token buf
| Sub (Chars "ab","b") -> print_endline "a"; token buf
| (Chars "ab"|"c") -> print_endline "abc"; token buf
| Intersect ("d", Chars "abd") -> print_endline "d"; token buf
| white_space ->
print_endline "\tWhitespace";
token buf
| Sub (Chars "ab", "b") ->
print_endline "a";
token buf
| Chars "ab" | "c" ->
print_endline "abc";
token buf
| Intersect ("d", Chars "abd") ->
print_endline "d";
token buf
| eof -> print_endline "\tEnd"
| any -> print_endline "Other"; token buf
| any ->
print_endline "Other";
token buf
| _ -> failwith "Internal failure: Reached impossible place"


let () =
let lexbuf = Sedlexing.Utf8.from_string "a b c d e" in
token lexbuf
24 changes: 15 additions & 9 deletions examples/tokenizer.ml
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
let digit = [%sedlex.regexp? '0'..'9']
let digit = [%sedlex.regexp? '0' .. '9']
let number = [%sedlex.regexp? Plus digit]

let rec token buf =
let letter = [%sedlex.regexp? 'a'..'z'|'A'..'Z'] in
let letter = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z'] in
match%sedlex buf with
| number -> Printf.printf "Number %s\n" (Sedlexing.Latin1.lexeme buf); token buf
| letter, Star ('A'..'Z' | 'a'..'z' | digit) -> Printf.printf "Ident %s\n" (Sedlexing.Latin1.lexeme buf); token buf
| Plus xml_blank -> token buf
| Plus (Chars "+*-/") -> Printf.printf "Op %s\n" (Sedlexing.Latin1.lexeme buf); token buf
| 128 .. 255 -> print_endline "Non ASCII"
| eof -> print_endline "EOF"
| _ -> failwith "Unexpected character"
| number ->
Printf.printf "Number %s\n" (Sedlexing.Latin1.lexeme buf);
token buf
| letter, Star ('A' .. 'Z' | 'a' .. 'z' | digit) ->
Printf.printf "Ident %s\n" (Sedlexing.Latin1.lexeme buf);
token buf
| Plus xml_blank -> token buf
| Plus (Chars "+*-/") ->
Printf.printf "Op %s\n" (Sedlexing.Latin1.lexeme buf);
token buf
| 128 .. 255 -> print_endline "Non ASCII"
| eof -> print_endline "EOF"
| _ -> failwith "Unexpected character"

let () =
let lexbuf = Sedlexing.Latin1.from_string "foobar A123Bfoo ++123Xbar/foo" in
Expand Down
Loading

0 comments on commit d8eb183

Please sign in to comment.