From a36223b6b4ea15e6150b432b98ddab8f6722f53c Mon Sep 17 00:00:00 2001
From: Shon Feder
Date: Sun, 5 Jul 2020 22:37:52 -0400
Subject: [PATCH] Add Tyxml backend for all element types
---
omd_tyxml/omd_tyxml.ml | 130 ++++++++++++++++++++++-------------------
tests/dune | 19 +-----
tests/extract_tests.ml | 4 +-
tests/omd.ml | 51 ++++++++++++++--
4 files changed, 119 insertions(+), 85 deletions(-)
diff --git a/omd_tyxml/omd_tyxml.ml b/omd_tyxml/omd_tyxml.ml
index 54b739d4..f622d648 100644
--- a/omd_tyxml/omd_tyxml.ml
+++ b/omd_tyxml/omd_tyxml.ml
@@ -1,6 +1,6 @@
open Tyxml
-let of_code c = Html.[code ~a:[] [txt c]]
+let of_code attrs c = Html.[code ~a:attrs [txt c]]
let of_attribute (attr, value) =
Html.Unsafe.string_attrib attr value
@@ -10,27 +10,14 @@ let of_attributes attrs =
exception Invalid_markdown of string
-module Attrs = struct
- let of_link_def : 'a Omd.link_def -> [< Html_types.a_attrib] Html.attrib list =
- fun l ->
- let href = Omd.Internal.escape_uri l.destination |> Html.a_href in
- match l.title with
- | Some t -> [href; Html.a_title t]
- | None -> [href]
-end
-(* let link_attribs _ *)
-(* let common_attributes : Omd.attributes -> Html_types.common =
- * fun ((attr, value) :: _) -> *)
-(* raise (Failure "TODO") *)
-
-(* let img_attributes : Omd.attributes -> [< Html_types.img_attrib] list =
- * let img_attrib (attr, value) =
- * match attr with
- * | "alt" -> Html.a_alt value
- * | _ -> raise (Failure "TODO img_attrib")
- * in
- * fun attrs -> List.map img_attrib attrs *)
+exception Unsupported_attribute of string
+let of_omd_attributes attrs =
+ List.map (fun (a, v) -> Html.Unsafe.string_attrib a v) attrs
+(* module Attrs = struct
+ * end *)
+
+(* TODO move into Omd module *)
let rec inline_to_plaintext : Omd.inline -> string =
fun il ->
match il.il_desc with
@@ -48,47 +35,39 @@ let rec inline_to_plaintext : Omd.inline -> string =
let rec of_inline ({il_attributes; il_desc} : Omd.inline) =
(* let a = of_attributes il_attributes in *)
(* TODO Attributes *)
- let _ = il_attributes in
+ let attrs = of_omd_attributes il_attributes in
match il_desc with
- | Code c -> of_code c
+ | Code c -> of_code attrs c
| Concat ls -> List.concat_map of_inline ls
| Emph e -> Html.[em ~a:[] (of_inline e)]
| Strong s -> Html.[strong ~a:[] (of_inline s)]
| Hard_break -> Html.[br ~a:[] ()]
(* TODO Add option for verified html ?*)
| Html raw -> Html.Unsafe.[data raw]
- | Image img -> [of_img img]
- | Link l -> Html.[a ~a:Attrs.(of_link_def l) (of_link_label l.label)]
+ | Image img -> [of_img attrs img]
+ | Link l -> [of_link attrs l]
| Soft_break -> Html.[txt "\n"]
| Text t -> Html.[txt t]
and of_link_label ({il_attributes; il_desc} as il : Omd.inline) =
let _attr = il_attributes in
match il_desc with
- (* | Code c -> of_code c
- * | Text t -> Html.[txt t]
- * | Concat l -> List.concat_map of_link_label l
- * | Emph e -> Html.[em ~a:[] (of_link_label e)]
- * | Strong e -> of_inline e |> List.map Html.Unsafe.coerce_elt
- * | Image i -> [Html.Unsafe.coerce_elt (of_img i )] *)
| Code _ | Text _ | Concat _
| Emph _ | Strong _ | Image _ -> List.map Html.Unsafe.coerce_elt (of_inline il)
- | _ -> raise (Failure "TODO invalid")
-(* | Emph _ -> (??)
- * | Strong _ -> (??)
- * | Hard_break -> (??)
- * | Soft_break -> (??)
- * | Link _ -> (??)
- * | Image _ -> (??)
- * | Html _ -> (??) *)
-
-and of_img (img : Omd.inline Omd.link_def) =
+ | _ -> raise (Failure "TODO invalid link label")
+
+and of_link attrs (l : Omd.link) =
+ let escaped_url = Omd.Internal.escape_uri l.destination in
+ let attrs = (Html.a_href escaped_url :: attrs) @ Option.to_list (Option.map Html.a_title l.title) in
+ Html.(a ~a:attrs (of_link_label l.label))
+
+and of_img attrs (img : Omd.link) =
let escaped_url = Omd.Internal.escape_uri img.destination in
- let attrs = Option.map Html.a_title img.title |> Option.to_list in
+ let attrs = attrs @ (Option.map Html.a_title img.title |> Option.to_list) in
let alt = inline_to_plaintext img.label in
Html.(img ~src:escaped_url ~alt ~a:attrs ())
-let of_heading n inline =
+let of_heading n attrs content =
let h =
let open Html in
match n with
@@ -100,37 +79,68 @@ let of_heading n inline =
| 6 -> h6
| m -> raise (Invalid_markdown (Printf.sprintf "heading number %d" m))
in
- h ~a:[] (of_inline inline)
+ h ~a:(of_omd_attributes attrs) (of_inline content)
-let of_link_block (ld : string Omd.link_def) =
- Html.(p [a ~a:Attrs.(of_link_def ld) [txt ld.label]])
+let of_code_block src attrs content =
+ let src_attr = match src with
+ | "" -> []
+ | _ -> [Html.a_class ["language-" ^ src]]
+ in
+ Html.(pre ~a:attrs [code ~a:src_attr [txt content]])
(* This function is partial because the Omd AST includes nodes which do not
correspond to any HTML element. *)
-let of_block : Omd.block -> _ Html.elt option =
+let rec of_block : Omd.block -> _ Html.elt =
fun block ->
(* FIXME *)
+ let attrs = of_omd_attributes block.bl_attributes in
try
match block.bl_desc with
- | Paragraph i -> Some (Html.p (of_inline i))
- | List _ -> raise (Failure "TODO of_block list")
- | Blockquote _ -> raise (Failure "TODO of_block blockquote")
- | Thematic_break -> raise (Failure "TODO of_block Thematic_break")
- | Heading (n, i) -> Some (of_heading n i)
- | Code_block (_, c) -> Some (Html.(pre [code ~a:[] [txt c]]))
- (* Html.pre ~a:[] (of_inline i) *)
- | Html_block _ -> raise (Failure "TODO of_block Html_bloc")
- | Link_def _ -> None
- | Definition_list _ -> raise (Failure "TODO of_block Definition_list")
- with (Failure err) ->
- Some (Html.(h1 [txt ("Error " ^ err)]))
+ | Paragraph i -> Html.p (of_inline i)
+ | List (typ, spacing, items) -> of_list typ spacing items
+ | Blockquote content -> Html.blockquote (List.map of_block content)
+ | Thematic_break -> Html.hr ()
+ | Heading (n, content) -> of_heading n block.bl_attributes content
+ | Code_block (src, c) -> of_code_block src attrs c
+ | Html_block html -> Html.Unsafe.data html
+ | Definition_list content -> of_definition_list content
+ with (Failure err) ->
+ Html.(h1 [txt ("Error " ^ err)])
+
+and of_list typ spacing items =
+ let of_list_block (bl : Omd.block) =
+ match bl.bl_desc, spacing with
+ | Paragraph il, Tight -> of_inline il |> List.map Html.Unsafe.coerce_elt
+ | _ -> [of_block bl]
+ in
+ let itemize i =
+ i |> List.concat_map of_list_block |> Html.li
+ in
+ let element =
+ match typ with
+ | Ordered (start, _) -> Html.ol ~a:(if start <> 1 then [Html.a_start start] else [])
+ | Bullet _ -> Html.ul ~a:[]
+ in
+ items
+ |> List.map itemize
+ |> element
+
+and of_definition_list defs =
+ let definiens d =
+ Html.dd (of_inline d |> List.map Html.Unsafe.coerce_elt)
+ in
+ let def ({term; defs} : Omd.def_elt) =
+ Html.(dt (of_inline term |> List.map Html.Unsafe.coerce_elt)
+ :: List.map definiens defs)
+ in
+ Html.dl (List.concat_map def defs)
let of_omd ?(title="") : Omd.doc -> Tyxml.Html.doc =
fun omd ->
let title' = title in
let body' =
try
- List.filter_map of_block omd
+ List.map of_block omd
with (Failure err) ->
Html.[h1 [txt ("Error " ^ err)]]
in
diff --git a/tests/dune b/tests/dune
index 634dd49b..f8f6743d 100644
--- a/tests/dune
+++ b/tests/dune
@@ -1,6 +1,6 @@
(executable
(name extract_tests)
- (libraries str)
+ (libraries str lambdasoup)
(modules extract_tests))
; Generate and run tests for the core omd package
@@ -14,26 +14,13 @@
(include dune.inc)
-(executable
- (name omd_tyxml)
- (libraries str omd tyxml omd_tyxml)
- (modules omd_tyxml))
-
-; Generate and run tests for the optional omd-tyxml package
-(rule
- (with-stdout-to dune-omd-tyxml.inc.new (run ./extract_tests.exe -write-dune-file-omd-tyxml)))
-
-(include dune-omd-tyxml.inc)
-
(executable
(name omd)
- (libraries str omd)
+ (libraries str omd omd_tyxml tyxml lambdasoup)
(modules omd))
; Generate the rules for diff-based tests
(rule
(alias gen)
(action
- (progn
- (diff dune.inc dune.inc.new)
- (diff dune-omd-tyxml.inc dune-omd-tyxml.inc.new))))
+ (diff dune.inc dune.inc.new)))
diff --git a/tests/extract_tests.ml b/tests/extract_tests.ml
index 757d2cc5..4a377bed 100644
--- a/tests/extract_tests.ml
+++ b/tests/extract_tests.ml
@@ -120,9 +120,7 @@ let write_dune_file test_specs tests =
let li_begin_re = Str.regexp_string "\n"
let li_end_re = Str.regexp_string "\n"
-let normalize_html s =
- Str.global_replace li_end_re ""
- (Str.global_replace li_begin_re "" s)
+let normalize_html s = Soup.(parse s |> pretty_print)
let generate_test_files tests =
let f {filename; example; markdown; html} =
diff --git a/tests/omd.ml b/tests/omd.ml
index e3bf48d4..03119d65 100644
--- a/tests/omd.ml
+++ b/tests/omd.ml
@@ -1,15 +1,54 @@
-let li_begin_re = Str.regexp_string "\n"
-let li_end_re = Str.regexp_string "\n"
+(* let li_begin_re = Str.regexp_string "\n"
+ * let li_end_re = Str.regexp_string "\n" *)
-let normalize_html s =
- Str.global_replace li_end_re ""
- (Str.global_replace li_begin_re "" s)
+(* let normalize_html s =
+ * Str.global_replace li_end_re ""
+ * (Str.global_replace li_begin_re "" s) *)
let with_open_in fn f =
let ic = open_in fn in
Fun.protect ~finally:(fun () -> close_in_noerr ic)
(fun () -> f ic)
+(* FIXME: Resolve preferred backend *)
+
+(* FIXME: This is getting rediculous. Probably better, imo, to programmatically
+ format the spec HTML and compare the ASTs of the HTMl instead of doing this
+ string munging *)
+
+let replacements =
+ [ Str.regexp_string "
", "
\n"
+ (* ; Str.regexp_string ">\n\n", ">\n" *)
+ (* ; Str.regexp "\n\n$", "\n" *)
+ (* Str.regexp "\"/>", "\" />"
+ * ; Str.regexp_string "
", "
\n"
+ * ; Str.regexp_string "", "\n"
+ * ; Str.regexp_string "
", "\n"
+ * ; Str.regexp_string "
", "\n"
+ * ; Str.regexp_string "
", "
\n"
+ * ; Str.regexp_string "
<", "\n<"
+ * ; Str.regexp_string "<", "\n<" *)
+ ]
+
+
+let tyxml_elt_to_string t =
+ Format.asprintf "%a" Tyxml.Html.(pp_elt ~indent:false ()) t
+
+let html_of_omd s =
+ s
+ |> List.map (fun b -> b |> Omd_tyxml.of_block |> tyxml_elt_to_string)
+ |> String.concat ""
+
+let normalize_html s = Soup.(parse s |> pretty_print)
+
+let denormalize_html str =
+ List.fold_left (fun s (re, rep) -> Str.global_replace re rep s) str replacements
+
let () =
with_open_in Sys.argv.(1) @@ fun ic ->
- print_string (normalize_html (Omd.to_html (Omd.of_channel ic)))
+ ic
+ |> Omd.of_channel
+ |> html_of_omd
+ |> normalize_html
+ |> denormalize_html
+ |> print_string