diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml index eae4590..593fcf8 100644 --- a/.github/workflows/dotnet.yml +++ b/.github/workflows/dotnet.yml @@ -10,7 +10,6 @@ on: - 'docs/**' - '**/*.md' pull_request: - branches: [ "dev" ] jobs: net8: diff --git a/CHANGELOG.md b/CHANGELOG.md index ef44323..bba08de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Changelog +## 3.2.2 + +- Supports a feature to disable heading numbering #175 +- Support center image with margin auto #171 +- Support deprecrated align attribute for block #171 +- Fix parsing of style attribute with a key with no value +- Improve parsing of style attribute to avoid an extra call to HtmlDecode +- Extend support of nested list for non-W3C compliant html #173 +- Change way to apply table 100% width +- Allow to apply percentage widths cells + ## 3.2.1 - Fix indentation of numbering list #166 diff --git a/HtmlToOpenXml.sln b/HtmlToOpenXml.sln index d702ded..2a20d0c 100644 --- a/HtmlToOpenXml.sln +++ b/HtmlToOpenXml.sln @@ -14,33 +14,33 @@ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "HtmlToOpenXml.Tests", "test\HtmlToOpenXml.Tests\HtmlToOpenXml.Tests.csproj", "{CA0A68E0-45A0-4A01-A061-F951D93D6906}" EndProject Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {EF700F30-C9BB-49A6-912C-E3B77857B514}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {EF700F30-C9BB-49A6-912C-E3B77857B514}.Debug|Any CPU.Build.0 = Debug|Any CPU - {EF700F30-C9BB-49A6-912C-E3B77857B514}.Release|Any CPU.ActiveCfg = Release|Any CPU - {EF700F30-C9BB-49A6-912C-E3B77857B514}.Release|Any CPU.Build.0 = Release|Any CPU - {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Debug|Any CPU.Build.0 = Debug|Any CPU - {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Release|Any CPU.ActiveCfg = Release|Any CPU - {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Release|Any CPU.Build.0 = Release|Any CPU - {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Debug|Any CPU.Build.0 = Debug|Any CPU - {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Release|Any CPU.ActiveCfg = Release|Any CPU - {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(NestedProjects) = preSolution - {EF700F30-C9BB-49A6-912C-E3B77857B514} = {58520A98-BA53-4BA4-AAE3-786AA21331D6} - {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F} = {84EA02ED-2E97-47D2-992E-32CC104A3A7A} - {CA0A68E0-45A0-4A01-A061-F951D93D6906} = {84EA02ED-2E97-47D2-992E-32CC104A3A7A} - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {14EE1026-6507-4295-9FEE-67A55C3849CE} - EndGlobalSection + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {EF700F30-C9BB-49A6-912C-E3B77857B514}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {EF700F30-C9BB-49A6-912C-E3B77857B514}.Debug|Any CPU.Build.0 = Debug|Any CPU + {EF700F30-C9BB-49A6-912C-E3B77857B514}.Release|Any CPU.ActiveCfg = Release|Any CPU + {EF700F30-C9BB-49A6-912C-E3B77857B514}.Release|Any CPU.Build.0 = Release|Any CPU + {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F}.Release|Any CPU.Build.0 = Release|Any CPU + {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Debug|Any CPU.Build.0 = Debug|Any CPU + {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Release|Any CPU.ActiveCfg = Release|Any CPU + {CA0A68E0-45A0-4A01-A061-F951D93D6906}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {EF700F30-C9BB-49A6-912C-E3B77857B514} = {58520A98-BA53-4BA4-AAE3-786AA21331D6} + {A1ECC760-B9F7-4A00-AF5F-568B5FD6F09F} = {84EA02ED-2E97-47D2-992E-32CC104A3A7A} + {CA0A68E0-45A0-4A01-A061-F951D93D6906} = {84EA02ED-2E97-47D2-992E-32CC104A3A7A} + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {14EE1026-6507-4295-9FEE-67A55C3849CE} + EndGlobalSection EndGlobal diff --git a/examples/Demo/Program.cs b/examples/Demo/Program.cs index 0620d54..e794af4 100644 --- a/examples/Demo/Program.cs +++ b/examples/Demo/Program.cs @@ -74,4 +74,4 @@ static void AssertThatOpenXmlDocumentIsValid(WordprocessingDocument wpDoc) Console.ReadLine(); } } -} \ No newline at end of file +} diff --git a/examples/Demo/Resources/CompleteRunTest.html b/examples/Demo/Resources/CompleteRunTest.html index 0976ee1..9c98726 100644 --- a/examples/Demo/Resources/CompleteRunTest.html +++ b/examples/Demo/Resources/CompleteRunTest.html @@ -4,6 +4,25 @@ + + + + + + + + + + + + + + + + + +
Column 1Column 2Column 3
Row 1, Cell 1Row 1, Cell 2Row 1, Cell 3
Row 2, Cell 1Row 2, Cell 2Row 2, Cell 3
+

Heading 1

Heading 2

@@ -15,8 +34,11 @@
Heading 5
Small caps
Strike Line Through Overline Underline
Bold
- This is a bold - text + + This is a + bold + text + Bold
Bolder
Lighter
@@ -47,7 +69,8 @@
Heading 5
Red dot -

Heading 5 OfDEXhz/AIRbxtAkV1pYNrJHNC0qSbPlDKQDg8e3tVD4Q6BKPGmp67Y2lxbaJ5ckVo04IMgZ 1KjnqAByfXFezMiv95Q2PUZoCKOigY9BSkA9QDjmgqGBDAEHsaia0tnQxtbxMjdVKAg0W9pb Wilba3ihB6iNAufyoltLaZw8tvFIw6FkBNSgADA4FLRRRRRRRX//2Q== - ">

+ "> +

Smiley face @@ -102,79 +126,81 @@
Heading 5

-
-For 50 years, WWF has been protecting the future of nature. The world's leading conservation organization, WWF works in 100 countries and is supported by 1.2 million members in the United States and close to 5 million globally. -
+
+ For 50 years, WWF has been protecting the future of nature. The world's leading conservation organization, WWF works in 100 countries and is supported by 1.2 million members in the United States and close to 5 million globally. +
-
My Text +
My Text -

An ordered list:

+

An ordered list:

    -
  1. Coffee
  2. -
  3. Tea
  4. -
  5. Milk - -
  6. -
  7. Wine
  8. +
  9. Coffee
  10. +
  11. Tea
  12. +
  13. + Milk + +
  14. +
  15. Wine
-
Inside table
-
-

delta parameter (d)

- Looks how cool is Open Xml. -
- Now with HtmlToOpenXml, it never been so easy to convert html. -

- If you like it, add me a rating on github -

- - simple text -
- Hello ! - je suis du texte - écrit en oblique. - -
  public void SetContentType(System.Web.HttpRequest request, System.Web.HttpResponse response, String reportName)
+    
Inside table
+
+

delta parameter (d)

+ Looks how cool is Open Xml. +
+ Now with HtmlToOpenXml, it never been so easy to convert html. +

+ If you like it, add me a rating on github +

+ + simple text +
+ Hello ! + je suis du texte + écrit en oblique. + +
  public void SetContentType(System.Web.HttpRequest request, System.Web.HttpResponse response, String reportName)
 {
 	if (request.Browser.Browser.Contains("IE"))
 	{
-		// Replace the %20 to obtain a clean name when saving the file from Word.
+        // Replace the %20 to obtain a clean name when saving the file from Word.
 		encodedFilename =
 		  Uri.EscapeDataString(Path.GetFileNameWithoutExtension(encodedFilename)).Replace("%20", " ")
 			+ Path.GetExtension(encodedFilename);
 	}
 }
 
-
-
+        
+
 Some <Pre> starting one 
 line below! 
-
-

Header placeholder:

-
    -
  1. Item 1
  2. -
  3. Item 2
  4. -
-

Footer Placeholder

-
+
+

Header placeholder:

+
    +
  1. Item 1
  2. +
  3. Item 2
  4. +
+

Footer Placeholder

+
Lorem Ipsum
- diff --git a/src/Html2OpenXml/Collections/HtmlAttributeCollection.cs b/src/Html2OpenXml/Collections/HtmlAttributeCollection.cs index bdccb5f..0c1c686 100755 --- a/src/Html2OpenXml/Collections/HtmlAttributeCollection.cs +++ b/src/Html2OpenXml/Collections/HtmlAttributeCollection.cs @@ -20,8 +20,7 @@ namespace HtmlToOpenXml; /// sealed class HtmlAttributeCollection { - private static readonly Regex stripStyleAttributesRegex = new(@"(?.+?):\s*(?[^;]+);*\s*"); - + private static readonly Regex stripStyleAttributesRegex = new(@"(?[^;\s]+)\s?(&\#58;|:)\s?(?[^;&]+)\s?(;|&\#59;)*"); private readonly Dictionary attributes = []; @@ -37,13 +36,7 @@ public static HtmlAttributeCollection ParseStyle(string? htmlTag) // Encoded ':' and ';' characters are valid for browser but not handled by the regex (bug #13812 reported by robin391) // ex= - MatchCollection matches = stripStyleAttributesRegex.Matches( -#if NET5_0_OR_GREATER - System.Web.HttpUtility.HtmlDecode(htmlTag) -#else - HttpUtility.HtmlDecode(htmlTag) -#endif - ); + MatchCollection matches = stripStyleAttributesRegex.Matches(htmlTag); foreach (Match m in matches) collection.attributes[m.Groups["name"].Value] = m.Groups["val"].Value; diff --git a/src/Html2OpenXml/Expressions/BlockElementExpression.cs b/src/Html2OpenXml/Expressions/BlockElementExpression.cs index 7ae7026..62bc8ce 100644 --- a/src/Html2OpenXml/Expressions/BlockElementExpression.cs +++ b/src/Html2OpenXml/Expressions/BlockElementExpression.cs @@ -168,8 +168,8 @@ protected override void ComposeStyles (ParsingContext context) }; } - var attrValue = styleAttributes!["text-align"]; - JustificationValues? align = Converter.ToParagraphAlign(attrValue); + JustificationValues? align = Converter.ToParagraphAlign(styleAttributes!["text-align"]); + if (!align.HasValue) align = Converter.ToParagraphAlign(node.GetAttribute("align")); if (align.HasValue) { paraProperties.Justification = new() { Val = align }; diff --git a/src/Html2OpenXml/Expressions/HyperlinkExpression.cs b/src/Html2OpenXml/Expressions/HyperlinkExpression.cs index 46fdc9a..62221ec 100644 --- a/src/Html2OpenXml/Expressions/HyperlinkExpression.cs +++ b/src/Html2OpenXml/Expressions/HyperlinkExpression.cs @@ -113,7 +113,7 @@ public override IEnumerable Interpret (ParsingContext context) h = new Hyperlink() { History = true, Anchor = "_top" }; } // is it an anchor? - else if (!context.Converter.ExcludeLinkAnchor && linkNode.Hash.Length > 1 && linkNode.Hash[0] == '#') + else if (context.Converter.SupportsAnchorLinks && linkNode.Hash.Length > 1 && linkNode.Hash[0] == '#') { h = new Hyperlink( ) { History = true, Anchor = linkNode.Hash.Substring(1) }; diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs b/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs index 3cf0422..06fb089 100644 --- a/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs +++ b/src/Html2OpenXml/Expressions/Image/ImageExpressionBase.cs @@ -12,7 +12,6 @@ using System.Collections.Generic; using System.Linq; using DocumentFormat.OpenXml; -using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using a = DocumentFormat.OpenXml.Drawing; @@ -26,6 +25,12 @@ namespace HtmlToOpenXml.Expressions; /// abstract class ImageExpressionBase(AngleSharp.Dom.IElement node) : HtmlDomExpression { + private readonly RunProperties runProperties = new(); + private readonly ParagraphProperties paraProperties = new(); + // some style attributes, such as borders, will convert this node to a framed container + private bool renderAsFramed; + + /// public override IEnumerable Interpret (ParsingContext context) { @@ -35,16 +40,17 @@ public override IEnumerable Interpret (ParsingContext context) return []; Run run = new(drawing); - Border border = ComposeStyles(); - if (border.Val?.Equals(BorderValues.None) == false) - { - run.RunProperties ??= new(); - run.RunProperties.Border = border; - } + ComposeStyles(); + + if (runProperties.HasChildren) + run.RunProperties = runProperties; + + if (renderAsFramed) + return [new Paragraph(paraProperties, run)]; return [run]; } - private Border ComposeStyles () + private void ComposeStyles () { var styleAttributes = node.GetStyles(); var border = new Border() { Val = BorderValues.None }; @@ -66,7 +72,22 @@ private Border ComposeStyles () border.Size = (uint) borderWidth.ValueInPx * 4; } } - return border; + + if (border.Val?.Equals(BorderValues.None) == false) + { + runProperties.Border = border; + } + + // if the layout is not inline and both left and right are auto, image appears centered + // https://developer.mozilla.org/en-US/docs/Web/CSS/margin-left + var margin = styleAttributes.GetMargin("margin"); + if (margin.Left.Type == UnitMetric.Auto + && margin.Right.Type == UnitMetric.Auto + && !AngleSharpExtensions.IsInlineLayout(styleAttributes["display"], "inline-block")) + { + paraProperties.Justification = new() { Val = JustificationValues.Center }; + renderAsFramed = true; + } } /// diff --git a/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs b/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs index 2c70032..9a9f6ff 100644 --- a/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs +++ b/src/Html2OpenXml/Expressions/Numbering/HeadingElementExpression.cs @@ -40,9 +40,9 @@ public override IEnumerable Interpret (ParsingContext context) paragraph.ParagraphProperties ??= new(); paragraph.ParagraphProperties.ParagraphStyleId = context.DocumentStyle.GetParagraphStyle(context.DocumentStyle.DefaultStyles.HeadingStyle + level); - + var runElement = childElements.FirstOrDefault(); - if (runElement != null && IsNumbering(runElement)) + if (runElement != null && context.Converter.SupportsHeadingNumbering && IsNumbering(runElement)) { var abstractNumId = GetOrCreateListTemplate(context, HeadingNumberingName); var instanceId = GetListInstance(abstractNumId); diff --git a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs index 811976c..3fa20af 100644 --- a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs +++ b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs @@ -50,9 +50,20 @@ readonly struct ListContext(string listName, int absNumId, int instanceId, int l public override IEnumerable Interpret(ParsingContext context) { - var liNodes = node.Children.Where(n => n.LocalName == "li"); + var liNodes = node.Children.Where(n => n.LocalName.Equals("li", StringComparison.OrdinalIgnoreCase)); if (!liNodes.Any()) yield break; + // W3C requires that nested list stands below a `li` element but some editors + // don't care to respect the standard. Let's reparent those lists + var nestedList = node.Children.Where(n => + n.LocalName.Equals("ol", StringComparison.OrdinalIgnoreCase) || + n.LocalName.Equals("ul", StringComparison.OrdinalIgnoreCase)); + if (nestedList.Any()) + { + foreach (var list in nestedList) + list.PreviousElementSibling?.AppendChild(list); + } + var listContext = context.Properties("listContext"); var parentContext = listContext; var listStyle = GetListName(node, listContext.Name); @@ -79,17 +90,38 @@ public override IEnumerable Interpret(ParsingContext context) var expression = new BlockElementExpression(liNode); var childElements = expression.Interpret(context); if (!childElements.Any()) continue; - Paragraph p = (Paragraph) childElements.First(); - - p.ParagraphProperties ??= new(); - p.ParagraphProperties.ParagraphStyleId = GetStyleIdForListItem(context.DocumentStyle, liNode); - p.ParagraphProperties.NumberingProperties = new NumberingProperties { - NumberingLevelReference = new() { Val = level - 1 }, - NumberingId = new() { Val = listContext.InstanceId } - }; - if (listContext.Dir.HasValue) { - p.ParagraphProperties.BiDi = new() { - Val = OnOffValue.FromBoolean(listContext.Dir == DirectionMode.Rtl) + + // ensure to filter out any non-paragraph like any nested table + var paragraphs = childElements.OfType(); + var listItemStyleId = GetStyleIdForListItem(context.DocumentStyle, liNode); + + if (paragraphs.Any()) + { + var p = paragraphs.First(); + p.ParagraphProperties ??= new(); + p.ParagraphProperties.ParagraphStyleId = listItemStyleId; + p.ParagraphProperties!.NumberingProperties ??= new NumberingProperties { + NumberingLevelReference = new() { Val = level - 1 }, + NumberingId = new() { Val = listContext.InstanceId } + }; + if (listContext.Dir.HasValue) { + p.ParagraphProperties.BiDi = new() { + Val = OnOffValue.FromBoolean(listContext.Dir == DirectionMode.Rtl) + }; + } + } + + // any standalone paragraphs must be aligned (indented) along its current level + foreach (var p in paragraphs.Skip(1)) + { + // if this is a list item paragraph, skip it + if (p.ParagraphProperties?.NumberingProperties is not null) + continue; + + p.ParagraphProperties ??= new(); + p.ParagraphProperties.ParagraphStyleId ??= (ParagraphStyleId?) listItemStyleId!.CloneNode(true); + p.ParagraphProperties.Indentation = new() { + Left = (level * Indentation * 2).ToString() }; } diff --git a/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs b/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs index 23bd2e2..4e816f8 100644 --- a/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs +++ b/src/Html2OpenXml/Expressions/Table/TableCellExpression.cs @@ -1,4 +1,4 @@ -/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved * * This source is subject to the Microsoft Permissive License. * Please see the License.txt file for more information. @@ -10,6 +10,7 @@ * PARTICULAR PURPOSE. */ using System.Collections.Generic; +using System.Globalization; using System.Linq; using AngleSharp.Html.Dom; using DocumentFormat.OpenXml; @@ -59,6 +60,27 @@ protected override void ComposeStyles(ParsingContext context) { base.ComposeStyles(context); + Unit width = styleAttributes!.GetUnit("width"); + if (!width.IsValid) + { + var widthValue = cellNode.GetAttribute("width"); + if (!string.IsNullOrEmpty(widthValue)) + { + width = Unit.Parse(widthValue); + } + } + + if (width.IsValid) + { + cellProperties.TableCellWidth = new TableCellWidth + { + Type = width.Type == UnitMetric.Percent ? TableWidthUnitValues.Pct : TableWidthUnitValues.Dxa, + Width = width.Type == UnitMetric.Percent + ? ((int) (width.Value * 50)).ToString(CultureInfo.InvariantCulture) + : width.ValueInDxa.ToString(CultureInfo.InvariantCulture) + }; + } + // Manage vertical text (only for table cell) string? direction = styleAttributes!["writing-mode"]; if (direction != null) @@ -98,4 +120,4 @@ internal static bool IsValidRowSpan(int rowSpan) // 0 means it extends until the end of the table grouping section return rowSpan == 0 || rowSpan > 1; } -} \ No newline at end of file +} diff --git a/src/Html2OpenXml/Expressions/Table/TableExpression.cs b/src/Html2OpenXml/Expressions/Table/TableExpression.cs index ecb0b9c..84c1d02 100644 --- a/src/Html2OpenXml/Expressions/Table/TableExpression.cs +++ b/src/Html2OpenXml/Expressions/Table/TableExpression.cs @@ -1,4 +1,4 @@ -/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved * * This source is subject to the Microsoft Permissive License. * Please see the License.txt file for more information. @@ -175,17 +175,11 @@ protected override void ComposeStyles (ParsingContext context) switch (width.Type) { case UnitMetric.Percent: - if (width.Value == 100) + tableProperties.TableWidth = new TableWidth { - // Use Auto=0 instead of Pct=auto - // bug reported by scarhand (https://html2openxml.codeplex.com/workitem/12494) - tableProperties.TableWidth = new() { Type = TableWidthUnitValues.Auto, Width = "0" }; - } - else - { - tableProperties.TableWidth = new() { Type = TableWidthUnitValues.Pct, - Width = (width.Value * 50).ToString(CultureInfo.InvariantCulture) }; - } + Type = TableWidthUnitValues.Pct, + Width = (width.Value * 50).ToString(CultureInfo.InvariantCulture) + }; break; case UnitMetric.Point: case UnitMetric.Pixel: @@ -287,4 +281,4 @@ protected override void ComposeStyles (ParsingContext context) } } } -} \ No newline at end of file +} diff --git a/src/Html2OpenXml/HtmlConverter.cs b/src/Html2OpenXml/HtmlConverter.cs index 7cb7a38..f834ecf 100755 --- a/src/Html2OpenXml/HtmlConverter.cs +++ b/src/Html2OpenXml/HtmlConverter.cs @@ -339,7 +339,8 @@ private TPart ResolveHeaderFooterPart(HeaderFooterValues? type) public AcronymPosition AcronymPosition { get; set; } /// - /// Gets or sets whether anchor links are included or not in the convertion. + /// Gets or sets whether anchor links are included or not in the conversion + /// (defaults ). /// /// An anchor is a term used to define a hyperlink destination inside a document. /// . @@ -351,7 +352,23 @@ private TPart ResolveHeaderFooterPart(HeaderFooterValues? type) /// elements /// and set the value of href to #name of your bookmark. /// - public bool ExcludeLinkAnchor { get; set; } + public bool SupportsAnchorLinks { get; set; } = true; + + /// + /// Gets or sets whether anchor links are included or not in the conversion. + /// + /// An anchor is a term used to define a hyperlink destination inside a document. + /// . + ///
+ /// It exists some predefined anchors used by Word such as _top to refer to the top of the document. + /// The anchor #_top is always accepted regardless this property value. + /// For others anchors like refering to your own bookmark or a title, add a + /// and + /// elements + /// and set the value of href to #name of your bookmark. + ///
+ [Obsolete("Use SupportsAnchorLink instead, if ExcludeLinkAnchor = true -> SupportsAnchorLink = false")] + public bool ExcludeLinkAnchor { get => !SupportsAnchorLinks; set => SupportsAnchorLinks = !value; } /// /// Gets the Html styles manager mapping to OpenXml style properties. @@ -367,7 +384,7 @@ public WordDocumentStyle HtmlStyles public CaptionPositionValues TableCaptionPosition { get; set; } /// - /// Gets or sets whether the pre tag should be rendered as a table (default ). + /// Gets or sets whether the pre tag should be rendered as a table (defaults ). /// /// The table will contains only one cell. public bool RenderPreAsTable { get; set; } @@ -378,6 +395,16 @@ public WordDocumentStyle HtmlStyles /// public bool ContinueNumbering { get; set; } = true; + /// + /// Defines whether any headings (h1-h6) could be considered as multi-level numbering, such as + /// top-level headings (Heading 1) are numbered 1, 2, 3, for example, and second-level headings (Heading 2) are numbered 1.1, 1.2, 1.3. + /// This feature is enabled by default. + /// + /// The converter is detecting headings starting with a number (ie: 1. or 1 ) + /// are considered as numbering. + /// + public bool SupportsHeadingNumbering { get; set; } = true; + /// /// Gets the mainDocumentPart of the destination OpenXml document. /// diff --git a/src/Html2OpenXml/HtmlToOpenXml.csproj b/src/Html2OpenXml/HtmlToOpenXml.csproj index 03daeff..bfffc34 100644 --- a/src/Html2OpenXml/HtmlToOpenXml.csproj +++ b/src/Html2OpenXml/HtmlToOpenXml.csproj @@ -9,13 +9,13 @@ HtmlToOpenXml HtmlToOpenXml HtmlToOpenXml.dll - 3.2.1 + 3.2.2 icon.png Copyright 2009-$([System.DateTime]::Now.Year) Olivier Nizet - See changelog https://github.com/onizet/html2openxml/blob/master/CHANGELOG.md + (Please write the package release notes in CHANGELOG.md) README.md office openxml netcore html - 3.2.1 + 3.2.2 MIT https://github.com/onizet/html2openxml https://github.com/onizet/html2openxml @@ -64,5 +64,15 @@ true + + + + + + + + @(ReleaseNoteLines, '%0a') + + \ No newline at end of file diff --git a/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs b/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs index 3d89624..085f7d4 100644 --- a/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs +++ b/src/Html2OpenXml/Utilities/AngleSharpExtensions.cs @@ -153,4 +153,14 @@ public static string CollapseLineBreaks(this string str) return new string(chars, 0, length); } + + /// + /// Determines whether the layout mode is inline vs block or flex. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsInlineLayout(string? displayMode, string defaultLayout) + { + return (displayMode ?? defaultLayout) + .StartsWith("inline", StringComparison.OrdinalIgnoreCase) == true; + } } \ No newline at end of file diff --git a/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs b/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs index 5242f49..905ba21 100755 --- a/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs +++ b/src/Html2OpenXml/Utilities/OpenXmlExtensions.cs @@ -9,11 +9,9 @@ * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A * PARTICULAR PURPOSE. */ -using System; using System.Runtime.CompilerServices; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Wordprocessing; -using DocumentFormat.OpenXml.Drawing.Wordprocessing; namespace HtmlToOpenXml; diff --git a/test/HtmlToOpenXml.Tests/HeadingTests.cs b/test/HtmlToOpenXml.Tests/HeadingTests.cs index c42ed25..b8feed1 100644 --- a/test/HtmlToOpenXml.Tests/HeadingTests.cs +++ b/test/HtmlToOpenXml.Tests/HeadingTests.cs @@ -51,6 +51,32 @@ public void OrderedPattern_ReturnsNumberingHeading(string html) }); } + [TestCase("

1. Heading 1

1.1 Heading Normal Case

")] + [TestCase("

1. Heading 1

1.1 Heading Double Space

", Description = "Double space after number")] + [TestCase("

1. Heading 1

1.2 Heading Tab

", Description = "Tab after number")] + [TestCase("

1. Heading 1

1.3Heading No Space

", Description = "No space after number")] + public void OrderedPattern_DisableNumberingSupports_ReturnsSimpleHeading(string html) + { + converter.SupportsHeadingNumbering = false; + var elements = converter.Parse(html); + + var absNum = mainPart.NumberingDefinitionsPart?.Numbering + .Elements() + .Where(abs => abs.AbstractNumDefinitionName?.Val == NumberingExpressionBase.HeadingNumberingName) + .SingleOrDefault(); + Assert.That(absNum, Is.Null); + + var paragraphs = elements.Cast(); + Assert.Multiple(() => + { + Assert.That(paragraphs.Count(), Is.EqualTo(2)); + Assert.That(paragraphs.First().InnerText, Is.EqualTo("1. Heading 1")); + Assert.That(paragraphs.First().ParagraphProperties?.NumberingProperties?.NumberingLevelReference?.Val, + Is.Null, + "First paragraph is not a numbering"); + }); + } + [Test] public void MaxLevel_ShouldBeIgnored() { diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index 15e5596..45aa66d 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -219,6 +219,22 @@ public async Task ParseIntoDocumentPart_ReturnsImageParentedToPart (Type openXml AssertThatOpenXmlDocumentIsValid(); } + [TestCase("display:block", ExpectedResult = true)] + [TestCase("display:flex", ExpectedResult = true)] + [TestCase("display:inline", ExpectedResult = false)] + [TestCase("", ExpectedResult = false)] + public bool CenterImg_ReturnsFramedImg(string displayMode) + { + var elements = converter.Parse($@""); + + Assert.That(elements, Has.Count.EqualTo(1)); + Assert.That(elements[0], Is.TypeOf()); + AssertIsImg(mainPart, elements[0]); + return elements[0].GetFirstChild()?. + Justification?.Val?.Value == JustificationValues.Center; + } + private static (Drawing, ImagePart) AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) { var run = paragraph.GetFirstChild(); diff --git a/test/HtmlToOpenXml.Tests/LinkTests.cs b/test/HtmlToOpenXml.Tests/LinkTests.cs index 27443ce..f688bef 100644 --- a/test/HtmlToOpenXml.Tests/LinkTests.cs +++ b/test/HtmlToOpenXml.Tests/LinkTests.cs @@ -82,7 +82,7 @@ public void Anchoring_WithUnknownTarget_ReturnsHyperlinkWithBookmark () [Test] public void SetExcludeAnchoring_ReturnsSimpleRun () { - converter.ExcludeLinkAnchor = true; + converter.SupportsAnchorLinks = false; // _top is always present and bypass the previous rule var elements = converter.Parse(@"Anchor2"); diff --git a/test/HtmlToOpenXml.Tests/NumberingTests.cs b/test/HtmlToOpenXml.Tests/NumberingTests.cs index a7b914e..c48b699 100644 --- a/test/HtmlToOpenXml.Tests/NumberingTests.cs +++ b/test/HtmlToOpenXml.Tests/NumberingTests.cs @@ -514,7 +514,7 @@ public void WithRtl_ReturnsBidi(string dir, bool? expectedValue) } [Test] - public void NestedNumberList_ReturnsIncrementalIdentation() + public void NestedNumberList_ReturnsIncrementalIndentation() { const int maxLevel = 8; var sb = new System.Text.StringBuilder(); @@ -541,5 +541,75 @@ public void NestedNumberList_ReturnsIncrementalIdentation() TestContext.Out.WriteLine($"{i}. {ident?.Left?.Value}"); } } + + [Test(Description = "Nested list must be a children of a `li` tag but some editor are not respecting the W3C standard (issue #173)")] + public async Task NestedNumberList_NonCompliant_ReturnsIncrementalIndentation() + { + await converter.ParseBody(@"
    +
  1. Item1
  2. +
  3. Item2
  4. +
    1. Item 2.1
    +
"); + + var absNum = mainPart.NumberingDefinitionsPart?.Numbering + .Elements() + .SingleOrDefault(); + Assert.That(absNum, Is.Not.Null); + + var inst = mainPart.NumberingDefinitionsPart?.Numbering + .Elements().Where(i => i.AbstractNumId?.Val == absNum.AbstractNumberId) + .SingleOrDefault(); + Assert.That(inst, Is.Not.Null); + Assert.That(inst.NumberID?.Value, Is.Not.Null); + + var elements = mainPart.Document.Body!.ChildElements; + Assert.Multiple(() => { + Assert.That(elements, Has.Count.EqualTo(3)); + Assert.That(elements, Is.All.TypeOf()); + Assert.That(mainPart.NumberingDefinitionsPart?.Numbering, Is.Not.Null); + }); + + // assert paragraphs linked to numbering instance + Assert.Multiple(() => + { + Assert.That(elements.Cast().Select(e => + e.ParagraphProperties?.NumberingProperties?.NumberingId?.Val?.Value), + Has.All.EqualTo(inst.NumberID.Value), + "All paragraphs are linked to the same list instance"); + Assert.That(elements.Take(2).Select(p => p.GetFirstChild()?.NumberingProperties?.NumberingLevelReference?.Val?.Value), Has.All.EqualTo(0)); + Assert.That(elements.Last().GetFirstChild()?.NumberingProperties?.NumberingLevelReference?.Val?.Value, Is.EqualTo(1)); + }); + AssertThatOpenXmlDocumentIsValid(); + } + + [Test] + public void NestedParagraph_ReturnsIndentedItems() + { + var elements = converter.Parse(@"
    +
  • +

    Paragraph text

    +

    Paragraph text

    +
  • +
"); + + Assert.That(elements, Is.Not.Empty); + + var inst = mainPart.NumberingDefinitionsPart?.Numbering + .Elements() + .SingleOrDefault(); + Assert.That(inst, Is.Not.Null); + Assert.Multiple(() => { + Assert.That(elements.Last().GetFirstChild()?.NumberingProperties?.NumberingId, + Is.Null, + "Last paragraph is standalone and not linked to a list instance"); + Assert.That(elements.Cast().Select(e => + e.ParagraphProperties?.ParagraphStyleId?.Val?.Value), + Has.All.EqualTo("ListParagraph"), + "All paragraphs use the same paragraph style"); + Assert.That(elements.Last().GetFirstChild()?.Indentation?.Left?.Value, + Is.EqualTo("720"), + "Last standalone paragraph is aligned with the level 1"); + }); + } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/StyleTests.cs b/test/HtmlToOpenXml.Tests/StyleTests.cs index 2fb56f5..24afe76 100644 --- a/test/HtmlToOpenXml.Tests/StyleTests.cs +++ b/test/HtmlToOpenXml.Tests/StyleTests.cs @@ -165,5 +165,21 @@ public void DuplicateStyle_ReturnsLatter() var styleAttributes = HtmlAttributeCollection.ParseStyle("color:red;color:blue"); Assert.That(styleAttributes["color"], Is.EqualTo("blue")); } + + [Test(Description = "Encoded ':' and ';' characters are valid")] + public void EncodedStyle_ShouldSucceed() + { + var styleAttributes = HtmlAttributeCollection.ParseStyle("text-decoration:underline;color:red"); + Assert.That(styleAttributes["text-decoration"], Is.EqualTo("underline")); + Assert.That(styleAttributes["color"], Is.EqualTo("red")); + } + + [Test(Description = "Key style with no value should be ignored")] + public void EmptyStyle_ShouldBeIgnoredd() + { + var styleAttributes = HtmlAttributeCollection.ParseStyle("text-decoration;color:red"); + Assert.That(styleAttributes["text-decoration"], Is.Null); + Assert.That(styleAttributes["color"], Is.EqualTo("red")); + } } }