Skip to content

Commit 7211424

Browse files
committed
Config Doc - Implement a first version of AsciiDoc -> Markdown converter
It is far from being perfect and typically doesn't handle tables. But it's a good first step. (cherry picked from commit 4abee41)
1 parent 0025fe4 commit 7211424

File tree

2 files changed

+530
-11
lines changed

2 files changed

+530
-11
lines changed

core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToMarkdownTransformer.java

Lines changed: 330 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
package io.quarkus.annotation.processor.documentation.config.formatter;
22

3+
import java.util.ArrayList;
4+
import java.util.List;
5+
import java.util.Map;
6+
import java.util.regex.Matcher;
37
import java.util.regex.Pattern;
8+
import java.util.stream.Collectors;
49

510
import com.github.javaparser.StaticJavaParser;
611
import com.github.javaparser.javadoc.Javadoc;
@@ -9,29 +14,62 @@
914
import com.github.javaparser.javadoc.description.JavadocInlineTag;
1015

1116
import io.quarkus.annotation.processor.documentation.config.model.JavadocFormat;
17+
import io.quarkus.annotation.processor.util.Strings;
1218

1319
public class JavadocToMarkdownTransformer {
1420

1521
private static final Pattern START_OF_LINE = Pattern.compile("^", Pattern.MULTILINE);
1622

23+
private static final Map<String, String> ADMONITIONS = Map.of(
24+
"CAUTION", "🔥",
25+
"IMPORTANT", "❗",
26+
"NOTE", "📌",
27+
"TIP", "💡",
28+
"WARNING", "⚠️");
29+
30+
private static final Pattern HEADER_PATTERN = Pattern.compile("^(=+) (.+)$");
31+
private static final Pattern LIST_ITEM_PATTERN = Pattern.compile("^(\\*+|\\.+) (.+)$");
32+
private static final Pattern IMAGE_BLOCK_PATTERN = Pattern.compile("^image::([^\\s]+)\\[(.*)\\]$");
33+
private static final Pattern IMAGE_INLINE_PATTERN = Pattern.compile("image:([^\\s]+)\\[(.*)\\]");
34+
private static final Pattern ADMONITION_BLOCK_START_PATTERN = Pattern
35+
.compile("^\\[(" + String.join("|", ADMONITIONS.keySet()) + ")\\]$");
36+
private static final String ADMONITION_BLOCK_DELIMITER = "====";
37+
private static final Pattern ADMONITION_INLINE_PATTERN = Pattern
38+
.compile("^(" + String.join("|", ADMONITIONS.keySet()) + "): (.*)$");
39+
private static final Pattern BOLD_PATTERN = Pattern.compile("(?<=^|\\s)\\*(.+?)\\*(?=\\s|$)");
40+
private static final Pattern ITALIC_PATTERN = Pattern.compile("__(.+?)__");
41+
private static final Pattern BLOCK_TITLE_PATTERN = Pattern.compile("^\\.([a-z0-9].*)$");
42+
private static final Pattern SOURCE_BLOCK_START_PATTERN = Pattern.compile("^\\[source(?:,[ ]*([a-z]+))?.*\\]$");
43+
private static final Pattern SOURCE_BLOCK_DELIMITER_PATTERN = Pattern.compile("^(-----*)$");
44+
private static final Pattern QUOTE_BLOCK_START_PATTERN = Pattern.compile("^\\[quote(?:, (.*?))?(?:, (.*?))?]$");
45+
private static final Pattern QUOTE_BLOCK_DELIMITER_PATTERN = Pattern.compile("^(_____*)$");
46+
private static final Pattern LINK_PATTERN = Pattern.compile("(?:link:)([^\\[]+)\\[(.*?)\\]");
47+
private static final Pattern URL_PATTERN = Pattern.compile("\\b(http[^\\[]+)\\[(.*?)\\]");
48+
private static final Pattern XREF_PATTERN = Pattern.compile("xref:([^\\[]+)\\[(.*?)\\]");
49+
private static final Pattern ICON_PATTERN = Pattern.compile("\\bicon:([a-z0-9_-]+)\\[(?:role=([a-z0-9_-]+))?\\](?=\\s|$)");
50+
private static final Pattern DESCRIPTION_LIST_PATTERN = Pattern.compile("^([a-z0-9][a-z0-9_ -]+)::(?=\\s|$)",
51+
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
52+
1753
public static String toMarkdown(String javadoc, JavadocFormat format) {
1854
if (javadoc == null || javadoc.isBlank()) {
1955
return null;
2056
}
2157

22-
if (format == JavadocFormat.MARKDOWN) {
23-
return javadoc;
24-
} else if (format == JavadocFormat.JAVADOC) {
25-
// the parser expects all the lines to start with "* "
26-
// we add it as it has been previously removed
27-
Javadoc parsedJavadoc = StaticJavaParser.parseJavadoc(START_OF_LINE.matcher(javadoc).replaceAll("* "));
58+
switch (format) {
59+
case MARKDOWN:
60+
return javadoc;
61+
case JAVADOC:
62+
// the parser expects all the lines to start with "* "
63+
// we add it as it has been previously removed
64+
Javadoc parsedJavadoc = StaticJavaParser.parseJavadoc(START_OF_LINE.matcher(javadoc).replaceAll("* "));
2865

29-
// HTML is valid Javadoc but we need to drop the Javadoc tags e.g. {@link ...}
30-
return simplifyJavadoc(parsedJavadoc.getDescription());
66+
// HTML is valid Javadoc but we need to drop the Javadoc tags e.g. {@link ...}
67+
return simplifyJavadoc(parsedJavadoc.getDescription());
68+
case ASCIIDOC:
69+
return asciidocToMarkdown(javadoc);
70+
default:
71+
throw new IllegalArgumentException("Converting from " + format + " to Markdown is not supported");
3172
}
32-
33-
// it's Asciidoc, the fun begins...
34-
return "";
3573
}
3674

3775
/**
@@ -83,4 +121,285 @@ private static String escapeHtml(String s) {
83121
}
84122
return out.toString();
85123
}
124+
125+
/**
126+
* This obviously don't handle the whole complexity of Asciidoc but should handle most cases.
127+
* <p>
128+
* One thing that might be worth adding is support for titles for source blocks and admonitions but we can add it later on.
129+
* <p>
130+
* It doesn't support tables (yet).
131+
*/
132+
private static String asciidocToMarkdown(String asciidoc) {
133+
List<String> lines = asciidoc.lines().toList();
134+
List<String> result = new ArrayList<>();
135+
String currentAdmonition = null;
136+
boolean inAdmonitionPreamble = false;
137+
boolean inAdmonitionBlock = false;
138+
String currentSourceBlockLanguage = null;
139+
boolean inSourcePreamble = false;
140+
boolean inSourceBlock = false;
141+
String currentSourceBlockTitle = null;
142+
String currentSourceBlockDelimiter = null;
143+
boolean inQuoteBlock = false;
144+
boolean quoteStarted = false;
145+
String currentQuoteBlockDelimiter = null;
146+
String quoteAuthor = null;
147+
String quoteSource = null;
148+
149+
String linePrefix = "";
150+
151+
for (String line : lines) {
152+
String markdownLine = line;
153+
154+
if (inAdmonitionPreamble) {
155+
if (ADMONITION_BLOCK_DELIMITER.equals(line)) {
156+
inAdmonitionBlock = true;
157+
inAdmonitionPreamble = false;
158+
result.add("> [!" + currentAdmonition + "]");
159+
continue;
160+
} else {
161+
// we haven't found a proper delimiter so we ignore the admonition altogether
162+
inAdmonitionPreamble = false;
163+
}
164+
}
165+
166+
if (inAdmonitionBlock) {
167+
if (ADMONITION_BLOCK_DELIMITER.equals(line)) {
168+
inAdmonitionBlock = false;
169+
currentAdmonition = null;
170+
linePrefix = "";
171+
continue;
172+
} else {
173+
linePrefix = "> ";
174+
}
175+
}
176+
177+
if (inSourcePreamble) {
178+
Matcher blockTitleMatcher = BLOCK_TITLE_PATTERN.matcher(line);
179+
if (blockTitleMatcher.matches()) {
180+
currentSourceBlockTitle = blockTitleMatcher.group(1);
181+
}
182+
}
183+
184+
if (inSourceBlock) {
185+
if (currentSourceBlockDelimiter.equals(line)) {
186+
// End of source block
187+
result.add(linePrefix + "```");
188+
inSourcePreamble = false;
189+
inSourceBlock = false;
190+
currentSourceBlockLanguage = null;
191+
currentSourceBlockDelimiter = null;
192+
currentSourceBlockTitle = null;
193+
continue;
194+
} else {
195+
// Inside source block
196+
result.add(linePrefix + markdownLine);
197+
continue;
198+
}
199+
}
200+
201+
Matcher sourceBlockStartMatcher = SOURCE_BLOCK_START_PATTERN.matcher(line);
202+
if (sourceBlockStartMatcher.matches()) {
203+
if (!Strings.isBlank(sourceBlockStartMatcher.group(1))) {
204+
currentSourceBlockLanguage = sourceBlockStartMatcher.group(1).trim();
205+
}
206+
inSourcePreamble = true;
207+
// Skip the start marker
208+
continue;
209+
}
210+
211+
Matcher sourceBlockDelimiterMatcher = SOURCE_BLOCK_DELIMITER_PATTERN.matcher(line);
212+
if (sourceBlockDelimiterMatcher.matches()) {
213+
currentSourceBlockDelimiter = sourceBlockDelimiterMatcher.group(0);
214+
// Start of code block
215+
if (!Strings.isBlank(currentSourceBlockTitle)) {
216+
result.add(linePrefix + "**" + currentSourceBlockTitle + "**");
217+
result.add(linePrefix + "");
218+
}
219+
result.add(
220+
linePrefix + "```" + (!Strings.isBlank(currentSourceBlockLanguage) ? currentSourceBlockLanguage : ""));
221+
inSourcePreamble = false;
222+
inSourceBlock = true;
223+
continue;
224+
}
225+
226+
if (inQuoteBlock) {
227+
Matcher quoteBlockDelimiterMatcher = QUOTE_BLOCK_DELIMITER_PATTERN.matcher(line);
228+
if (!quoteStarted && quoteBlockDelimiterMatcher.matches()) {
229+
currentQuoteBlockDelimiter = quoteBlockDelimiterMatcher.group(0);
230+
continue;
231+
} else if (line.equals(currentQuoteBlockDelimiter)) {
232+
// End of quote block
233+
if (quoteAuthor != null || quoteSource != null) {
234+
result.add(linePrefix + ">");
235+
result.add(linePrefix + "> — " + (quoteAuthor != null ? quoteAuthor : "")
236+
+ (quoteSource != null ? ", " + quoteSource : ""));
237+
}
238+
inQuoteBlock = false;
239+
quoteStarted = false;
240+
currentQuoteBlockDelimiter = null;
241+
continue;
242+
} else {
243+
// Inside quote block
244+
result.add(linePrefix + "> " + line);
245+
quoteStarted = true;
246+
continue;
247+
}
248+
}
249+
250+
Matcher quoteBlockStartMatcher = QUOTE_BLOCK_START_PATTERN.matcher(line);
251+
if (quoteBlockStartMatcher.matches()) {
252+
// Start of quote block
253+
quoteAuthor = quoteBlockStartMatcher.group(1);
254+
quoteSource = quoteBlockStartMatcher.group(2);
255+
inQuoteBlock = true;
256+
continue;
257+
}
258+
259+
Matcher admonitionBlockStartMatcher = ADMONITION_BLOCK_START_PATTERN.matcher(line);
260+
if (admonitionBlockStartMatcher.matches()) {
261+
currentAdmonition = admonitionBlockStartMatcher.group(1);
262+
inAdmonitionPreamble = true;
263+
// Skip the start marker
264+
continue;
265+
}
266+
267+
// Convert headings
268+
Matcher headingMatcher = HEADER_PATTERN.matcher(line);
269+
if (headingMatcher.find()) {
270+
int level = headingMatcher.group(1).length();
271+
String text = headingMatcher.group(2);
272+
markdownLine = "#".repeat(level) + " " + text;
273+
}
274+
275+
// Convert list items
276+
Matcher listItemMatcher = LIST_ITEM_PATTERN.matcher(line);
277+
if (listItemMatcher.find()) {
278+
String marker = listItemMatcher.group(1);
279+
String text = listItemMatcher.group(2);
280+
if (marker.startsWith("*")) {
281+
markdownLine = "- " + text;
282+
} else if (marker.startsWith(".")) {
283+
markdownLine = "1. " + text;
284+
}
285+
}
286+
287+
// Convert italic and bold
288+
markdownLine = convertInline(markdownLine, ITALIC_PATTERN, "*");
289+
markdownLine = convertInline(markdownLine, BOLD_PATTERN, "**");
290+
291+
// Inline Admonitions
292+
if (!inAdmonitionBlock) {
293+
Matcher admonitionInlineMatcher = ADMONITION_INLINE_PATTERN.matcher(line);
294+
if (admonitionInlineMatcher.find()) {
295+
String admonition = admonitionInlineMatcher.group(1);
296+
if (ADMONITIONS.containsKey(admonition)) {
297+
markdownLine = "> " + ADMONITIONS.get(admonition) + " " + admonitionInlineMatcher.group(2);
298+
} else {
299+
markdownLine = "> " + markdownLine;
300+
}
301+
}
302+
}
303+
304+
// Convert block images
305+
Matcher blockImageMatcher = IMAGE_BLOCK_PATTERN.matcher(line);
306+
if (blockImageMatcher.find()) {
307+
String target = blockImageMatcher.group(1);
308+
String altText = blockImageMatcher.group(2);
309+
markdownLine = "![" + altText + "](" + target + ")";
310+
}
311+
312+
// Convert inline images
313+
Matcher inlineImageMatcher = IMAGE_INLINE_PATTERN.matcher(line);
314+
if (inlineImageMatcher.find()) {
315+
String target = inlineImageMatcher.group(1);
316+
String altText = inlineImageMatcher.group(2);
317+
markdownLine = line.replace(inlineImageMatcher.group(), "![" + altText + "](" + target + ")");
318+
}
319+
320+
// Convert links
321+
markdownLine = convertLinksAndXrefs(markdownLine, LINK_PATTERN, "link");
322+
// Convert direct URL links
323+
markdownLine = convertLinksAndXrefs(markdownLine, URL_PATTERN, "url");
324+
// Convert xrefs
325+
markdownLine = convertLinksAndXrefs(markdownLine, XREF_PATTERN, "xref");
326+
327+
// Convert icons
328+
markdownLine = convertIcons(markdownLine);
329+
330+
// Convert description lists: we only convert the title
331+
markdownLine = convertDescriptionLists(markdownLine);
332+
333+
result.add(linePrefix + markdownLine);
334+
}
335+
336+
return result.stream().collect(Collectors.joining("\n"));
337+
}
338+
339+
private static String convertInline(String line, Pattern pattern, String markdownDelimiter) {
340+
Matcher matcher = pattern.matcher(line);
341+
StringBuffer sb = new StringBuffer();
342+
while (matcher.find()) {
343+
matcher.appendReplacement(sb, markdownDelimiter + matcher.group(1) + markdownDelimiter);
344+
}
345+
matcher.appendTail(sb);
346+
return sb.toString();
347+
}
348+
349+
private static String convertLinksAndXrefs(String line, Pattern pattern, String type) {
350+
Matcher matcher = pattern.matcher(line);
351+
StringBuffer sb = new StringBuffer();
352+
while (matcher.find()) {
353+
if (type.equals("link") || type.equals("url")) {
354+
matcher.appendReplacement(sb, "[" + matcher.group(2) + "](" + matcher.group(1) + ")");
355+
} else if (type.equals("xref")) {
356+
String xref = matcher.group(1);
357+
if (xref.contains(".adoc")) {
358+
xref = "https://quarkus.io/guides/" + xref.replace(".adoc", "");
359+
} else {
360+
xref = "#" + xref;
361+
}
362+
363+
matcher.appendReplacement(sb, "[" + matcher.group(2) + "](" + xref + ")");
364+
}
365+
}
366+
matcher.appendTail(sb);
367+
return sb.toString();
368+
}
369+
370+
private static String convertIcons(String line) {
371+
Matcher matcher = ICON_PATTERN.matcher(line);
372+
StringBuffer sb = new StringBuffer();
373+
while (matcher.find()) {
374+
String icon = matcher.group(1);
375+
String emoji;
376+
377+
switch (icon) {
378+
case "check":
379+
emoji = "✅";
380+
break;
381+
case "times":
382+
emoji = "❌";
383+
break;
384+
default:
385+
// TODO we probably need to collect the errors and log them instead
386+
throw new IllegalArgumentException("Icon " + matcher.group(1) + " is not mapped.");
387+
}
388+
389+
matcher.appendReplacement(sb, emoji);
390+
}
391+
matcher.appendTail(sb);
392+
return sb.toString();
393+
}
394+
395+
private static String convertDescriptionLists(String line) {
396+
Matcher matcher = DESCRIPTION_LIST_PATTERN.matcher(line);
397+
StringBuffer sb = new StringBuffer();
398+
while (matcher.find()) {
399+
String descriptionTitle = matcher.group(1);
400+
matcher.appendReplacement(sb, "**" + descriptionTitle + "**");
401+
}
402+
matcher.appendTail(sb);
403+
return sb.toString();
404+
}
86405
}

0 commit comments

Comments
 (0)