Skip to content

Commit 0216181

Browse files
committed
Config Doc - Implement a first version of AsciiDoc -> Markdown converter
It is far from being perfect and typically doesn't handle tables. But it's a good first step.
1 parent a91fc3e commit 0216181

File tree

2 files changed

+449
-11
lines changed

2 files changed

+449
-11
lines changed

core/processor/src/main/java/io/quarkus/annotation/processor/documentation/config/formatter/JavadocToMarkdownTransformer.java

Lines changed: 285 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
package io.quarkus.annotation.processor.documentation.config.formatter;
22

3+
import java.util.ArrayList;
4+
import java.util.List;
5+
import java.util.Map;
6+
import java.util.regex.Matcher;
37
import java.util.regex.Pattern;
8+
import java.util.stream.Collectors;
49

510
import com.github.javaparser.StaticJavaParser;
611
import com.github.javaparser.javadoc.Javadoc;
@@ -9,29 +14,59 @@
914
import com.github.javaparser.javadoc.description.JavadocInlineTag;
1015

1116
import io.quarkus.annotation.processor.documentation.config.model.JavadocFormat;
17+
import io.quarkus.annotation.processor.util.Strings;
1218

1319
public class JavadocToMarkdownTransformer {
1420

1521
private static final Pattern START_OF_LINE = Pattern.compile("^", Pattern.MULTILINE);
1622

23+
private static final Map<String, String> ADMONITIONS = Map.of(
24+
"CAUTION", "🔥",
25+
"IMPORTANT", "❗",
26+
"NOTE", "📌",
27+
"TIP", "💡",
28+
"WARNING", "⚠️");
29+
30+
private static final Pattern HEADER_PATTERN = Pattern.compile("^(=+) (.+)$");
31+
private static final Pattern LIST_ITEM_PATTERN = Pattern.compile("^(\\*+|\\.+) (.+)$");
32+
private static final Pattern IMAGE_BLOCK_PATTERN = Pattern.compile("^image::([^\\s]+)\\[(.*)\\]$");
33+
private static final Pattern IMAGE_INLINE_PATTERN = Pattern.compile("image:([^\\s]+)\\[(.*)\\]");
34+
private static final Pattern ADMONITION_BLOCK_START_PATTERN = Pattern
35+
.compile("^\\[(" + String.join("|", ADMONITIONS.keySet()) + ")\\]$");
36+
private static final String ADMONITION_BLOCK_DELIMITER = "====";
37+
private static final Pattern ADMONITION_INLINE_PATTERN = Pattern
38+
.compile("^(" + String.join("|", ADMONITIONS.keySet()) + "): (.*)$");
39+
private static final Pattern BOLD_PATTERN = Pattern.compile("(?<=^|\\s)\\*(.+?)\\*(?=\\s|$)");
40+
private static final Pattern ITALIC_PATTERN = Pattern.compile("__(.+?)__");
41+
private static final Pattern BLOCK_TITLE_PATTERN = Pattern.compile("^\\.([a-z0-9].*)$");
42+
private static final Pattern SOURCE_BLOCK_START_PATTERN = Pattern.compile("^\\[source(?:,[ ]*([a-z]+))?.*\\]$");
43+
private static final Pattern SOURCE_BLOCK_DELIMITER_PATTERN = Pattern.compile("^(-----*)$");
44+
private static final Pattern QUOTE_BLOCK_START_PATTERN = Pattern.compile("^\\[quote(?:, (.*?))?(?:, (.*?))?]$");
45+
private static final Pattern QUOTE_BLOCK_DELIMITER_PATTERN = Pattern.compile("^(_____*)$");
46+
private static final Pattern LINK_PATTERN = Pattern.compile("(?:link:)([^\\[]+)\\[(.*?)\\]");
47+
private static final Pattern URL_PATTERN = Pattern.compile("\\b(http[^\\[]+)\\[(.*?)\\]");
48+
private static final Pattern XREF_PATTERN = Pattern.compile("xref:([^\\[]+)\\[(.*?)\\]");
49+
1750
public static String toMarkdown(String javadoc, JavadocFormat format) {
1851
if (javadoc == null || javadoc.isBlank()) {
1952
return null;
2053
}
2154

22-
if (format == JavadocFormat.MARKDOWN) {
23-
return javadoc;
24-
} else if (format == JavadocFormat.JAVADOC) {
25-
// the parser expects all the lines to start with "* "
26-
// we add it as it has been previously removed
27-
Javadoc parsedJavadoc = StaticJavaParser.parseJavadoc(START_OF_LINE.matcher(javadoc).replaceAll("* "));
55+
switch (format) {
56+
case MARKDOWN:
57+
return javadoc;
58+
case JAVADOC:
59+
// the parser expects all the lines to start with "* "
60+
// we add it as it has been previously removed
61+
Javadoc parsedJavadoc = StaticJavaParser.parseJavadoc(START_OF_LINE.matcher(javadoc).replaceAll("* "));
2862

29-
// HTML is valid Javadoc but we need to drop the Javadoc tags e.g. {@link ...}
30-
return simplifyJavadoc(parsedJavadoc.getDescription());
63+
// HTML is valid Javadoc but we need to drop the Javadoc tags e.g. {@link ...}
64+
return simplifyJavadoc(parsedJavadoc.getDescription());
65+
case ASCIIDOC:
66+
return asciidocToMarkdown(javadoc);
67+
default:
68+
throw new IllegalArgumentException("Converting from " + format + " to Markdown is not supported");
3169
}
32-
33-
// it's Asciidoc, the fun begins...
34-
return "";
3570
}
3671

3772
/**
@@ -84,4 +119,243 @@ private static String escapeHtml(String s) {
84119
}
85120
return out.toString();
86121
}
122+
123+
/**
124+
* This obviously don't handle the whole complexity of Asciidoc but should handle most cases.
125+
* <p>
126+
* One thing that might be worth adding is support for titles for source blocks and admonitions but we can add it later on.
127+
* <p>
128+
* It doesn't support tables (yet).
129+
*/
130+
private static String asciidocToMarkdown(String asciidoc) {
131+
List<String> lines = asciidoc.lines().toList();
132+
List<String> result = new ArrayList<>();
133+
String currentAdmonition = null;
134+
boolean inAdmonitionPreamble = false;
135+
boolean inAdmonitionBlock = false;
136+
String currentSourceBlockLanguage = null;
137+
boolean inSourcePreamble = false;
138+
boolean inSourceBlock = false;
139+
String currentSourceBlockTitle = null;
140+
String currentSourceBlockDelimiter = null;
141+
boolean inQuoteBlock = false;
142+
boolean quoteStarted = false;
143+
String currentQuoteBlockDelimiter = null;
144+
String quoteAuthor = null;
145+
String quoteSource = null;
146+
147+
String linePrefix = "";
148+
149+
for (String line : lines) {
150+
String markdownLine = line;
151+
152+
if (inAdmonitionPreamble) {
153+
if (ADMONITION_BLOCK_DELIMITER.equals(line)) {
154+
inAdmonitionBlock = true;
155+
inAdmonitionPreamble = false;
156+
result.add("> [!" + currentAdmonition + "]");
157+
continue;
158+
} else {
159+
// we haven't found a proper delimiter so we ignore the admonition altogether
160+
inAdmonitionPreamble = false;
161+
}
162+
}
163+
164+
if (inAdmonitionBlock) {
165+
if (ADMONITION_BLOCK_DELIMITER.equals(line)) {
166+
inAdmonitionBlock = false;
167+
currentAdmonition = null;
168+
linePrefix = "";
169+
continue;
170+
} else {
171+
linePrefix = "> ";
172+
}
173+
}
174+
175+
if (inSourcePreamble) {
176+
Matcher blockTitleMatcher = BLOCK_TITLE_PATTERN.matcher(line);
177+
if (blockTitleMatcher.matches()) {
178+
currentSourceBlockTitle = blockTitleMatcher.group(1);
179+
}
180+
}
181+
182+
if (inSourceBlock) {
183+
if (currentSourceBlockDelimiter.equals(line)) {
184+
// End of source block
185+
result.add(linePrefix + "```");
186+
inSourcePreamble = false;
187+
inSourceBlock = false;
188+
currentSourceBlockLanguage = null;
189+
currentSourceBlockDelimiter = null;
190+
currentSourceBlockTitle = null;
191+
continue;
192+
} else {
193+
// Inside source block
194+
result.add(linePrefix + markdownLine);
195+
continue;
196+
}
197+
}
198+
199+
Matcher sourceBlockStartMatcher = SOURCE_BLOCK_START_PATTERN.matcher(line);
200+
if (sourceBlockStartMatcher.matches()) {
201+
if (!Strings.isBlank(sourceBlockStartMatcher.group(1))) {
202+
currentSourceBlockLanguage = sourceBlockStartMatcher.group(1).trim();
203+
}
204+
inSourcePreamble = true;
205+
// Skip the start marker
206+
continue;
207+
}
208+
209+
Matcher sourceBlockDelimiterMatcher = SOURCE_BLOCK_DELIMITER_PATTERN.matcher(line);
210+
if (sourceBlockDelimiterMatcher.matches()) {
211+
currentSourceBlockDelimiter = sourceBlockDelimiterMatcher.group(0);
212+
// Start of code block
213+
if (!Strings.isBlank(currentSourceBlockTitle)) {
214+
result.add(linePrefix + "**" + currentSourceBlockTitle + "**");
215+
result.add(linePrefix + "");
216+
}
217+
result.add(
218+
linePrefix + "```" + (!Strings.isBlank(currentSourceBlockLanguage) ? currentSourceBlockLanguage : ""));
219+
inSourcePreamble = false;
220+
inSourceBlock = true;
221+
continue;
222+
}
223+
224+
if (inQuoteBlock) {
225+
Matcher quoteBlockDelimiterMatcher = QUOTE_BLOCK_DELIMITER_PATTERN.matcher(line);
226+
if (!quoteStarted && quoteBlockDelimiterMatcher.matches()) {
227+
currentQuoteBlockDelimiter = quoteBlockDelimiterMatcher.group(0);
228+
continue;
229+
} else if (line.equals(currentQuoteBlockDelimiter)) {
230+
// End of quote block
231+
if (quoteAuthor != null || quoteSource != null) {
232+
result.add(linePrefix + ">");
233+
result.add(linePrefix + "> — " + (quoteAuthor != null ? quoteAuthor : "")
234+
+ (quoteSource != null ? ", " + quoteSource : ""));
235+
}
236+
inQuoteBlock = false;
237+
quoteStarted = false;
238+
currentQuoteBlockDelimiter = null;
239+
continue;
240+
} else {
241+
// Inside quote block
242+
result.add(linePrefix + "> " + line);
243+
quoteStarted = true;
244+
continue;
245+
}
246+
}
247+
248+
Matcher quoteBlockStartMatcher = QUOTE_BLOCK_START_PATTERN.matcher(line);
249+
if (quoteBlockStartMatcher.matches()) {
250+
// Start of quote block
251+
quoteAuthor = quoteBlockStartMatcher.group(1);
252+
quoteSource = quoteBlockStartMatcher.group(2);
253+
inQuoteBlock = true;
254+
continue;
255+
}
256+
257+
Matcher admonitionBlockStartMatcher = ADMONITION_BLOCK_START_PATTERN.matcher(line);
258+
if (admonitionBlockStartMatcher.matches()) {
259+
currentAdmonition = admonitionBlockStartMatcher.group(1);
260+
inAdmonitionPreamble = true;
261+
// Skip the start marker
262+
continue;
263+
}
264+
265+
// Convert headings
266+
Matcher headingMatcher = HEADER_PATTERN.matcher(line);
267+
if (headingMatcher.find()) {
268+
int level = headingMatcher.group(1).length();
269+
String text = headingMatcher.group(2);
270+
markdownLine = "#".repeat(level) + " " + text;
271+
}
272+
273+
// Convert list items
274+
Matcher listItemMatcher = LIST_ITEM_PATTERN.matcher(line);
275+
if (listItemMatcher.find()) {
276+
String marker = listItemMatcher.group(1);
277+
String text = listItemMatcher.group(2);
278+
if (marker.startsWith("*")) {
279+
markdownLine = "- " + text;
280+
} else if (marker.startsWith(".")) {
281+
markdownLine = "1. " + text;
282+
}
283+
}
284+
285+
// Convert italic and bold
286+
markdownLine = convertInline(markdownLine, ITALIC_PATTERN, "*");
287+
markdownLine = convertInline(markdownLine, BOLD_PATTERN, "**");
288+
289+
// Inline Admonitions
290+
if (!inAdmonitionBlock) {
291+
Matcher admonitionInlineMatcher = ADMONITION_INLINE_PATTERN.matcher(line);
292+
if (admonitionInlineMatcher.find()) {
293+
String admonition = admonitionInlineMatcher.group(1);
294+
if (ADMONITIONS.containsKey(admonition)) {
295+
markdownLine = "> " + ADMONITIONS.get(admonition) + " " + admonitionInlineMatcher.group(2);
296+
} else {
297+
markdownLine = "> " + markdownLine;
298+
}
299+
}
300+
}
301+
302+
// Convert block images
303+
Matcher blockImageMatcher = IMAGE_BLOCK_PATTERN.matcher(line);
304+
if (blockImageMatcher.find()) {
305+
String target = blockImageMatcher.group(1);
306+
String altText = blockImageMatcher.group(2);
307+
markdownLine = "![" + altText + "](" + target + ")";
308+
}
309+
310+
// Convert inline images
311+
Matcher inlineImageMatcher = IMAGE_INLINE_PATTERN.matcher(line);
312+
if (inlineImageMatcher.find()) {
313+
String target = inlineImageMatcher.group(1);
314+
String altText = inlineImageMatcher.group(2);
315+
markdownLine = line.replace(inlineImageMatcher.group(), "![" + altText + "](" + target + ")");
316+
}
317+
318+
// Convert links
319+
markdownLine = convertLinksAndXrefs(markdownLine, LINK_PATTERN, "link");
320+
// Convert direct URL links
321+
markdownLine = convertLinksAndXrefs(markdownLine, URL_PATTERN, "url");
322+
// Convert xrefs
323+
markdownLine = convertLinksAndXrefs(markdownLine, XREF_PATTERN, "xref");
324+
325+
result.add(linePrefix + markdownLine);
326+
}
327+
328+
return result.stream().collect(Collectors.joining("\n"));
329+
}
330+
331+
private static String convertInline(String line, Pattern pattern, String markdownDelimiter) {
332+
Matcher matcher = pattern.matcher(line);
333+
StringBuffer sb = new StringBuffer();
334+
while (matcher.find()) {
335+
matcher.appendReplacement(sb, markdownDelimiter + matcher.group(1) + markdownDelimiter);
336+
}
337+
matcher.appendTail(sb);
338+
return sb.toString();
339+
}
340+
341+
private static String convertLinksAndXrefs(String line, Pattern pattern, String type) {
342+
Matcher matcher = pattern.matcher(line);
343+
StringBuffer sb = new StringBuffer();
344+
while (matcher.find()) {
345+
if (type.equals("link") || type.equals("url")) {
346+
matcher.appendReplacement(sb, "[" + matcher.group(2) + "](" + matcher.group(1) + ")");
347+
} else if (type.equals("xref")) {
348+
String xref = matcher.group(1);
349+
if (xref.contains(".adoc")) {
350+
xref = "https://quarkus.io/guides/" + xref.replace(".adoc", "");
351+
} else {
352+
xref = "#" + xref;
353+
}
354+
355+
matcher.appendReplacement(sb, "[" + matcher.group(2) + "](" + xref + ")");
356+
}
357+
}
358+
matcher.appendTail(sb);
359+
return sb.toString();
360+
}
87361
}

0 commit comments

Comments
 (0)