1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.doxia.module.markdown;
20
21 import javax.inject.Inject;
22 import javax.inject.Named;
23 import javax.inject.Singleton;
24
25 import java.io.IOException;
26 import java.io.Reader;
27 import java.util.Arrays;
28 import java.util.Collections;
29 import java.util.LinkedHashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 import java.util.stream.Collectors;
36
37 import com.vladsch.flexmark.ast.Heading;
38 import com.vladsch.flexmark.ast.HtmlCommentBlock;
39 import com.vladsch.flexmark.ast.util.TextCollectingVisitor;
40 import com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension;
41 import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
42 import com.vladsch.flexmark.ext.definition.DefinitionExtension;
43 import com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension;
44 import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
45 import com.vladsch.flexmark.ext.tables.TablesExtension;
46 import com.vladsch.flexmark.ext.typographic.TypographicExtension;
47 import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
48 import com.vladsch.flexmark.ext.yaml.front.matter.YamlFrontMatterExtension;
49 import com.vladsch.flexmark.html.HtmlRenderer;
50 import com.vladsch.flexmark.util.ast.Node;
51 import com.vladsch.flexmark.util.data.MutableDataSet;
52 import org.apache.commons.io.IOUtils;
53 import org.apache.maven.doxia.markup.HtmlMarkup;
54 import org.apache.maven.doxia.markup.TextMarkup;
55 import org.apache.maven.doxia.module.xhtml5.Xhtml5Parser;
56 import org.apache.maven.doxia.parser.AbstractTextParser;
57 import org.apache.maven.doxia.parser.ParseException;
58 import org.apache.maven.doxia.sink.Sink;
59 import org.apache.maven.doxia.util.HtmlTools;
60 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 @Singleton
77 @Named("markdown")
78 public class MarkdownParser extends AbstractTextParser implements TextMarkup {
79
80
81
82
83
84
85
86
87
88 private static final Pattern METADATA_SECTION_PATTERN = Pattern.compile(
89 "\\A^"
90 + "(?:title|author|date|address|affiliation|copyright|email|keywords|language|phone|subtitle)"
91 + "[ \\t]*:[\\S\\s]+?^[ \\t]*$",
92 Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
93
94
95
96
97
98
99
100
101 private static final Pattern METADATA_ENTRY_PATTERN = Pattern.compile(
102 "^([^:\\r\\n]+?)[ \\t]*:([\\S\\s]+?)(?=(?:^(?:[^:\\r\\n]+?)[ \\t]*:)|^[ \\t]*$)", Pattern.MULTILINE);
103
104
105
106
107
108 @Inject
109 private MarkdownHtmlParser parser;
110
111
112
113
114 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_PARSER;
115
116
117
118
119 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_METADATA_PARSER;
120
121
122
123
124 private static final HtmlRenderer FLEXMARK_HTML_RENDERER;
125
126
127 static {
128 MutableDataSet flexmarkOptions = new MutableDataSet();
129
130
131 flexmarkOptions.set(
132 com.vladsch.flexmark.parser.Parser.EXTENSIONS,
133 Arrays.asList(
134 EscapedCharacterExtension.create(),
135 AbbreviationExtension.create(),
136 AutolinkExtension.create(),
137 DefinitionExtension.create(),
138 TypographicExtension.create(),
139 TablesExtension.create(),
140 WikiLinkExtension.create(),
141 StrikethroughExtension.create()));
142
143
144 flexmarkOptions.set(TypographicExtension.SINGLE_QUOTE_UNMATCHED, "'");
145
146
147 flexmarkOptions.set(HtmlRenderer.HTML_BLOCK_OPEN_TAG_EOL, false);
148 flexmarkOptions.set(HtmlRenderer.HTML_BLOCK_CLOSE_TAG_EOL, false);
149 flexmarkOptions.set(HtmlRenderer.MAX_TRAILING_BLANK_LINES, -1);
150
151
152 FLEXMARK_PARSER =
153 com.vladsch.flexmark.parser.Parser.builder(flexmarkOptions).build();
154
155 MutableDataSet flexmarkMetadataOptions = new MutableDataSet();
156 flexmarkMetadataOptions.set(
157 com.vladsch.flexmark.parser.Parser.EXTENSIONS, Arrays.asList(YamlFrontMatterExtension.create()));
158 FLEXMARK_METADATA_PARSER = com.vladsch.flexmark.parser.Parser.builder(flexmarkMetadataOptions)
159 .build();
160
161
162 FLEXMARK_HTML_RENDERER = HtmlRenderer.builder(flexmarkOptions)
163 .linkResolverFactory(new FlexmarkDoxiaLinkResolver.Factory())
164 .build();
165 }
166
167
168 @Override
169 public void parse(Reader source, Sink sink, String reference) throws ParseException {
170 try {
171
172 String html = toHtml(source);
173
174
175
176
177
178 parser.parse(html, getWrappedSink(sink), "Intermediate HTML from " + reference);
179 } catch (IOException e) {
180 throw new ParseException("Failed reading Markdown source document", e);
181 }
182 }
183
184 private boolean processMetadataForHtml(StringBuilder html, StringBuilder source) {
185 final Map<String, List<String>> metadata;
186 final int endOffset;
187
188 if (source.toString().startsWith("---")) {
189
190 Node documentRoot = FLEXMARK_METADATA_PARSER.parse(source.toString());
191 YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor();
192 visitor.visit(documentRoot);
193 metadata = visitor.getData();
194 endOffset = visitor.getEndOffset();
195 } else {
196
197
198 metadata = new LinkedHashMap<>();
199 Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(source);
200 if (metadataMatcher.find()) {
201 String entry = metadataMatcher.group(0) + EOL;
202 Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(entry);
203 while (entryMatcher.find()) {
204 String key = entryMatcher.group(1);
205 String value = normalizeMultilineValue(entryMatcher.group(2));
206 metadata.put(key, Collections.singletonList(value));
207 }
208 endOffset = metadataMatcher.end(0);
209 } else {
210 endOffset = 0;
211 }
212 }
213 if (endOffset > 0) {
214
215 source.delete(0, endOffset);
216 }
217 return writeHtmlMetadata(html, metadata);
218 }
219
220 static String normalizeMultilineValue(String value) {
221 return value.trim().replaceAll("[ \\t]*[\\r\\n]+[ \\t]*", " ");
222 }
223
224 private boolean writeHtmlMetadata(StringBuilder html, Map<String, List<String>> data) {
225 boolean containsTitle = false;
226 for (Entry<String, List<String>> entry : data.entrySet()) {
227 if (writeHtmlMetadata(html, entry.getKey(), entry.getValue())) {
228 containsTitle = true;
229 }
230 }
231 return containsTitle;
232 }
233
234 private boolean writeHtmlMetadata(StringBuilder html, String key, List<String> values) {
235 if ("title".equalsIgnoreCase(key)) {
236 html.append("<title>");
237 html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", ")), false));
238 html.append("</title>");
239 return true;
240 } else {
241 if (key.equalsIgnoreCase("author") && values.size() > 1) {
242
243 for (String value : values) {
244 writeHtmlMetadata(html, key, Collections.singletonList(value));
245 }
246 } else {
247
248 final String separator;
249 if (key.equalsIgnoreCase("keywords")) {
250 separator = ",";
251 } else {
252 separator = EOL;
253 }
254 html.append("<meta name='");
255 html.append(HtmlTools.escapeHTML(key));
256 html.append("' content='");
257 html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(separator))));
258 html.append("' />");
259 }
260 return false;
261 }
262 }
263
264
265
266
267
268
269
270
271 String toHtml(Reader source) throws IOException {
272
273 StringBuilder markdownText = new StringBuilder(IOUtils.toString(source));
274
275
276 StringBuilder html = new StringBuilder(1000);
277 html.append("<html>");
278 html.append("<head>");
279
280 boolean haveTitle = processMetadataForHtml(html, markdownText);
281
282
283
284 Node documentRoot = FLEXMARK_PARSER.parse(markdownText.toString());
285
286
287
288 if (!haveTitle && documentRoot.hasChildren()) {
289
290 Node firstNode = documentRoot.getFirstChild();
291 while (firstNode != null && firstNode instanceof HtmlCommentBlock) {
292 firstNode = firstNode.getNext();
293 }
294
295
296 if (firstNode != null && firstNode instanceof Heading) {
297 html.append("<title>");
298 TextCollectingVisitor collectingVisitor = new TextCollectingVisitor();
299 String headingText = collectingVisitor.collectAndGetText(firstNode);
300 html.append(HtmlTools.escapeHTML(headingText, false));
301 html.append("</title>");
302 }
303 }
304 html.append("</head>");
305 html.append("<body>");
306
307
308 FLEXMARK_HTML_RENDERER.render(documentRoot, html);
309
310 html.append("</body>");
311 html.append("</html>");
312
313 return html.toString();
314 }
315
316
317
318
319
320
321
322
323
324
325
326
327
328 @Named
329 public static class MarkdownHtmlParser extends Xhtml5Parser {
330 public MarkdownHtmlParser() {
331 super();
332 }
333
334 @Override
335 protected void init() {
336 super.init();
337 }
338
339 @Override
340 protected boolean baseEndTag(XmlPullParser parser, Sink sink) {
341 boolean visited = super.baseEndTag(parser, sink);
342 if (!visited) {
343 if (parser.getName().equals(HtmlMarkup.DIV.toString())) {
344 handleUnknown(parser, sink, TAG_TYPE_END);
345 visited = true;
346 }
347 }
348 return visited;
349 }
350
351 @Override
352 protected boolean baseStartTag(XmlPullParser parser, Sink sink) {
353 boolean visited = super.baseStartTag(parser, sink);
354 if (!visited) {
355 if (parser.getName().equals(HtmlMarkup.DIV.toString())) {
356 handleUnknown(parser, sink, TAG_TYPE_START);
357 visited = true;
358 }
359 }
360 return visited;
361 }
362 }
363 }