1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.doxia.module.markdown;
20
21 import javax.inject.Inject;
22 import javax.inject.Named;
23 import javax.inject.Singleton;
24
25 import java.io.IOException;
26 import java.io.Reader;
27 import java.util.Arrays;
28 import java.util.Collections;
29 import java.util.LinkedHashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 import java.util.stream.Collectors;
36
37 import com.vladsch.flexmark.ast.Heading;
38 import com.vladsch.flexmark.ast.HtmlCommentBlock;
39 import com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension;
40 import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
41 import com.vladsch.flexmark.ext.definition.DefinitionExtension;
42 import com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension;
43 import com.vladsch.flexmark.ext.footnotes.FootnoteExtension;
44 import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
45 import com.vladsch.flexmark.ext.tables.TablesExtension;
46 import com.vladsch.flexmark.ext.typographic.TypographicExtension;
47 import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
48 import com.vladsch.flexmark.ext.yaml.front.matter.YamlFrontMatterExtension;
49 import com.vladsch.flexmark.html.HtmlRenderer;
50 import com.vladsch.flexmark.util.ast.Node;
51 import com.vladsch.flexmark.util.ast.TextCollectingVisitor;
52 import com.vladsch.flexmark.util.data.MutableDataSet;
53 import org.apache.commons.io.IOUtils;
54 import org.apache.maven.doxia.markup.HtmlMarkup;
55 import org.apache.maven.doxia.markup.TextMarkup;
56 import org.apache.maven.doxia.module.xhtml5.Xhtml5Parser;
57 import org.apache.maven.doxia.parser.AbstractTextParser;
58 import org.apache.maven.doxia.parser.ParseException;
59 import org.apache.maven.doxia.sink.Sink;
60 import org.apache.maven.doxia.util.HtmlTools;
61 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 @Singleton
78 @Named("markdown")
79 public class MarkdownParser extends AbstractTextParser implements TextMarkup {
80
81
82
83
84
85
86
87
88
89 private static final Pattern METADATA_SECTION_PATTERN = Pattern.compile(
90 "\\A^"
91 + "(?:title|author|date|address|affiliation|copyright|email|keywords|language|phone|subtitle)"
92 + "[ \\t]*:[\\S\\s]+?^[ \\t]*$",
93 Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
94
95
96
97
98
99
100
101
102 private static final Pattern METADATA_ENTRY_PATTERN = Pattern.compile(
103 "^([^:\\r\\n]+?)[ \\t]*:([\\S\\s]+?)(?=(?:^(?:[^:\\r\\n]+?)[ \\t]*:)|^[ \\t]*$)", Pattern.MULTILINE);
104
105
106
107
108
109 @Inject
110 private MarkdownHtmlParser parser;
111
112
113
114
115 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_PARSER;
116
117
118
119
120 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_METADATA_PARSER;
121
122
123
124
125 private static final HtmlRenderer FLEXMARK_HTML_RENDERER;
126
127
128 static {
129 MutableDataSet flexmarkOptions = new MutableDataSet();
130
131
132 flexmarkOptions.set(
133 com.vladsch.flexmark.parser.Parser.EXTENSIONS,
134 Arrays.asList(
135 EscapedCharacterExtension.create(),
136 AbbreviationExtension.create(),
137 AutolinkExtension.create(),
138 DefinitionExtension.create(),
139 TypographicExtension.create(),
140 TablesExtension.create(),
141 WikiLinkExtension.create(),
142 FootnoteExtension.create(),
143 StrikethroughExtension.create()));
144
145
146 flexmarkOptions.set(TypographicExtension.SINGLE_QUOTE_UNMATCHED, "'");
147
148
149 flexmarkOptions.set(HtmlRenderer.HTML_BLOCK_OPEN_TAG_EOL, false);
150 flexmarkOptions.set(HtmlRenderer.HTML_BLOCK_CLOSE_TAG_EOL, false);
151 flexmarkOptions.set(HtmlRenderer.MAX_TRAILING_BLANK_LINES, -1);
152 flexmarkOptions.set(HtmlRenderer.FENCED_CODE_NO_LANGUAGE_CLASS, "nohighlight nocode");
153
154
155 FLEXMARK_PARSER =
156 com.vladsch.flexmark.parser.Parser.builder(flexmarkOptions).build();
157
158 MutableDataSet flexmarkMetadataOptions = new MutableDataSet();
159 flexmarkMetadataOptions.set(
160 com.vladsch.flexmark.parser.Parser.EXTENSIONS, Arrays.asList(YamlFrontMatterExtension.create()));
161 FLEXMARK_METADATA_PARSER = com.vladsch.flexmark.parser.Parser.builder(flexmarkMetadataOptions)
162 .build();
163
164
165 FLEXMARK_HTML_RENDERER = HtmlRenderer.builder(flexmarkOptions)
166 .linkResolverFactory(new FlexmarkDoxiaLinkResolver.Factory())
167 .build();
168 }
169
170
171 @Override
172 public void parse(Reader source, Sink sink, String reference) throws ParseException {
173 try {
174
175 String html = toHtml(source);
176
177
178
179
180
181 parser.parse(html, getWrappedSink(sink), "Intermediate HTML from " + reference);
182 } catch (IOException e) {
183 throw new ParseException("Failed reading Markdown source document", e);
184 }
185 }
186
187 private boolean processMetadataForHtml(StringBuilder html, StringBuilder source) {
188 final Map<String, List<String>> metadata;
189 final int endOffset;
190
191 if (source.toString().startsWith("---")) {
192
193 Node documentRoot = FLEXMARK_METADATA_PARSER.parse(source.toString());
194 YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor();
195 visitor.visit(documentRoot);
196 metadata = visitor.getData();
197 endOffset = visitor.getEndOffset();
198 } else {
199
200
201 metadata = new LinkedHashMap<>();
202 Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(source);
203 if (metadataMatcher.find()) {
204 String entry = metadataMatcher.group(0) + EOL;
205 Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(entry);
206 while (entryMatcher.find()) {
207 String key = entryMatcher.group(1);
208 String value = normalizeMultilineValue(entryMatcher.group(2));
209 metadata.put(key, Collections.singletonList(value));
210 }
211 endOffset = metadataMatcher.end(0);
212 } else {
213 endOffset = 0;
214 }
215 }
216 if (endOffset > 0) {
217
218 source.delete(0, endOffset);
219 }
220 return writeHtmlMetadata(html, metadata);
221 }
222
223 static String normalizeMultilineValue(String value) {
224 return value.trim().replaceAll("[ \\t]*[\\r\\n]+[ \\t]*", " ");
225 }
226
227 private boolean writeHtmlMetadata(StringBuilder html, Map<String, List<String>> data) {
228 boolean containsTitle = false;
229 for (Entry<String, List<String>> entry : data.entrySet()) {
230 if (writeHtmlMetadata(html, entry.getKey(), entry.getValue())) {
231 containsTitle = true;
232 }
233 }
234 return containsTitle;
235 }
236
237 private boolean writeHtmlMetadata(StringBuilder html, String key, List<String> values) {
238 if ("title".equalsIgnoreCase(key)) {
239 html.append("<title>");
240 html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", ")), false));
241 html.append("</title>");
242 return true;
243 } else {
244 if (key.equalsIgnoreCase("author") && values.size() > 1) {
245
246 for (String value : values) {
247 writeHtmlMetadata(html, key, Collections.singletonList(value));
248 }
249 } else {
250
251 final String separator;
252 if (key.equalsIgnoreCase("keywords")) {
253 separator = ",";
254 } else {
255 separator = EOL;
256 }
257 html.append("<meta name='");
258 html.append(HtmlTools.escapeHTML(key));
259 html.append("' content='");
260 html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(separator))));
261 html.append("' />");
262 }
263 return false;
264 }
265 }
266
267
268
269
270
271
272
273
274 String toHtml(Reader source) throws IOException {
275
276 StringBuilder markdownText = new StringBuilder(IOUtils.toString(source));
277
278
279 StringBuilder html = new StringBuilder(1000);
280 html.append("<html>");
281 html.append("<head>");
282
283 boolean haveTitle = processMetadataForHtml(html, markdownText);
284
285
286
287 Node documentRoot = FLEXMARK_PARSER.parse(markdownText.toString());
288
289
290
291 if (!haveTitle && documentRoot.hasChildren()) {
292
293 Node firstNode = documentRoot.getFirstChild();
294 while (firstNode != null && firstNode instanceof HtmlCommentBlock) {
295 firstNode = firstNode.getNext();
296 }
297
298
299 if (firstNode != null && firstNode instanceof Heading) {
300 html.append("<title>");
301 TextCollectingVisitor collectingVisitor = new TextCollectingVisitor();
302 String headingText = collectingVisitor.collectAndGetText(firstNode);
303 html.append(HtmlTools.escapeHTML(headingText, false));
304 html.append("</title>");
305 }
306 }
307 html.append("</head>");
308 html.append("<body>");
309
310
311 FLEXMARK_HTML_RENDERER.render(documentRoot, html);
312
313 html.append("</body>");
314 html.append("</html>");
315
316 return html.toString();
317 }
318
319
320
321
322
323
324
325
326
327
328
329
330
331 @Named
332 public static class MarkdownHtmlParser extends Xhtml5Parser {
333 public MarkdownHtmlParser() {
334 super();
335 }
336
337 @Override
338 protected void init() {
339 super.init();
340 }
341
342 @Override
343 protected boolean baseEndTag(XmlPullParser parser, Sink sink) {
344 boolean visited = super.baseEndTag(parser, sink);
345 if (!visited) {
346 if (parser.getName().equals(HtmlMarkup.DIV.toString())) {
347 handleUnknown(parser, sink, TAG_TYPE_END);
348 visited = true;
349 }
350 }
351 return visited;
352 }
353
354 @Override
355 protected boolean baseStartTag(XmlPullParser parser, Sink sink) {
356 boolean visited = super.baseStartTag(parser, sink);
357 if (!visited) {
358 if (parser.getName().equals(HtmlMarkup.DIV.toString())) {
359 handleUnknown(parser, sink, TAG_TYPE_START);
360 visited = true;
361 }
362 }
363 return visited;
364 }
365 }
366 }