1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.doxia.module.markdown;
20
21 import javax.inject.Inject;
22 import javax.inject.Named;
23 import javax.inject.Singleton;
24
25 import java.io.IOException;
26 import java.io.Reader;
27 import java.util.Arrays;
28 import java.util.Collections;
29 import java.util.LinkedHashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.Map.Entry;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 import java.util.stream.Collectors;
36
37 import com.vladsch.flexmark.ast.Heading;
38 import com.vladsch.flexmark.ast.HtmlCommentBlock;
39 import com.vladsch.flexmark.ast.util.TextCollectingVisitor;
40 import com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension;
41 import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
42 import com.vladsch.flexmark.ext.definition.DefinitionExtension;
43 import com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension;
44 import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
45 import com.vladsch.flexmark.ext.tables.TablesExtension;
46 import com.vladsch.flexmark.ext.typographic.TypographicExtension;
47 import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
48 import com.vladsch.flexmark.ext.yaml.front.matter.YamlFrontMatterExtension;
49 import com.vladsch.flexmark.html.HtmlRenderer;
50 import com.vladsch.flexmark.util.ast.Node;
51 import com.vladsch.flexmark.util.data.MutableDataSet;
52 import org.apache.maven.doxia.markup.HtmlMarkup;
53 import org.apache.maven.doxia.markup.TextMarkup;
54 import org.apache.maven.doxia.module.xhtml5.Xhtml5Parser;
55 import org.apache.maven.doxia.parser.AbstractTextParser;
56 import org.apache.maven.doxia.parser.ParseException;
57 import org.apache.maven.doxia.sink.Sink;
58 import org.apache.maven.doxia.util.HtmlTools;
59 import org.codehaus.plexus.util.IOUtil;
60 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76 @Singleton
77 @Named("markdown")
78 public class MarkdownParser extends AbstractTextParser implements TextMarkup {
79
80
81
82
83
84
85
86
87
88 private static final Pattern METADATA_SECTION_PATTERN = Pattern.compile(
89 "\\A^"
90 + "(?:title|author|date|address|affiliation|copyright|email|keywords|language|phone|subtitle)"
91 + "[ \\t]*:[\\S\\s]+?^[ \\t]*$",
92 Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
93
94
95
96
97
98
99
100
101 private static final Pattern METADATA_ENTRY_PATTERN = Pattern.compile(
102 "^([^:\\r\\n]+?)[ \\t]*:([\\S\\s]+?)(?=(?:^(?:[^:\\r\\n]+?)[ \\t]*:)|^[ \\t]*$)", Pattern.MULTILINE);
103
104
105
106
107
108 @Inject
109 private MarkdownHtmlParser parser;
110
111
112
113
114 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_PARSER;
115
116
117
118
119 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_METADATA_PARSER;
120
121
122
123
124 private static final HtmlRenderer FLEXMARK_HTML_RENDERER;
125
126
127 static {
128 MutableDataSet flexmarkOptions = new MutableDataSet();
129
130
131 flexmarkOptions.set(
132 com.vladsch.flexmark.parser.Parser.EXTENSIONS,
133 Arrays.asList(
134 EscapedCharacterExtension.create(),
135 AbbreviationExtension.create(),
136 AutolinkExtension.create(),
137 DefinitionExtension.create(),
138 TypographicExtension.create(),
139 TablesExtension.create(),
140 WikiLinkExtension.create(),
141 StrikethroughExtension.create()));
142
143
144 flexmarkOptions.set(TypographicExtension.SINGLE_QUOTE_UNMATCHED, "'");
145
146
147 flexmarkOptions.set(HtmlRenderer.HTML_BLOCK_OPEN_TAG_EOL, false);
148 flexmarkOptions.set(HtmlRenderer.HTML_BLOCK_CLOSE_TAG_EOL, false);
149 flexmarkOptions.set(HtmlRenderer.MAX_TRAILING_BLANK_LINES, -1);
150
151
152 FLEXMARK_PARSER =
153 com.vladsch.flexmark.parser.Parser.builder(flexmarkOptions).build();
154
155 MutableDataSet flexmarkMetadataOptions = new MutableDataSet();
156 flexmarkMetadataOptions.set(
157 com.vladsch.flexmark.parser.Parser.EXTENSIONS, Arrays.asList(YamlFrontMatterExtension.create()));
158 FLEXMARK_METADATA_PARSER = com.vladsch.flexmark.parser.Parser.builder(flexmarkMetadataOptions)
159 .build();
160
161
162 FLEXMARK_HTML_RENDERER = HtmlRenderer.builder(flexmarkOptions)
163 .linkResolverFactory(new FlexmarkDoxiaLinkResolver.Factory())
164 .build();
165 }
166
167
168 @Override
169 public void parse(Reader source, Sink sink, String reference) throws ParseException {
170 try {
171
172 String html = toHtml(source);
173
174
175 parser.parse(html, sink);
176 } catch (IOException e) {
177 throw new ParseException("Failed reading Markdown source document", e);
178 }
179 }
180
181 private boolean processMetadataForHtml(StringBuilder html, StringBuilder source) {
182 final Map<String, List<String>> metadata;
183 final int endOffset;
184
185 if (source.toString().startsWith("---")) {
186
187 Node documentRoot = FLEXMARK_METADATA_PARSER.parse(source.toString());
188 YamlFrontMatterVisitor visitor = new YamlFrontMatterVisitor();
189 visitor.visit(documentRoot);
190 metadata = visitor.getData();
191 endOffset = visitor.getEndOffset();
192 } else {
193
194
195 metadata = new LinkedHashMap<>();
196 Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher(source);
197 if (metadataMatcher.find()) {
198 String entry = metadataMatcher.group(0) + EOL;
199 Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher(entry);
200 while (entryMatcher.find()) {
201 String key = entryMatcher.group(1);
202 String value = normalizeMultilineValue(entryMatcher.group(2));
203 metadata.put(key, Collections.singletonList(value));
204 }
205 endOffset = metadataMatcher.end(0);
206 } else {
207 endOffset = 0;
208 }
209 }
210 if (endOffset > 0) {
211
212 source.delete(0, endOffset);
213 }
214 return writeHtmlMetadata(html, metadata);
215 }
216
217 static String normalizeMultilineValue(String value) {
218 return value.trim().replaceAll("[ \\t]*[\\r\\n]+[ \\t]*", " ");
219 }
220
221 private boolean writeHtmlMetadata(StringBuilder html, Map<String, List<String>> data) {
222 boolean containsTitle = false;
223 for (Entry<String, List<String>> entry : data.entrySet()) {
224 if (writeHtmlMetadata(html, entry.getKey(), entry.getValue())) {
225 containsTitle = true;
226 }
227 }
228 return containsTitle;
229 }
230
231 private boolean writeHtmlMetadata(StringBuilder html, String key, List<String> values) {
232 if ("title".equalsIgnoreCase(key)) {
233 html.append("<title>");
234 html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(", ")), false));
235 html.append("</title>");
236 return true;
237 } else {
238 if (key.equalsIgnoreCase("author") && values.size() > 1) {
239
240 for (String value : values) {
241 writeHtmlMetadata(html, key, Collections.singletonList(value));
242 }
243 } else {
244
245 final String separator;
246 if (key.equalsIgnoreCase("keywords")) {
247 separator = ",";
248 } else {
249 separator = EOL;
250 }
251 html.append("<meta name='");
252 html.append(HtmlTools.escapeHTML(key));
253 html.append("' content='");
254 html.append(HtmlTools.escapeHTML(values.stream().collect(Collectors.joining(separator))));
255 html.append("' />");
256 }
257 return false;
258 }
259 }
260
261
262
263
264
265
266
267
268 String toHtml(Reader source) throws IOException {
269
270 StringBuilder markdownText = new StringBuilder(IOUtil.toString(source));
271
272
273 StringBuilder html = new StringBuilder(1000);
274 html.append("<html>");
275 html.append("<head>");
276
277 boolean haveTitle = processMetadataForHtml(html, markdownText);
278
279
280
281 Node documentRoot = FLEXMARK_PARSER.parse(markdownText.toString());
282
283
284
285 if (!haveTitle && documentRoot.hasChildren()) {
286
287 Node firstNode = documentRoot.getFirstChild();
288 while (firstNode != null && firstNode instanceof HtmlCommentBlock) {
289 firstNode = firstNode.getNext();
290 }
291
292
293 if (firstNode != null && firstNode instanceof Heading) {
294 html.append("<title>");
295 TextCollectingVisitor collectingVisitor = new TextCollectingVisitor();
296 String headingText = collectingVisitor.collectAndGetText(firstNode);
297 html.append(HtmlTools.escapeHTML(headingText, false));
298 html.append("</title>");
299 }
300 }
301 html.append("</head>");
302 html.append("<body>");
303
304
305 FLEXMARK_HTML_RENDERER.render(documentRoot, html);
306
307 html.append("</body>");
308 html.append("</html>");
309
310 return html.toString();
311 }
312
313
314
315
316
317
318
319
320
321
322
323
324
325 @Named
326 public static class MarkdownHtmlParser extends Xhtml5Parser {
327 public MarkdownHtmlParser() {
328 super();
329 }
330
331 @Override
332 protected void init() {
333 super.init();
334 }
335
336 @Override
337 protected boolean baseEndTag(XmlPullParser parser, Sink sink) {
338 boolean visited = super.baseEndTag(parser, sink);
339 if (!visited) {
340 if (parser.getName().equals(HtmlMarkup.DIV.toString())) {
341 handleUnknown(parser, sink, TAG_TYPE_END);
342 visited = true;
343 }
344 }
345 return visited;
346 }
347
348 @Override
349 protected boolean baseStartTag(XmlPullParser parser, Sink sink) {
350 boolean visited = super.baseStartTag(parser, sink);
351 if (!visited) {
352 if (parser.getName().equals(HtmlMarkup.DIV.toString())) {
353 handleUnknown(parser, sink, TAG_TYPE_START);
354 visited = true;
355 }
356 }
357 return visited;
358 }
359 }
360 }