1 package org.apache.maven.doxia.module.markdown;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import com.vladsch.flexmark.ast.Heading;
23 import com.vladsch.flexmark.ast.HtmlCommentBlock;
24 import com.vladsch.flexmark.util.ast.Node;
25 import com.vladsch.flexmark.ast.util.TextCollectingVisitor;
26 import com.vladsch.flexmark.html.HtmlRenderer;
27 import com.vladsch.flexmark.util.options.MutableDataSet;
28 import com.vladsch.flexmark.ext.escaped.character.EscapedCharacterExtension;
29 import com.vladsch.flexmark.ext.abbreviation.AbbreviationExtension;
30 import com.vladsch.flexmark.ext.autolink.AutolinkExtension;
31 import com.vladsch.flexmark.ext.definition.DefinitionExtension;
32 import com.vladsch.flexmark.ext.typographic.TypographicExtension;
33 import com.vladsch.flexmark.ext.tables.TablesExtension;
34 import com.vladsch.flexmark.ext.wikilink.WikiLinkExtension;
35 import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension;
36
37 import org.apache.maven.doxia.markup.HtmlMarkup;
38 import org.apache.maven.doxia.module.xhtml.XhtmlParser;
39 import org.apache.maven.doxia.parser.AbstractParser;
40 import org.apache.maven.doxia.parser.ParseException;
41 import org.apache.maven.doxia.parser.Parser;
42 import org.apache.maven.doxia.sink.Sink;
43 import org.apache.maven.doxia.util.HtmlTools;
44 import org.codehaus.plexus.component.annotations.Component;
45 import org.codehaus.plexus.component.annotations.Requirement;
46 import org.codehaus.plexus.util.IOUtil;
47 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
48
49 import java.io.IOException;
50 import java.io.Reader;
51 import java.util.Arrays;
52 import java.util.regex.Matcher;
53 import java.util.regex.Pattern;
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69 @Component( role = Parser.class, hint = MarkdownParser.ROLE_HINT )
70 public class MarkdownParser
71 extends AbstractParser
72 {
73
74
75
76
77 public static final String ROLE_HINT = "markdown";
78
79
80
81
82
83
84
85
86 private static final Pattern METADATA_SECTION_PATTERN = Pattern.compile(
87 "\\A^\\s*"
88 + "(?:title|author|date|address|affiliation|copyright|email|keywords|language|phone|subtitle)"
89 + "[ \\t]*:[ \\t]*[^\\r\\n]*[ \\t]*$[\\r\\n]+"
90 + "(?:^[ \\t]*[^:\\r\\n]+[ \\t]*:[ \\t]*[^\\r\\n]*[ \\t]*$[\\r\\n]+)*",
91 Pattern.MULTILINE | Pattern.CASE_INSENSITIVE );
92
93
94
95
96 private static final Pattern METADATA_ENTRY_PATTERN = Pattern.compile(
97 "^[ \\t]*([^:\\r\\n]+?)[ \\t]*:[ \\t]*([^\\r\\n]*)[ \\t]*$",
98 Pattern.MULTILINE );
99
100
101
102
103
104
105 @Override
106 public int getType()
107 {
108 return TXT_TYPE;
109 }
110
111
112
113
114
115 @Requirement
116 private MarkdownHtmlParser parser;
117
118
119
120
121 private static final com.vladsch.flexmark.parser.Parser FLEXMARK_PARSER;
122
123
124
125
126 private static final HtmlRenderer FLEXMARK_HTML_RENDERER;
127
128
129 static
130 {
131 MutableDataSet flexmarkOptions = new MutableDataSet();
132
133
134 flexmarkOptions.set( com.vladsch.flexmark.parser.Parser.EXTENSIONS, Arrays.asList(
135 EscapedCharacterExtension.create(),
136 AbbreviationExtension.create(),
137 AutolinkExtension.create(),
138 DefinitionExtension.create(),
139 TypographicExtension.create(),
140 TablesExtension.create(),
141 WikiLinkExtension.create(),
142 StrikethroughExtension.create()
143 ) );
144
145
146 flexmarkOptions.set( TypographicExtension.SINGLE_QUOTE_UNMATCHED, "'" );
147
148
149 flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_OPEN_TAG_EOL, false );
150 flexmarkOptions.set( HtmlRenderer.HTML_BLOCK_CLOSE_TAG_EOL, false );
151 flexmarkOptions.set( HtmlRenderer.MAX_TRAILING_BLANK_LINES, -1 );
152
153
154 FLEXMARK_PARSER = com.vladsch.flexmark.parser.Parser.builder( flexmarkOptions ).build();
155
156
157 FLEXMARK_HTML_RENDERER = HtmlRenderer.builder( flexmarkOptions )
158 .linkResolverFactory( new FlexmarkDoxiaLinkResolver.Factory() )
159 .build();
160
161 }
162
163
164 @Override
165 public void parse( Reader source, Sink sink, String reference )
166 throws ParseException
167 {
168 try
169 {
170
171 String html = toHtml( source );
172
173
174 parser.parse( html, sink );
175 }
176 catch ( IOException e )
177 {
178 throw new ParseException( "Failed reading Markdown source document", e );
179 }
180 }
181
182
183
184
185
186
187
188
189 String toHtml( Reader source )
190 throws IOException
191 {
192
193 String text = IOUtil.toString( source );
194
195
196 StringBuilder html = new StringBuilder( 1000 );
197 html.append( "<html>" );
198 html.append( "<head>" );
199
200
201 Matcher metadataMatcher = METADATA_SECTION_PATTERN.matcher( text );
202 boolean haveTitle = false;
203 if ( metadataMatcher.find() )
204 {
205 Matcher entryMatcher = METADATA_ENTRY_PATTERN.matcher( metadataMatcher.group( 0 ) );
206 while ( entryMatcher.find() )
207 {
208 String key = entryMatcher.group( 1 );
209 String value = entryMatcher.group( 2 );
210 if ( "title".equalsIgnoreCase( key ) )
211 {
212 haveTitle = true;
213 html.append( "<title>" );
214 html.append( HtmlTools.escapeHTML( value, false ) );
215 html.append( "</title>" );
216 }
217 else
218 {
219 html.append( "<meta name='" );
220 html.append( HtmlTools.escapeHTML( key ) );
221 html.append( "' content='" );
222 html.append( HtmlTools.escapeHTML( value ) );
223 html.append( "' />" );
224 }
225 }
226
227
228 text = text.substring( metadataMatcher.end( 0 ) );
229
230 }
231
232
233
234 Node documentRoot = FLEXMARK_PARSER.parse( text );
235
236
237
238 if ( !haveTitle && documentRoot.hasChildren() )
239 {
240
241 Node firstNode = documentRoot.getFirstChild();
242 while ( firstNode != null && firstNode instanceof HtmlCommentBlock )
243 {
244 firstNode = firstNode.getNext();
245 }
246
247
248 if ( firstNode != null && firstNode instanceof Heading )
249 {
250 html.append( "<title>" );
251 TextCollectingVisitor collectingVisitor = new TextCollectingVisitor();
252 String headingText = collectingVisitor.collectAndGetText( firstNode );
253 html.append( HtmlTools.escapeHTML( headingText, false ) );
254 html.append( "</title>" );
255 }
256 }
257 html.append( "</head>" );
258 html.append( "<body>" );
259
260
261 FLEXMARK_HTML_RENDERER.render( documentRoot, html );
262
263 html.append( "</body>" );
264 html.append( "</html>" );
265
266 return html.toString();
267 }
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282 @Component( role = MarkdownHtmlParser.class )
283 public static class MarkdownHtmlParser
284 extends XhtmlParser
285 {
286 public MarkdownHtmlParser()
287 {
288 super();
289 }
290
291 @Override
292 protected void init()
293 {
294 super.init();
295 super.boxed = true;
296 }
297
298 @Override
299 protected boolean baseEndTag( XmlPullParser parser, Sink sink )
300 {
301 boolean visited = super.baseEndTag( parser, sink );
302 if ( !visited )
303 {
304 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
305 {
306 handleUnknown( parser, sink, TAG_TYPE_END );
307 visited = true;
308 }
309 }
310 return visited;
311 }
312
313 @Override
314 protected boolean baseStartTag( XmlPullParser parser, Sink sink )
315 {
316 boolean visited = super.baseStartTag( parser, sink );
317 if ( !visited )
318 {
319 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
320 {
321 handleUnknown( parser, sink, TAG_TYPE_START );
322 super.boxed = true;
323 visited = true;
324 }
325 }
326 return visited;
327 }
328 }
329 }