1 package org.apache.maven.doxia.module.markdown;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.apache.commons.lang.StringEscapeUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.maven.doxia.markup.HtmlMarkup;
25 import org.apache.maven.doxia.module.xhtml.XhtmlParser;
26 import org.apache.maven.doxia.parser.AbstractParser;
27 import org.apache.maven.doxia.parser.ParseException;
28 import org.apache.maven.doxia.parser.Parser;
29 import org.apache.maven.doxia.sink.Sink;
30 import org.codehaus.plexus.component.annotations.Component;
31 import org.codehaus.plexus.component.annotations.Requirement;
32 import org.codehaus.plexus.util.IOUtil;
33 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
34 import org.pegdown.Extensions;
35 import org.pegdown.PegDownProcessor;
36 import org.pegdown.ast.HeaderNode;
37 import org.pegdown.ast.HtmlBlockNode;
38 import org.pegdown.ast.Node;
39 import org.pegdown.ast.RootNode;
40 import org.pegdown.ast.SuperNode;
41 import org.pegdown.ast.TextNode;
42
43 import java.io.IOException;
44 import java.io.Reader;
45 import java.io.StringReader;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48
49
50
51
52
53
54
55
56
57
58
59 @Component( role = Parser.class, hint = "markdown" )
60 public class MarkdownParser
61 extends AbstractParser
62 {
63
64
65
66
67 public static final String ROLE_HINT = "markdown";
68
69
70
71
72 protected static final PegDownProcessor PEGDOWN_PROCESSOR =
73 new PegDownProcessor( Extensions.ALL & ~Extensions.HARDWRAPS, Long.MAX_VALUE );
74
75
76
77
78 private static final String MULTI_MARKDOWN_METADATA_SECTION =
79 "^(((?:[^\\s:][^:]*):(?:.*(?:\r?\n\\p{Blank}+[^\\s].*)*\r?\n))+)(?:\\s*\r?\n)";
80
81
82
83
84 private static final String MULTI_MARKDOWN_METADATA_ENTRY =
85 "([^\\s:][^:]*):(.*(?:\r?\n\\p{Blank}+[^\\s].*)*)\r?\n";
86
87
88
89
90
91
92 private static final String[] STANDARD_METADATA_KEYS =
93 { "title", "author", "date", "address", "affiliation", "copyright", "email", "keywords", "language", "phone",
94 "subtitle" };
95
96 public int getType()
97 {
98 return TXT_TYPE;
99 }
100
101 @Requirement
102 private PegDownHtmlParser parser;
103
104 public void parse( Reader source, Sink sink )
105 throws ParseException
106 {
107 try
108 {
109
110 String html = toHtml( source );
111
112 parser.parse( new StringReader( html ), sink );
113 }
114 catch ( IOException e )
115 {
116 throw new ParseException( "Failed reading Markdown source document", e );
117 }
118 }
119
120
121
122
123
124
125
126
127
128 private String toHtml( Reader source )
129 throws IOException
130 {
131 String text = IOUtil.toString( source );
132 StringBuilder html = new StringBuilder( text.length() * 2 );
133 html.append( "<html>" );
134 html.append( "<head>" );
135 Pattern metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_SECTION, Pattern.MULTILINE );
136 Matcher metadataMatcher = metadataPattern.matcher( text );
137 boolean haveTitle = false;
138 if ( metadataMatcher.find() )
139 {
140 metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_ENTRY, Pattern.MULTILINE );
141 Matcher lineMatcher = metadataPattern.matcher( metadataMatcher.group( 1 ) );
142 boolean first = true;
143 while ( lineMatcher.find() )
144 {
145 String key = StringUtils.trimToEmpty( lineMatcher.group( 1 ) );
146 if ( first )
147 {
148 boolean found = false;
149 for ( String k : STANDARD_METADATA_KEYS )
150 {
151 if ( k.equalsIgnoreCase( key ) )
152 {
153 found = true;
154 break;
155 }
156 }
157 if ( !found )
158 {
159 break;
160 }
161 first = false;
162 }
163 String value = StringUtils.trimToEmpty( lineMatcher.group( 2 ) );
164 if ( "title".equalsIgnoreCase( key ) )
165 {
166 haveTitle = true;
167 html.append( "<title>" );
168 html.append( StringEscapeUtils.escapeXml( value ) );
169 html.append( "</title>" );
170 }
171 else if ( "author".equalsIgnoreCase( key ) )
172 {
173 html.append( "<meta name=\'author\' content=\'" );
174 html.append( StringEscapeUtils.escapeXml( value ) );
175 html.append( "\' />" );
176 }
177 else if ( "date".equalsIgnoreCase( key ) )
178 {
179 html.append( "<meta name=\'date\' content=\'" );
180 html.append( StringEscapeUtils.escapeXml( value ) );
181 html.append( "\' />" );
182 }
183 else
184 {
185 html.append( "<meta name=\'" );
186 html.append( StringEscapeUtils.escapeXml( key ) );
187 html.append( "\' content=\'" );
188 html.append( StringEscapeUtils.escapeXml( value ) );
189 html.append( "\' />" );
190 }
191 }
192 if ( !first )
193 {
194 text = text.substring( metadataMatcher.end() );
195 }
196 }
197 RootNode rootNode = PEGDOWN_PROCESSOR.parseMarkdown( text.toCharArray() );
198 if ( !haveTitle && rootNode.getChildren().size() > 0 )
199 {
200
201 int i = 0;
202 Node firstNode = null;
203 while ( i < rootNode.getChildren().size() && isHtmlComment(
204 ( firstNode = rootNode.getChildren().get( i ) ) ) )
205 {
206 i++;
207 }
208 if ( firstNode instanceof HeaderNode )
209 {
210 html.append( "<title>" );
211 html.append( StringEscapeUtils.escapeXml( nodeText( firstNode ) ) );
212 html.append( "</title>" );
213 }
214 }
215 html.append( "</head>" );
216 html.append( "<body>" );
217 html.append( new MarkdownToDoxiaHtmlSerializer().toHtml( rootNode ) );
218 html.append( "</body>" );
219 html.append( "</html>" );
220
221 return html.toString();
222 }
223
224 public static boolean isHtmlComment( Node node )
225 {
226 if ( node instanceof HtmlBlockNode )
227 {
228 HtmlBlockNode blockNode = (HtmlBlockNode) node;
229 return blockNode.getText().startsWith( "<!--" );
230 }
231 return false;
232 }
233
234 public static String nodeText( Node node )
235 {
236 StringBuilder builder = new StringBuilder();
237 if ( node instanceof TextNode )
238 {
239 builder.append( TextNode.class.cast( node ).getText() );
240 }
241 else
242 {
243 for ( Node n : node.getChildren() )
244 {
245 if ( n instanceof TextNode )
246 {
247 builder.append( TextNode.class.cast( n ).getText() );
248 }
249 else if ( n instanceof SuperNode )
250 {
251 builder.append( nodeText( n ) );
252 }
253 }
254 }
255 return builder.toString();
256 }
257
258
259
260
261 @Component( role = PegDownHtmlParser.class )
262 public static class PegDownHtmlParser
263 extends XhtmlParser
264 {
265 public PegDownHtmlParser()
266 {
267 super();
268 }
269
270 @Override
271 protected boolean baseEndTag( XmlPullParser parser, Sink sink )
272 {
273 boolean visited = super.baseEndTag( parser, sink );
274 if ( !visited )
275 {
276 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
277 {
278 handleUnknown( parser, sink, TAG_TYPE_END );
279 visited = true;
280 }
281 }
282 return visited;
283 }
284
285 @Override
286 protected boolean baseStartTag( XmlPullParser parser, Sink sink )
287 {
288 boolean visited = super.baseStartTag( parser, sink );
289 if ( !visited )
290 {
291 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
292 {
293 handleUnknown( parser, sink, TAG_TYPE_START );
294 visited = true;
295 }
296 }
297 return visited;
298 }
299 }
300 }