1 package org.apache.maven.doxia.module.markdown;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.apache.commons.lang.StringEscapeUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.maven.doxia.macro.MacroExecutionException;
25 import org.apache.maven.doxia.markup.HtmlMarkup;
26 import org.apache.maven.doxia.module.xhtml.XhtmlParser;
27 import org.apache.maven.doxia.parser.ParseException;
28 import org.apache.maven.doxia.parser.Parser;
29 import org.apache.maven.doxia.sink.Sink;
30 import org.codehaus.plexus.component.annotations.Component;
31 import org.codehaus.plexus.util.IOUtil;
32 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
33 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
34 import org.pegdown.Extensions;
35 import org.pegdown.PegDownProcessor;
36 import org.pegdown.ast.HeaderNode;
37 import org.pegdown.ast.HtmlBlockNode;
38 import org.pegdown.ast.Node;
39 import org.pegdown.ast.RootNode;
40 import org.pegdown.ast.SuperNode;
41 import org.pegdown.ast.TextNode;
42
43 import java.io.IOException;
44 import java.io.Reader;
45 import java.io.StringReader;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48
49
50
51
52
53
54
55
56
57 @Component( role = Parser.class, hint = "markdown" )
58 public class MarkdownParser
59 extends XhtmlParser
60 {
61
62
63
64
65 public static final String ROLE_HINT = "markdown";
66
67
68
69
70 protected static final PegDownProcessor PEGDOWN_PROCESSOR =
71 new PegDownProcessor( Extensions.ALL & ~Extensions.HARDWRAPS, Long.MAX_VALUE );
72
73
74
75
76 private static final String MULTI_MARKDOWN_METADATA_SECTION =
77 "^(((?:[^\\s:][^:]*):(?:.*(?:\r?\n\\p{Blank}+[^\\s].*)*\r?\n))+)(?:\\s*\r?\n)";
78
79
80
81
82 private static final String MULTI_MARKDOWN_METADATA_ENTRY =
83 "([^\\s:][^:]*):(.*(?:\r?\n\\p{Blank}+[^\\s].*)*)\r?\n";
84
85
86
87
88
89
90 private static final String[] STANDARD_METADATA_KEYS =
91 { "title", "author", "date", "address", "affiliation", "copyright", "email", "keywords", "language", "phone",
92 "subtitle" };
93
94
95
96
97
98 @Override
99 public void parse( Reader source, Sink sink )
100 throws ParseException
101 {
102 try
103 {
104 String text = IOUtil.toString( source );
105 StringBuilder html = new StringBuilder( text.length() * 2 );
106 html.append( "<html>" );
107 html.append( "<head>" );
108 Pattern metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_SECTION, Pattern.MULTILINE );
109 Matcher metadataMatcher = metadataPattern.matcher( text );
110 boolean haveTitle = false;
111 if ( metadataMatcher.find() )
112 {
113 metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_ENTRY, Pattern.MULTILINE );
114 Matcher lineMatcher = metadataPattern.matcher( metadataMatcher.group( 1 ) );
115 boolean first = true;
116 while ( lineMatcher.find() )
117 {
118 String key = StringUtils.trimToEmpty( lineMatcher.group( 1 ) );
119 if ( first )
120 {
121 boolean found = false;
122 for ( String k : STANDARD_METADATA_KEYS )
123 {
124 if ( k.equalsIgnoreCase( key ) )
125 {
126 found = true;
127 break;
128 }
129 }
130 if ( !found )
131 {
132 break;
133 }
134 first = false;
135 }
136 String value = StringUtils.trimToEmpty( lineMatcher.group( 2 ) );
137 if ( "title".equalsIgnoreCase( key ) )
138 {
139 haveTitle = true;
140 html.append( "<title>" );
141 html.append( StringEscapeUtils.escapeXml( value ) );
142 html.append( "</title>" );
143 }
144 else if ( "author".equalsIgnoreCase( key ) )
145 {
146 html.append( "<meta name=\'author\' content=\'" );
147 html.append( StringEscapeUtils.escapeXml( value ) );
148 html.append( "\' />" );
149 }
150 else if ( "date".equalsIgnoreCase( key ) )
151 {
152 html.append( "<meta name=\'date\' content=\'" );
153 html.append( StringEscapeUtils.escapeXml( value ) );
154 html.append( "\' />" );
155 }
156 else
157 {
158 html.append( "<meta name=\'" );
159 html.append( StringEscapeUtils.escapeXml( key ) );
160 html.append( "\' content=\'" );
161 html.append( StringEscapeUtils.escapeXml( value ) );
162 html.append( "\' />" );
163 }
164 }
165 if ( !first )
166 {
167 text = text.substring( metadataMatcher.end() );
168 }
169 }
170 RootNode rootNode = PEGDOWN_PROCESSOR.parseMarkdown( text.toCharArray() );
171 if ( !haveTitle && rootNode.getChildren().size() > 0 )
172 {
173
174 int i = 0;
175 Node firstNode = null;
176 while ( i < rootNode.getChildren().size() && isHtmlComment(
177 ( firstNode = rootNode.getChildren().get( i ) ) ) )
178 {
179 i++;
180 }
181 if ( firstNode instanceof HeaderNode )
182 {
183 html.append( "<title>" );
184 html.append( StringEscapeUtils.escapeXml( nodeText( firstNode ) ) );
185 html.append( "</title>" );
186 }
187 }
188 html.append( "</head>" );
189 html.append( "<body>" );
190 html.append( new MarkdownToDoxiaHtmlSerializer().toHtml( rootNode ) );
191 html.append( "</body>" );
192 html.append( "</html>" );
193 super.parse( new StringReader( html.toString() ), sink );
194 }
195 catch ( IOException e )
196 {
197 throw new ParseException( "Failed reading Markdown source document", e );
198 }
199 }
200
201 public static boolean isHtmlComment( Node node ) {
202 if (node instanceof HtmlBlockNode) {
203 HtmlBlockNode blockNode = (HtmlBlockNode) node;
204 return blockNode.getText().startsWith( "<!--" );
205 }
206 return false;
207 }
208
209 public static String nodeText( Node node )
210 {
211 StringBuilder builder = new StringBuilder();
212 if ( node instanceof TextNode )
213 {
214 builder.append( TextNode.class.cast( node ).getText() );
215 }
216 else
217 {
218 for ( Node n : node.getChildren() )
219 {
220 if ( n instanceof TextNode )
221 {
222 builder.append( TextNode.class.cast( n ).getText() );
223 }
224 else if ( n instanceof SuperNode )
225 {
226 builder.append( nodeText( n ) );
227 }
228 }
229 }
230 return builder.toString();
231 }
232
233 @Override
234 protected boolean baseEndTag( XmlPullParser parser, Sink sink )
235 {
236 boolean visited = super.baseEndTag( parser, sink );
237 if ( !visited )
238 {
239 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
240 {
241 handleUnknown( parser, sink, TAG_TYPE_END );
242 visited = true;
243 }
244 }
245 return visited;
246 }
247
248 @Override
249 protected boolean baseStartTag(XmlPullParser parser, Sink sink) {
250 boolean visited = super.baseStartTag( parser, sink );
251 if ( !visited )
252 {
253 if ( parser.getName().equals( HtmlMarkup.DIV.toString() ) )
254 {
255 handleUnknown( parser, sink, TAG_TYPE_START );
256 visited = true;
257 }
258 }
259 return visited;
260 }
261 }