1 package org.apache.maven.doxia.module.markdown;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.apache.commons.lang.StringEscapeUtils;
23 import org.apache.commons.lang.StringUtils;
24 import org.apache.maven.doxia.module.xhtml.XhtmlParser;
25 import org.apache.maven.doxia.parser.ParseException;
26 import org.apache.maven.doxia.parser.Parser;
27 import org.apache.maven.doxia.sink.Sink;
28 import org.codehaus.plexus.component.annotations.Component;
29 import org.codehaus.plexus.util.IOUtil;
30 import org.pegdown.Extensions;
31 import org.pegdown.PegDownProcessor;
32 import org.pegdown.ast.HeaderNode;
33 import org.pegdown.ast.HtmlBlockNode;
34 import org.pegdown.ast.Node;
35 import org.pegdown.ast.RootNode;
36 import org.pegdown.ast.SuperNode;
37 import org.pegdown.ast.TextNode;
38
39 import java.io.IOException;
40 import java.io.Reader;
41 import java.io.StringReader;
42 import java.util.regex.Matcher;
43 import java.util.regex.Pattern;
44
45
46
47
48
49
50
51
52
53 @Component( role = Parser.class, hint = "markdown" )
54 public class MarkdownParser
55 extends XhtmlParser
56 {
57
58
59
60
61 public static final String ROLE_HINT = "markdown";
62
63
64
65
66 protected static final PegDownProcessor PEGDOWN_PROCESSOR =
67 new PegDownProcessor( Extensions.ALL & ~Extensions.HARDWRAPS, Long.MAX_VALUE );
68
69
70
71
72 private static final String MULTI_MARKDOWN_METADATA_SECTION =
73 "^(((?:[^\\s:][^:]*):(?:.*(?:\r?\n\\s[^\\s].*)*\r?\n))+)(?:\\s*\r?\n)";
74
75
76
77
78 private static final String MULTI_MARKDOWN_METADATA_ENTRY = "([^\\s:][^:]*):(.*(?:\r?\n\\s[^\\s].*)*)\r?\n";
79
80
81
82
83
84
85 private static final String[] STANDARD_METADATA_KEYS =
86 { "title", "author", "date", "address", "affiliation", "copyright", "email", "keywords", "language", "phone",
87 "subtitle" };
88
89
90
91
92
93 @Override
94 public void parse( Reader source, Sink sink )
95 throws ParseException
96 {
97 try
98 {
99 String text = IOUtil.toString( source );
100 StringBuilder html = new StringBuilder( text.length() * 2 );
101 html.append( "<html>" );
102 html.append( "<head>" );
103 Pattern metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_SECTION, Pattern.MULTILINE );
104 Matcher metadataMatcher = metadataPattern.matcher( text );
105 boolean haveTitle = false;
106 if ( metadataMatcher.find() )
107 {
108 metadataPattern = Pattern.compile( MULTI_MARKDOWN_METADATA_ENTRY, Pattern.MULTILINE );
109 Matcher lineMatcher = metadataPattern.matcher( metadataMatcher.group( 1 ) );
110 boolean first = true;
111 while ( lineMatcher.find() )
112 {
113 String key = StringUtils.trimToEmpty( lineMatcher.group( 1 ) );
114 if ( first )
115 {
116 boolean found = false;
117 for ( String k : STANDARD_METADATA_KEYS )
118 {
119 if ( k.equalsIgnoreCase( key ) )
120 {
121 found = true;
122 break;
123 }
124 }
125 if ( !found )
126 {
127 break;
128 }
129 first = false;
130 }
131 String value = StringUtils.trimToEmpty( lineMatcher.group( 2 ) );
132 if ( "title".equalsIgnoreCase( key ) )
133 {
134 haveTitle = true;
135 html.append( "<title>" );
136 html.append( StringEscapeUtils.escapeXml( value ) );
137 html.append( "</title>" );
138 }
139 else if ( "author".equalsIgnoreCase( key ) )
140 {
141 html.append( "<meta name=\'author\' content=\'" );
142 html.append( StringEscapeUtils.escapeXml( value ) );
143 html.append( "\' />" );
144 }
145 else if ( "date".equalsIgnoreCase( key ) )
146 {
147 html.append( "<meta name=\'date\' content=\'" );
148 html.append( StringEscapeUtils.escapeXml( value ) );
149 html.append( "\' />" );
150 }
151 else
152 {
153 html.append( "<meta name=\'" );
154 html.append( StringEscapeUtils.escapeXml( key ) );
155 html.append( "\' content=\'" );
156 html.append( StringEscapeUtils.escapeXml( value ) );
157 html.append( "\' />" );
158 }
159 }
160 if ( !first )
161 {
162 text = text.substring( metadataMatcher.end() );
163 }
164 }
165 RootNode rootNode = PEGDOWN_PROCESSOR.parseMarkdown( text.toCharArray() );
166 if ( !haveTitle && rootNode.getChildren().size() > 0 )
167 {
168
169 int i = 0;
170 Node firstNode = null;
171 while ( i < rootNode.getChildren().size() && isHtmlComment(
172 ( firstNode = rootNode.getChildren().get( i ) ) ) )
173 {
174 i++;
175 }
176 if ( firstNode instanceof HeaderNode )
177 {
178 html.append( "<title>" );
179 html.append( StringEscapeUtils.escapeXml( nodeText( firstNode ) ) );
180 html.append( "</title>" );
181 }
182 }
183 html.append( "</head>" );
184 html.append( "<body>" );
185 html.append( new MarkdownToDoxiaHtmlSerializer().toHtml( rootNode ) );
186 html.append( "</body>" );
187 html.append( "</html>" );
188 super.parse( new StringReader( html.toString() ), sink );
189 }
190 catch ( IOException e )
191 {
192 throw new ParseException( "Failed reading Markdown source document", e );
193 }
194 }
195
196 public static boolean isHtmlComment( Node node ) {
197 if (node instanceof HtmlBlockNode) {
198 HtmlBlockNode blockNode = (HtmlBlockNode) node;
199 return blockNode.getText().startsWith( "<!--" );
200 }
201 return false;
202 }
203
204 public static String nodeText( Node node )
205 {
206 StringBuilder builder = new StringBuilder();
207 if ( node instanceof TextNode )
208 {
209 builder.append( TextNode.class.cast( node ).getText() );
210 }
211 else
212 {
213 for ( Node n : node.getChildren() )
214 {
215 if ( n instanceof TextNode )
216 {
217 builder.append( TextNode.class.cast( n ).getText() );
218 }
219 else if ( n instanceof SuperNode )
220 {
221 builder.append( nodeText( n ) );
222 }
223 }
224 }
225 return builder.toString();
226 }
227
228 }