View Javadoc
1   package org.apache.maven.doxia.module.xhtml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.io.StringReader;
25  import java.io.StringWriter;
26  import java.util.HashMap;
27  import java.util.Map;
28  
29  import javax.inject.Named;
30  import javax.swing.text.html.HTML.Attribute;
31  
32  import org.apache.maven.doxia.macro.MacroExecutionException;
33  import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
34  import org.apache.maven.doxia.macro.MacroRequest;
35  import org.apache.maven.doxia.parser.ParseException;
36  import org.apache.maven.doxia.parser.XhtmlBaseParser;
37  import org.apache.maven.doxia.sink.Sink;
38  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
39  import org.codehaus.plexus.util.IOUtil;
40  import org.codehaus.plexus.util.StringUtils;
41  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
42  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
43  import org.slf4j.Logger;
44  import org.slf4j.LoggerFactory;
45  
46  /**
47   * Parse an xhtml model and emit events into a Doxia Sink.
48   *
49   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
50   * @since 1.0
51   */
52  @Named( "xhtml" )
53  public class XhtmlParser
54      extends XhtmlBaseParser
55      implements XhtmlMarkup
56  {
57      private static final Logger LOGGER = LoggerFactory.getLogger( XhtmlParser.class );
58  
59      /** For boxed verbatim. */
60      protected boolean boxed;
61  
62      /** Empty elements don't write a closing tag. */
63      private boolean isEmptyElement;
64  
65      /**
66       * The source content of the input reader. Used to pass into macros.
67       */
68      private String sourceContent;
69  
70      /** {@inheritDoc} */
71      protected void handleStartTag( XmlPullParser parser, Sink sink )
72          throws XmlPullParserException, MacroExecutionException
73      {
74          isEmptyElement = parser.isEmptyElementTag();
75  
76          SinkEventAttributeSet attribs = getAttributesFromParser( parser );
77  
78          if ( parser.getName().equals( HTML.toString() ) )
79          {
80              //Do nothing
81              return;
82          }
83          else if ( parser.getName().equals( HEAD.toString() ) )
84          {
85              sink.head( attribs );
86          }
87          else if ( parser.getName().equals( TITLE.toString() ) )
88          {
89              sink.title( attribs );
90          }
91          else if ( parser.getName().equals( META.toString() ) )
92          {
93              String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
94              String content = parser.getAttributeValue( null, Attribute.CONTENT.toString() );
95  
96              if ( "author".equals( name ) )
97              {
98                  sink.author( null );
99  
100                 sink.text( content );
101 
102                 sink.author_();
103             }
104             else if ( "date".equals( name ) )
105             {
106                 sink.date( null );
107 
108                 sink.text( content );
109 
110                 sink.date_();
111             }
112             else
113             {
114                 sink.unknown( "meta", new Object[] { TAG_TYPE_SIMPLE }, attribs );
115             }
116         }
117         /*
118          * The ADDRESS element may be used by authors to supply contact information
119          * for a model or a major part of a model such as a form. This element
120          *  often appears at the beginning or end of a model.
121          */
122         else if ( parser.getName().equals( ADDRESS.toString() ) )
123         {
124             sink.address( attribs );
125         }
126         else if ( parser.getName().equals( BODY.toString() ) )
127         {
128             sink.body( attribs );
129         }
130         else if ( parser.getName().equals( DIV.toString() ) )
131         {
132             String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
133 
134             if ( "source".equals( divclass ) )
135             {
136                 this.boxed = true;
137             }
138 
139             baseStartTag( parser, sink ); // pick up other divs
140         }
141         /*
142          * The PRE element tells visual user agents that the enclosed text is
143          * "preformatted". When handling preformatted text, visual user agents:
144          * - May leave white space intact.
145          * - May render text with a fixed-pitch font.
146          * - May disable automatic word wrap.
147          * - Must not disable bidirectional processing.
148          * Non-visual user agents are not required to respect extra white space
149          * in the content of a PRE element.
150          */
151         else if ( parser.getName().equals( PRE.toString() ) )
152         {
153             if ( boxed )
154             {
155                 attribs.addAttributes( SinkEventAttributeSet.BOXED );
156             }
157 
158             verbatim();
159 
160             sink.verbatim( attribs );
161         }
162         else if ( !baseStartTag( parser, sink ) )
163         {
164             if ( isEmptyElement )
165             {
166                 handleUnknown( parser, sink, TAG_TYPE_SIMPLE );
167             }
168             else
169             {
170                 handleUnknown( parser, sink, TAG_TYPE_START );
171             }
172 
173             LOGGER.warn( "Unrecognized xhtml tag <{}> at [{}:{}]", parser.getName(),
174                     parser.getLineNumber(), parser.getColumnNumber() );
175         }
176     }
177 
178     /** {@inheritDoc} */
179     protected void handleEndTag( XmlPullParser parser, Sink sink )
180         throws XmlPullParserException, MacroExecutionException
181     {
182         if ( parser.getName().equals( HTML.toString() ) )
183         {
184             //Do nothing
185             return;
186         }
187         else if ( parser.getName().equals( HEAD.toString() ) )
188         {
189             sink.head_();
190         }
191         else if ( parser.getName().equals( TITLE.toString() ) )
192         {
193             sink.title_();
194         }
195         else if ( parser.getName().equals( BODY.toString() ) )
196         {
197             consecutiveSections( 0, sink );
198 
199             sink.body_();
200         }
201         else if ( parser.getName().equals( ADDRESS.toString() ) )
202         {
203             sink.address_();
204         }
205         else if ( parser.getName().equals( DIV.toString() ) )
206         {
207             this.boxed = false;
208             baseEndTag( parser, sink );
209         }
210         else if ( !baseEndTag( parser, sink ) )
211         {
212             if ( !isEmptyElement )
213             {
214                 handleUnknown( parser, sink, TAG_TYPE_END );
215             }
216         }
217 
218         isEmptyElement = false;
219     }
220 
221     /** {@inheritDoc} */
222     @Override
223     protected void handleComment( XmlPullParser parser, Sink sink )
224         throws XmlPullParserException
225     {
226         String text = getText( parser ).trim();
227 
228         if ( text.startsWith( "MACRO" ) && !isSecondParsing() )
229         {
230             processMacro( parser, text, sink );
231         }
232         else
233         {
234             super.handleComment( parser, sink );
235         }
236     }
237 
238     /** process macro embedded in XHTML commment */
239     private void processMacro( XmlPullParser parser, String text, Sink sink )
240         throws XmlPullParserException
241     {
242         String s = text.substring( text.indexOf( '{' ) + 1, text.indexOf( '}' ) );
243         s = escapeForMacro( s );
244         String[] params = StringUtils.split( s, "|" );
245         String macroName = params[0];
246 
247         Map<String, Object> parameters = new HashMap<>();
248         for ( int i = 1; i < params.length; i++ )
249         {
250             String[] param = StringUtils.split( params[i], "=" );
251             if ( param.length == 1 )
252             {
253                 throw new XmlPullParserException( "Invalid 'key=value' pair for macro " + macroName + " parameter: "
254                     + params[i], parser, null );
255             }
256 
257             String key = unescapeForMacro( param[0] );
258             String value = unescapeForMacro( param[1] );
259             parameters.put( key, value );
260         }
261 
262         MacroRequest request = new MacroRequest( sourceContent, new XhtmlParser(), parameters, getBasedir() );
263 
264         try
265         {
266             executeMacro( macroName, request, sink );
267         }
268         catch ( MacroExecutionException e )
269         {
270             throw new XmlPullParserException( "Unable to execute macro in the document: " + macroName, parser, e );
271         }
272         catch ( MacroNotFoundException me )
273         {
274             throw new XmlPullParserException( "Macro not found: " + macroName, parser, null );
275         }
276     }
277 
278     /**
279      * escapeForMacro
280      *
281      * @param s String
282      * @return String
283      */
284     private String escapeForMacro( String s )
285     {
286         if ( s == null || s.length() < 1 )
287         {
288             return s;
289         }
290 
291         String result = s;
292 
293         // use some outrageously out-of-place chars for text
294         // (these are device control one/two in unicode)
295         result = StringUtils.replace( result, "\\=", "\u0011" );
296         result = StringUtils.replace( result, "\\|", "\u0012" );
297 
298         return result;
299     }
300 
301     /**
302      * unescapeForMacro
303      *
304      * @param s String
305      * @return String
306      */
307     private String unescapeForMacro( String s )
308     {
309         if ( s == null || s.length() < 1 )
310         {
311             return s;
312         }
313 
314         String result = s;
315 
316         result = StringUtils.replace( result, "\u0011", "=" );
317         result = StringUtils.replace( result, "\u0012", "|" );
318 
319         return result;
320     }
321 
322     /**
323      * {@inheritDoc}
324      */
325     protected void init()
326     {
327         super.init();
328 
329         this.boxed = false;
330         this.isEmptyElement = false;
331     }
332 
333     /** {@inheritDoc} */
334     public void parse( Reader source, Sink sink, String reference )
335         throws ParseException
336     {
337         this.sourceContent = null;
338 
339         try
340         {
341             StringWriter contentWriter = new StringWriter();
342             IOUtil.copy( source, contentWriter );
343             sourceContent = contentWriter.toString();
344         }
345         catch ( IOException ex )
346         {
347             throw new ParseException( "Error reading the input source", ex );
348         }
349         finally
350         {
351             IOUtil.close( source );
352         }
353 
354         try
355         {
356             super.parse( new StringReader( sourceContent ), sink, reference );
357         }
358         finally
359         {
360             this.sourceContent = null;
361         }
362     }
363 }