View Javadoc
1   package org.apache.maven.doxia.module.xhtml;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.Reader;
24  import java.io.StringReader;
25  import java.io.StringWriter;
26  import java.util.HashMap;
27  import java.util.Map;
28  import javax.swing.text.html.HTML.Attribute;
29  
30  import org.apache.maven.doxia.macro.MacroExecutionException;
31  import org.apache.maven.doxia.macro.manager.MacroNotFoundException;
32  import org.apache.maven.doxia.macro.MacroRequest;
33  import org.apache.maven.doxia.parser.ParseException;
34  import org.apache.maven.doxia.parser.Parser;
35  import org.apache.maven.doxia.parser.XhtmlBaseParser;
36  import org.apache.maven.doxia.sink.Sink;
37  import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
38  import org.codehaus.plexus.component.annotations.Component;
39  import org.codehaus.plexus.util.IOUtil;
40  import org.codehaus.plexus.util.StringUtils;
41  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
42  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
43  
44  /**
45   * Parse an xhtml model and emit events into a Doxia Sink.
46   *
47   * @author <a href="mailto:jason@maven.org">Jason van Zyl</a>
48   * @version $Id: XhtmlParser.java 1726411 2016-01-23 16:34:09Z hboutemy $
49   * @since 1.0
50   */
51  @Component( role = Parser.class, hint = "xhtml" )
52  public class XhtmlParser
53      extends XhtmlBaseParser
54      implements XhtmlMarkup
55  {
56      /** For boxed verbatim. */
57      private boolean boxed;
58  
59      /** Empty elements don't write a closing tag. */
60      private boolean isEmptyElement;
61  
62      /**
63       * The source content of the input reader. Used to pass into macros.
64       */
65      private String sourceContent;
66  
67      /** {@inheritDoc} */
68      protected void handleStartTag( XmlPullParser parser, Sink sink )
69          throws XmlPullParserException, MacroExecutionException
70      {
71          isEmptyElement = parser.isEmptyElementTag();
72  
73          SinkEventAttributeSet attribs = getAttributesFromParser( parser );
74  
75          if ( parser.getName().equals( HTML.toString() ) )
76          {
77              //Do nothing
78              return;
79          }
80          else if ( parser.getName().equals( HEAD.toString() ) )
81          {
82              sink.head( attribs );
83          }
84          else if ( parser.getName().equals( TITLE.toString() ) )
85          {
86              sink.title( attribs );
87          }
88          else if ( parser.getName().equals( META.toString() ) )
89          {
90              String name = parser.getAttributeValue( null, Attribute.NAME.toString() );
91              String content = parser.getAttributeValue( null, Attribute.CONTENT.toString() );
92  
93              if ( "author".equals( name ) )
94              {
95                  sink.author( null );
96  
97                  sink.text( content );
98  
99                  sink.author_();
100             }
101             else if ( "date".equals( name ) )
102             {
103                 sink.date( null );
104 
105                 sink.text( content );
106 
107                 sink.date_();
108             }
109             else
110             {
111                 sink.unknown( "meta", new Object[] { Integer.valueOf( TAG_TYPE_SIMPLE ) }, attribs );
112             }
113         }
114         /*
115          * The ADDRESS element may be used by authors to supply contact information
116          * for a model or a major part of a model such as a form. This element
117          *  often appears at the beginning or end of a model.
118          */
119         else if ( parser.getName().equals( ADDRESS.toString() ) )
120         {
121             sink.author( attribs );
122         }
123         else if ( parser.getName().equals( BODY.toString() ) )
124         {
125             sink.body( attribs );
126         }
127         else if ( parser.getName().equals( DIV.toString() ) )
128         {
129             String divclass = parser.getAttributeValue( null, Attribute.CLASS.toString() );
130 
131             if ( "source".equals( divclass ) )
132             {
133                 this.boxed = true;
134             }
135 
136             baseStartTag( parser, sink ); // pick up other divs
137         }
138         /*
139          * The PRE element tells visual user agents that the enclosed text is
140          * "preformatted". When handling preformatted text, visual user agents:
141          * - May leave white space intact.
142          * - May render text with a fixed-pitch font.
143          * - May disable automatic word wrap.
144          * - Must not disable bidirectional processing.
145          * Non-visual user agents are not required to respect extra white space
146          * in the content of a PRE element.
147          */
148         else if ( parser.getName().equals( PRE.toString() ) )
149         {
150             if ( boxed )
151             {
152                 attribs.addAttributes( SinkEventAttributeSet.BOXED );
153             }
154 
155             verbatim();
156 
157             sink.verbatim( attribs );
158         }
159         else if ( !baseStartTag( parser, sink ) )
160         {
161             if ( isEmptyElement )
162             {
163                 handleUnknown( parser, sink, TAG_TYPE_SIMPLE );
164             }
165             else
166             {
167                 handleUnknown( parser, sink, TAG_TYPE_START );
168             }
169 
170             if ( getLog().isDebugEnabled() )
171             {
172                 String position = "[" + parser.getLineNumber() + ":"
173                     + parser.getColumnNumber() + "]";
174                 String tag = "<" + parser.getName() + ">";
175 
176                 getLog().debug( "Unrecognized xhtml tag: " + tag + " at " + position );
177             }
178         }
179     }
180 
181     /** {@inheritDoc} */
182     protected void handleEndTag( XmlPullParser parser, Sink sink )
183         throws XmlPullParserException, MacroExecutionException
184     {
185         if ( parser.getName().equals( HTML.toString() ) )
186         {
187             //Do nothing
188             return;
189         }
190         else if ( parser.getName().equals( HEAD.toString() ) )
191         {
192             sink.head_();
193         }
194         else if ( parser.getName().equals( TITLE.toString() ) )
195         {
196             sink.title_();
197         }
198         else if ( parser.getName().equals( BODY.toString() ) )
199         {
200             consecutiveSections( 0, sink );
201 
202             sink.body_();
203         }
204         else if ( parser.getName().equals( ADDRESS.toString() ) )
205         {
206             sink.author_();
207         }
208         else if ( parser.getName().equals( DIV.toString() ) )
209         {
210             this.boxed = false;
211             baseEndTag( parser, sink );
212         }
213         else if ( !baseEndTag( parser, sink ) )
214         {
215             if ( !isEmptyElement )
216             {
217                 handleUnknown( parser, sink, TAG_TYPE_END );
218             }
219         }
220 
221         isEmptyElement = false;
222     }
223 
224     /** {@inheritDoc} */
225     @Override
226     protected void handleComment( XmlPullParser parser, Sink sink )
227         throws XmlPullParserException
228     {
229         String text = getText( parser ).trim();
230 
231         if ( text.startsWith( "MACRO" ) && !isSecondParsing() )
232         {
233             processMacro( text, sink );
234         }
235         else
236         {
237             super.handleComment( parser, sink );
238         }
239     }
240 
241     /** process macro embedded in XHTML commment */
242     private void processMacro( String text, Sink sink )
243         throws XmlPullParserException
244     {
245         String s = text.substring( text.indexOf( '{' ) + 1, text.indexOf( '}' ) );
246         s = escapeForMacro( s );
247         String[] params = StringUtils.split( s, "|" );
248         String macroName = params[0];
249 
250         Map<String, Object> parameters = new HashMap<String, Object>();
251         for ( int i = 1; i < params.length; i++ )
252         {
253             String[] param = StringUtils.split( params[i], "=" );
254             if ( param.length == 1 )
255             {
256                 throw new XmlPullParserException( "Missing 'key=value' pair for macro parameter: " + params[i] );
257             }
258 
259             String key = unescapeForMacro( param[0] );
260             String value = unescapeForMacro( param[1] );
261             parameters.put( key, value );
262         }
263 
264         MacroRequest request = new MacroRequest( sourceContent, new XhtmlParser(), parameters, getBasedir() );
265 
266         try
267         {
268             executeMacro( macroName, request, sink );
269         }
270         catch ( MacroExecutionException e )
271         {
272             throw new XmlPullParserException( "Unable to execute macro in the document: " + macroName );
273         }
274         catch ( MacroNotFoundException me )
275         {
276             throw new XmlPullParserException( "Macro not found: " + macroName );
277         }
278     }
279 
280     /**
281      * escapeForMacro
282      *
283      * @param s String
284      * @return String
285      */
286     private String escapeForMacro( String s )
287     {
288         if ( s == null || s.length() < 1 )
289         {
290             return s;
291         }
292 
293         String result = s;
294 
295         // use some outrageously out-of-place chars for text
296         // (these are device control one/two in unicode)
297         result = StringUtils.replace( result, "\\=", "\u0011" );
298         result = StringUtils.replace( result, "\\|", "\u0012" );
299 
300         return result;
301     }
302 
303     /**
304      * unescapeForMacro
305      *
306      * @param s String
307      * @return String
308      */
309     private String unescapeForMacro( String s )
310     {
311         if ( s == null || s.length() < 1 )
312         {
313             return s;
314         }
315 
316         String result = s;
317 
318         result = StringUtils.replace( result, "\u0011", "=" );
319         result = StringUtils.replace( result, "\u0012", "|" );
320 
321         return result;
322     }
323 
324     /** {@inheritDoc} */
325     protected void init()
326     {
327         super.init();
328 
329         this.boxed = false;
330         this.isEmptyElement = false;
331     }
332 
333     /** {@inheritDoc} */
334     public void parse( Reader source, Sink sink )
335         throws ParseException
336     {
337         this.sourceContent = null;
338 
339         try
340         {
341             StringWriter contentWriter = new StringWriter();
342             IOUtil.copy( source, contentWriter );
343             sourceContent = contentWriter.toString();
344         }
345         catch ( IOException ex )
346         {
347             throw new ParseException( "Error reading the input source: " + ex.getMessage(), ex );
348         }
349         finally
350         {
351             IOUtil.close( source );
352         }
353 
354         try
355         {
356             super.parse( new StringReader( sourceContent ), sink );
357         }
358         finally
359         {
360             this.sourceContent = null;
361         }
362     }
363 }