1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedReader;
23 import java.io.ByteArrayInputStream;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.OutputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.Hashtable;
33 import java.util.LinkedHashMap;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.http.HttpEntity;
40 import org.apache.http.HttpResponse;
41 import org.apache.http.HttpStatus;
42 import org.apache.http.client.ClientProtocolException;
43 import org.apache.http.client.HttpRequestRetryHandler;
44 import org.apache.http.client.methods.HttpGet;
45 import org.apache.http.impl.client.DefaultHttpClient;
46 import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
47 import org.apache.http.util.EntityUtils;
48
49 import org.apache.maven.doxia.macro.MacroExecutionException;
50 import org.apache.maven.doxia.markup.XmlMarkup;
51 import org.apache.maven.doxia.sink.Sink;
52 import org.apache.maven.doxia.sink.SinkEventAttributeSet;
53 import org.apache.maven.doxia.util.HtmlTools;
54 import org.apache.maven.doxia.util.XmlValidator;
55
56 import org.codehaus.plexus.util.FileUtils;
57 import org.codehaus.plexus.util.IOUtil;
58 import org.codehaus.plexus.util.StringUtils;
59 import org.codehaus.plexus.util.xml.pull.MXParser;
60 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
61 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
62
63 import org.xml.sax.EntityResolver;
64 import org.xml.sax.InputSource;
65 import org.xml.sax.SAXException;
66
67
68
69
70
71
72
73
74 public abstract class AbstractXmlParser
75 extends AbstractParser
76 implements XmlMarkup
77 {
78
79
80
81
82
83
84 private static final Pattern PATTERN_ENTITY_1 =
85 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
86
87
88
89
90
91
92
93 private static final Pattern PATTERN_ENTITY_2 =
94 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
95
96 private boolean ignorableWhitespace;
97
98 private boolean collapsibleWhitespace;
99
100 private boolean trimmableWhitespace;
101
102 private Map<String, String> entities;
103
104 private boolean validate = false;
105
106
107 public void parse( Reader source, Sink sink )
108 throws ParseException
109 {
110 init();
111
112
113 if ( isValidate() )
114 {
115 String content;
116 try
117 {
118 content = IOUtil.toString( new BufferedReader( source ) );
119 }
120 catch ( IOException e )
121 {
122 throw new ParseException( "Error reading the model: " + e.getMessage(), e );
123 }
124
125 new XmlValidator( getLog() ).validate( content );
126
127 source = new StringReader( content );
128 }
129
130
131 try
132 {
133 XmlPullParser parser = new MXParser();
134
135 parser.setInput( source );
136
137 sink.enableLogging( getLog() );
138
139 parseXml( parser, sink );
140 }
141 catch ( XmlPullParserException ex )
142 {
143 throw new ParseException( "Error parsing the model: " + ex.getMessage(), ex, ex.getLineNumber(),
144 ex.getColumnNumber() );
145 }
146 catch ( MacroExecutionException ex )
147 {
148 throw new ParseException( "Macro execution failed: " + ex.getMessage(), ex );
149 }
150
151 setSecondParsing( false );
152 init();
153 }
154
155
156
157
158
159
160 public void parse( String string, Sink sink )
161 throws ParseException
162 {
163 super.parse( string, sink );
164 }
165
166
167 public final int getType()
168 {
169 return XML_TYPE;
170 }
171
172
173
174
175
176
177
178
179 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
180 {
181 int count = parser.getAttributeCount();
182
183 if ( count < 0 )
184 {
185 return null;
186 }
187
188 SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
189
190 for ( int i = 0; i < count; i++ )
191 {
192 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
193 }
194
195 return atts;
196 }
197
198
199
200
201
202
203
204
205
206 private void parseXml( XmlPullParser parser, Sink sink )
207 throws XmlPullParserException, MacroExecutionException
208 {
209 int eventType = parser.getEventType();
210
211 while ( eventType != XmlPullParser.END_DOCUMENT )
212 {
213 if ( eventType == XmlPullParser.START_TAG )
214 {
215 handleStartTag( parser, sink );
216 }
217 else if ( eventType == XmlPullParser.END_TAG )
218 {
219 handleEndTag( parser, sink );
220 }
221 else if ( eventType == XmlPullParser.TEXT )
222 {
223 String text = getText( parser );
224
225 if ( isIgnorableWhitespace() )
226 {
227 if ( !text.trim().equals( "" ) )
228 {
229 handleText( parser, sink );
230 }
231 }
232 else
233 {
234 handleText( parser, sink );
235 }
236 }
237 else if ( eventType == XmlPullParser.CDSECT )
238 {
239 handleCdsect( parser, sink );
240 }
241 else if ( eventType == XmlPullParser.COMMENT )
242 {
243 handleComment( parser, sink );
244 }
245 else if ( eventType == XmlPullParser.ENTITY_REF )
246 {
247 handleEntity( parser, sink );
248 }
249 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
250 {
251
252 }
253 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
254 {
255
256 }
257 else if ( eventType == XmlPullParser.DOCDECL )
258 {
259 addLocalEntities( parser, parser.getText() );
260
261 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
262 {
263 addDTDEntities( parser, new String( res ) );
264 }
265 }
266
267 try
268 {
269 eventType = parser.nextToken();
270 }
271 catch ( IOException io )
272 {
273 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
274 }
275 }
276 }
277
278
279
280
281
282
283
284
285
286 protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
287 throws XmlPullParserException, MacroExecutionException;
288
289
290
291
292
293
294
295
296
297 protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
298 throws XmlPullParserException, MacroExecutionException;
299
300
301
302
303
304
305
306
307
308
309
310 protected void handleText( XmlPullParser parser, Sink sink )
311 throws XmlPullParserException
312 {
313 String text = getText( parser );
314
315
316
317
318
319 if ( StringUtils.isNotEmpty( text ) )
320 {
321 sink.text( text );
322 }
323 }
324
325
326
327
328
329
330
331
332
333
334
335 protected void handleCdsect( XmlPullParser parser, Sink sink )
336 throws XmlPullParserException
337 {
338 sink.text( getText( parser ) );
339 }
340
341
342
343
344
345
346
347
348
349
350
351 protected void handleComment( XmlPullParser parser, Sink sink )
352 throws XmlPullParserException
353 {
354 sink.comment( getText( parser ).trim() );
355 }
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371 protected void handleEntity( XmlPullParser parser, Sink sink )
372 throws XmlPullParserException
373 {
374 String text = getText( parser );
375
376 String name = parser.getName();
377
378 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
379 {
380 sink.nonBreakingSpace();
381 }
382 else
383 {
384 String unescaped = HtmlTools.unescapeHTML( text );
385
386 sink.text( unescaped );
387 }
388 }
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404 protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
405 {
406 Object[] required = new Object[] { new Integer( type ) };
407
408 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
409
410 sink.unknown( parser.getName(), required, attribs );
411 }
412
413
414
415
416
417
418
419
420 protected boolean isIgnorableWhitespace()
421 {
422 return ignorableWhitespace;
423 }
424
425
426
427
428
429
430
431
432
433
434 protected void setIgnorableWhitespace( boolean ignorable )
435 {
436 this.ignorableWhitespace = ignorable;
437 }
438
439
440
441
442
443
444
445
446 protected boolean isCollapsibleWhitespace()
447 {
448 return collapsibleWhitespace;
449 }
450
451
452
453
454
455
456
457
458
459
460 protected void setCollapsibleWhitespace( boolean collapsible )
461 {
462 this.collapsibleWhitespace = collapsible;
463 }
464
465
466
467
468
469
470
471
472 protected boolean isTrimmableWhitespace()
473 {
474 return trimmableWhitespace;
475 }
476
477
478
479
480
481
482
483
484
485
486 protected void setTrimmableWhitespace( boolean trimmable )
487 {
488 this.trimmableWhitespace = trimmable;
489 }
490
491
492
493
494
495
496
497
498
499
500
501 protected String getText( XmlPullParser parser )
502 {
503 String text = parser.getText();
504
505 if ( isTrimmableWhitespace() )
506 {
507 text = text.trim();
508 }
509
510 if ( isCollapsibleWhitespace() )
511 {
512 StringBuffer newText = new StringBuffer();
513 String[] elts = StringUtils.split( text, " \r\n" );
514 for ( int i = 0; i < elts.length; i++ )
515 {
516 newText.append( elts[i] );
517 if ( ( i + 1 ) < elts.length )
518 {
519 newText.append( " " );
520 }
521 }
522 text = newText.toString();
523 }
524
525 return text;
526 }
527
528
529
530
531
532
533
534
535
536
537
538
539
540 protected Map<String, String> getLocalEntities()
541 {
542 if ( entities == null )
543 {
544 entities = new LinkedHashMap<String, String>();
545 }
546
547 return entities;
548 }
549
550
551
552
553
554
555
556 public boolean isValidate()
557 {
558 return validate;
559 }
560
561
562
563
564
565
566
567
568 public void setValidate( boolean validate )
569 {
570 this.validate = validate;
571 }
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588 private void addEntity( XmlPullParser parser, String entityName, String entityValue )
589 throws XmlPullParserException
590 {
591 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
592 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
593 {
594 return;
595 }
596
597 parser.defineEntityReplacementText( entityName, entityValue );
598 getLocalEntities().put( entityName, entityValue );
599 }
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614 private void addLocalEntities( XmlPullParser parser, String text )
615 throws XmlPullParserException
616 {
617 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
618 if ( entitiesCount > 0 )
619 {
620
621 int start = text.indexOf( "[" );
622 int end = text.lastIndexOf( "]" );
623 if ( start != -1 && end != -1 )
624 {
625 text = text.substring( start + 1, end );
626 addDTDEntities( parser, text );
627 }
628 }
629 }
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646 private void addDTDEntities( XmlPullParser parser, String text )
647 throws XmlPullParserException
648 {
649 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
650 if ( entitiesCount > 0 )
651 {
652 text = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
653 BufferedReader reader = new BufferedReader( new StringReader( text ) );
654 String line;
655 String tmpLine = "";
656 try
657 {
658 Matcher matcher;
659 while ( ( line = reader.readLine() ) != null )
660 {
661 tmpLine += "\n" + line;
662 matcher = PATTERN_ENTITY_1.matcher( tmpLine );
663 if ( matcher.find() && matcher.groupCount() == 7 )
664 {
665 String entityName = matcher.group( 2 );
666 String entityValue = matcher.group( 5 );
667
668 addEntity( parser, entityName, entityValue );
669 tmpLine = "";
670 }
671 else
672 {
673 matcher = PATTERN_ENTITY_2.matcher( tmpLine );
674 if ( matcher.find() && matcher.groupCount() == 8 )
675 {
676 String entityName = matcher.group( 2 );
677 String entityValue = matcher.group( 5 );
678
679 addEntity( parser, entityName, entityValue );
680 tmpLine = "";
681 }
682 }
683 }
684 }
685 catch ( IOException e )
686 {
687
688 }
689 finally
690 {
691 IOUtil.close( reader );
692 }
693 }
694 }
695
696
697
698
699
700 public static class CachedFileEntityResolver
701 implements EntityResolver
702 {
703
704 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<String, byte[]>();
705
706
707 public InputSource resolveEntity( String publicId, String systemId )
708 throws SAXException, IOException
709 {
710 byte[] res = ENTITY_CACHE.get( systemId );
711
712 if ( res == null )
713 {
714 String systemName = FileUtils.getFile( systemId ).getName();
715 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
716
717 if ( !temp.exists() )
718 {
719
720 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
721 {
722
723
724 String resource = "/" + systemName;
725 URL url = getClass().getResource( resource );
726 if ( url != null )
727 {
728 res = toByteArray( url );
729 }
730 else
731 {
732 throw new SAXException( "Could not find the SYSTEM entity: " + systemId
733 + " because '" + resource + "' is not available of the classpath." );
734 }
735 }
736 else
737 {
738 res = toByteArray( new URL( systemId ) );
739 }
740
741
742 copy( res, temp );
743 }
744 else
745 {
746
747 res = toByteArray( temp.toURI().toURL() );
748 }
749
750 ENTITY_CACHE.put( systemId, res );
751 }
752
753 InputSource is = new InputSource( new ByteArrayInputStream( res ) );
754 is.setPublicId( publicId );
755 is.setSystemId( systemId );
756
757 return is;
758 }
759
760
761
762
763
764
765
766
767
768
769
770 private static byte[] toByteArray( URL url )
771 throws SAXException
772 {
773 if ( !( url.getProtocol().equalsIgnoreCase( "http" ) || url.getProtocol().equalsIgnoreCase( "https" ) ) )
774 {
775 InputStream is = null;
776 try
777 {
778 is = url.openStream();
779 if ( is == null )
780 {
781 throw new SAXException( "Cannot open stream from the url: " + url.toString() );
782 }
783 return IOUtil.toByteArray( is );
784 }
785 catch ( IOException e )
786 {
787 throw new SAXException( "IOException: " + e.getMessage(), e );
788 }
789 finally
790 {
791 IOUtil.close( is );
792 }
793 }
794
795
796 DefaultHttpClient client = new DefaultHttpClient();
797 HttpGet method = new HttpGet( url.toString() );
798
799
800 method.setHeader( "user-agent", "Apache-Doxia/" + doxiaVersion() );
801
802 HttpRequestRetryHandler retryHandler = new DefaultHttpRequestRetryHandler( 3, false );
803 client.setHttpRequestRetryHandler( retryHandler );
804
805 HttpEntity entity = null;
806 try
807 {
808 HttpResponse response = client.execute( method );
809 int statusCode = response.getStatusLine().getStatusCode();
810 if ( statusCode != HttpStatus.SC_OK )
811 {
812 throw new IOException( "The status code when accessing the URL '" + url.toString() + "' was "
813 + statusCode + ", which is not allowed. The server gave this reason for the failure '"
814 + response.getStatusLine().getReasonPhrase() + "'." );
815 }
816
817 entity = response.getEntity();
818 return EntityUtils.toByteArray( entity );
819 }
820 catch ( ClientProtocolException e )
821 {
822 throw new SAXException( "ClientProtocolException: Fatal protocol violation: " + e.getMessage(), e );
823 }
824 catch ( IOException e )
825 {
826 throw new SAXException( "IOException: Fatal transport error: " + e.getMessage(), e );
827 }
828 finally
829 {
830 if ( entity != null )
831 {
832 try
833 {
834 entity.consumeContent();
835 }
836 catch ( IOException e )
837 {
838
839 }
840 }
841 }
842 }
843
844
845
846
847
848
849
850
851
852 private void copy( byte[] res, File f )
853 throws SAXException
854 {
855 if ( f.isDirectory() )
856 {
857 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
858 }
859
860 OutputStream os = null;
861 try
862 {
863 os = new FileOutputStream( f );
864 IOUtil.copy( res, os );
865 }
866 catch ( IOException e )
867 {
868 throw new SAXException( "IOException: " + e.getMessage(), e );
869 }
870 finally
871 {
872 IOUtil.close( os );
873 }
874 }
875 }
876 }