1 package org.apache.maven.doxia.parser;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedReader;
23 import java.io.ByteArrayInputStream;
24 import java.io.File;
25 import java.io.FileOutputStream;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.OutputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31 import java.net.URL;
32 import java.util.Hashtable;
33 import java.util.LinkedHashMap;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
38
39 import org.apache.maven.doxia.macro.MacroExecutionException;
40 import org.apache.maven.doxia.markup.XmlMarkup;
41 import org.apache.maven.doxia.sink.Sink;
42 import org.apache.maven.doxia.sink.impl.SinkEventAttributeSet;
43 import org.apache.maven.doxia.util.HtmlTools;
44 import org.apache.maven.doxia.util.XmlValidator;
45
46 import org.codehaus.plexus.util.FileUtils;
47 import org.codehaus.plexus.util.IOUtil;
48 import org.codehaus.plexus.util.StringUtils;
49 import org.codehaus.plexus.util.xml.pull.MXParser;
50 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
51 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
52
53 import org.xml.sax.EntityResolver;
54 import org.xml.sax.InputSource;
55 import org.xml.sax.SAXException;
56
57
58
59
60
61
62
63 public abstract class AbstractXmlParser
64 extends AbstractParser
65 implements XmlMarkup
66 {
67
68
69
70
71
72
73 private static final Pattern PATTERN_ENTITY_1 =
74 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&[a-zA-Z]{2,6};)(\\s)*\"(\\s)*>" );
75
76
77
78
79
80
81
82 private static final Pattern PATTERN_ENTITY_2 =
83 Pattern.compile( ENTITY_START + "(\\s)+([^>|^\\s]+)(\\s)+\"(\\s)*(&(#x?[0-9a-fA-F]{1,5};)*)(\\s)*\"(\\s)*>" );
84
85 private boolean ignorableWhitespace;
86
87 private boolean collapsibleWhitespace;
88
89 private boolean trimmableWhitespace;
90
91 private Map<String, String> entities;
92
93 private boolean validate = false;
94
95
96 public void parse( Reader source, Sink sink, String reference )
97 throws ParseException
98 {
99 init();
100
101 Reader src = source;
102
103
104 if ( isValidate() )
105 {
106 String content;
107 try
108 {
109 content = IOUtil.toString( new BufferedReader( src ) );
110 }
111 catch ( IOException e )
112 {
113 throw new ParseException( "Error reading the model", e );
114 }
115
116 new XmlValidator( ).validate( content );
117
118 src = new StringReader( content );
119 }
120
121
122 try
123 {
124 XmlPullParser parser = new MXParser();
125
126 parser.setInput( src );
127
128
129
130 initXmlParser( parser );
131
132 parseXml( parser, sink );
133 }
134 catch ( XmlPullParserException ex )
135 {
136 throw new ParseException( "Error parsing the model", ex, ex.getLineNumber(),
137 ex.getColumnNumber() );
138 }
139 catch ( MacroExecutionException ex )
140 {
141 throw new ParseException( "Macro execution failed", ex );
142 }
143
144 setSecondParsing( false );
145 init();
146 }
147
148
149
150
151
152
153
154 protected void initXmlParser( XmlPullParser parser )
155 throws XmlPullParserException
156 {
157
158 }
159
160
161 @Override
162 public final int getType()
163 {
164 return XML_TYPE;
165 }
166
167
168
169
170
171
172
173
174 protected SinkEventAttributeSet getAttributesFromParser( XmlPullParser parser )
175 {
176 int count = parser.getAttributeCount();
177
178 if ( count < 0 )
179 {
180 return null;
181 }
182
183 SinkEventAttributeSet atts = new SinkEventAttributeSet( count );
184
185 for ( int i = 0; i < count; i++ )
186 {
187 atts.addAttribute( parser.getAttributeName( i ), parser.getAttributeValue( i ) );
188 }
189
190 return atts;
191 }
192
193
194
195
196
197
198
199
200
201 private void parseXml( XmlPullParser parser, Sink sink )
202 throws XmlPullParserException, MacroExecutionException
203 {
204 int eventType = parser.getEventType();
205
206 while ( eventType != XmlPullParser.END_DOCUMENT )
207 {
208 if ( eventType == XmlPullParser.START_TAG )
209 {
210 handleStartTag( parser, sink );
211 }
212 else if ( eventType == XmlPullParser.END_TAG )
213 {
214 handleEndTag( parser, sink );
215 }
216 else if ( eventType == XmlPullParser.TEXT )
217 {
218 String text = getText( parser );
219
220 if ( isIgnorableWhitespace() )
221 {
222 if ( text.trim().length() != 0 )
223 {
224 handleText( parser, sink );
225 }
226 }
227 else
228 {
229 handleText( parser, sink );
230 }
231 }
232 else if ( eventType == XmlPullParser.CDSECT )
233 {
234 handleCdsect( parser, sink );
235 }
236 else if ( eventType == XmlPullParser.COMMENT )
237 {
238 handleComment( parser, sink );
239 }
240 else if ( eventType == XmlPullParser.ENTITY_REF )
241 {
242 handleEntity( parser, sink );
243 }
244 else if ( eventType == XmlPullParser.IGNORABLE_WHITESPACE )
245 {
246
247 }
248 else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
249 {
250
251 }
252 else if ( eventType == XmlPullParser.DOCDECL )
253 {
254 addLocalEntities( parser, parser.getText() );
255
256 for ( byte[] res : CachedFileEntityResolver.ENTITY_CACHE.values() )
257 {
258 addDTDEntities( parser, new String( res ) );
259 }
260 }
261
262 try
263 {
264 eventType = parser.nextToken();
265 }
266 catch ( IOException io )
267 {
268
269 throw new XmlPullParserException( "IOException: " + io.getMessage(), parser, io );
270 }
271 }
272 }
273
274
275
276
277
278
279
280
281
282 protected abstract void handleStartTag( XmlPullParser parser, Sink sink )
283 throws XmlPullParserException, MacroExecutionException;
284
285
286
287
288
289
290
291
292
293 protected abstract void handleEndTag( XmlPullParser parser, Sink sink )
294 throws XmlPullParserException, MacroExecutionException;
295
296
297
298
299
300
301
302
303
304
305
306 protected void handleText( XmlPullParser parser, Sink sink )
307 throws XmlPullParserException
308 {
309 String text = getText( parser );
310
311
312
313
314
315 if ( StringUtils.isNotEmpty( text ) )
316 {
317 sink.text( text );
318 }
319 }
320
321
322
323
324
325
326
327
328
329
330
331 protected void handleCdsect( XmlPullParser parser, Sink sink )
332 throws XmlPullParserException
333 {
334 sink.text( getText( parser ) );
335 }
336
337
338
339
340
341
342
343
344
345
346
347 protected void handleComment( XmlPullParser parser, Sink sink )
348 throws XmlPullParserException
349 {
350 if ( isEmitComments() )
351 {
352 sink.comment( getText( parser ) );
353 }
354 }
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370 protected void handleEntity( XmlPullParser parser, Sink sink )
371 throws XmlPullParserException
372 {
373 String text = getText( parser );
374
375 String name = parser.getName();
376
377 if ( "#160".equals( name ) || "nbsp".equals( name ) || "#x00A0".equals( name ) )
378 {
379 sink.nonBreakingSpace();
380 }
381 else
382 {
383 String unescaped = HtmlTools.unescapeHTML( text );
384
385 sink.text( unescaped );
386 }
387 }
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404 protected void handleUnknown( XmlPullParser parser, Sink sink, int type )
405 {
406 Object[] required = new Object[] { type };
407
408 SinkEventAttributeSet attribs = getAttributesFromParser( parser );
409
410 sink.unknown( parser.getName(), required, attribs );
411 }
412
413
414
415
416
417
418
419
420 protected boolean isIgnorableWhitespace()
421 {
422 return ignorableWhitespace;
423 }
424
425
426
427
428
429
430
431
432
433
434 protected void setIgnorableWhitespace( boolean ignorable )
435 {
436 this.ignorableWhitespace = ignorable;
437 }
438
439
440
441
442
443
444
445
446 protected boolean isCollapsibleWhitespace()
447 {
448 return collapsibleWhitespace;
449 }
450
451
452
453
454
455
456
457
458
459
460 protected void setCollapsibleWhitespace( boolean collapsible )
461 {
462 this.collapsibleWhitespace = collapsible;
463 }
464
465
466
467
468
469
470
471
472 protected boolean isTrimmableWhitespace()
473 {
474 return trimmableWhitespace;
475 }
476
477
478
479
480
481
482
483
484
485
486 protected void setTrimmableWhitespace( boolean trimmable )
487 {
488 this.trimmableWhitespace = trimmable;
489 }
490
491
492
493
494
495
496
497
498
499
500
501 protected String getText( XmlPullParser parser )
502 {
503 String text = parser.getText();
504
505 if ( isTrimmableWhitespace() )
506 {
507 text = text.trim();
508 }
509
510 if ( isCollapsibleWhitespace() )
511 {
512 StringBuilder newText = new StringBuilder();
513 String[] elts = StringUtils.split( text, " \r\n" );
514 for ( int i = 0; i < elts.length; i++ )
515 {
516 newText.append( elts[i] );
517 if ( ( i + 1 ) < elts.length )
518 {
519 newText.append( " " );
520 }
521 }
522 text = newText.toString();
523 }
524
525 return text;
526 }
527
528
529
530
531
532
533
534
535
536
537
538
539
540 protected Map<String, String> getLocalEntities()
541 {
542 if ( entities == null )
543 {
544 entities = new LinkedHashMap<>();
545 }
546
547 return entities;
548 }
549
550
551
552
553
554
555
556 public boolean isValidate()
557 {
558 return validate;
559 }
560
561
562
563
564
565
566
567
568 public void setValidate( boolean validate )
569 {
570 this.validate = validate;
571 }
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588 private void addEntity( XmlPullParser parser, String entityName, String entityValue )
589 throws XmlPullParserException
590 {
591 if ( entityName.endsWith( "amp" ) || entityName.endsWith( "lt" ) || entityName.endsWith( "gt" )
592 || entityName.endsWith( "quot" ) || entityName.endsWith( "apos" ) )
593 {
594 return;
595 }
596
597 parser.defineEntityReplacementText( entityName, entityValue );
598 getLocalEntities().put( entityName, entityValue );
599 }
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614 private void addLocalEntities( XmlPullParser parser, String text )
615 throws XmlPullParserException
616 {
617 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
618 if ( entitiesCount > 0 )
619 {
620
621 int start = text.indexOf( '[' );
622 int end = text.lastIndexOf( ']' );
623 if ( start != -1 && end != -1 )
624 {
625 addDTDEntities( parser, text.substring( start + 1, end ) );
626 }
627 }
628 }
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645 private void addDTDEntities( XmlPullParser parser, String text )
646 throws XmlPullParserException
647 {
648 int entitiesCount = StringUtils.countMatches( text, ENTITY_START );
649 if ( entitiesCount > 0 )
650 {
651 final String txt = StringUtils.replace( text, ENTITY_START, "\n" + ENTITY_START );
652 try ( BufferedReader reader = new BufferedReader( new StringReader( txt ) ) )
653 {
654 String line;
655 String tmpLine = "";
656 Matcher matcher;
657 while ( ( line = reader.readLine() ) != null )
658 {
659 tmpLine += "\n" + line;
660 matcher = PATTERN_ENTITY_1.matcher( tmpLine );
661 if ( matcher.find() && matcher.groupCount() == 7 )
662 {
663 String entityName = matcher.group( 2 );
664 String entityValue = matcher.group( 5 );
665
666 addEntity( parser, entityName, entityValue );
667 tmpLine = "";
668 }
669 else
670 {
671 matcher = PATTERN_ENTITY_2.matcher( tmpLine );
672 if ( matcher.find() && matcher.groupCount() == 8 )
673 {
674 String entityName = matcher.group( 2 );
675 String entityValue = matcher.group( 5 );
676
677 addEntity( parser, entityName, entityValue );
678 tmpLine = "";
679 }
680 }
681 }
682 }
683 catch ( IOException e )
684 {
685
686 }
687 }
688 }
689
690
691
692
693
694 public static class CachedFileEntityResolver
695 implements EntityResolver
696 {
697
698 protected static final Map<String, byte[]> ENTITY_CACHE = new Hashtable<>();
699
700
701 public InputSource resolveEntity( String publicId, String systemId )
702 throws SAXException, IOException
703 {
704 byte[] res = ENTITY_CACHE.get( systemId );
705
706 if ( res == null )
707 {
708 String systemName = FileUtils.getFile( systemId ).getName();
709 File temp = new File( System.getProperty( "java.io.tmpdir" ), systemName );
710
711 if ( !temp.exists() )
712 {
713
714 if ( systemId.toLowerCase( Locale.ENGLISH ).startsWith( "file" ) )
715 {
716
717
718 String resource = "/" + systemName;
719 URL url = getClass().getResource( resource );
720 if ( url != null )
721 {
722 res = toByteArray( url );
723 }
724 else
725 {
726 throw new SAXException( "Could not find the SYSTEM entity: " + systemId
727 + " because '" + resource + "' is not available of the classpath." );
728 }
729 }
730 else
731 {
732 res = toByteArray( new URL( systemId ) );
733 }
734
735
736 copy( res, temp );
737 }
738 else
739 {
740
741 res = toByteArray( temp.toURI().toURL() );
742 }
743
744 ENTITY_CACHE.put( systemId, res );
745 }
746
747 InputSource is = new InputSource( new ByteArrayInputStream( res ) );
748 is.setPublicId( publicId );
749 is.setSystemId( systemId );
750
751 return is;
752 }
753
754
755
756
757
758
759 private static byte[] toByteArray( URL url )
760 throws SAXException
761 {
762 InputStream is = null;
763 try
764 {
765 is = url.openStream();
766 if ( is == null )
767 {
768 throw new SAXException( "Cannot open stream from the url: " + url );
769 }
770 return IOUtil.toByteArray( is );
771 }
772 catch ( IOException e )
773 {
774 throw new SAXException( e );
775 }
776 finally
777 {
778 IOUtil.close( is );
779 }
780 }
781
782
783
784
785
786
787
788
789
790 private void copy( byte[] res, File f )
791 throws SAXException
792 {
793 if ( f.isDirectory() )
794 {
795 throw new SAXException( "'" + f.getAbsolutePath() + "' is a directory, can not write it." );
796 }
797
798 OutputStream os = null;
799 try
800 {
801 os = new FileOutputStream( f );
802 IOUtil.copy( res, os );
803 }
804 catch ( IOException e )
805 {
806 throw new SAXException( e );
807 }
808 finally
809 {
810 IOUtil.close( os );
811 }
812 }
813 }
814 }