1 package org.apache.maven.doxia;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedInputStream;
23 import java.io.CharArrayWriter;
24 import java.io.File;
25 import java.io.FileInputStream;
26 import java.io.FileOutputStream;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.OutputStream;
30 import java.io.Reader;
31 import java.io.Writer;
32 import java.util.HashMap;
33 import java.util.List;
34 import java.util.Locale;
35 import java.util.Map;
36 import java.util.Objects;
37 import java.util.regex.Matcher;
38 import java.util.regex.Pattern;
39
40 import org.apache.maven.doxia.logging.Log;
41 import org.apache.maven.doxia.logging.SystemStreamLog;
42 import org.apache.maven.doxia.parser.ParseException;
43 import org.apache.maven.doxia.parser.Parser;
44 import org.apache.maven.doxia.sink.Sink;
45 import org.apache.maven.doxia.sink.SinkFactory;
46 import org.apache.maven.doxia.util.ConverterUtil;
47 import org.apache.maven.doxia.wrapper.InputFileWrapper;
48 import org.apache.maven.doxia.wrapper.InputReaderWrapper;
49 import org.apache.maven.doxia.wrapper.OutputFileWrapper;
50 import org.apache.maven.doxia.wrapper.OutputStreamWrapper;
51 import org.codehaus.plexus.ContainerConfiguration;
52 import org.codehaus.plexus.DefaultContainerConfiguration;
53 import org.codehaus.plexus.DefaultPlexusContainer;
54 import org.codehaus.plexus.PlexusContainer;
55 import org.codehaus.plexus.PlexusContainerException;
56 import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
57 import org.codehaus.plexus.util.FileUtils;
58 import org.codehaus.plexus.util.ReaderFactory;
59 import org.codehaus.plexus.util.SelectorUtils;
60 import org.codehaus.plexus.util.StringUtils;
61 import org.codehaus.plexus.util.WriterFactory;
62 import org.apache.commons.io.input.XmlStreamReader;
63 import org.codehaus.plexus.util.xml.XmlUtil;
64 import org.codehaus.plexus.util.xml.pull.MXParser;
65 import org.codehaus.plexus.util.xml.pull.XmlPullParser;
66 import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
67
68 import com.ibm.icu.text.CharsetDetector;
69 import com.ibm.icu.text.CharsetMatch;
70
71 import static java.lang.String.format;
72
73
74
75
76
77
78 public class DefaultConverter
79 implements Converter
80 {
81 private static final String APT_PARSER = "apt";
82
83 private static final String CONFLUENCE_PARSER = "confluence";
84
85 private static final String DOCBOOK_PARSER = "docbook";
86
87 private static final String FML_PARSER = "fml";
88
89 private static final String TWIKI_PARSER = "twiki";
90
91 private static final String XDOC_PARSER = "xdoc";
92
93 private static final String XHTML_PARSER = "xhtml";
94
95 private static final String XHTML5_PARSER = "xhtml5";
96
97 private static final String MARKDOWN_PARSER = "markdown";
98
99
100 public static final String[] SUPPORTED_FROM_FORMAT =
101 { APT_PARSER, CONFLUENCE_PARSER, DOCBOOK_PARSER, FML_PARSER, MARKDOWN_PARSER, TWIKI_PARSER,
102 XDOC_PARSER, XHTML_PARSER, XHTML5_PARSER };
103
104 private static final String APT_SINK = "apt";
105
106 private static final String CONFLUENCE_SINK = "confluence";
107
108 private static final String DOCBOOK_SINK = "docbook";
109
110 private static final String FO_SINK = "fo";
111
112 private static final String ITEXT_SINK = "itext";
113
114 private static final String LATEX_SINK = "latex";
115
116 private static final String RTF_SINK = "rtf";
117
118 private static final String TWIKI_SINK = "twiki";
119
120 private static final String XDOC_SINK = "xdoc";
121
122 private static final String XHTML_SINK = "xhtml";
123
124 private static final String XHTML5_SINK = "xhtml5";
125
126 private static final String MARKDOWN_SINK = "markdown";
127
128
129 public static final String[] SUPPORTED_TO_FORMAT =
130 { APT_SINK, CONFLUENCE_SINK, DOCBOOK_SINK, FO_SINK, ITEXT_SINK, LATEX_SINK, MARKDOWN_SINK, RTF_SINK, TWIKI_SINK,
131 XDOC_SINK, XHTML_SINK, XHTML5_SINK };
132
133
134 private boolean formatOutput;
135
136
137 private PlexusContainer plexus;
138
139
140 private Log log;
141
142
143 @Override
144 public void enableLogging( Log log )
145 {
146 this.log = log;
147 }
148
149
150
151
152
153
154
155 protected Log getLog()
156 {
157 if ( log == null )
158 {
159 log = new SystemStreamLog();
160 }
161
162 return log;
163 }
164
165
166 @Override
167 public String[] getInputFormats()
168 {
169 return SUPPORTED_FROM_FORMAT;
170 }
171
172
173 @Override
174 public String[] getOutputFormats()
175 {
176 return SUPPORTED_TO_FORMAT;
177 }
178
179
180 @Override
181 public void convert( InputFileWrapper input, OutputFileWrapper output )
182 throws UnsupportedFormatException, ConverterException
183 {
184 Objects.requireNonNull( input, "input is required" );
185 Objects.requireNonNull( output, "output is required" );
186
187 try
188 {
189 startPlexusContainer();
190 }
191 catch ( PlexusContainerException e )
192 {
193 throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
194 }
195
196 try
197 {
198 if ( input.getFile().isFile() )
199 {
200 parse( input.getFile(), input.getEncoding(), input.getFormat(), output );
201 }
202 else
203 {
204 List<File> files;
205 try
206 {
207 files = FileUtils.getFiles( input.getFile(), "**/*." + input.getFormat(),
208 StringUtils.join( FileUtils.getDefaultExcludes(), ", " ) );
209 }
210 catch ( IOException e )
211 {
212 throw new ConverterException( "IOException: " + e.getMessage(), e );
213 }
214 catch ( IllegalStateException e )
215 {
216 throw new ConverterException( "IllegalStateException: " + e.getMessage(), e );
217 }
218
219 for ( File f : files )
220 {
221 parse( f, input.getEncoding(), input.getFormat(), output );
222 }
223 }
224 }
225 finally
226 {
227 stopPlexusContainer();
228 }
229 }
230
231
232 @Override
233 public void convert( InputReaderWrapper input, OutputStreamWrapper output )
234 throws UnsupportedFormatException, ConverterException
235 {
236 Objects.requireNonNull( input, "input is required" );
237 Objects.requireNonNull( output, "output is required" );
238
239 try
240 {
241 startPlexusContainer();
242 }
243 catch ( PlexusContainerException e )
244 {
245 throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
246 }
247
248 try
249 {
250 Parser parser;
251 try
252 {
253 parser = ConverterUtil.getParser( plexus, input.getFormat(), SUPPORTED_FROM_FORMAT );
254 parser.enableLogging( log );
255 }
256 catch ( ComponentLookupException e )
257 {
258 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
259 }
260
261 if ( getLog().isDebugEnabled() )
262 {
263 getLog().debug( "Parser used: " + parser.getClass().getName() );
264 }
265
266 SinkFactory sinkFactory;
267 try
268 {
269 sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
270 }
271 catch ( ComponentLookupException e )
272 {
273 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
274 }
275
276 Sink sink;
277 try
278 {
279 sink = sinkFactory.createSink( output.getOutputStream(), output.getEncoding() );
280 }
281 catch ( IOException e )
282 {
283 throw new ConverterException( "IOException: " + e.getMessage(), e );
284 }
285 sink.enableLogging( log );
286
287 if ( getLog().isDebugEnabled() )
288 {
289 getLog().debug( "Sink used: " + sink.getClass().getName() );
290 }
291
292 parse( parser, input.getReader(), sink );
293 }
294 finally
295 {
296 stopPlexusContainer();
297 }
298 }
299
300
301 @Override
302 public void setFormatOutput( boolean formatOutput )
303 {
304 this.formatOutput = formatOutput;
305 }
306
307
308
309
310
311
312
313
314
315
316
317
318
319 private void parse( File inputFile, String inputEncoding, String inputFormat, OutputFileWrapper output )
320 throws ConverterException, UnsupportedFormatException
321 {
322 if ( getLog().isDebugEnabled() )
323 {
324 getLog().debug(
325 "Parsing file from '" + inputFile.getAbsolutePath() + "' with the encoding '"
326 + inputEncoding + "' to '" + output.getFile().getAbsolutePath()
327 + "' with the encoding '" + output.getEncoding() + "'" );
328 }
329
330 if ( InputFileWrapper.AUTO_ENCODING.equals( inputEncoding ) )
331 {
332 inputEncoding = autoDetectEncoding( inputFile );
333 if ( getLog().isDebugEnabled() )
334 {
335 getLog().debug( "Auto detect encoding: " + inputEncoding );
336 }
337 }
338
339 if ( InputFileWrapper.AUTO_FORMAT.equals( inputFormat ) )
340 {
341 inputFormat = autoDetectFormat( inputFile, inputEncoding );
342 if ( getLog().isDebugEnabled() )
343 {
344 getLog().debug( "Auto detect input format: " + inputFormat );
345 }
346 }
347
348 Parser parser;
349 try
350 {
351 parser = ConverterUtil.getParser( plexus, inputFormat, SUPPORTED_FROM_FORMAT );
352 parser.enableLogging( log );
353 }
354 catch ( ComponentLookupException e )
355 {
356 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
357 }
358
359 File outputFile;
360 if ( output.getFile().isDirectory() )
361 {
362 outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
363 }
364 else
365 {
366 if ( !SelectorUtils.match( "**.*", output.getFile().getName() ) )
367 {
368
369 output.getFile().mkdirs();
370 outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
371 }
372 else
373 {
374 output.getFile().getParentFile().mkdirs();
375 outputFile = output.getFile();
376 }
377 }
378
379 Reader reader;
380 try
381 {
382 if ( inputEncoding != null )
383 {
384 if ( parser.getType() == Parser.XML_TYPE )
385 {
386 reader = ReaderFactory.newXmlReader( inputFile );
387 }
388 else
389 {
390 reader = ReaderFactory.newReader( inputFile, inputEncoding );
391 }
392 }
393 else
394 {
395 reader = ReaderFactory.newPlatformReader( inputFile );
396 }
397 }
398 catch ( IOException e )
399 {
400 throw new ConverterException( "IOException: " + e.getMessage(), e );
401 }
402
403 SinkFactory sinkFactory;
404 try
405 {
406 sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
407 }
408 catch ( ComponentLookupException e )
409 {
410 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
411 }
412
413 Sink sink;
414 try
415 {
416 String outputEncoding;
417 if ( StringUtils.isEmpty( output.getEncoding() )
418 || output.getEncoding().equals( OutputFileWrapper.AUTO_ENCODING ) )
419 {
420 outputEncoding = inputEncoding;
421 }
422 else
423 {
424 outputEncoding = output.getEncoding();
425 }
426
427 OutputStream out = new FileOutputStream( outputFile );
428 sink = sinkFactory.createSink( out, outputEncoding );
429 }
430 catch ( IOException e )
431 {
432 throw new ConverterException( "IOException: " + e.getMessage(), e );
433 }
434
435 sink.enableLogging( log );
436
437 if ( getLog().isDebugEnabled() )
438 {
439 getLog().debug( "Sink used: " + sink.getClass().getName() );
440 }
441
442 parse( parser, reader, sink );
443
444 if ( formatOutput && ( DOCBOOK_SINK.equals( output.getFormat() ) || FO_SINK.equals( output.getFormat() )
445 || ITEXT_SINK.equals( output.getFormat() ) || XDOC_SINK.equals( output.getFormat() )
446 || XHTML_SINK.equals( output.getFormat() ) || XHTML5_SINK.equals( output.getFormat() ) ) )
447 {
448
449
450 if ( DOCBOOK_SINK.equals( output.getFormat() ) || DOCBOOK_PARSER.equals( inputFormat ) )
451 {
452 return;
453 }
454
455 try ( Reader r = ReaderFactory.newXmlReader( outputFile );
456 Writer w = WriterFactory.newXmlWriter( outputFile ) )
457 {
458 CharArrayWriter caw = new CharArrayWriter();
459 XmlUtil.prettyFormat( r, caw );
460 w.write( caw.toString() );
461 }
462 catch ( IOException e )
463 {
464 throw new ConverterException( "IOException: " + e.getMessage(), e );
465 }
466 }
467 }
468
469
470
471
472
473
474
475 private void parse( Parser parser, Reader reader, Sink sink )
476 throws ConverterException
477 {
478 try ( Reader r = reader )
479 {
480 parser.parse( r, sink );
481 }
482 catch ( ParseException | IOException e )
483 {
484 throw new ConverterException( "ParseException: " + e.getMessage(), e );
485 }
486 finally
487 {
488 sink.flush();
489 sink.close();
490 }
491 }
492
493
494
495
496
497
498 private void startPlexusContainer()
499 throws PlexusContainerException
500 {
501 if ( plexus != null )
502 {
503 return;
504 }
505
506 Map<Object, Object> context = new HashMap<>();
507 context.put( "basedir", new File( "" ).getAbsolutePath() );
508
509 ContainerConfiguration containerConfiguration = new DefaultContainerConfiguration();
510 containerConfiguration.setName( "Doxia" );
511 containerConfiguration.setContext( context );
512
513 plexus = new DefaultPlexusContainer( containerConfiguration );
514 }
515
516
517
518
519 private void stopPlexusContainer()
520 {
521 if ( plexus == null )
522 {
523 return;
524 }
525
526 plexus.dispose();
527 plexus = null;
528 }
529
530
531
532
533
534
535
536
537
538 static String autoDetectEncoding( File f )
539 {
540 if ( !f.isFile() )
541 {
542 throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
543 + "' is not a file, could not detect encoding." );
544 }
545 try
546 {
547 if ( XmlUtil.isXml( f ) )
548 {
549 try ( XmlStreamReader reader = new XmlStreamReader( f ) )
550 {
551 return reader.getEncoding();
552 }
553 }
554
555 try ( InputStream is = new BufferedInputStream( new FileInputStream( f ) ) )
556 {
557 CharsetDetector detector = new CharsetDetector();
558 detector.setText( is );
559 CharsetMatch match = detector.detect();
560
561 return match.getName().toUpperCase( Locale.ENGLISH );
562 }
563 }
564 catch ( IOException e )
565 {
566
567 }
568 throw new UnsupportedOperationException( format( "Could not detect the encoding for file: %s\n"
569 + "Specify explicitly the encoding.", f.getAbsolutePath() ) );
570 }
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585 static String autoDetectFormat( File f, String encoding )
586 {
587 if ( !f.isFile() )
588 {
589 throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
590 + "' is not a file, could not detect format." );
591 }
592
593 for ( String supportedFromFormat : SUPPORTED_FROM_FORMAT )
594 {
595
596 if ( APT_PARSER.equalsIgnoreCase( supportedFromFormat ) && isDoxiaFileName( f, supportedFromFormat ) )
597 {
598 return supportedFromFormat;
599 }
600 else if ( CONFLUENCE_PARSER.equalsIgnoreCase( supportedFromFormat ) && isDoxiaFileName( f,
601 supportedFromFormat ) )
602 {
603 return supportedFromFormat;
604 }
605 else if ( TWIKI_PARSER.equalsIgnoreCase( supportedFromFormat ) && isDoxiaFileName( f,
606 supportedFromFormat ) )
607 {
608 return supportedFromFormat;
609 }
610
611
612 String firstTag = getFirstTag( f );
613 if ( firstTag == null )
614 {
615
616 continue;
617 }
618 else if ( "article".equals( firstTag ) && DOCBOOK_PARSER.equalsIgnoreCase( supportedFromFormat ) )
619 {
620 return supportedFromFormat;
621 }
622 else if ( "faqs".equals( firstTag ) && FML_PARSER.equalsIgnoreCase( supportedFromFormat ) )
623 {
624 return supportedFromFormat;
625 }
626 else if ( "document".equals( firstTag ) && XDOC_PARSER.equalsIgnoreCase( supportedFromFormat ) )
627 {
628 return supportedFromFormat;
629 }
630 else if ( "html".equals( firstTag ) && XHTML_PARSER.equalsIgnoreCase( supportedFromFormat ) )
631 {
632 return supportedFromFormat;
633 }
634 }
635
636 throw new UnsupportedOperationException(
637 format( "Could not detect the Doxia format for file: %s\n Specify explicitly the Doxia format.",
638 f.getAbsolutePath() ) );
639 }
640
641
642
643
644
645
646 private static boolean isDoxiaFileName( File f, String format )
647 {
648 Objects.requireNonNull( f, "f is required." );
649
650 Pattern pattern = Pattern.compile( "(.*?)\\." + format.toLowerCase( Locale.ENGLISH ) + "$" );
651 Matcher matcher = pattern.matcher( f.getName().toLowerCase( Locale.ENGLISH ) );
652
653 return matcher.matches();
654 }
655
656
657
658
659
660 private static String getFirstTag( File xmlFile )
661 {
662 if ( xmlFile == null )
663 {
664 throw new IllegalArgumentException( "xmlFile is required." );
665 }
666 if ( !xmlFile.isFile() )
667 {
668 throw new IllegalArgumentException( "The file '" + xmlFile.getAbsolutePath() + "' is not a file." );
669 }
670
671
672 try ( Reader reader = ReaderFactory.newXmlReader( xmlFile ) )
673 {
674 XmlPullParser parser = new MXParser();
675 parser.setInput( reader );
676 int eventType = parser.getEventType();
677 while ( eventType != XmlPullParser.END_DOCUMENT )
678 {
679 if ( eventType == XmlPullParser.START_TAG )
680 {
681 return parser.getName();
682 }
683 eventType = parser.nextToken();
684 }
685 }
686 catch ( IOException | XmlPullParserException e )
687 {
688 return null;
689 }
690
691 return null;
692 }
693 }