View Javadoc
1   package org.apache.maven.doxia;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.BufferedInputStream;
23  import java.io.CharArrayWriter;
24  import java.io.File;
25  import java.io.FileInputStream;
26  import java.io.FileOutputStream;
27  import java.io.IOException;
28  import java.io.InputStream;
29  import java.io.OutputStream;
30  import java.io.Reader;
31  import java.io.Writer;
32  import java.util.HashMap;
33  import java.util.List;
34  import java.util.Locale;
35  import java.util.Map;
36  import java.util.Objects;
37  import java.util.regex.Matcher;
38  import java.util.regex.Pattern;
39  
40  import org.apache.maven.doxia.logging.Log;
41  import org.apache.maven.doxia.logging.SystemStreamLog;
42  import org.apache.maven.doxia.parser.ParseException;
43  import org.apache.maven.doxia.parser.Parser;
44  import org.apache.maven.doxia.sink.Sink;
45  import org.apache.maven.doxia.sink.SinkFactory;
46  import org.apache.maven.doxia.util.ConverterUtil;
47  import org.apache.maven.doxia.wrapper.InputFileWrapper;
48  import org.apache.maven.doxia.wrapper.InputReaderWrapper;
49  import org.apache.maven.doxia.wrapper.OutputFileWrapper;
50  import org.apache.maven.doxia.wrapper.OutputStreamWrapper;
51  import org.codehaus.plexus.ContainerConfiguration;
52  import org.codehaus.plexus.DefaultContainerConfiguration;
53  import org.codehaus.plexus.DefaultPlexusContainer;
54  import org.codehaus.plexus.PlexusContainer;
55  import org.codehaus.plexus.PlexusContainerException;
56  import org.codehaus.plexus.component.repository.exception.ComponentLookupException;
57  import org.codehaus.plexus.util.FileUtils;
58  import org.codehaus.plexus.util.ReaderFactory;
59  import org.codehaus.plexus.util.SelectorUtils;
60  import org.codehaus.plexus.util.StringUtils;
61  import org.codehaus.plexus.util.WriterFactory;
62  import org.apache.commons.io.input.XmlStreamReader;
63  import org.codehaus.plexus.util.xml.XmlUtil;
64  import org.codehaus.plexus.util.xml.pull.MXParser;
65  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
66  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
67  
68  import com.ibm.icu.text.CharsetDetector;
69  import com.ibm.icu.text.CharsetMatch;
70  
71  import static java.lang.String.format;
72  
73  /**
74   * Default implementation of <code>Converter</code>
75   *
76   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
77   */
78  public class DefaultConverter
79      implements Converter
80  {
81      private static final String APT_PARSER = "apt";
82  
83      private static final String CONFLUENCE_PARSER = "confluence";
84  
85      private static final String DOCBOOK_PARSER = "docbook";
86  
87      private static final String FML_PARSER = "fml";
88  
89      private static final String TWIKI_PARSER = "twiki";
90  
91      private static final String XDOC_PARSER = "xdoc";
92  
93      private static final String XHTML_PARSER = "xhtml";
94  
95      private static final String XHTML5_PARSER = "xhtml5";
96  
97      private static final String MARKDOWN_PARSER = "markdown";
98  
99      /** Supported input format, i.e. supported Doxia parser */
100     public static final String[] SUPPORTED_FROM_FORMAT =
101         { APT_PARSER, CONFLUENCE_PARSER, DOCBOOK_PARSER, FML_PARSER, MARKDOWN_PARSER, TWIKI_PARSER,
102             XDOC_PARSER, XHTML_PARSER, XHTML5_PARSER };
103 
104     private static final String APT_SINK = "apt";
105 
106     private static final String CONFLUENCE_SINK = "confluence";
107 
108     private static final String DOCBOOK_SINK = "docbook";
109 
110     private static final String FO_SINK = "fo";
111 
112     private static final String ITEXT_SINK = "itext";
113 
114     private static final String LATEX_SINK = "latex";
115 
116     private static final String RTF_SINK = "rtf";
117 
118     private static final String TWIKI_SINK = "twiki";
119 
120     private static final String XDOC_SINK = "xdoc";
121 
122     private static final String XHTML_SINK = "xhtml";
123 
124     private static final String XHTML5_SINK = "xhtml5";
125 
126     private static final String MARKDOWN_SINK = "markdown";
127 
128     /** Supported output format, i.e. supported Doxia Sink */
129     public static final String[] SUPPORTED_TO_FORMAT =
130         { APT_SINK, CONFLUENCE_SINK, DOCBOOK_SINK, FO_SINK, ITEXT_SINK, LATEX_SINK, MARKDOWN_SINK, RTF_SINK, TWIKI_SINK,
131             XDOC_SINK, XHTML_SINK, XHTML5_SINK };
132 
133     /** Flag to format the generated files, actually only for XML based sinks. */
134     private boolean formatOutput;
135 
136     /** Plexus container */
137     private PlexusContainer plexus;
138 
139     /** Doxia logger */
140     private Log log;
141 
142     /** {@inheritDoc} */
143     @Override
144     public void enableLogging( Log log )
145     {
146         this.log = log;
147     }
148 
149     /**
150      * Returns a logger for this sink.
151      * If no logger has been configured, a new SystemStreamLog is returned.
152      *
153      * @return Log
154      */
155     protected Log getLog()
156     {
157         if ( log == null )
158         {
159             log = new SystemStreamLog();
160         }
161 
162         return log;
163     }
164 
165     /** {@inheritDoc} */
166     @Override
167     public String[] getInputFormats()
168     {
169         return SUPPORTED_FROM_FORMAT;
170     }
171 
172     /** {@inheritDoc} */
173     @Override
174     public String[] getOutputFormats()
175     {
176         return SUPPORTED_TO_FORMAT;
177     }
178 
179     /** {@inheritDoc} */
180     @Override
181     public void convert( InputFileWrapper input, OutputFileWrapper output )
182         throws UnsupportedFormatException, ConverterException
183     {
184         Objects.requireNonNull( input, "input is required" );
185         Objects.requireNonNull( output, "output is required" );
186 
187         try
188         {
189             startPlexusContainer();
190         }
191         catch ( PlexusContainerException e )
192         {
193             throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
194         }
195 
196         try
197         {
198             if ( input.getFile().isFile() )
199             {
200                 parse( input.getFile(), input.getEncoding(), input.getFormat(), output );
201             }
202             else
203             {
204                 List<File> files;
205                 try
206                 {
207                     files = FileUtils.getFiles( input.getFile(), "**/*." + input.getFormat(),
208                                             StringUtils.join( FileUtils.getDefaultExcludes(), ", " ) );
209                 }
210                 catch ( IOException e )
211                 {
212                     throw new ConverterException( "IOException: " + e.getMessage(), e );
213                 }
214                 catch ( IllegalStateException e )
215                 {
216                     throw new ConverterException( "IllegalStateException: " + e.getMessage(), e );
217                 }
218 
219                 for ( File f : files )
220                 {
221                     parse( f, input.getEncoding(), input.getFormat(), output );
222                 }
223             }
224         }
225         finally
226         {
227             stopPlexusContainer();
228         }
229     }
230 
231     /** {@inheritDoc} */
232     @Override
233     public void convert( InputReaderWrapper input, OutputStreamWrapper output )
234         throws UnsupportedFormatException, ConverterException
235     {
236         Objects.requireNonNull( input, "input is required" );
237         Objects.requireNonNull( output, "output is required" );
238 
239         try
240         {
241             startPlexusContainer();
242         }
243         catch ( PlexusContainerException e )
244         {
245             throw new ConverterException( "PlexusContainerException: " + e.getMessage(), e );
246         }
247 
248         try
249         {
250             Parser parser;
251             try
252             {
253                 parser = ConverterUtil.getParser( plexus, input.getFormat(), SUPPORTED_FROM_FORMAT );
254                 parser.enableLogging( log );
255             }
256             catch ( ComponentLookupException e )
257             {
258                 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
259             }
260 
261             if ( getLog().isDebugEnabled() )
262             {
263                 getLog().debug( "Parser used: " + parser.getClass().getName() );
264             }
265 
266             SinkFactory sinkFactory;
267             try
268             {
269                 sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
270             }
271             catch ( ComponentLookupException e )
272             {
273                 throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
274             }
275 
276             Sink sink;
277             try
278             {
279                 sink = sinkFactory.createSink( output.getOutputStream(), output.getEncoding() );
280             }
281             catch ( IOException e )
282             {
283                 throw new ConverterException( "IOException: " + e.getMessage(), e );
284             }
285             sink.enableLogging( log );
286 
287             if ( getLog().isDebugEnabled() )
288             {
289                 getLog().debug( "Sink used: " + sink.getClass().getName() );
290             }
291 
292             parse( parser, input.getReader(), sink );
293         }
294         finally
295         {
296             stopPlexusContainer();
297         }
298     }
299 
300     /** {@inheritDoc} */
301     @Override
302     public void setFormatOutput( boolean formatOutput )
303     {
304         this.formatOutput = formatOutput;
305     }
306 
307     // ----------------------------------------------------------------------
308     // Private methods
309     // ----------------------------------------------------------------------
310 
311     /**
312      * @param inputFile a not null existing file.
313      * @param inputEncoding a not null supported encoding or {@link InputFileWrapper#AUTO_ENCODING}
314      * @param inputFormat  a not null supported format or {@link InputFileWrapper#AUTO_FORMAT}
315      * @param output not null OutputFileWrapper object
316      * @throws ConverterException if any
317      * @throws UnsupportedFormatException if any
318      */
319     private void parse( File inputFile, String inputEncoding, String inputFormat, OutputFileWrapper output )
320         throws ConverterException, UnsupportedFormatException
321     {
322         if ( getLog().isDebugEnabled() )
323         {
324             getLog().debug(
325                             "Parsing file from '" + inputFile.getAbsolutePath() + "' with the encoding '"
326                                 + inputEncoding + "' to '" + output.getFile().getAbsolutePath()
327                                 + "' with the encoding '" + output.getEncoding() + "'" );
328         }
329 
330         if ( InputFileWrapper.AUTO_ENCODING.equals( inputEncoding ) )
331         {
332             inputEncoding = autoDetectEncoding( inputFile );
333             if ( getLog().isDebugEnabled() )
334             {
335                 getLog().debug( "Auto detect encoding: " + inputEncoding );
336             }
337         }
338 
339         if ( InputFileWrapper.AUTO_FORMAT.equals( inputFormat ) )
340         {
341             inputFormat = autoDetectFormat( inputFile, inputEncoding );
342             if ( getLog().isDebugEnabled() )
343             {
344                 getLog().debug( "Auto detect input format: " + inputFormat );
345             }
346         }
347 
348         Parser parser;
349         try
350         {
351             parser = ConverterUtil.getParser( plexus, inputFormat, SUPPORTED_FROM_FORMAT );
352             parser.enableLogging( log );
353         }
354         catch ( ComponentLookupException e )
355         {
356             throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
357         }
358 
359         File outputFile;
360         if ( output.getFile().isDirectory() )
361         {
362             outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
363         }
364         else
365         {
366             if ( !SelectorUtils.match( "**.*", output.getFile().getName() ) )
367             {
368                 // assume it is a directory
369                 output.getFile().mkdirs();
370                 outputFile = new File( output.getFile(), inputFile.getName() + "." + output.getFormat() );
371             }
372             else
373             {
374                 output.getFile().getParentFile().mkdirs();
375                 outputFile = output.getFile();
376             }
377         }
378 
379         Reader reader;
380         try
381         {
382             if ( inputEncoding != null )
383             {
384                 if ( parser.getType() == Parser.XML_TYPE )
385                 {
386                     reader = ReaderFactory.newXmlReader( inputFile );
387                 }
388                 else
389                 {
390                     reader = ReaderFactory.newReader( inputFile, inputEncoding );
391                 }
392             }
393             else
394             {
395                 reader = ReaderFactory.newPlatformReader( inputFile );
396             }
397         }
398         catch ( IOException e )
399         {
400             throw new ConverterException( "IOException: " + e.getMessage(), e );
401         }
402 
403         SinkFactory sinkFactory;
404         try
405         {
406             sinkFactory = ConverterUtil.getSinkFactory( plexus, output.getFormat(), SUPPORTED_TO_FORMAT );
407         }
408         catch ( ComponentLookupException e )
409         {
410             throw new ConverterException( "ComponentLookupException: " + e.getMessage(), e );
411         }
412 
413         Sink sink;
414         try
415         {
416             String outputEncoding;
417             if ( StringUtils.isEmpty( output.getEncoding() )
418                 || output.getEncoding().equals( OutputFileWrapper.AUTO_ENCODING ) )
419             {
420                 outputEncoding = inputEncoding;
421             }
422             else
423             {
424                 outputEncoding = output.getEncoding();
425             }
426 
427             OutputStream out = new FileOutputStream( outputFile );
428             sink = sinkFactory.createSink( out, outputEncoding );
429         }
430         catch ( IOException e )
431         {
432             throw new ConverterException( "IOException: " + e.getMessage(), e );
433         }
434 
435         sink.enableLogging( log );
436 
437         if ( getLog().isDebugEnabled() )
438         {
439             getLog().debug( "Sink used: " + sink.getClass().getName() );
440         }
441 
442         parse( parser, reader, sink );
443 
444         if ( formatOutput && ( DOCBOOK_SINK.equals( output.getFormat() ) || FO_SINK.equals( output.getFormat() )
445             || ITEXT_SINK.equals( output.getFormat() ) || XDOC_SINK.equals( output.getFormat() )
446             || XHTML_SINK.equals( output.getFormat() ) || XHTML5_SINK.equals( output.getFormat() ) ) )
447         {
448             // format all xml files excluding docbook which is buggy
449             // TODO Add doc book format
450             if ( DOCBOOK_SINK.equals( output.getFormat() ) || DOCBOOK_PARSER.equals( inputFormat ) )
451             {
452                 return;
453             }
454             
455             try ( Reader r = ReaderFactory.newXmlReader( outputFile );
456                   Writer w = WriterFactory.newXmlWriter( outputFile ) )
457             {
458                 CharArrayWriter caw = new CharArrayWriter();
459                 XmlUtil.prettyFormat( r, caw );
460                 w.write( caw.toString() );
461             }
462             catch ( IOException e )
463             {
464                 throw new ConverterException( "IOException: " + e.getMessage(), e );
465             }
466         }
467     }
468 
469     /**
470      * @param parser not null
471      * @param reader not null
472      * @param sink not null
473      * @throws ConverterException if any
474      */
475     private void parse( Parser parser, Reader reader, Sink sink )
476         throws ConverterException
477     {
478         try ( Reader r = reader )
479         {
480             parser.parse( r, sink );
481         }
482         catch ( ParseException | IOException e )
483         {
484             throw new ConverterException( "ParseException: " + e.getMessage(), e );
485         }
486         finally
487         {
488             sink.flush();
489             sink.close();
490         }
491     }
492 
493     /**
494      * Start the Plexus container.
495      *
496      * @throws PlexusContainerException if any
497      */
498     private void startPlexusContainer()
499         throws PlexusContainerException
500     {
501         if ( plexus != null )
502         {
503             return;
504         }
505 
506         Map<Object, Object> context = new HashMap<>();
507         context.put( "basedir", new File( "" ).getAbsolutePath() );
508 
509         ContainerConfiguration containerConfiguration = new DefaultContainerConfiguration();
510         containerConfiguration.setName( "Doxia" );
511         containerConfiguration.setContext( context );
512 
513         plexus = new DefaultPlexusContainer( containerConfiguration );
514     }
515 
516     /**
517      * Stop the Plexus container.
518      */
519     private void stopPlexusContainer()
520     {
521         if ( plexus == null )
522         {
523             return;
524         }
525 
526         plexus.dispose();
527         plexus = null;
528     }
529 
530     /**
531      * @param f not null file
532      * @return the detected encoding for f or <code>null</code> if not able to detect it.
533      * @throws IllegalArgumentException if f is not a file.
534      * @throws UnsupportedOperationException if could not detect the file encoding.
535      * @see XmlStreamReader#getEncoding() for xml files
536      * @see CharsetDetector#detect() for text files
537      */
538     static String autoDetectEncoding( File f )
539     {
540         if ( !f.isFile() )
541         {
542             throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
543                 + "' is not a file, could not detect encoding." );
544         }
545         try
546         {
547             if ( XmlUtil.isXml( f ) )
548             {
549                 try ( XmlStreamReader reader = new XmlStreamReader( f ) )
550                 {
551                     return reader.getEncoding();
552                 }
553             }
554 
555             try ( InputStream is = new BufferedInputStream( new FileInputStream( f ) ) )
556             {
557                 CharsetDetector detector = new CharsetDetector();
558                 detector.setText( is );
559                 CharsetMatch match = detector.detect();
560 
561                 return match.getName().toUpperCase( Locale.ENGLISH );
562             }
563         }
564         catch ( IOException e )
565         {
566             // nop
567         }
568         throw new UnsupportedOperationException( format( "Could not detect the encoding for file: %s\n"
569                 + "Specify explicitly the encoding.", f.getAbsolutePath() ) );
570     }
571 
572     /**
573      * Auto detect Doxia format for the given file depending:
574      * <ul>
575      * <li>the file name for TextMarkup based Doxia files</li>
576      * <li>the file content for XMLMarkup based Doxia files</li>
577      * </ul>
578      *
579      * @param f not null file
580      * @param encoding a not null encoding.
581      * @return the detected encoding from f.
582      * @throws IllegalArgumentException if f is not a file.
583      * @throws UnsupportedOperationException if could not detect the Doxia format.
584      */
585     static String autoDetectFormat( File f, String encoding )
586     {
587         if ( !f.isFile() )
588         {
589             throw new IllegalArgumentException( "The file '" + f.getAbsolutePath()
590                 + "' is not a file, could not detect format." );
591         }
592 
593         for ( String supportedFromFormat : SUPPORTED_FROM_FORMAT )
594         {
595             // Handle Doxia text files
596             if ( APT_PARSER.equalsIgnoreCase( supportedFromFormat ) && isDoxiaFileName( f, supportedFromFormat ) )
597             {
598                 return supportedFromFormat;
599             }
600             else if ( CONFLUENCE_PARSER.equalsIgnoreCase( supportedFromFormat ) && isDoxiaFileName( f,
601                     supportedFromFormat ) )
602             {
603                 return supportedFromFormat;
604             }
605             else if ( TWIKI_PARSER.equalsIgnoreCase( supportedFromFormat ) && isDoxiaFileName( f,
606                     supportedFromFormat ) )
607             {
608                 return supportedFromFormat;
609             }
610 
611             // Handle Doxia xml files
612             String firstTag = getFirstTag( f );
613             if ( firstTag == null )
614             {
615                 //noinspection UnnecessaryContinue
616                 continue;
617             }
618             else if ( "article".equals( firstTag ) && DOCBOOK_PARSER.equalsIgnoreCase( supportedFromFormat ) )
619             {
620                 return supportedFromFormat;
621             }
622             else if ( "faqs".equals( firstTag ) && FML_PARSER.equalsIgnoreCase( supportedFromFormat ) )
623             {
624                 return supportedFromFormat;
625             }
626             else if ( "document".equals( firstTag ) && XDOC_PARSER.equalsIgnoreCase( supportedFromFormat ) )
627             {
628                 return supportedFromFormat;
629             }
630             else if ( "html".equals( firstTag ) && XHTML_PARSER.equalsIgnoreCase( supportedFromFormat ) )
631             {
632                 return supportedFromFormat;
633             }
634         }
635 
636         throw new UnsupportedOperationException(
637                 format( "Could not detect the Doxia format for file: %s\n Specify explicitly the Doxia format.",
638                         f.getAbsolutePath() ) );
639     }
640 
641     /**
642      * @param f not null
643      * @param format could be null
644      * @return <code>true</code> if the file name computes the format.
645      */
646     private static boolean isDoxiaFileName( File f, String format )
647     {
648         Objects.requireNonNull( f, "f is required." );
649 
650         Pattern pattern = Pattern.compile( "(.*?)\\." + format.toLowerCase( Locale.ENGLISH ) + "$" );
651         Matcher matcher = pattern.matcher( f.getName().toLowerCase( Locale.ENGLISH ) );
652 
653         return matcher.matches();
654     }
655 
656     /**
657      * @param xmlFile not null and should be a file.
658      * @return the first tag name if found, <code>null</code> in other case.
659      */
660     private static String getFirstTag( File xmlFile )
661     {
662         if ( xmlFile == null )
663         {
664             throw new IllegalArgumentException( "xmlFile is required." );
665         }
666         if ( !xmlFile.isFile() )
667         {
668             throw new IllegalArgumentException( "The file '" + xmlFile.getAbsolutePath() + "' is not a file." );
669         }
670 
671 
672         try ( Reader reader = ReaderFactory.newXmlReader( xmlFile ) )
673         {
674             XmlPullParser parser = new MXParser();
675             parser.setInput( reader );
676             int eventType = parser.getEventType();
677             while ( eventType != XmlPullParser.END_DOCUMENT )
678             {
679                 if ( eventType == XmlPullParser.START_TAG )
680                 {
681                     return parser.getName();
682                 }
683                 eventType = parser.nextToken();
684             }
685         }
686         catch ( IOException | XmlPullParserException e )
687         {
688             return null;
689         }
690 
691         return null;
692     }
693 }