View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.PrintWriter;
20  import java.io.Writer;
21  import java.util.LinkedList;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.codehaus.plexus.util.StringUtils;
26  
27  /**
28   * Implementation of XMLWriter which emits nicely formatted documents.
29   *
30   *
31   */
32  public class PrettyPrintXMLWriter
33      implements XMLWriter
34  {
35      /** Line separator ("\n" on UNIX) */
36      protected static final String LS = System.getProperty( "line.separator" );
37  
38      private PrintWriter writer;
39  
40      private LinkedList<String> elementStack = new LinkedList<String>();
41  
42      private boolean tagInProgress;
43  
44      private int depth;
45  
46      private String lineIndenter;
47  
48      private String lineSeparator;
49  
50      private String encoding;
51  
52      private String docType;
53  
54      private boolean readyForNewLine;
55  
56      private boolean tagIsEmpty;
57  
58      /**
59       * @param writer not null
60       * @param lineIndenter could be null, but the normal way is some spaces.
61       */
62      public PrettyPrintXMLWriter( PrintWriter writer, String lineIndenter )
63      {
64          this( writer, lineIndenter, null, null );
65      }
66  
67      /**
68       * @param writer not null
69       * @param lineIndenter could be null, but the normal way is some spaces.
70       */
71      public PrettyPrintXMLWriter( Writer writer, String lineIndenter )
72      {
73          this( new PrintWriter( writer ), lineIndenter );
74      }
75  
76      /**
77       * @param writer not null
78       */
79      public PrettyPrintXMLWriter( PrintWriter writer )
80      {
81          this( writer, null, null );
82      }
83  
84      /**
85       * @param writer not null
86       */
87      public PrettyPrintXMLWriter( Writer writer )
88      {
89          this( new PrintWriter( writer ) );
90      }
91  
92      /**
93       * @param writer not null
94       * @param lineIndenter could be null, but the normal way is some spaces.
95       * @param encoding could be null or invalid.
96       * @param doctype could be null.
97       */
98      public PrettyPrintXMLWriter( PrintWriter writer, String lineIndenter, String encoding, String doctype )
99      {
100         this( writer, lineIndenter, LS, encoding, doctype );
101     }
102 
103     /**
104      * @param writer not null
105      * @param lineIndenter could be null, but the normal way is some spaces.
106      * @param encoding could be null or invalid.
107      * @param doctype could be null.
108      */
109     public PrettyPrintXMLWriter( Writer writer, String lineIndenter, String encoding, String doctype )
110     {
111         this( new PrintWriter( writer ), lineIndenter, encoding, doctype );
112     }
113 
114     /**
115      * @param writer not null
116      * @param encoding could be null or invalid.
117      * @param doctype could be null.
118      */
119     public PrettyPrintXMLWriter( PrintWriter writer, String encoding, String doctype )
120     {
121         this( writer, "  ", encoding, doctype );
122     }
123 
124     /**
125      * @param writer not null
126      * @param encoding could be null or invalid.
127      * @param doctype could be null.
128      */
129     public PrettyPrintXMLWriter( Writer writer, String encoding, String doctype )
130     {
131         this( new PrintWriter( writer ), encoding, doctype );
132     }
133 
134     /**
135      * @param writer not null
136      * @param lineIndenter could be null, but the normal way is some spaces.
137      * @param lineSeparator could be null, but the normal way is valid line separator ("\n" on UNIX).
138      * @param encoding could be null or invalid.
139      * @param doctype could be null.
140      */
141     public PrettyPrintXMLWriter( PrintWriter writer, String lineIndenter, String lineSeparator, String encoding,
142                                  String doctype )
143     {
144         setWriter( writer );
145 
146         setLineIndenter( lineIndenter );
147 
148         setLineSeparator( lineSeparator );
149 
150         setEncoding( encoding );
151 
152         setDocType( doctype );
153 
154         if ( doctype != null || encoding != null )
155         {
156             writeDocumentHeaders();
157         }
158     }
159 
160     /** {@inheritDoc} */
161     @Override
162     public void startElement( String name )
163     {
164         tagIsEmpty = false;
165 
166         finishTag();
167 
168         write( "<" );
169 
170         write( name );
171 
172         elementStack.addLast( name );
173 
174         tagInProgress = true;
175 
176         setDepth( getDepth() + 1 );
177 
178         readyForNewLine = true;
179 
180         tagIsEmpty = true;
181     }
182 
183     /** {@inheritDoc} */
184     @Override
185     public void writeText( String text )
186     {
187         writeText( text, true );
188     }
189 
190     /** {@inheritDoc} */
191     @Override
192     public void writeMarkup( String text )
193     {
194         writeText( text, false );
195     }
196 
197     private void writeText( String text, boolean escapeXml )
198     {
199         readyForNewLine = false;
200 
201         tagIsEmpty = false;
202 
203         finishTag();
204 
205         if ( escapeXml )
206         {
207             text = escapeXml( text );
208         }
209 
210         write( StringUtils.unifyLineSeparators( text, lineSeparator ) );
211     }
212 
213     private static final Pattern amp = Pattern.compile( "&" );
214 
215     private static final Pattern lt = Pattern.compile( "<" );
216 
217     private static final Pattern gt = Pattern.compile( ">" );
218 
219     private static final Pattern dqoute = Pattern.compile( "\"" );
220 
221     private static final Pattern sqoute = Pattern.compile( "\'" );
222 
223     private static String escapeXml( String text )
224     {
225         if ( text.indexOf( '&' ) >= 0 )
226         {
227             text = amp.matcher( text ).replaceAll( "&amp;" );
228         }
229         if ( text.indexOf( '<' ) >= 0 )
230         {
231             text = lt.matcher( text ).replaceAll( "&lt;" );
232         }
233         if ( text.indexOf( '>' ) >= 0 )
234         {
235             text = gt.matcher( text ).replaceAll( "&gt;" );
236         }
237         if ( text.indexOf( '"' ) >= 0 )
238         {
239             text = dqoute.matcher( text ).replaceAll( "&quot;" );
240         }
241         if ( text.indexOf( '\'' ) >= 0 )
242         {
243             text = sqoute.matcher( text ).replaceAll( "&apos;" );
244         }
245 
246         return text;
247     }
248 
249     private static final String crlf_str = "\r\n";
250 
251     private static final Pattern crlf = Pattern.compile( crlf_str );
252 
253     private static final Pattern lowers = Pattern.compile( "([\000-\037])" );
254 
255     private static String escapeXmlAttribute( String text )
256     {
257         text = escapeXml( text );
258 
259         // Windows
260         Matcher crlfmatcher = crlf.matcher( text );
261         if ( text.contains( crlf_str ) )
262         {
263             text = crlfmatcher.replaceAll( "&#10;" );
264         }
265 
266         Matcher m = lowers.matcher( text );
267         StringBuffer b = new StringBuffer();
268         while ( m.find() )
269         {
270             m = m.appendReplacement( b, "&#" + Integer.toString( m.group( 1 ).charAt( 0 ) ) + ";" );
271         }
272         m.appendTail( b );
273 
274         return b.toString();
275     }
276 
277     /** {@inheritDoc} */
278     @Override
279     public void addAttribute( String key, String value )
280     {
281         write( " " );
282 
283         write( key );
284 
285         write( "=\"" );
286 
287         write( escapeXmlAttribute( value ) );
288 
289         write( "\"" );
290     }
291 
292     /** {@inheritDoc} */
293     @Override
294     public void endElement()
295     {
296         setDepth( getDepth() - 1 );
297 
298         if ( tagIsEmpty )
299         {
300             write( "/" );
301 
302             readyForNewLine = false;
303 
304             finishTag();
305 
306             elementStack.removeLast();
307         }
308         else
309         {
310             finishTag();
311 
312             // see issue #51: https://github.com/codehaus-plexus/plexus-utils/issues/51
313             // Rationale: replaced 1 write() with string concatenations with 3 write()
314             // (this avoids the string concatenation optimization bug detected in Java 7)
315             // TODO: change the below code to a more efficient expression when the library
316             // be ready to target Java 8.
317             write( "</" );
318             write( elementStack.removeLast() );
319             write( ">" );
320         }
321 
322         readyForNewLine = true;
323     }
324 
325     /**
326      * Write a string to the underlying writer
327      * 
328      * @param str
329      */
330     private void write( String str )
331     {
332         getWriter().write( str );
333     }
334 
335     private void finishTag()
336     {
337         if ( tagInProgress )
338         {
339             write( ">" );
340         }
341 
342         tagInProgress = false;
343 
344         if ( readyForNewLine )
345         {
346             endOfLine();
347         }
348         readyForNewLine = false;
349 
350         tagIsEmpty = false;
351     }
352 
353     /**
354      * Get the string used as line indenter
355      *
356      * @return the line indenter
357      */
358     protected String getLineIndenter()
359     {
360         return lineIndenter;
361     }
362 
363     /**
364      * Set the string used as line indenter
365      *
366      * @param lineIndenter new line indenter, could be null, but the normal way is some spaces.
367      */
368     protected void setLineIndenter( String lineIndenter )
369     {
370         this.lineIndenter = lineIndenter;
371     }
372 
373     /**
374      * Get the string used as line separator or LS if not set.
375      *
376      * @return the line separator
377      * @see #LS
378      */
379     protected String getLineSeparator()
380     {
381         return lineSeparator;
382     }
383 
384     /**
385      * Set the string used as line separator
386      *
387      * @param lineSeparator new line separator, could be null but the normal way is valid line separator ("\n" on UNIX).
388      */
389     protected void setLineSeparator( String lineSeparator )
390     {
391         this.lineSeparator = lineSeparator;
392     }
393 
394     /**
395      * Write the end of line character (using specified line separator) and start new line with indentation
396      *
397      * @see #getLineIndenter()
398      * @see #getLineSeparator()
399      */
400     protected void endOfLine()
401     {
402         write( getLineSeparator() );
403 
404         for ( int i = 0; i < getDepth(); i++ )
405         {
406             write( getLineIndenter() );
407         }
408     }
409 
410     private void writeDocumentHeaders()
411     {
412         write( "<?xml version=\"1.0\"" );
413 
414         if ( getEncoding() != null )
415         {
416             write( " encoding=\"" + getEncoding() + "\"" );
417         }
418 
419         write( "?>" );
420 
421         endOfLine();
422 
423         if ( getDocType() != null )
424         {
425             write( "<!DOCTYPE " );
426 
427             write( getDocType() );
428 
429             write( ">" );
430 
431             endOfLine();
432         }
433     }
434 
435     /**
436      * Set the underlying writer
437      *
438      * @param writer not null writer
439      */
440     protected void setWriter( PrintWriter writer )
441     {
442         if ( writer == null )
443         {
444             throw new IllegalArgumentException( "writer could not be null" );
445         }
446 
447         this.writer = writer;
448     }
449 
450     /**
451      * Get the underlying writer
452      *
453      * @return the underlying writer
454      */
455     protected PrintWriter getWriter()
456     {
457         return writer;
458     }
459 
460     /**
461      * Set the depth in the xml indentation
462      *
463      * @param depth new depth
464      */
465     protected void setDepth( int depth )
466     {
467         this.depth = depth;
468     }
469 
470     /**
471      * Get the current depth in the xml indentation
472      *
473      * @return the current depth
474      */
475     protected int getDepth()
476     {
477         return depth;
478     }
479 
480     /**
481      * Set the encoding in the xml
482      *
483      * @param encoding new encoding
484      */
485     protected void setEncoding( String encoding )
486     {
487         this.encoding = encoding;
488     }
489 
490     /**
491      * Get the current encoding in the xml
492      *
493      * @return the current encoding
494      */
495     protected String getEncoding()
496     {
497         return encoding;
498     }
499 
500     /**
501      * Set the docType in the xml
502      *
503      * @param docType new docType
504      */
505     protected void setDocType( String docType )
506     {
507         this.docType = docType;
508     }
509 
510     /**
511      * Get the docType in the xml
512      *
513      * @return the current docType
514      */
515     protected String getDocType()
516     {
517         return docType;
518     }
519 
520     /**
521      * @return the current elementStack;
522      */
523     protected LinkedList<String> getElementStack()
524     {
525         return elementStack;
526     }
527 }