View Javadoc
1   package org.apache.maven.archetype.common.util;
2   
3   /*
4    * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
5    * All rights reserved.
6    *
7    * Redistribution and use in source and binary forms, with or without
8    * modification, are permitted provided that the following conditions
9    * are met:
10   *
11   * 1. Redistributions of source code must retain the above copyright
12   *    notice, this list of conditions, and the following disclaimer.
13   *
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions, and the disclaimer that follows 
16   *    these conditions in the documentation and/or other materials 
17   *    provided with the distribution.
18   *
19   * 3. The name "JDOM" must not be used to endorse or promote products
20   *    derived from this software without prior written permission.  For
21   *    written permission, please contact <request_AT_jdom_DOT_org>.
22   *
23   * 4. Products derived from this software may not be called "JDOM", nor
24   *    may "JDOM" appear in their name, without prior written permission
25   *    from the JDOM Project Management <request_AT_jdom_DOT_org>.
26   *
27   * In addition, we request (but do not require) that you include in the 
28   * end-user documentation provided with the redistribution and/or in the 
29   * software itself an acknowledgement equivalent to the following:
30   *     "This product includes software developed by the
31   *      JDOM Project (http://www.jdom.org/)."
32   * Alternatively, the acknowledgment may be graphical using the logos 
33   * available at http://www.jdom.org/images/logos.
34   *
35   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38   * DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
39   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46   * SUCH DAMAGE.
47   *
48   * This software consists of voluntary contributions made by many 
49   * individuals on behalf of the JDOM Project and was originally 
50   * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
51   * Brett McLaughlin <brett_AT_jdom_DOT_org>.  For more information
52   * on the JDOM Project, please see <http://www.jdom.org/>.
53   */
54  
55  import org.jdom.output.EscapeStrategy;
56  
57  import java.lang.reflect.Method;
58  
59  /**
60   * Class to encapsulate XMLOutputter format options.
61   * Typical users can use the standard format configurations obtained by
62   * {@link #getRawFormat} (no whitespace changes),
63   * {@link #getPrettyFormat} (whitespace beautification), and
64   * {@link #getCompactFormat} (whitespace normalization).
65   * <p/>
66   * Several modes are available to effect the way textual content is printed.
67   * See the documentation for {@link TextMode} for details.
68   *
69   * @author Jason Hunter
70   * @version $Revision: 942523 $, $Date: 2015-03-06 01:20:33 +0100 (Fri, 06 Mar 2015) $
71   */
72  public class Format
73      implements Cloneable
74  {
75  
76      /**
77       * Returns a new Format object that performs no whitespace changes, uses
78       * the UTF-8 encoding, doesn't expand empty elements, includes the
79       * declaration and encoding, and uses the default entity escape strategy.
80       * Tweaks can be made to the returned Format instance without affecting
81       * other instances.
82       *
83       * @return a Format with no whitespace changes
84       */
85      public static Format getRawFormat()
86      {
87          return new Format();
88      }
89  
90      /**
91       * Returns a new Format object that performs whitespace beautification with
92       * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
93       * includes the declaration and encoding, and uses the default entity
94       * escape strategy.
95       * Tweaks can be made to the returned Format instance without affecting
96       * other instances.
97       *
98       * @return a Format with whitespace beautification
99       */
100     public static Format getPrettyFormat()
101     {
102         Format f = new Format();
103         f.setIndent( STANDARD_INDENT );
104         f.setTextMode( TextMode.TRIM );
105         return f;
106     }
107 
108     /**
109      * Returns a new Format object that performs whitespace normalization, uses
110      * the UTF-8 encoding, doesn't expand empty elements, includes the
111      * declaration and encoding, and uses the default entity escape strategy.
112      * Tweaks can be made to the returned Format instance without affecting
113      * other instances.
114      *
115      * @return a Format with whitespace normalization
116      */
117     public static Format getCompactFormat()
118     {
119         Format f = new Format();
120         f.setTextMode( TextMode.NORMALIZE );
121         return f;
122     }
123 
124     /** standard value to indent by, if we are indenting */
125     private static final String STANDARD_INDENT = "  ";
126 
127     /** standard string with which to end a line */
128     private static final String STANDARD_LINE_SEPARATOR = "\r\n";
129 
130     /** standard encoding */
131     private static final String STANDARD_ENCODING = "UTF-8";
132 
133 
134     /** The default indent is no spaces (as original document) */
135     String indent = null;
136 
137     /** New line separator */
138     String lineSeparator = STANDARD_LINE_SEPARATOR;
139 
140     /** The encoding format */
141     String encoding = STANDARD_ENCODING;
142 
143     /**
144      * Whether or not to output the XML declaration
145      * - default is <code>false</code>
146      */
147     boolean omitDeclaration = false;
148 
149     /**
150      * Whether or not to output the encoding in the XML declaration
151      * - default is <code>false</code>
152      */
153     boolean omitEncoding = false;
154 
155     /**
156      * Whether or not to expand empty elements to
157      * &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code>
158      */
159     boolean expandEmptyElements = false;
160 
161     /**
162      * Whether TrAX output escaping disabling/enabling PIs are ignored
163      * or processed - default is <code>false</code>
164      */
165     boolean ignoreTrAXEscapingPIs = false;
166 
167     /** text handling mode */
168     TextMode mode = TextMode.PRESERVE;
169 
170     /** entity escape logic */
171     EscapeStrategy escapeStrategy = new DefaultEscapeStrategy( encoding );
172 
173     /** Creates a new Format instance with default (raw) behavior. */
174     private Format()
175     {
176     }
177 
178     /**
179      * Sets the {@link EscapeStrategy} to use for character escaping.
180      *
181      * @param strategy the EscapeStrategy to use
182      * @return a pointer to this Format for chaining
183      */
184     public Format setEscapeStrategy( EscapeStrategy strategy )
185     {
186         escapeStrategy = strategy;
187         return this;
188     }
189 
190     /**
191      * Returns the current escape strategy
192      *
193      * @return the current escape strategy
194      */
195     public EscapeStrategy getEscapeStrategy()
196     {
197         return escapeStrategy;
198     }
199 
200     /**
201      * This will set the newline separator (<code>lineSeparator</code>).
202      * The default is <code>\r\n</code>. Note that if the "newlines"
203      * property is false, this value is irrelevant.  To make it output
204      * the system default line ending string, call
205      * <code>setLineSeparator(System.getProperty("line.separator"))</code>
206      * <p/>
207      * <p/>
208      * To output "UNIX-style" documents, call
209      * <code>setLineSeparator("\n")</code>.  To output "Mac-style"
210      * documents, call <code>setLineSeparator("\r")</code>.  DOS-style
211      * documents use CR-LF ("\r\n"), which is the default.
212      * </p>
213      * <p/>
214      * <p/>
215      * Note that this only applies to newlines generated by the
216      * outputter.  If you parse an XML document that contains newlines
217      * embedded inside a text node, and you do not set TextMode.NORMALIZE,
218      * then the newlines will be output
219      * verbatim, as "\n" which is how parsers normalize them.
220      * </p>
221      *
222      * @param separator <code>String</code> line separator to use.
223      * @return a pointer to this Format for chaining
224      * @see #setTextMode
225      */
226     public Format setLineSeparator( String separator )
227     {
228         this.lineSeparator = separator;
229         return this;
230     }
231 
232     /**
233      * Returns the current line separator.
234      *
235      * @return the current line separator
236      */
237     public String getLineSeparator()
238     {
239         return lineSeparator;
240     }
241 
242     /**
243      * This will set whether the XML declaration
244      * (<code>&lt;&#063;xml version="1&#046;0"
245      * encoding="UTF-8"&#063;&gt;</code>)
246      * includes the encoding of the document. It is common to omit
247      * this in uses such as WML and other wireless device protocols.
248      *
249      * @param omitEncoding <code>boolean</code> indicating whether or not
250      *                     the XML declaration should indicate the document encoding.
251      * @return a pointer to this Format for chaining
252      */
253     public Format setOmitEncoding( boolean omitEncoding )
254     {
255         this.omitEncoding = omitEncoding;
256         return this;
257     }
258 
259     /**
260      * Returns whether the XML declaration encoding will be omitted.
261      *
262      * @return whether the XML declaration encoding will be omitted
263      */
264     public boolean getOmitEncoding()
265     {
266         return omitEncoding;
267     }
268 
269     /**
270      * This will set whether the XML declaration
271      * (<code>&lt;&#063;xml version="1&#046;0"&#063;gt;</code>)
272      * will be omitted or not. It is common to omit this in uses such
273      * as SOAP and XML-RPC calls.
274      *
275      * @param omitDeclaration <code>boolean</code> indicating whether or not
276      *                        the XML declaration should be omitted.
277      * @return a pointer to this Format for chaining
278      */
279     public Format setOmitDeclaration( boolean omitDeclaration )
280     {
281         this.omitDeclaration = omitDeclaration;
282         return this;
283     }
284 
285     /**
286      * Returns whether the XML declaration will be omitted.
287      *
288      * @return whether the XML declaration will be omitted
289      */
290     public boolean getOmitDeclaration()
291     {
292         return omitDeclaration;
293     }
294 
295     /**
296      * This will set whether empty elements are expanded from
297      * <code>&lt;tagName/&gt;</code> to
298      * <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.
299      *
300      * @param expandEmptyElements <code>boolean</code> indicating whether or not
301      *                            empty elements should be expanded.
302      * @return a pointer to this Format for chaining
303      */
304     public Format setExpandEmptyElements( boolean expandEmptyElements )
305     {
306         this.expandEmptyElements = expandEmptyElements;
307         return this;
308     }
309 
310     /**
311      * Returns whether empty elements are expanded.
312      *
313      * @return whether empty elements are expanded
314      */
315     public boolean getExpandEmptyElements()
316     {
317         return expandEmptyElements;
318     }
319 
320     /**
321      * This will set whether JAXP TrAX processing instructions for
322      * disabling/enabling output escaping are ignored.  Disabling
323      * output escaping allows using XML text as element content and
324      * outputing it verbatim, i&#46;e&#46; as element children would be.
325      * <p/>
326      * When processed, these processing instructions are removed from
327      * the generated XML text and control whether the element text
328      * content is output verbatim or with escaping of the pre-defined
329      * entities in XML 1.0.  The text to be output verbatim shall be
330      * surrounded by the
331      * <code>&lt;?javax.xml.transform.disable-output-escaping ?&gt;</code>
332      * and <code>&lt;?javax.xml.transform.enable-output-escaping ?&gt;</code>
333      * PIs.</p>
334      * <p/>
335      * When ignored, the processing instructions are present in the
336      * generated XML text and the pre-defined entities in XML 1.0 are
337      * escaped.
338      * <p/>
339      * Default: <code>false</code>.</p>
340      *
341      * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
342      *                              whether or not TrAX ouput escaping PIs are ignored.
343      * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
344      * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
345      */
346     public void setIgnoreTrAXEscapingPIs( boolean ignoreTrAXEscapingPIs )
347     {
348         this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
349     }
350 
351     /**
352      * Returns whether JAXP TrAX processing instructions for
353      * disabling/enabling output escaping are ignored.
354      *
355      * @return whether or not TrAX ouput escaping PIs are ignored.
356      */
357     public boolean getIgnoreTrAXEscapingPIs()
358     {
359         return ignoreTrAXEscapingPIs;
360     }
361 
362     /**
363      * This sets the text output style.  Options are available as static
364      * {@link TextMode} instances.  The default is {@link TextMode#PRESERVE}.
365      *
366      * @return a pointer to this Format for chaining
367      */
368     public Format setTextMode( Format.TextMode mode )
369     {
370         this.mode = mode;
371         return this;
372     }
373 
374     /**
375      * Returns the current text output style.
376      *
377      * @return the current text output style
378      */
379     public Format.TextMode getTextMode()
380     {
381         return mode;
382     }
383 
384     /**
385      * This will set the indent <code>String</code> to use; this
386      * is usually a <code>String</code> of empty spaces. If you pass
387      * null, or the empty string (""), then no indentation will
388      * happen.  Default: none (null)
389      *
390      * @param indent <code>String</code> to use for indentation.
391      * @return a pointer to this Format for chaining
392      */
393     public Format setIndent( String indent )
394     {
395         // if passed the empty string, change it to null, for marginal
396         // performance gains later (can compare to null first instead
397         // of calling equals())
398         if ( "".equals( indent ) )
399         {
400             indent = null;
401         }
402         this.indent = indent;
403         return this;
404     }
405 
406     /**
407      * Returns the indent string in use.
408      *
409      * @return the indent string in use
410      */
411     public String getIndent()
412     {
413         return indent;
414     }
415 
416     /**
417      * Sets the output encoding.  The name should be an accepted XML
418      * encoding.
419      *
420      * @param encoding the encoding format.  Use XML-style names like
421      *                 "UTF-8" or "ISO-8859-1" or "US-ASCII"
422      * @return a pointer to this Format for chaining
423      */
424     public Format setEncoding( String encoding )
425     {
426         this.encoding = encoding;
427         escapeStrategy = new DefaultEscapeStrategy( encoding );
428         return this;
429     }
430 
431     /**
432      * Returns the configured output encoding.
433      *
434      * @return the output encoding
435      */
436     public String getEncoding()
437     {
438         return encoding;
439     }
440 
441     protected Object clone()
442     {
443         Format format = null;
444 
445         try
446         {
447             format = (Format) super.clone();
448         }
449         catch ( CloneNotSupportedException ce )
450         {
451         }
452 
453         return format;
454     }
455 
456 
457     /**
458      * Handle common charsets quickly and easily.  Use reflection
459      * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
460      * If JDK 1.4 isn't around, default to no special encoding.
461      */
462     class DefaultEscapeStrategy
463         implements EscapeStrategy
464     {
465         private int bits;
466         Object encoder;
467         Method canEncode;
468 
469         public DefaultEscapeStrategy( String encoding )
470         {
471             if ( "UTF-8".equalsIgnoreCase( encoding )
472                 || "UTF-16".equalsIgnoreCase( encoding ) )
473             {
474                 bits = 16;
475             }
476             else if ( "ISO-8859-1".equalsIgnoreCase( encoding )
477                 || "Latin1".equalsIgnoreCase( encoding ) )
478             {
479                 bits = 8;
480             }
481             else if ( "US-ASCII".equalsIgnoreCase( encoding )
482                 || "ASCII".equalsIgnoreCase( encoding ) )
483             {
484                 bits = 7;
485             }
486             else
487             {
488                 bits = 0;
489                 //encoder = Charset.forName(encoding).newEncoder();
490                 try
491                 {
492                     Class<?> charsetClass = Class.forName( "java.nio.charset.Charset" );
493                     Class<?> encoderClass = Class.forName( "java.nio.charset.CharsetEncoder" );
494                     Method forName = charsetClass.getMethod( "forName", new Class[] { String.class } );
495                     Object charsetObj = forName.invoke( null, new Object[] { encoding } );
496                     Method newEncoder = charsetClass.getMethod( "newEncoder" );
497                     encoder = newEncoder.invoke( charsetObj );
498                     canEncode = encoderClass.getMethod( "canEncode", new Class[] { char.class } );
499                 }
500                 catch ( Exception ignored )
501                 {
502                 }
503             }
504         }
505 
506         public boolean shouldEscape( char ch )
507         {
508             if ( bits == 16 )
509             {
510                 return false;
511             }
512             if ( bits == 8 )
513             {
514                 return ( (int) ch > 255 );
515             }
516             if ( bits == 7 )
517             {
518                 return ( (int) ch > 127 );
519             }
520             else
521             {
522                 if ( canEncode != null && encoder != null )
523                 {
524                     try
525                     {
526                         Boolean val = (Boolean) canEncode.invoke( encoder, new Object[] { Character.valueOf( ch ) } );
527                         return !val.booleanValue();
528                     }
529                     catch ( Exception ignored )
530                     {
531                     }
532                 }
533                 // Return false if we don't know.  This risks not escaping
534                 // things which should be escaped, but also means people won't
535                 // start getting loads of unnecessary escapes.
536                 return false;
537             }
538         }
539     }
540 
541 
542     /**
543      * Class to signify how text should be handled on output.  The following
544      * table provides details.
545      * <p/>
546      * <table>
547      * <tr>
548      * <th align="left">
549      * Text Mode
550      * </th>
551      * <th>
552      * Resulting behavior.
553      * </th>
554      * </tr>
555      * <p/>
556      * <tr valign="top">
557      * <td>
558      * <i>PRESERVE (Default)</i>
559      * </td>
560      * <td>
561      * All content is printed in the format it was created, no whitespace
562      * or line separators are are added or removed.
563      * </td>
564      * </tr>
565      * <p/>
566      * <tr valign="top">
567      * <td>
568      * TRIM_FULL_WHITE
569      * </td>
570      * <td>
571      * Content between tags consisting of all whitespace is not printed.
572      * If the content contains even one non-whitespace character, it is
573      * printed verbatim, whitespace and all.
574      * </td>
575      * </tr>
576      * <p/>
577      * <tr valign="top">
578      * <td>
579      * TRIM
580      * </td>
581      * <td>
582      * Same as TrimAllWhite, plus leading/trailing whitespace are
583      * trimmed.
584      * </td>
585      * </tr>
586      * <p/>
587      * <tr valign="top">
588      * <td>
589      * NORMALIZE
590      * </td>
591      * <td>
592      * Same as TextTrim, plus addition interior whitespace is compressed
593      * to a single space.
594      * </td>
595      * </tr>
596      * </table>
597      * <p/>
598      * In most cases textual content is aligned with the surrounding tags
599      * (after the appropriate text mode is applied). In the case where the only
600      * content between the start and end tags is textual, the start tag, text,
601      * and end tag are all printed on the same line. If the document being
602      * output already has whitespace, it's wise to turn on TRIM mode so the
603      * pre-existing whitespace can be trimmed before adding new whitespace.
604      * <p/>
605      * When a element has a xml:space attribute with the value of "preserve",
606      * all formating is turned off and reverts back to the default until the
607      * element and its contents have been printed. If a nested element contains
608      * another xml:space with the value "default" formatting is turned back on
609      * for the child element and then off for the remainder of the parent
610      * element.
611      */
612     public static class TextMode
613     {
614         /** Mode for literal text preservation. */
615         public static final TextMode PRESERVE = new TextMode( "PRESERVE" );
616 
617         /** Mode for text trimming (left and right trim). */
618         public static final TextMode TRIM = new TextMode( "TRIM" );
619 
620         /**
621          * Mode for text normalization (left and right trim plus internal
622          * whitespace is normalized to a single space.
623          *
624          * @see org.jdom.Element#getTextNormalize
625          */
626         public static final TextMode NORMALIZE = new TextMode( "NORMALIZE" );
627 
628         /**
629          * Mode for text trimming of content consisting of nothing but
630          * whitespace but otherwise not changing output.
631          */
632         public static final TextMode TRIM_FULL_WHITE =
633             new TextMode( "TRIM_FULL_WHITE" );
634 
635         private final String name;
636 
637         private TextMode( String name )
638         {
639             this.name = name;
640         }
641 
642         public String toString()
643         {
644             return name;
645         }
646     }
647 }
648