View Javadoc
1   package org.apache.maven.archetype.common.util;
2   
3   /*
4    * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
5    * All rights reserved.
6    *
7    * Redistribution and use in source and binary forms, with or without
8    * modification, are permitted provided that the following conditions
9    * are met:
10   *
11   * 1. Redistributions of source code must retain the above copyright
12   *    notice, this list of conditions, and the following disclaimer.
13   *
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions, and the disclaimer that follows 
16   *    these conditions in the documentation and/or other materials 
17   *    provided with the distribution.
18   *
19   * 3. The name "JDOM" must not be used to endorse or promote products
20   *    derived from this software without prior written permission.  For
21   *    written permission, please contact <request_AT_jdom_DOT_org>.
22   *
23   * 4. Products derived from this software may not be called "JDOM", nor
24   *    may "JDOM" appear in their name, without prior written permission
25   *    from the JDOM Project Management <request_AT_jdom_DOT_org>.
26   *
27   * In addition, we request (but do not require) that you include in the 
28   * end-user documentation provided with the redistribution and/or in the 
29   * software itself an acknowledgement equivalent to the following:
30   *     "This product includes software developed by the
31   *      JDOM Project (http://www.jdom.org/)."
32   * Alternatively, the acknowledgment may be graphical using the logos 
33   * available at http://www.jdom.org/images/logos.
34   *
35   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38   * DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
39   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46   * SUCH DAMAGE.
47   *
48   * This software consists of voluntary contributions made by many 
49   * individuals on behalf of the JDOM Project and was originally 
50   * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
51   * Brett McLaughlin <brett_AT_jdom_DOT_org>.  For more information
52   * on the JDOM Project, please see <http://www.jdom.org/>.
53   */
54  
55  import org.jdom2.output.EscapeStrategy;
56  
57  import java.lang.reflect.Method;
58  
59  /**
60   * <p>Class to encapsulate XMLOutputter format options.
61   * Typical users can use the standard format configurations obtained by
62   * {@link #getRawFormat} (no whitespace changes),
63   * {@link #getPrettyFormat} (whitespace beautification), and
64   * {@link #getCompactFormat} (whitespace normalization).</p>
65   * 
66   * <p>Several modes are available to effect the way textual content is printed.
67   * See the documentation for {@link TextMode} for details.</p>
68   *
69   * @author Jason Hunter
70   */
71  public class Format
72      implements Cloneable
73  {
74  
75      /**
76       * Returns a new Format object that performs no whitespace changes, uses
77       * the UTF-8 encoding, doesn't expand empty elements, includes the
78       * declaration and encoding, and uses the default entity escape strategy.
79       * Tweaks can be made to the returned Format instance without affecting
80       * other instances.
81       *
82       * @return a Format with no whitespace changes
83       */
84      public static Format getRawFormat()
85      {
86          return new Format();
87      }
88  
89      /**
90       * Returns a new Format object that performs whitespace beautification with
91       * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
92       * includes the declaration and encoding, and uses the default entity
93       * escape strategy.
94       * Tweaks can be made to the returned Format instance without affecting
95       * other instances.
96       *
97       * @return a Format with whitespace beautification
98       */
99      public static Format getPrettyFormat()
100     {
101         Format f = new Format();
102         f.setIndent( STANDARD_INDENT );
103         f.setTextMode( TextMode.TRIM );
104         return f;
105     }
106 
107     /**
108      * Returns a new Format object that performs whitespace normalization, uses
109      * the UTF-8 encoding, doesn't expand empty elements, includes the
110      * declaration and encoding, and uses the default entity escape strategy.
111      * Tweaks can be made to the returned Format instance without affecting
112      * other instances.
113      *
114      * @return a Format with whitespace normalization
115      */
116     public static Format getCompactFormat()
117     {
118         Format f = new Format();
119         f.setTextMode( TextMode.NORMALIZE );
120         return f;
121     }
122 
123     /** standard value to indent by, if we are indenting */
124     private static final String STANDARD_INDENT = "  ";
125 
126     /** standard string with which to end a line */
127     private static final String STANDARD_LINE_SEPARATOR = "\r\n";
128 
129     /** standard encoding */
130     private static final String STANDARD_ENCODING = "UTF-8";
131 
132 
133     /** The default indent is no spaces (as original document) */
134     String indent = null;
135 
136     /** New line separator */
137     String lineSeparator = STANDARD_LINE_SEPARATOR;
138 
139     /** The encoding format */
140     String encoding = STANDARD_ENCODING;
141 
142     /**
143      * Whether or not to output the XML declaration
144      * - default is <code>false</code>
145      */
146     boolean omitDeclaration = false;
147 
148     /**
149      * Whether or not to output the encoding in the XML declaration
150      * - default is <code>false</code>
151      */
152     boolean omitEncoding = false;
153 
154     /**
155      * Whether or not to expand empty elements to
156      * &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code>
157      */
158     boolean expandEmptyElements = false;
159 
160     /**
161      * Whether TrAX output escaping disabling/enabling PIs are ignored
162      * or processed - default is <code>false</code>
163      */
164     boolean ignoreTrAXEscapingPIs = false;
165 
166     /** text handling mode */
167     TextMode mode = TextMode.PRESERVE;
168 
169     /** entity escape logic */
170     EscapeStrategy escapeStrategy = new DefaultEscapeStrategy( encoding );
171 
172     /** Creates a new Format instance with default (raw) behavior. */
173     private Format()
174     {
175     }
176 
177     /**
178      * Sets the {@link EscapeStrategy} to use for character escaping.
179      *
180      * @param strategy the EscapeStrategy to use
181      * @return a pointer to this Format for chaining
182      */
183     public Format setEscapeStrategy( EscapeStrategy strategy )
184     {
185         escapeStrategy = strategy;
186         return this;
187     }
188 
189     /**
190      * Returns the current escape strategy
191      *
192      * @return the current escape strategy
193      */
194     public EscapeStrategy getEscapeStrategy()
195     {
196         return escapeStrategy;
197     }
198 
199     /**
200      * <p>This will set the newline separator (<code>lineSeparator</code>).
201      * The default is <code>\r\n</code>. Note that if the "newlines"
202      * property is false, this value is irrelevant.  To make it output
203      * the system default line ending string, call
204      * <code>setLineSeparator(System.getProperty("line.separator"))</code></p>
205      * 
206      * <p>To output "UNIX-style" documents, call
207      * <code>setLineSeparator("\n")</code>.  To output "Mac-style"
208      * documents, call <code>setLineSeparator("\r")</code>.  DOS-style
209      * documents use CR-LF ("\r\n"), which is the default.</p>
210      * 
211      * <p>Note that this only applies to newlines generated by the
212      * outputter.  If you parse an XML document that contains newlines
213      * embedded inside a text node, and you do not set TextMode.NORMALIZE,
214      * then the newlines will be output
215      * verbatim, as "\n" which is how parsers normalize them.
216      * </p>
217      *
218      * @param separator <code>String</code> line separator to use.
219      * @return a pointer to this Format for chaining
220      * @see #setTextMode
221      */
222     public Format setLineSeparator( String separator )
223     {
224         this.lineSeparator = separator;
225         return this;
226     }
227 
228     /**
229      * Returns the current line separator.
230      *
231      * @return the current line separator
232      */
233     public String getLineSeparator()
234     {
235         return lineSeparator;
236     }
237 
238     /**
239      * This will set whether the XML declaration
240      * (<code>&lt;&#063;xml version="1&#046;0"
241      * encoding="UTF-8"&#063;&gt;</code>)
242      * includes the encoding of the document. It is common to omit
243      * this in uses such as WML and other wireless device protocols.
244      *
245      * @param omitEncoding <code>boolean</code> indicating whether or not
246      *                     the XML declaration should indicate the document encoding.
247      * @return a pointer to this Format for chaining
248      */
249     public Format setOmitEncoding( boolean omitEncoding )
250     {
251         this.omitEncoding = omitEncoding;
252         return this;
253     }
254 
255     /**
256      * Returns whether the XML declaration encoding will be omitted.
257      *
258      * @return whether the XML declaration encoding will be omitted
259      */
260     public boolean getOmitEncoding()
261     {
262         return omitEncoding;
263     }
264 
265     /**
266      * This will set whether the XML declaration
267      * (<code>&lt;&#063;xml version="1&#046;0"&#063;gt;</code>)
268      * will be omitted or not. It is common to omit this in uses such
269      * as SOAP and XML-RPC calls.
270      *
271      * @param omitDeclaration <code>boolean</code> indicating whether or not
272      *                        the XML declaration should be omitted.
273      * @return a pointer to this Format for chaining
274      */
275     public Format setOmitDeclaration( boolean omitDeclaration )
276     {
277         this.omitDeclaration = omitDeclaration;
278         return this;
279     }
280 
281     /**
282      * Returns whether the XML declaration will be omitted.
283      *
284      * @return whether the XML declaration will be omitted
285      */
286     public boolean getOmitDeclaration()
287     {
288         return omitDeclaration;
289     }
290 
291     /**
292      * This will set whether empty elements are expanded from
293      * <code>&lt;tagName/&gt;</code> to
294      * <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.
295      *
296      * @param expandEmptyElements <code>boolean</code> indicating whether or not
297      *                            empty elements should be expanded.
298      * @return a pointer to this Format for chaining
299      */
300     public Format setExpandEmptyElements( boolean expandEmptyElements )
301     {
302         this.expandEmptyElements = expandEmptyElements;
303         return this;
304     }
305 
306     /**
307      * Returns whether empty elements are expanded.
308      *
309      * @return whether empty elements are expanded
310      */
311     public boolean getExpandEmptyElements()
312     {
313         return expandEmptyElements;
314     }
315 
316     /**
317      * <p>This will set whether JAXP TrAX processing instructions for
318      * disabling/enabling output escaping are ignored.  Disabling
319      * output escaping allows using XML text as element content and
320      * outputing it verbatim, i&#46;e&#46; as element children would be.</p>
321      * 
322      * <p>When processed, these processing instructions are removed from
323      * the generated XML text and control whether the element text
324      * content is output verbatim or with escaping of the pre-defined
325      * entities in XML 1.0.  The text to be output verbatim shall be
326      * surrounded by the
327      * <code>&lt;?javax.xml.transform.disable-output-escaping ?&gt;</code>
328      * and <code>&lt;?javax.xml.transform.enable-output-escaping ?&gt;</code>
329      * PIs.</p>
330      * 
331      * <p>When ignored, the processing instructions are present in the
332      * generated XML text and the pre-defined entities in XML 1.0 are
333      * escaped.</p>
334      * 
335      * Default: <code>false</code>.
336      *
337      * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
338      *                              whether or not TrAX ouput escaping PIs are ignored.
339      * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
340      * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
341      */
342     public void setIgnoreTrAXEscapingPIs( boolean ignoreTrAXEscapingPIs )
343     {
344         this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
345     }
346 
347     /**
348      * Returns whether JAXP TrAX processing instructions for
349      * disabling/enabling output escaping are ignored.
350      *
351      * @return whether or not TrAX ouput escaping PIs are ignored.
352      */
353     public boolean getIgnoreTrAXEscapingPIs()
354     {
355         return ignoreTrAXEscapingPIs;
356     }
357 
358     /**
359      * This sets the text output style.  Options are available as static
360      * {@link TextMode} instances.  The default is {@link TextMode#PRESERVE}.
361      *
362      * @return a pointer to this Format for chaining
363      */
364     public Format setTextMode( Format.TextMode mode )
365     {
366         this.mode = mode;
367         return this;
368     }
369 
370     /**
371      * Returns the current text output style.
372      *
373      * @return the current text output style
374      */
375     public Format.TextMode getTextMode()
376     {
377         return mode;
378     }
379 
380     /**
381      * This will set the indent <code>String</code> to use; this
382      * is usually a <code>String</code> of empty spaces. If you pass
383      * null, or the empty string (""), then no indentation will
384      * happen.  Default: none (null)
385      *
386      * @param indent <code>String</code> to use for indentation.
387      * @return a pointer to this Format for chaining
388      */
389     public Format setIndent( String indent )
390     {
391         // if passed the empty string, change it to null, for marginal
392         // performance gains later (can compare to null first instead
393         // of calling equals())
394         if ( "".equals( indent ) )
395         {
396             indent = null;
397         }
398         this.indent = indent;
399         return this;
400     }
401 
402     /**
403      * Returns the indent string in use.
404      *
405      * @return the indent string in use
406      */
407     public String getIndent()
408     {
409         return indent;
410     }
411 
412     /**
413      * Sets the output encoding.  The name should be an accepted XML
414      * encoding.
415      *
416      * @param encoding the encoding format.  Use XML-style names like
417      *                 "UTF-8" or "ISO-8859-1" or "US-ASCII"
418      * @return a pointer to this Format for chaining
419      */
420     public Format setEncoding( String encoding )
421     {
422         this.encoding = encoding;
423         escapeStrategy = new DefaultEscapeStrategy( encoding );
424         return this;
425     }
426 
427     /**
428      * Returns the configured output encoding.
429      *
430      * @return the output encoding
431      */
432     public String getEncoding()
433     {
434         return encoding;
435     }
436 
437     @Override
438     protected Object clone()
439     {
440         Format format = null;
441 
442         try
443         {
444             format = (Format) super.clone();
445         }
446         catch ( CloneNotSupportedException ce )
447         {
448         }
449 
450         return format;
451     }
452 
453 
454     /**
455      * Handle common charsets quickly and easily.  Use reflection
456      * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
457      * If JDK 1.4 isn't around, default to no special encoding.
458      */
459     class DefaultEscapeStrategy
460         implements EscapeStrategy
461     {
462         private int bits;
463         Object encoder;
464         Method canEncode;
465 
466         public DefaultEscapeStrategy( String encoding )
467         {
468             if ( "UTF-8".equalsIgnoreCase( encoding )
469                 || "UTF-16".equalsIgnoreCase( encoding ) )
470             {
471                 bits = 16;
472             }
473             else if ( "ISO-8859-1".equalsIgnoreCase( encoding )
474                 || "Latin1".equalsIgnoreCase( encoding ) )
475             {
476                 bits = 8;
477             }
478             else if ( "US-ASCII".equalsIgnoreCase( encoding )
479                 || "ASCII".equalsIgnoreCase( encoding ) )
480             {
481                 bits = 7;
482             }
483             else
484             {
485                 bits = 0;
486                 //encoder = Charset.forName(encoding).newEncoder();
487                 try
488                 {
489                     Class<?> charsetClass = Class.forName( "java.nio.charset.Charset" );
490                     Class<?> encoderClass = Class.forName( "java.nio.charset.CharsetEncoder" );
491                     Method forName = charsetClass.getMethod( "forName", new Class[] { String.class } );
492                     Object charsetObj = forName.invoke( null, new Object[] { encoding } );
493                     Method newEncoder = charsetClass.getMethod( "newEncoder" );
494                     encoder = newEncoder.invoke( charsetObj );
495                     canEncode = encoderClass.getMethod( "canEncode", new Class[] { char.class } );
496                 }
497                 catch ( Exception ignored )
498                 {
499                 }
500             }
501         }
502 
503         @Override
504         public boolean shouldEscape( char ch )
505         {
506             if ( bits == 16 )
507             {
508                 return false;
509             }
510             if ( bits == 8 )
511             {
512                 return ( ch > 255 );
513             }
514             if ( bits == 7 )
515             {
516                 return ( ch > 127 );
517             }
518             else
519             {
520                 if ( canEncode != null && encoder != null )
521                 {
522                     try
523                     {
524                         Boolean val = (Boolean) canEncode.invoke( encoder, new Object[] { Character.valueOf( ch ) } );
525                         return !val.booleanValue();
526                     }
527                     catch ( Exception ignored )
528                     {
529                     }
530                 }
531                 // Return false if we don't know.  This risks not escaping
532                 // things which should be escaped, but also means people won't
533                 // start getting loads of unnecessary escapes.
534                 return false;
535             }
536         }
537     }
538 
539 
540     /**
541      * <p>
542      * Class to signify how text should be handled on output.  The following
543      * table provides details.</p>
544      * <table>
545      * <caption>TextMode details</caption>
546      * <tr>
547      * <th>
548      * Text Mode
549      * </th>
550      * <th>
551      * Resulting behavior.
552      * </th>
553      * </tr>
554      * <tr>
555      * <td>
556      * <i>PRESERVE (Default)</i>
557      * </td>
558      * <td>
559      * All content is printed in the format it was created, no whitespace
560      * or line separators are are added or removed.
561      * </td>
562      * </tr>
563      * <tr>
564      * <td>
565      * TRIM_FULL_WHITE
566      * </td>
567      * <td>
568      * Content between tags consisting of all whitespace is not printed.
569      * If the content contains even one non-whitespace character, it is
570      * printed verbatim, whitespace and all.
571      * </td>
572      * </tr>
573      * <tr>
574      * <td>
575      * TRIM
576      * </td>
577      * <td>
578      * Same as TrimAllWhite, plus leading/trailing whitespace are
579      * trimmed.
580      * </td>
581      * </tr>
582      * <tr>
583      * <td>
584      * NORMALIZE
585      * </td>
586      * <td>
587      * Same as TextTrim, plus addition interior whitespace is compressed
588      * to a single space.
589      * </td>
590      * </tr>
591      * </table>
592      * 
593      * <p>In most cases textual content is aligned with the surrounding tags
594      * (after the appropriate text mode is applied). In the case where the only
595      * content between the start and end tags is textual, the start tag, text,
596      * and end tag are all printed on the same line. If the document being
597      * output already has whitespace, it's wise to turn on TRIM mode so the
598      * pre-existing whitespace can be trimmed before adding new whitespace.</p>
599      * 
600      * <p>When a element has a xml:space attribute with the value of "preserve",
601      * all formating is turned off and reverts back to the default until the
602      * element and its contents have been printed. If a nested element contains
603      * another xml:space with the value "default" formatting is turned back on
604      * for the child element and then off for the remainder of the parent
605      * element.</p>
606      */
607     public static class TextMode
608     {
609         /** Mode for literal text preservation. */
610         public static final TextMode PRESERVE = new TextMode( "PRESERVE" );
611 
612         /** Mode for text trimming (left and right trim). */
613         public static final TextMode TRIM = new TextMode( "TRIM" );
614 
615         /**
616          * Mode for text normalization (left and right trim plus internal
617          * whitespace is normalized to a single space.
618          *
619          * @see org.jdom2.Element#getTextNormalize
620          */
621         public static final TextMode NORMALIZE = new TextMode( "NORMALIZE" );
622 
623         /**
624          * Mode for text trimming of content consisting of nothing but
625          * whitespace but otherwise not changing output.
626          */
627         public static final TextMode TRIM_FULL_WHITE =
628             new TextMode( "TRIM_FULL_WHITE" );
629 
630         private final String name;
631 
632         private TextMode( String name )
633         {
634             this.name = name;
635         }
636 
637         @Override
638         public String toString()
639         {
640             return name;
641         }
642     }
643 }
644