View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.archetype.common.util;
20  
21  /*
22   * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
23   * All rights reserved.
24   *
25   * Redistribution and use in source and binary forms, with or without
26   * modification, are permitted provided that the following conditions
27   * are met:
28   *
29   * 1. Redistributions of source code must retain the above copyright
30   *    notice, this list of conditions, and the following disclaimer.
31   *
32   * 2. Redistributions in binary form must reproduce the above copyright
33   *    notice, this list of conditions, and the disclaimer that follows
34   *    these conditions in the documentation and/or other materials
35   *    provided with the distribution.
36   *
37   * 3. The name "JDOM" must not be used to endorse or promote products
38   *    derived from this software without prior written permission.  For
39   *    written permission, please contact <request_AT_jdom_DOT_org>.
40   *
41   * 4. Products derived from this software may not be called "JDOM", nor
42   *    may "JDOM" appear in their name, without prior written permission
43   *    from the JDOM Project Management <request_AT_jdom_DOT_org>.
44   *
45   * In addition, we request (but do not require) that you include in the
46   * end-user documentation provided with the redistribution and/or in the
47   * software itself an acknowledgement equivalent to the following:
48   *     "This product includes software developed by the
49   *      JDOM Project (http://www.jdom.org/)."
50   * Alternatively, the acknowledgment may be graphical using the logos
51   * available at http://www.jdom.org/images/logos.
52   *
53   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
54   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
56   * DISCLAIMED.  IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
57   * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
58   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
59   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
60   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
61   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
63   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64   * SUCH DAMAGE.
65   *
66   * This software consists of voluntary contributions made by many
67   * individuals on behalf of the JDOM Project and was originally
68   * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
69   * Brett McLaughlin <brett_AT_jdom_DOT_org>.  For more information
70   * on the JDOM Project, please see <http://www.jdom.org/>.
71   */
72  
73  import java.nio.charset.Charset;
74  import java.nio.charset.CharsetEncoder;
75  
76  import org.jdom2.output.EscapeStrategy;
77  
78  /**
79   * <p>Class to encapsulate XMLOutputter format options.
80   * Typical users can use the standard format configurations obtained by
81   * {@link #getRawFormat} (no whitespace changes),
82   * {@link #getPrettyFormat} (whitespace beautification), and
83   * {@link #getCompactFormat} (whitespace normalization).</p>
84   *
85   * <p>Several modes are available to effect the way textual content is printed.
86   * See the documentation for {@link TextMode} for details.</p>
87   *
88   * @author Jason Hunter
89   */
90  public class Format implements Cloneable {
91  
92      /**
93       * Returns a new Format object that performs no whitespace changes, uses
94       * the UTF-8 encoding, doesn't expand empty elements, includes the
95       * declaration and encoding, and uses the default entity escape strategy.
96       * Tweaks can be made to the returned Format instance without affecting
97       * other instances.
98       *
99       * @return a Format with no whitespace changes
100      */
101     public static Format getRawFormat() {
102         return new Format();
103     }
104 
105     /**
106      * Returns a new Format object that performs whitespace beautification with
107      * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
108      * includes the declaration and encoding, and uses the default entity
109      * escape strategy.
110      * Tweaks can be made to the returned Format instance without affecting
111      * other instances.
112      *
113      * @return a Format with whitespace beautification
114      */
115     public static Format getPrettyFormat() {
116         Format f = new Format();
117         f.setIndent(STANDARD_INDENT);
118         f.setTextMode(TextMode.TRIM);
119         return f;
120     }
121 
122     /**
123      * Returns a new Format object that performs whitespace normalization, uses
124      * the UTF-8 encoding, doesn't expand empty elements, includes the
125      * declaration and encoding, and uses the default entity escape strategy.
126      * Tweaks can be made to the returned Format instance without affecting
127      * other instances.
128      *
129      * @return a Format with whitespace normalization
130      */
131     public static Format getCompactFormat() {
132         Format f = new Format();
133         f.setTextMode(TextMode.NORMALIZE);
134         return f;
135     }
136 
137     /** standard value to indent by, if we are indenting */
138     private static final String STANDARD_INDENT = "  ";
139 
140     /** standard string with which to end a line */
141     private static final String STANDARD_LINE_SEPARATOR = "\r\n";
142 
143     /** standard encoding */
144     private static final String STANDARD_ENCODING = "UTF-8";
145 
146     /** The default indent is no spaces (as original document) */
147     String indent = null;
148 
149     /** New line separator */
150     String lineSeparator = STANDARD_LINE_SEPARATOR;
151 
152     /** The encoding format */
153     String encoding = STANDARD_ENCODING;
154 
155     /**
156      * Whether or not to output the XML declaration
157      * - default is <code>false</code>
158      */
159     boolean omitDeclaration = false;
160 
161     /**
162      * Whether or not to output the encoding in the XML declaration
163      * - default is <code>false</code>
164      */
165     boolean omitEncoding = false;
166 
167     /**
168      * Whether or not to expand empty elements to
169      * &lt;tagName&gt;&lt;/tagName&gt; - default is <code>false</code>
170      */
171     boolean expandEmptyElements = false;
172 
173     /**
174      * Whether TrAX output escaping disabling/enabling PIs are ignored
175      * or processed - default is <code>false</code>
176      */
177     boolean ignoreTrAXEscapingPIs = false;
178 
179     /** text handling mode */
180     TextMode mode = TextMode.PRESERVE;
181 
182     /** entity escape logic */
183     EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);
184 
185     /** Creates a new Format instance with default (raw) behavior. */
186     private Format() {}
187 
188     /**
189      * Sets the {@link EscapeStrategy} to use for character escaping.
190      *
191      * @param strategy the EscapeStrategy to use
192      * @return a pointer to this Format for chaining
193      */
194     public Format setEscapeStrategy(EscapeStrategy strategy) {
195         escapeStrategy = strategy;
196         return this;
197     }
198 
199     /**
200      * Returns the current escape strategy
201      *
202      * @return the current escape strategy
203      */
204     public EscapeStrategy getEscapeStrategy() {
205         return escapeStrategy;
206     }
207 
208     /**
209      * <p>This will set the newline separator (<code>lineSeparator</code>).
210      * The default is <code>\r\n</code>. Note that if the "newlines"
211      * property is false, this value is irrelevant.  To make it output
212      * the system default line ending string, call
213      * <code>setLineSeparator(System.getProperty("line.separator"))</code></p>
214      *
215      * <p>To output "UNIX-style" documents, call
216      * <code>setLineSeparator("\n")</code>.  To output "Mac-style"
217      * documents, call <code>setLineSeparator("\r")</code>.  DOS-style
218      * documents use CR-LF ("\r\n"), which is the default.</p>
219      *
220      * <p>Note that this only applies to newlines generated by the
221      * outputter.  If you parse an XML document that contains newlines
222      * embedded inside a text node, and you do not set TextMode.NORMALIZE,
223      * then the newlines will be output
224      * verbatim, as "\n" which is how parsers normalize them.
225      * </p>
226      *
227      * @param separator <code>String</code> line separator to use.
228      * @return a pointer to this Format for chaining
229      * @see #setTextMode
230      */
231     public Format setLineSeparator(String separator) {
232         this.lineSeparator = separator;
233         return this;
234     }
235 
236     /**
237      * Returns the current line separator.
238      *
239      * @return the current line separator
240      */
241     public String getLineSeparator() {
242         return lineSeparator;
243     }
244 
245     /**
246      * This will set whether the XML declaration
247      * (<code>&lt;&#063;xml version="1&#046;0"
248      * encoding="UTF-8"&#063;&gt;</code>)
249      * includes the encoding of the document. It is common to omit
250      * this in uses such as WML and other wireless device protocols.
251      *
252      * @param omitEncoding <code>boolean</code> indicating whether or not
253      *                     the XML declaration should indicate the document encoding.
254      * @return a pointer to this Format for chaining
255      */
256     public Format setOmitEncoding(boolean omitEncoding) {
257         this.omitEncoding = omitEncoding;
258         return this;
259     }
260 
261     /**
262      * Returns whether the XML declaration encoding will be omitted.
263      *
264      * @return whether the XML declaration encoding will be omitted
265      */
266     public boolean getOmitEncoding() {
267         return omitEncoding;
268     }
269 
270     /**
271      * This will set whether the XML declaration
272      * (<code>&lt;&#063;xml version="1&#046;0"&#063;gt;</code>)
273      * will be omitted or not. It is common to omit this in uses such
274      * as SOAP and XML-RPC calls.
275      *
276      * @param omitDeclaration <code>boolean</code> indicating whether or not
277      *                        the XML declaration should be omitted.
278      * @return a pointer to this Format for chaining
279      */
280     public Format setOmitDeclaration(boolean omitDeclaration) {
281         this.omitDeclaration = omitDeclaration;
282         return this;
283     }
284 
285     /**
286      * Returns whether the XML declaration will be omitted.
287      *
288      * @return whether the XML declaration will be omitted
289      */
290     public boolean getOmitDeclaration() {
291         return omitDeclaration;
292     }
293 
294     /**
295      * This will set whether empty elements are expanded from
296      * <code>&lt;tagName/&gt;</code> to
297      * <code>&lt;tagName&gt;&lt;/tagName&gt;</code>.
298      *
299      * @param expandEmptyElements <code>boolean</code> indicating whether or not
300      *                            empty elements should be expanded.
301      * @return a pointer to this Format for chaining
302      */
303     public Format setExpandEmptyElements(boolean expandEmptyElements) {
304         this.expandEmptyElements = expandEmptyElements;
305         return this;
306     }
307 
308     /**
309      * Returns whether empty elements are expanded.
310      *
311      * @return whether empty elements are expanded
312      */
313     public boolean getExpandEmptyElements() {
314         return expandEmptyElements;
315     }
316 
317     /**
318      * <p>This will set whether JAXP TrAX processing instructions for
319      * disabling/enabling output escaping are ignored.  Disabling
320      * output escaping allows using XML text as element content and
321      * outputing it verbatim, i&#46;e&#46; as element children would be.</p>
322      *
323      * <p>When processed, these processing instructions are removed from
324      * the generated XML text and control whether the element text
325      * content is output verbatim or with escaping of the pre-defined
326      * entities in XML 1.0.  The text to be output verbatim shall be
327      * surrounded by the
328      * <code>&lt;?javax.xml.transform.disable-output-escaping ?&gt;</code>
329      * and <code>&lt;?javax.xml.transform.enable-output-escaping ?&gt;</code>
330      * PIs.</p>
331      *
332      * <p>When ignored, the processing instructions are present in the
333      * generated XML text and the pre-defined entities in XML 1.0 are
334      * escaped.</p>
335      *
336      * Default: <code>false</code>.
337      *
338      * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
339      *                              whether or not TrAX ouput escaping PIs are ignored.
340      * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
341      * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
342      */
343     public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
344         this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
345     }
346 
347     /**
348      * Returns whether JAXP TrAX processing instructions for
349      * disabling/enabling output escaping are ignored.
350      *
351      * @return whether or not TrAX ouput escaping PIs are ignored.
352      */
353     public boolean getIgnoreTrAXEscapingPIs() {
354         return ignoreTrAXEscapingPIs;
355     }
356 
357     /**
358      * This sets the text output style.  Options are available as static
359      * {@link TextMode} instances.  The default is {@link TextMode#PRESERVE}.
360      *
361      * @return a pointer to this Format for chaining
362      */
363     public Format setTextMode(Format.TextMode mode) {
364         this.mode = mode;
365         return this;
366     }
367 
368     /**
369      * Returns the current text output style.
370      *
371      * @return the current text output style
372      */
373     public Format.TextMode getTextMode() {
374         return mode;
375     }
376 
377     /**
378      * This will set the indent <code>String</code> to use; this
379      * is usually a <code>String</code> of empty spaces. If you pass
380      * null, or the empty string (""), then no indentation will
381      * happen.  Default: none (null)
382      *
383      * @param indent <code>String</code> to use for indentation.
384      * @return a pointer to this Format for chaining
385      */
386     public Format setIndent(String indent) {
387         // if passed the empty string, change it to null, for marginal
388         // performance gains later (can compare to null first instead
389         // of calling equals())
390         if ("".equals(indent)) {
391             indent = null;
392         }
393         this.indent = indent;
394         return this;
395     }
396 
397     /**
398      * Returns the indent string in use.
399      *
400      * @return the indent string in use
401      */
402     public String getIndent() {
403         return indent;
404     }
405 
406     /**
407      * Sets the output encoding.  The name should be an accepted XML
408      * encoding.
409      *
410      * @param encoding the encoding format.  Use XML-style names like
411      *                 "UTF-8" or "ISO-8859-1" or "US-ASCII"
412      * @return a pointer to this Format for chaining
413      */
414     public Format setEncoding(String encoding) {
415         this.encoding = encoding;
416         escapeStrategy = new DefaultEscapeStrategy(encoding);
417         return this;
418     }
419 
420     /**
421      * Returns the configured output encoding.
422      *
423      * @return the output encoding
424      */
425     public String getEncoding() {
426         return encoding;
427     }
428 
429     @Override
430     protected Object clone() {
431         Format format = null;
432 
433         try {
434             format = (Format) super.clone();
435         } catch (CloneNotSupportedException ce) {
436         }
437 
438         return format;
439     }
440 
441     /**
442      * Handle common charsets quickly and easily.
443      * If JDK 1.4 isn't around, default to no special encoding.
444      */
445     static class DefaultEscapeStrategy implements EscapeStrategy {
446         private int bits;
447         CharsetEncoder encoder;
448 
449         DefaultEscapeStrategy(String encoding) {
450             if ("UTF-8".equalsIgnoreCase(encoding) || "UTF-16".equalsIgnoreCase(encoding)) {
451                 bits = 16;
452             } else if ("ISO-8859-1".equalsIgnoreCase(encoding) || "Latin1".equalsIgnoreCase(encoding)) {
453                 bits = 8;
454             } else if ("US-ASCII".equalsIgnoreCase(encoding) || "ASCII".equalsIgnoreCase(encoding)) {
455                 bits = 7;
456             } else {
457                 bits = 0;
458                 encoder = Charset.forName(encoding).newEncoder();
459             }
460         }
461 
462         @Override
463         public boolean shouldEscape(char ch) {
464             if (bits == 16) {
465                 return false;
466             }
467             if (bits == 8) {
468                 return (ch > 255);
469             }
470             if (bits == 7) {
471                 return (ch > 127);
472             } else {
473                 if (encoder != null) {
474                     return !encoder.canEncode(ch);
475                 }
476                 // Return false if we don't know.  This risks not escaping
477                 // things which should be escaped, but also means people won't
478                 // start getting loads of unnecessary escapes.
479                 return false;
480             }
481         }
482     }
483 
484     /**
485      * <p>
486      * Class to signify how text should be handled on output.  The following
487      * table provides details.</p>
488      * <table>
489      * <caption>TextMode details</caption>
490      * <tr>
491      * <th>
492      * Text Mode
493      * </th>
494      * <th>
495      * Resulting behavior.
496      * </th>
497      * </tr>
498      * <tr>
499      * <td>
500      * <i>PRESERVE (Default)</i>
501      * </td>
502      * <td>
503      * All content is printed in the format it was created, no whitespace
504      * or line separators are are added or removed.
505      * </td>
506      * </tr>
507      * <tr>
508      * <td>
509      * TRIM_FULL_WHITE
510      * </td>
511      * <td>
512      * Content between tags consisting of all whitespace is not printed.
513      * If the content contains even one non-whitespace character, it is
514      * printed verbatim, whitespace and all.
515      * </td>
516      * </tr>
517      * <tr>
518      * <td>
519      * TRIM
520      * </td>
521      * <td>
522      * Same as TrimAllWhite, plus leading/trailing whitespace are
523      * trimmed.
524      * </td>
525      * </tr>
526      * <tr>
527      * <td>
528      * NORMALIZE
529      * </td>
530      * <td>
531      * Same as TextTrim, plus addition interior whitespace is compressed
532      * to a single space.
533      * </td>
534      * </tr>
535      * </table>
536      *
537      * <p>In most cases textual content is aligned with the surrounding tags
538      * (after the appropriate text mode is applied). In the case where the only
539      * content between the start and end tags is textual, the start tag, text,
540      * and end tag are all printed on the same line. If the document being
541      * output already has whitespace, it's wise to turn on TRIM mode so the
542      * pre-existing whitespace can be trimmed before adding new whitespace.</p>
543      *
544      * <p>When a element has a xml:space attribute with the value of "preserve",
545      * all formating is turned off and reverts back to the default until the
546      * element and its contents have been printed. If a nested element contains
547      * another xml:space with the value "default" formatting is turned back on
548      * for the child element and then off for the remainder of the parent
549      * element.</p>
550      */
551     public static class TextMode {
552         /** Mode for literal text preservation. */
553         public static final TextMode PRESERVE = new TextMode("PRESERVE");
554 
555         /** Mode for text trimming (left and right trim). */
556         public static final TextMode TRIM = new TextMode("TRIM");
557 
558         /**
559          * Mode for text normalization (left and right trim plus internal
560          * whitespace is normalized to a single space.
561          *
562          * @see org.jdom2.Element#getTextNormalize
563          */
564         public static final TextMode NORMALIZE = new TextMode("NORMALIZE");
565 
566         /**
567          * Mode for text trimming of content consisting of nothing but
568          * whitespace but otherwise not changing output.
569          */
570         public static final TextMode TRIM_FULL_WHITE = new TextMode("TRIM_FULL_WHITE");
571 
572         private final String name;
573 
574         private TextMode(String name) {
575             this.name = name;
576         }
577 
578         @Override
579         public String toString() {
580             return name;
581         }
582     }
583 }