1 package org.apache.maven.archetype.common.util;
2
3 /*
4 * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions, and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions, and the disclaimer that follows
16 * these conditions in the documentation and/or other materials
17 * provided with the distribution.
18 *
19 * 3. The name "JDOM" must not be used to endorse or promote products
20 * derived from this software without prior written permission. For
21 * written permission, please contact <request_AT_jdom_DOT_org>.
22 *
23 * 4. Products derived from this software may not be called "JDOM", nor
24 * may "JDOM" appear in their name, without prior written permission
25 * from the JDOM Project Management <request_AT_jdom_DOT_org>.
26 *
27 * In addition, we request (but do not require) that you include in the
28 * end-user documentation provided with the redistribution and/or in the
29 * software itself an acknowledgement equivalent to the following:
30 * "This product includes software developed by the
31 * JDOM Project (http://www.jdom.org/)."
32 * Alternatively, the acknowledgment may be graphical using the logos
33 * available at http://www.jdom.org/images/logos.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
39 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
48 * This software consists of voluntary contributions made by many
49 * individuals on behalf of the JDOM Project and was originally
50 * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
51 * Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
52 * on the JDOM Project, please see <http://www.jdom.org/>.
53 */
54
55 import org.jdom.output.EscapeStrategy;
56
57 import java.lang.reflect.Method;
58
59 /**
60 * Class to encapsulate XMLOutputter format options.
61 * Typical users can use the standard format configurations obtained by
62 * {@link #getRawFormat} (no whitespace changes),
63 * {@link #getPrettyFormat} (whitespace beautification), and
64 * {@link #getCompactFormat} (whitespace normalization).
65 * <p/>
66 * Several modes are available to effect the way textual content is printed.
67 * See the documentation for {@link TextMode} for details.
68 *
69 * @author Jason Hunter
70 * @version $Revision: 942523 $, $Date: 2015-03-06 00:20:33 +0000 (Fri, 06 Mar 2015) $
71 */
72 public class Format
73 implements Cloneable
74 {
75
76 /**
77 * Returns a new Format object that performs no whitespace changes, uses
78 * the UTF-8 encoding, doesn't expand empty elements, includes the
79 * declaration and encoding, and uses the default entity escape strategy.
80 * Tweaks can be made to the returned Format instance without affecting
81 * other instances.
82 *
83 * @return a Format with no whitespace changes
84 */
85 public static Format getRawFormat()
86 {
87 return new Format();
88 }
89
90 /**
91 * Returns a new Format object that performs whitespace beautification with
92 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
93 * includes the declaration and encoding, and uses the default entity
94 * escape strategy.
95 * Tweaks can be made to the returned Format instance without affecting
96 * other instances.
97 *
98 * @return a Format with whitespace beautification
99 */
100 public static Format getPrettyFormat()
101 {
102 Format f = new Format();
103 f.setIndent( STANDARD_INDENT );
104 f.setTextMode( TextMode.TRIM );
105 return f;
106 }
107
108 /**
109 * Returns a new Format object that performs whitespace normalization, uses
110 * the UTF-8 encoding, doesn't expand empty elements, includes the
111 * declaration and encoding, and uses the default entity escape strategy.
112 * Tweaks can be made to the returned Format instance without affecting
113 * other instances.
114 *
115 * @return a Format with whitespace normalization
116 */
117 public static Format getCompactFormat()
118 {
119 Format f = new Format();
120 f.setTextMode( TextMode.NORMALIZE );
121 return f;
122 }
123
124 /** standard value to indent by, if we are indenting */
125 private static final String STANDARD_INDENT = " ";
126
127 /** standard string with which to end a line */
128 private static final String STANDARD_LINE_SEPARATOR = "\r\n";
129
130 /** standard encoding */
131 private static final String STANDARD_ENCODING = "UTF-8";
132
133
134 /** The default indent is no spaces (as original document) */
135 String indent = null;
136
137 /** New line separator */
138 String lineSeparator = STANDARD_LINE_SEPARATOR;
139
140 /** The encoding format */
141 String encoding = STANDARD_ENCODING;
142
143 /**
144 * Whether or not to output the XML declaration
145 * - default is <code>false</code>
146 */
147 boolean omitDeclaration = false;
148
149 /**
150 * Whether or not to output the encoding in the XML declaration
151 * - default is <code>false</code>
152 */
153 boolean omitEncoding = false;
154
155 /**
156 * Whether or not to expand empty elements to
157 * <tagName></tagName> - default is <code>false</code>
158 */
159 boolean expandEmptyElements = false;
160
161 /**
162 * Whether TrAX output escaping disabling/enabling PIs are ignored
163 * or processed - default is <code>false</code>
164 */
165 boolean ignoreTrAXEscapingPIs = false;
166
167 /** text handling mode */
168 TextMode mode = TextMode.PRESERVE;
169
170 /** entity escape logic */
171 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy( encoding );
172
173 /** Creates a new Format instance with default (raw) behavior. */
174 private Format()
175 {
176 }
177
178 /**
179 * Sets the {@link EscapeStrategy} to use for character escaping.
180 *
181 * @param strategy the EscapeStrategy to use
182 * @return a pointer to this Format for chaining
183 */
184 public Format setEscapeStrategy( EscapeStrategy strategy )
185 {
186 escapeStrategy = strategy;
187 return this;
188 }
189
190 /**
191 * Returns the current escape strategy
192 *
193 * @return the current escape strategy
194 */
195 public EscapeStrategy getEscapeStrategy()
196 {
197 return escapeStrategy;
198 }
199
200 /**
201 * This will set the newline separator (<code>lineSeparator</code>).
202 * The default is <code>\r\n</code>. Note that if the "newlines"
203 * property is false, this value is irrelevant. To make it output
204 * the system default line ending string, call
205 * <code>setLineSeparator(System.getProperty("line.separator"))</code>
206 * <p/>
207 * <p/>
208 * To output "UNIX-style" documents, call
209 * <code>setLineSeparator("\n")</code>. To output "Mac-style"
210 * documents, call <code>setLineSeparator("\r")</code>. DOS-style
211 * documents use CR-LF ("\r\n"), which is the default.
212 * </p>
213 * <p/>
214 * <p/>
215 * Note that this only applies to newlines generated by the
216 * outputter. If you parse an XML document that contains newlines
217 * embedded inside a text node, and you do not set TextMode.NORMALIZE,
218 * then the newlines will be output
219 * verbatim, as "\n" which is how parsers normalize them.
220 * </p>
221 *
222 * @param separator <code>String</code> line separator to use.
223 * @return a pointer to this Format for chaining
224 * @see #setTextMode
225 */
226 public Format setLineSeparator( String separator )
227 {
228 this.lineSeparator = separator;
229 return this;
230 }
231
232 /**
233 * Returns the current line separator.
234 *
235 * @return the current line separator
236 */
237 public String getLineSeparator()
238 {
239 return lineSeparator;
240 }
241
242 /**
243 * This will set whether the XML declaration
244 * (<code><?xml version="1.0"
245 * encoding="UTF-8"?></code>)
246 * includes the encoding of the document. It is common to omit
247 * this in uses such as WML and other wireless device protocols.
248 *
249 * @param omitEncoding <code>boolean</code> indicating whether or not
250 * the XML declaration should indicate the document encoding.
251 * @return a pointer to this Format for chaining
252 */
253 public Format setOmitEncoding( boolean omitEncoding )
254 {
255 this.omitEncoding = omitEncoding;
256 return this;
257 }
258
259 /**
260 * Returns whether the XML declaration encoding will be omitted.
261 *
262 * @return whether the XML declaration encoding will be omitted
263 */
264 public boolean getOmitEncoding()
265 {
266 return omitEncoding;
267 }
268
269 /**
270 * This will set whether the XML declaration
271 * (<code><?xml version="1.0"?gt;</code>)
272 * will be omitted or not. It is common to omit this in uses such
273 * as SOAP and XML-RPC calls.
274 *
275 * @param omitDeclaration <code>boolean</code> indicating whether or not
276 * the XML declaration should be omitted.
277 * @return a pointer to this Format for chaining
278 */
279 public Format setOmitDeclaration( boolean omitDeclaration )
280 {
281 this.omitDeclaration = omitDeclaration;
282 return this;
283 }
284
285 /**
286 * Returns whether the XML declaration will be omitted.
287 *
288 * @return whether the XML declaration will be omitted
289 */
290 public boolean getOmitDeclaration()
291 {
292 return omitDeclaration;
293 }
294
295 /**
296 * This will set whether empty elements are expanded from
297 * <code><tagName/></code> to
298 * <code><tagName></tagName></code>.
299 *
300 * @param expandEmptyElements <code>boolean</code> indicating whether or not
301 * empty elements should be expanded.
302 * @return a pointer to this Format for chaining
303 */
304 public Format setExpandEmptyElements( boolean expandEmptyElements )
305 {
306 this.expandEmptyElements = expandEmptyElements;
307 return this;
308 }
309
310 /**
311 * Returns whether empty elements are expanded.
312 *
313 * @return whether empty elements are expanded
314 */
315 public boolean getExpandEmptyElements()
316 {
317 return expandEmptyElements;
318 }
319
320 /**
321 * This will set whether JAXP TrAX processing instructions for
322 * disabling/enabling output escaping are ignored. Disabling
323 * output escaping allows using XML text as element content and
324 * outputing it verbatim, i.e. as element children would be.
325 * <p/>
326 * When processed, these processing instructions are removed from
327 * the generated XML text and control whether the element text
328 * content is output verbatim or with escaping of the pre-defined
329 * entities in XML 1.0. The text to be output verbatim shall be
330 * surrounded by the
331 * <code><?javax.xml.transform.disable-output-escaping ?></code>
332 * and <code><?javax.xml.transform.enable-output-escaping ?></code>
333 * PIs.</p>
334 * <p/>
335 * When ignored, the processing instructions are present in the
336 * generated XML text and the pre-defined entities in XML 1.0 are
337 * escaped.
338 * <p/>
339 * Default: <code>false</code>.</p>
340 *
341 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
342 * whether or not TrAX ouput escaping PIs are ignored.
343 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
344 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
345 */
346 public void setIgnoreTrAXEscapingPIs( boolean ignoreTrAXEscapingPIs )
347 {
348 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
349 }
350
351 /**
352 * Returns whether JAXP TrAX processing instructions for
353 * disabling/enabling output escaping are ignored.
354 *
355 * @return whether or not TrAX ouput escaping PIs are ignored.
356 */
357 public boolean getIgnoreTrAXEscapingPIs()
358 {
359 return ignoreTrAXEscapingPIs;
360 }
361
362 /**
363 * This sets the text output style. Options are available as static
364 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
365 *
366 * @return a pointer to this Format for chaining
367 */
368 public Format setTextMode( Format.TextMode mode )
369 {
370 this.mode = mode;
371 return this;
372 }
373
374 /**
375 * Returns the current text output style.
376 *
377 * @return the current text output style
378 */
379 public Format.TextMode getTextMode()
380 {
381 return mode;
382 }
383
384 /**
385 * This will set the indent <code>String</code> to use; this
386 * is usually a <code>String</code> of empty spaces. If you pass
387 * null, or the empty string (""), then no indentation will
388 * happen. Default: none (null)
389 *
390 * @param indent <code>String</code> to use for indentation.
391 * @return a pointer to this Format for chaining
392 */
393 public Format setIndent( String indent )
394 {
395 // if passed the empty string, change it to null, for marginal
396 // performance gains later (can compare to null first instead
397 // of calling equals())
398 if ( "".equals( indent ) )
399 {
400 indent = null;
401 }
402 this.indent = indent;
403 return this;
404 }
405
406 /**
407 * Returns the indent string in use.
408 *
409 * @return the indent string in use
410 */
411 public String getIndent()
412 {
413 return indent;
414 }
415
416 /**
417 * Sets the output encoding. The name should be an accepted XML
418 * encoding.
419 *
420 * @param encoding the encoding format. Use XML-style names like
421 * "UTF-8" or "ISO-8859-1" or "US-ASCII"
422 * @return a pointer to this Format for chaining
423 */
424 public Format setEncoding( String encoding )
425 {
426 this.encoding = encoding;
427 escapeStrategy = new DefaultEscapeStrategy( encoding );
428 return this;
429 }
430
431 /**
432 * Returns the configured output encoding.
433 *
434 * @return the output encoding
435 */
436 public String getEncoding()
437 {
438 return encoding;
439 }
440
441 protected Object clone()
442 {
443 Format format = null;
444
445 try
446 {
447 format = (Format) super.clone();
448 }
449 catch ( CloneNotSupportedException ce )
450 {
451 }
452
453 return format;
454 }
455
456
457 /**
458 * Handle common charsets quickly and easily. Use reflection
459 * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
460 * If JDK 1.4 isn't around, default to no special encoding.
461 */
462 class DefaultEscapeStrategy
463 implements EscapeStrategy
464 {
465 private int bits;
466 Object encoder;
467 Method canEncode;
468
469 public DefaultEscapeStrategy( String encoding )
470 {
471 if ( "UTF-8".equalsIgnoreCase( encoding )
472 || "UTF-16".equalsIgnoreCase( encoding ) )
473 {
474 bits = 16;
475 }
476 else if ( "ISO-8859-1".equalsIgnoreCase( encoding )
477 || "Latin1".equalsIgnoreCase( encoding ) )
478 {
479 bits = 8;
480 }
481 else if ( "US-ASCII".equalsIgnoreCase( encoding )
482 || "ASCII".equalsIgnoreCase( encoding ) )
483 {
484 bits = 7;
485 }
486 else
487 {
488 bits = 0;
489 //encoder = Charset.forName(encoding).newEncoder();
490 try
491 {
492 Class<?> charsetClass = Class.forName( "java.nio.charset.Charset" );
493 Class<?> encoderClass = Class.forName( "java.nio.charset.CharsetEncoder" );
494 Method forName = charsetClass.getMethod( "forName", new Class[] { String.class } );
495 Object charsetObj = forName.invoke( null, new Object[] { encoding } );
496 Method newEncoder = charsetClass.getMethod( "newEncoder" );
497 encoder = newEncoder.invoke( charsetObj );
498 canEncode = encoderClass.getMethod( "canEncode", new Class[] { char.class } );
499 }
500 catch ( Exception ignored )
501 {
502 }
503 }
504 }
505
506 public boolean shouldEscape( char ch )
507 {
508 if ( bits == 16 )
509 {
510 return false;
511 }
512 if ( bits == 8 )
513 {
514 return ( (int) ch > 255 );
515 }
516 if ( bits == 7 )
517 {
518 return ( (int) ch > 127 );
519 }
520 else
521 {
522 if ( canEncode != null && encoder != null )
523 {
524 try
525 {
526 Boolean val = (Boolean) canEncode.invoke( encoder, new Object[] { Character.valueOf( ch ) } );
527 return !val.booleanValue();
528 }
529 catch ( Exception ignored )
530 {
531 }
532 }
533 // Return false if we don't know. This risks not escaping
534 // things which should be escaped, but also means people won't
535 // start getting loads of unnecessary escapes.
536 return false;
537 }
538 }
539 }
540
541
542 /**
543 * Class to signify how text should be handled on output. The following
544 * table provides details.
545 * <p/>
546 * <table>
547 * <tr>
548 * <th align="left">
549 * Text Mode
550 * </th>
551 * <th>
552 * Resulting behavior.
553 * </th>
554 * </tr>
555 * <p/>
556 * <tr valign="top">
557 * <td>
558 * <i>PRESERVE (Default)</i>
559 * </td>
560 * <td>
561 * All content is printed in the format it was created, no whitespace
562 * or line separators are are added or removed.
563 * </td>
564 * </tr>
565 * <p/>
566 * <tr valign="top">
567 * <td>
568 * TRIM_FULL_WHITE
569 * </td>
570 * <td>
571 * Content between tags consisting of all whitespace is not printed.
572 * If the content contains even one non-whitespace character, it is
573 * printed verbatim, whitespace and all.
574 * </td>
575 * </tr>
576 * <p/>
577 * <tr valign="top">
578 * <td>
579 * TRIM
580 * </td>
581 * <td>
582 * Same as TrimAllWhite, plus leading/trailing whitespace are
583 * trimmed.
584 * </td>
585 * </tr>
586 * <p/>
587 * <tr valign="top">
588 * <td>
589 * NORMALIZE
590 * </td>
591 * <td>
592 * Same as TextTrim, plus addition interior whitespace is compressed
593 * to a single space.
594 * </td>
595 * </tr>
596 * </table>
597 * <p/>
598 * In most cases textual content is aligned with the surrounding tags
599 * (after the appropriate text mode is applied). In the case where the only
600 * content between the start and end tags is textual, the start tag, text,
601 * and end tag are all printed on the same line. If the document being
602 * output already has whitespace, it's wise to turn on TRIM mode so the
603 * pre-existing whitespace can be trimmed before adding new whitespace.
604 * <p/>
605 * When a element has a xml:space attribute with the value of "preserve",
606 * all formating is turned off and reverts back to the default until the
607 * element and its contents have been printed. If a nested element contains
608 * another xml:space with the value "default" formatting is turned back on
609 * for the child element and then off for the remainder of the parent
610 * element.
611 */
612 public static class TextMode
613 {
614 /** Mode for literal text preservation. */
615 public static final TextMode PRESERVE = new TextMode( "PRESERVE" );
616
617 /** Mode for text trimming (left and right trim). */
618 public static final TextMode TRIM = new TextMode( "TRIM" );
619
620 /**
621 * Mode for text normalization (left and right trim plus internal
622 * whitespace is normalized to a single space.
623 *
624 * @see org.jdom.Element#getTextNormalize
625 */
626 public static final TextMode NORMALIZE = new TextMode( "NORMALIZE" );
627
628 /**
629 * Mode for text trimming of content consisting of nothing but
630 * whitespace but otherwise not changing output.
631 */
632 public static final TextMode TRIM_FULL_WHITE =
633 new TextMode( "TRIM_FULL_WHITE" );
634
635 private final String name;
636
637 private TextMode( String name )
638 {
639 this.name = name;
640 }
641
642 public String toString()
643 {
644 return name;
645 }
646 }
647 }
648