1 package org.apache.maven.archetype.common.util;
2
3 /*
4 * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions, and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions, and the disclaimer that follows
16 * these conditions in the documentation and/or other materials
17 * provided with the distribution.
18 *
19 * 3. The name "JDOM" must not be used to endorse or promote products
20 * derived from this software without prior written permission. For
21 * written permission, please contact <request_AT_jdom_DOT_org>.
22 *
23 * 4. Products derived from this software may not be called "JDOM", nor
24 * may "JDOM" appear in their name, without prior written permission
25 * from the JDOM Project Management <request_AT_jdom_DOT_org>.
26 *
27 * In addition, we request (but do not require) that you include in the
28 * end-user documentation provided with the redistribution and/or in the
29 * software itself an acknowledgement equivalent to the following:
30 * "This product includes software developed by the
31 * JDOM Project (http://www.jdom.org/)."
32 * Alternatively, the acknowledgment may be graphical using the logos
33 * available at http://www.jdom.org/images/logos.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
39 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 *
48 * This software consists of voluntary contributions made by many
49 * individuals on behalf of the JDOM Project and was originally
50 * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
51 * Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
52 * on the JDOM Project, please see <http://www.jdom.org/>.
53 */
54
55 import org.jdom.output.EscapeStrategy;
56
57 import java.lang.reflect.Method;
58
59 /**
60 * <p>Class to encapsulate XMLOutputter format options.
61 * Typical users can use the standard format configurations obtained by
62 * {@link #getRawFormat} (no whitespace changes),
63 * {@link #getPrettyFormat} (whitespace beautification), and
64 * {@link #getCompactFormat} (whitespace normalization).</p>
65 *
66 * <p>Several modes are available to effect the way textual content is printed.
67 * See the documentation for {@link TextMode} for details.</p>
68 *
69 * @author Jason Hunter
70 */
71 public class Format
72 implements Cloneable
73 {
74
75 /**
76 * Returns a new Format object that performs no whitespace changes, uses
77 * the UTF-8 encoding, doesn't expand empty elements, includes the
78 * declaration and encoding, and uses the default entity escape strategy.
79 * Tweaks can be made to the returned Format instance without affecting
80 * other instances.
81 *
82 * @return a Format with no whitespace changes
83 */
84 public static Format getRawFormat()
85 {
86 return new Format();
87 }
88
89 /**
90 * Returns a new Format object that performs whitespace beautification with
91 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
92 * includes the declaration and encoding, and uses the default entity
93 * escape strategy.
94 * Tweaks can be made to the returned Format instance without affecting
95 * other instances.
96 *
97 * @return a Format with whitespace beautification
98 */
99 public static Format getPrettyFormat()
100 {
101 Format f = new Format();
102 f.setIndent( STANDARD_INDENT );
103 f.setTextMode( TextMode.TRIM );
104 return f;
105 }
106
107 /**
108 * Returns a new Format object that performs whitespace normalization, uses
109 * the UTF-8 encoding, doesn't expand empty elements, includes the
110 * declaration and encoding, and uses the default entity escape strategy.
111 * Tweaks can be made to the returned Format instance without affecting
112 * other instances.
113 *
114 * @return a Format with whitespace normalization
115 */
116 public static Format getCompactFormat()
117 {
118 Format f = new Format();
119 f.setTextMode( TextMode.NORMALIZE );
120 return f;
121 }
122
123 /** standard value to indent by, if we are indenting */
124 private static final String STANDARD_INDENT = " ";
125
126 /** standard string with which to end a line */
127 private static final String STANDARD_LINE_SEPARATOR = "\r\n";
128
129 /** standard encoding */
130 private static final String STANDARD_ENCODING = "UTF-8";
131
132
133 /** The default indent is no spaces (as original document) */
134 String indent = null;
135
136 /** New line separator */
137 String lineSeparator = STANDARD_LINE_SEPARATOR;
138
139 /** The encoding format */
140 String encoding = STANDARD_ENCODING;
141
142 /**
143 * Whether or not to output the XML declaration
144 * - default is <code>false</code>
145 */
146 boolean omitDeclaration = false;
147
148 /**
149 * Whether or not to output the encoding in the XML declaration
150 * - default is <code>false</code>
151 */
152 boolean omitEncoding = false;
153
154 /**
155 * Whether or not to expand empty elements to
156 * <tagName></tagName> - default is <code>false</code>
157 */
158 boolean expandEmptyElements = false;
159
160 /**
161 * Whether TrAX output escaping disabling/enabling PIs are ignored
162 * or processed - default is <code>false</code>
163 */
164 boolean ignoreTrAXEscapingPIs = false;
165
166 /** text handling mode */
167 TextMode mode = TextMode.PRESERVE;
168
169 /** entity escape logic */
170 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy( encoding );
171
172 /** Creates a new Format instance with default (raw) behavior. */
173 private Format()
174 {
175 }
176
177 /**
178 * Sets the {@link EscapeStrategy} to use for character escaping.
179 *
180 * @param strategy the EscapeStrategy to use
181 * @return a pointer to this Format for chaining
182 */
183 public Format setEscapeStrategy( EscapeStrategy strategy )
184 {
185 escapeStrategy = strategy;
186 return this;
187 }
188
189 /**
190 * Returns the current escape strategy
191 *
192 * @return the current escape strategy
193 */
194 public EscapeStrategy getEscapeStrategy()
195 {
196 return escapeStrategy;
197 }
198
199 /**
200 * <p>This will set the newline separator (<code>lineSeparator</code>).
201 * The default is <code>\r\n</code>. Note that if the "newlines"
202 * property is false, this value is irrelevant. To make it output
203 * the system default line ending string, call
204 * <code>setLineSeparator(System.getProperty("line.separator"))</code></p>
205 *
206 * <p>To output "UNIX-style" documents, call
207 * <code>setLineSeparator("\n")</code>. To output "Mac-style"
208 * documents, call <code>setLineSeparator("\r")</code>. DOS-style
209 * documents use CR-LF ("\r\n"), which is the default.</p>
210 *
211 * <p>Note that this only applies to newlines generated by the
212 * outputter. If you parse an XML document that contains newlines
213 * embedded inside a text node, and you do not set TextMode.NORMALIZE,
214 * then the newlines will be output
215 * verbatim, as "\n" which is how parsers normalize them.
216 * </p>
217 *
218 * @param separator <code>String</code> line separator to use.
219 * @return a pointer to this Format for chaining
220 * @see #setTextMode
221 */
222 public Format setLineSeparator( String separator )
223 {
224 this.lineSeparator = separator;
225 return this;
226 }
227
228 /**
229 * Returns the current line separator.
230 *
231 * @return the current line separator
232 */
233 public String getLineSeparator()
234 {
235 return lineSeparator;
236 }
237
238 /**
239 * This will set whether the XML declaration
240 * (<code><?xml version="1.0"
241 * encoding="UTF-8"?></code>)
242 * includes the encoding of the document. It is common to omit
243 * this in uses such as WML and other wireless device protocols.
244 *
245 * @param omitEncoding <code>boolean</code> indicating whether or not
246 * the XML declaration should indicate the document encoding.
247 * @return a pointer to this Format for chaining
248 */
249 public Format setOmitEncoding( boolean omitEncoding )
250 {
251 this.omitEncoding = omitEncoding;
252 return this;
253 }
254
255 /**
256 * Returns whether the XML declaration encoding will be omitted.
257 *
258 * @return whether the XML declaration encoding will be omitted
259 */
260 public boolean getOmitEncoding()
261 {
262 return omitEncoding;
263 }
264
265 /**
266 * This will set whether the XML declaration
267 * (<code><?xml version="1.0"?gt;</code>)
268 * will be omitted or not. It is common to omit this in uses such
269 * as SOAP and XML-RPC calls.
270 *
271 * @param omitDeclaration <code>boolean</code> indicating whether or not
272 * the XML declaration should be omitted.
273 * @return a pointer to this Format for chaining
274 */
275 public Format setOmitDeclaration( boolean omitDeclaration )
276 {
277 this.omitDeclaration = omitDeclaration;
278 return this;
279 }
280
281 /**
282 * Returns whether the XML declaration will be omitted.
283 *
284 * @return whether the XML declaration will be omitted
285 */
286 public boolean getOmitDeclaration()
287 {
288 return omitDeclaration;
289 }
290
291 /**
292 * This will set whether empty elements are expanded from
293 * <code><tagName/></code> to
294 * <code><tagName></tagName></code>.
295 *
296 * @param expandEmptyElements <code>boolean</code> indicating whether or not
297 * empty elements should be expanded.
298 * @return a pointer to this Format for chaining
299 */
300 public Format setExpandEmptyElements( boolean expandEmptyElements )
301 {
302 this.expandEmptyElements = expandEmptyElements;
303 return this;
304 }
305
306 /**
307 * Returns whether empty elements are expanded.
308 *
309 * @return whether empty elements are expanded
310 */
311 public boolean getExpandEmptyElements()
312 {
313 return expandEmptyElements;
314 }
315
316 /**
317 * <p>This will set whether JAXP TrAX processing instructions for
318 * disabling/enabling output escaping are ignored. Disabling
319 * output escaping allows using XML text as element content and
320 * outputing it verbatim, i.e. as element children would be.</p>
321 *
322 * <p>When processed, these processing instructions are removed from
323 * the generated XML text and control whether the element text
324 * content is output verbatim or with escaping of the pre-defined
325 * entities in XML 1.0. The text to be output verbatim shall be
326 * surrounded by the
327 * <code><?javax.xml.transform.disable-output-escaping ?></code>
328 * and <code><?javax.xml.transform.enable-output-escaping ?></code>
329 * PIs.</p>
330 *
331 * <p>When ignored, the processing instructions are present in the
332 * generated XML text and the pre-defined entities in XML 1.0 are
333 * escaped.</p>
334 *
335 * Default: <code>false</code>.
336 *
337 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
338 * whether or not TrAX ouput escaping PIs are ignored.
339 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
340 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
341 */
342 public void setIgnoreTrAXEscapingPIs( boolean ignoreTrAXEscapingPIs )
343 {
344 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
345 }
346
347 /**
348 * Returns whether JAXP TrAX processing instructions for
349 * disabling/enabling output escaping are ignored.
350 *
351 * @return whether or not TrAX ouput escaping PIs are ignored.
352 */
353 public boolean getIgnoreTrAXEscapingPIs()
354 {
355 return ignoreTrAXEscapingPIs;
356 }
357
358 /**
359 * This sets the text output style. Options are available as static
360 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
361 *
362 * @return a pointer to this Format for chaining
363 */
364 public Format setTextMode( Format.TextMode mode )
365 {
366 this.mode = mode;
367 return this;
368 }
369
370 /**
371 * Returns the current text output style.
372 *
373 * @return the current text output style
374 */
375 public Format.TextMode getTextMode()
376 {
377 return mode;
378 }
379
380 /**
381 * This will set the indent <code>String</code> to use; this
382 * is usually a <code>String</code> of empty spaces. If you pass
383 * null, or the empty string (""), then no indentation will
384 * happen. Default: none (null)
385 *
386 * @param indent <code>String</code> to use for indentation.
387 * @return a pointer to this Format for chaining
388 */
389 public Format setIndent( String indent )
390 {
391 // if passed the empty string, change it to null, for marginal
392 // performance gains later (can compare to null first instead
393 // of calling equals())
394 if ( "".equals( indent ) )
395 {
396 indent = null;
397 }
398 this.indent = indent;
399 return this;
400 }
401
402 /**
403 * Returns the indent string in use.
404 *
405 * @return the indent string in use
406 */
407 public String getIndent()
408 {
409 return indent;
410 }
411
412 /**
413 * Sets the output encoding. The name should be an accepted XML
414 * encoding.
415 *
416 * @param encoding the encoding format. Use XML-style names like
417 * "UTF-8" or "ISO-8859-1" or "US-ASCII"
418 * @return a pointer to this Format for chaining
419 */
420 public Format setEncoding( String encoding )
421 {
422 this.encoding = encoding;
423 escapeStrategy = new DefaultEscapeStrategy( encoding );
424 return this;
425 }
426
427 /**
428 * Returns the configured output encoding.
429 *
430 * @return the output encoding
431 */
432 public String getEncoding()
433 {
434 return encoding;
435 }
436
437 @Override
438 protected Object clone()
439 {
440 Format format = null;
441
442 try
443 {
444 format = (Format) super.clone();
445 }
446 catch ( CloneNotSupportedException ce )
447 {
448 }
449
450 return format;
451 }
452
453
454 /**
455 * Handle common charsets quickly and easily. Use reflection
456 * to query the JDK 1.4 CharsetEncoder class for unknown charsets.
457 * If JDK 1.4 isn't around, default to no special encoding.
458 */
459 class DefaultEscapeStrategy
460 implements EscapeStrategy
461 {
462 private int bits;
463 Object encoder;
464 Method canEncode;
465
466 public DefaultEscapeStrategy( String encoding )
467 {
468 if ( "UTF-8".equalsIgnoreCase( encoding )
469 || "UTF-16".equalsIgnoreCase( encoding ) )
470 {
471 bits = 16;
472 }
473 else if ( "ISO-8859-1".equalsIgnoreCase( encoding )
474 || "Latin1".equalsIgnoreCase( encoding ) )
475 {
476 bits = 8;
477 }
478 else if ( "US-ASCII".equalsIgnoreCase( encoding )
479 || "ASCII".equalsIgnoreCase( encoding ) )
480 {
481 bits = 7;
482 }
483 else
484 {
485 bits = 0;
486 //encoder = Charset.forName(encoding).newEncoder();
487 try
488 {
489 Class<?> charsetClass = Class.forName( "java.nio.charset.Charset" );
490 Class<?> encoderClass = Class.forName( "java.nio.charset.CharsetEncoder" );
491 Method forName = charsetClass.getMethod( "forName", new Class[] { String.class } );
492 Object charsetObj = forName.invoke( null, new Object[] { encoding } );
493 Method newEncoder = charsetClass.getMethod( "newEncoder" );
494 encoder = newEncoder.invoke( charsetObj );
495 canEncode = encoderClass.getMethod( "canEncode", new Class[] { char.class } );
496 }
497 catch ( Exception ignored )
498 {
499 }
500 }
501 }
502
503 @Override
504 public boolean shouldEscape( char ch )
505 {
506 if ( bits == 16 )
507 {
508 return false;
509 }
510 if ( bits == 8 )
511 {
512 return ( ch > 255 );
513 }
514 if ( bits == 7 )
515 {
516 return ( ch > 127 );
517 }
518 else
519 {
520 if ( canEncode != null && encoder != null )
521 {
522 try
523 {
524 Boolean val = (Boolean) canEncode.invoke( encoder, new Object[] { Character.valueOf( ch ) } );
525 return !val.booleanValue();
526 }
527 catch ( Exception ignored )
528 {
529 }
530 }
531 // Return false if we don't know. This risks not escaping
532 // things which should be escaped, but also means people won't
533 // start getting loads of unnecessary escapes.
534 return false;
535 }
536 }
537 }
538
539
540 /**
541 * <p>
542 * Class to signify how text should be handled on output. The following
543 * table provides details.</p>
544 * <table>
545 * <caption>TextMode details</caption>
546 * <tr>
547 * <th align="left">
548 * Text Mode
549 * </th>
550 * <th>
551 * Resulting behavior.
552 * </th>
553 * </tr>
554 * <tr valign="top">
555 * <td>
556 * <i>PRESERVE (Default)</i>
557 * </td>
558 * <td>
559 * All content is printed in the format it was created, no whitespace
560 * or line separators are are added or removed.
561 * </td>
562 * </tr>
563 * <tr valign="top">
564 * <td>
565 * TRIM_FULL_WHITE
566 * </td>
567 * <td>
568 * Content between tags consisting of all whitespace is not printed.
569 * If the content contains even one non-whitespace character, it is
570 * printed verbatim, whitespace and all.
571 * </td>
572 * </tr>
573 * <tr valign="top">
574 * <td>
575 * TRIM
576 * </td>
577 * <td>
578 * Same as TrimAllWhite, plus leading/trailing whitespace are
579 * trimmed.
580 * </td>
581 * </tr>
582 * <tr valign="top">
583 * <td>
584 * NORMALIZE
585 * </td>
586 * <td>
587 * Same as TextTrim, plus addition interior whitespace is compressed
588 * to a single space.
589 * </td>
590 * </tr>
591 * </table>
592 *
593 * <p>In most cases textual content is aligned with the surrounding tags
594 * (after the appropriate text mode is applied). In the case where the only
595 * content between the start and end tags is textual, the start tag, text,
596 * and end tag are all printed on the same line. If the document being
597 * output already has whitespace, it's wise to turn on TRIM mode so the
598 * pre-existing whitespace can be trimmed before adding new whitespace.</p>
599 *
600 * <p>When a element has a xml:space attribute with the value of "preserve",
601 * all formating is turned off and reverts back to the default until the
602 * element and its contents have been printed. If a nested element contains
603 * another xml:space with the value "default" formatting is turned back on
604 * for the child element and then off for the remainder of the parent
605 * element.</p>
606 */
607 public static class TextMode
608 {
609 /** Mode for literal text preservation. */
610 public static final TextMode PRESERVE = new TextMode( "PRESERVE" );
611
612 /** Mode for text trimming (left and right trim). */
613 public static final TextMode TRIM = new TextMode( "TRIM" );
614
615 /**
616 * Mode for text normalization (left and right trim plus internal
617 * whitespace is normalized to a single space.
618 *
619 * @see org.jdom.Element#getTextNormalize
620 */
621 public static final TextMode NORMALIZE = new TextMode( "NORMALIZE" );
622
623 /**
624 * Mode for text trimming of content consisting of nothing but
625 * whitespace but otherwise not changing output.
626 */
627 public static final TextMode TRIM_FULL_WHITE =
628 new TextMode( "TRIM_FULL_WHITE" );
629
630 private final String name;
631
632 private TextMode( String name )
633 {
634 this.name = name;
635 }
636
637 @Override
638 public String toString()
639 {
640 return name;
641 }
642 }
643 }
644