1 package org.apache.maven.archetype.common.util; 2 3 /* 4 * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions, and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions, and the disclaimer that follows 16 * these conditions in the documentation and/or other materials 17 * provided with the distribution. 18 * 19 * 3. The name "JDOM" must not be used to endorse or promote products 20 * derived from this software without prior written permission. For 21 * written permission, please contact <request_AT_jdom_DOT_org>. 22 * 23 * 4. Products derived from this software may not be called "JDOM", nor 24 * may "JDOM" appear in their name, without prior written permission 25 * from the JDOM Project Management <request_AT_jdom_DOT_org>. 26 * 27 * In addition, we request (but do not require) that you include in the 28 * end-user documentation provided with the redistribution and/or in the 29 * software itself an acknowledgement equivalent to the following: 30 * "This product includes software developed by the 31 * JDOM Project (http://www.jdom.org/)." 32 * Alternatively, the acknowledgment may be graphical using the logos 33 * available at http://www.jdom.org/images/logos. 34 * 35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 38 * DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT 39 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 * 48 * This software consists of voluntary contributions made by many 49 * individuals on behalf of the JDOM Project and was originally 50 * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and 51 * Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information 52 * on the JDOM Project, please see <http://www.jdom.org/>. 53 */ 54 55 import org.jdom.output.EscapeStrategy; 56 57 import java.lang.reflect.Method; 58 59 /** 60 * <p>Class to encapsulate XMLOutputter format options. 61 * Typical users can use the standard format configurations obtained by 62 * {@link #getRawFormat} (no whitespace changes), 63 * {@link #getPrettyFormat} (whitespace beautification), and 64 * {@link #getCompactFormat} (whitespace normalization).</p> 65 * 66 * <p>Several modes are available to effect the way textual content is printed. 67 * See the documentation for {@link TextMode} for details.</p> 68 * 69 * @author Jason Hunter 70 */ 71 public class Format 72 implements Cloneable 73 { 74 75 /** 76 * Returns a new Format object that performs no whitespace changes, uses 77 * the UTF-8 encoding, doesn't expand empty elements, includes the 78 * declaration and encoding, and uses the default entity escape strategy. 79 * Tweaks can be made to the returned Format instance without affecting 80 * other instances. 81 * 82 * @return a Format with no whitespace changes 83 */ 84 public static Format getRawFormat() 85 { 86 return new Format(); 87 } 88 89 /** 90 * Returns a new Format object that performs whitespace beautification with 91 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements, 92 * includes the declaration and encoding, and uses the default entity 93 * escape strategy. 94 * Tweaks can be made to the returned Format instance without affecting 95 * other instances. 96 * 97 * @return a Format with whitespace beautification 98 */ 99 public static Format getPrettyFormat() 100 { 101 Format f = new Format(); 102 f.setIndent( STANDARD_INDENT ); 103 f.setTextMode( TextMode.TRIM ); 104 return f; 105 } 106 107 /** 108 * Returns a new Format object that performs whitespace normalization, uses 109 * the UTF-8 encoding, doesn't expand empty elements, includes the 110 * declaration and encoding, and uses the default entity escape strategy. 111 * Tweaks can be made to the returned Format instance without affecting 112 * other instances. 113 * 114 * @return a Format with whitespace normalization 115 */ 116 public static Format getCompactFormat() 117 { 118 Format f = new Format(); 119 f.setTextMode( TextMode.NORMALIZE ); 120 return f; 121 } 122 123 /** standard value to indent by, if we are indenting */ 124 private static final String STANDARD_INDENT = " "; 125 126 /** standard string with which to end a line */ 127 private static final String STANDARD_LINE_SEPARATOR = "\r\n"; 128 129 /** standard encoding */ 130 private static final String STANDARD_ENCODING = "UTF-8"; 131 132 133 /** The default indent is no spaces (as original document) */ 134 String indent = null; 135 136 /** New line separator */ 137 String lineSeparator = STANDARD_LINE_SEPARATOR; 138 139 /** The encoding format */ 140 String encoding = STANDARD_ENCODING; 141 142 /** 143 * Whether or not to output the XML declaration 144 * - default is <code>false</code> 145 */ 146 boolean omitDeclaration = false; 147 148 /** 149 * Whether or not to output the encoding in the XML declaration 150 * - default is <code>false</code> 151 */ 152 boolean omitEncoding = false; 153 154 /** 155 * Whether or not to expand empty elements to 156 * <tagName></tagName> - default is <code>false</code> 157 */ 158 boolean expandEmptyElements = false; 159 160 /** 161 * Whether TrAX output escaping disabling/enabling PIs are ignored 162 * or processed - default is <code>false</code> 163 */ 164 boolean ignoreTrAXEscapingPIs = false; 165 166 /** text handling mode */ 167 TextMode mode = TextMode.PRESERVE; 168 169 /** entity escape logic */ 170 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy( encoding ); 171 172 /** Creates a new Format instance with default (raw) behavior. */ 173 private Format() 174 { 175 } 176 177 /** 178 * Sets the {@link EscapeStrategy} to use for character escaping. 179 * 180 * @param strategy the EscapeStrategy to use 181 * @return a pointer to this Format for chaining 182 */ 183 public Format setEscapeStrategy( EscapeStrategy strategy ) 184 { 185 escapeStrategy = strategy; 186 return this; 187 } 188 189 /** 190 * Returns the current escape strategy 191 * 192 * @return the current escape strategy 193 */ 194 public EscapeStrategy getEscapeStrategy() 195 { 196 return escapeStrategy; 197 } 198 199 /** 200 * <p>This will set the newline separator (<code>lineSeparator</code>). 201 * The default is <code>\r\n</code>. Note that if the "newlines" 202 * property is false, this value is irrelevant. To make it output 203 * the system default line ending string, call 204 * <code>setLineSeparator(System.getProperty("line.separator"))</code></p> 205 * 206 * <p>To output "UNIX-style" documents, call 207 * <code>setLineSeparator("\n")</code>. To output "Mac-style" 208 * documents, call <code>setLineSeparator("\r")</code>. DOS-style 209 * documents use CR-LF ("\r\n"), which is the default.</p> 210 * 211 * <p>Note that this only applies to newlines generated by the 212 * outputter. If you parse an XML document that contains newlines 213 * embedded inside a text node, and you do not set TextMode.NORMALIZE, 214 * then the newlines will be output 215 * verbatim, as "\n" which is how parsers normalize them. 216 * </p> 217 * 218 * @param separator <code>String</code> line separator to use. 219 * @return a pointer to this Format for chaining 220 * @see #setTextMode 221 */ 222 public Format setLineSeparator( String separator ) 223 { 224 this.lineSeparator = separator; 225 return this; 226 } 227 228 /** 229 * Returns the current line separator. 230 * 231 * @return the current line separator 232 */ 233 public String getLineSeparator() 234 { 235 return lineSeparator; 236 } 237 238 /** 239 * This will set whether the XML declaration 240 * (<code><?xml version="1.0" 241 * encoding="UTF-8"?></code>) 242 * includes the encoding of the document. It is common to omit 243 * this in uses such as WML and other wireless device protocols. 244 * 245 * @param omitEncoding <code>boolean</code> indicating whether or not 246 * the XML declaration should indicate the document encoding. 247 * @return a pointer to this Format for chaining 248 */ 249 public Format setOmitEncoding( boolean omitEncoding ) 250 { 251 this.omitEncoding = omitEncoding; 252 return this; 253 } 254 255 /** 256 * Returns whether the XML declaration encoding will be omitted. 257 * 258 * @return whether the XML declaration encoding will be omitted 259 */ 260 public boolean getOmitEncoding() 261 { 262 return omitEncoding; 263 } 264 265 /** 266 * This will set whether the XML declaration 267 * (<code><?xml version="1.0"?gt;</code>) 268 * will be omitted or not. It is common to omit this in uses such 269 * as SOAP and XML-RPC calls. 270 * 271 * @param omitDeclaration <code>boolean</code> indicating whether or not 272 * the XML declaration should be omitted. 273 * @return a pointer to this Format for chaining 274 */ 275 public Format setOmitDeclaration( boolean omitDeclaration ) 276 { 277 this.omitDeclaration = omitDeclaration; 278 return this; 279 } 280 281 /** 282 * Returns whether the XML declaration will be omitted. 283 * 284 * @return whether the XML declaration will be omitted 285 */ 286 public boolean getOmitDeclaration() 287 { 288 return omitDeclaration; 289 } 290 291 /** 292 * This will set whether empty elements are expanded from 293 * <code><tagName/></code> to 294 * <code><tagName></tagName></code>. 295 * 296 * @param expandEmptyElements <code>boolean</code> indicating whether or not 297 * empty elements should be expanded. 298 * @return a pointer to this Format for chaining 299 */ 300 public Format setExpandEmptyElements( boolean expandEmptyElements ) 301 { 302 this.expandEmptyElements = expandEmptyElements; 303 return this; 304 } 305 306 /** 307 * Returns whether empty elements are expanded. 308 * 309 * @return whether empty elements are expanded 310 */ 311 public boolean getExpandEmptyElements() 312 { 313 return expandEmptyElements; 314 } 315 316 /** 317 * <p>This will set whether JAXP TrAX processing instructions for 318 * disabling/enabling output escaping are ignored. Disabling 319 * output escaping allows using XML text as element content and 320 * outputing it verbatim, i.e. as element children would be.</p> 321 * 322 * <p>When processed, these processing instructions are removed from 323 * the generated XML text and control whether the element text 324 * content is output verbatim or with escaping of the pre-defined 325 * entities in XML 1.0. The text to be output verbatim shall be 326 * surrounded by the 327 * <code><?javax.xml.transform.disable-output-escaping ?></code> 328 * and <code><?javax.xml.transform.enable-output-escaping ?></code> 329 * PIs.</p> 330 * 331 * <p>When ignored, the processing instructions are present in the 332 * generated XML text and the pre-defined entities in XML 1.0 are 333 * escaped.</p> 334 * 335 * Default: <code>false</code>. 336 * 337 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating 338 * whether or not TrAX ouput escaping PIs are ignored. 339 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING 340 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING 341 */ 342 public void setIgnoreTrAXEscapingPIs( boolean ignoreTrAXEscapingPIs ) 343 { 344 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs; 345 } 346 347 /** 348 * Returns whether JAXP TrAX processing instructions for 349 * disabling/enabling output escaping are ignored. 350 * 351 * @return whether or not TrAX ouput escaping PIs are ignored. 352 */ 353 public boolean getIgnoreTrAXEscapingPIs() 354 { 355 return ignoreTrAXEscapingPIs; 356 } 357 358 /** 359 * This sets the text output style. Options are available as static 360 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}. 361 * 362 * @return a pointer to this Format for chaining 363 */ 364 public Format setTextMode( Format.TextMode mode ) 365 { 366 this.mode = mode; 367 return this; 368 } 369 370 /** 371 * Returns the current text output style. 372 * 373 * @return the current text output style 374 */ 375 public Format.TextMode getTextMode() 376 { 377 return mode; 378 } 379 380 /** 381 * This will set the indent <code>String</code> to use; this 382 * is usually a <code>String</code> of empty spaces. If you pass 383 * null, or the empty string (""), then no indentation will 384 * happen. Default: none (null) 385 * 386 * @param indent <code>String</code> to use for indentation. 387 * @return a pointer to this Format for chaining 388 */ 389 public Format setIndent( String indent ) 390 { 391 // if passed the empty string, change it to null, for marginal 392 // performance gains later (can compare to null first instead 393 // of calling equals()) 394 if ( "".equals( indent ) ) 395 { 396 indent = null; 397 } 398 this.indent = indent; 399 return this; 400 } 401 402 /** 403 * Returns the indent string in use. 404 * 405 * @return the indent string in use 406 */ 407 public String getIndent() 408 { 409 return indent; 410 } 411 412 /** 413 * Sets the output encoding. The name should be an accepted XML 414 * encoding. 415 * 416 * @param encoding the encoding format. Use XML-style names like 417 * "UTF-8" or "ISO-8859-1" or "US-ASCII" 418 * @return a pointer to this Format for chaining 419 */ 420 public Format setEncoding( String encoding ) 421 { 422 this.encoding = encoding; 423 escapeStrategy = new DefaultEscapeStrategy( encoding ); 424 return this; 425 } 426 427 /** 428 * Returns the configured output encoding. 429 * 430 * @return the output encoding 431 */ 432 public String getEncoding() 433 { 434 return encoding; 435 } 436 437 @Override 438 protected Object clone() 439 { 440 Format format = null; 441 442 try 443 { 444 format = (Format) super.clone(); 445 } 446 catch ( CloneNotSupportedException ce ) 447 { 448 } 449 450 return format; 451 } 452 453 454 /** 455 * Handle common charsets quickly and easily. Use reflection 456 * to query the JDK 1.4 CharsetEncoder class for unknown charsets. 457 * If JDK 1.4 isn't around, default to no special encoding. 458 */ 459 class DefaultEscapeStrategy 460 implements EscapeStrategy 461 { 462 private int bits; 463 Object encoder; 464 Method canEncode; 465 466 public DefaultEscapeStrategy( String encoding ) 467 { 468 if ( "UTF-8".equalsIgnoreCase( encoding ) 469 || "UTF-16".equalsIgnoreCase( encoding ) ) 470 { 471 bits = 16; 472 } 473 else if ( "ISO-8859-1".equalsIgnoreCase( encoding ) 474 || "Latin1".equalsIgnoreCase( encoding ) ) 475 { 476 bits = 8; 477 } 478 else if ( "US-ASCII".equalsIgnoreCase( encoding ) 479 || "ASCII".equalsIgnoreCase( encoding ) ) 480 { 481 bits = 7; 482 } 483 else 484 { 485 bits = 0; 486 //encoder = Charset.forName(encoding).newEncoder(); 487 try 488 { 489 Class<?> charsetClass = Class.forName( "java.nio.charset.Charset" ); 490 Class<?> encoderClass = Class.forName( "java.nio.charset.CharsetEncoder" ); 491 Method forName = charsetClass.getMethod( "forName", new Class[] { String.class } ); 492 Object charsetObj = forName.invoke( null, new Object[] { encoding } ); 493 Method newEncoder = charsetClass.getMethod( "newEncoder" ); 494 encoder = newEncoder.invoke( charsetObj ); 495 canEncode = encoderClass.getMethod( "canEncode", new Class[] { char.class } ); 496 } 497 catch ( Exception ignored ) 498 { 499 } 500 } 501 } 502 503 @Override 504 public boolean shouldEscape( char ch ) 505 { 506 if ( bits == 16 ) 507 { 508 return false; 509 } 510 if ( bits == 8 ) 511 { 512 return ( ch > 255 ); 513 } 514 if ( bits == 7 ) 515 { 516 return ( ch > 127 ); 517 } 518 else 519 { 520 if ( canEncode != null && encoder != null ) 521 { 522 try 523 { 524 Boolean val = (Boolean) canEncode.invoke( encoder, new Object[] { Character.valueOf( ch ) } ); 525 return !val.booleanValue(); 526 } 527 catch ( Exception ignored ) 528 { 529 } 530 } 531 // Return false if we don't know. This risks not escaping 532 // things which should be escaped, but also means people won't 533 // start getting loads of unnecessary escapes. 534 return false; 535 } 536 } 537 } 538 539 540 /** 541 * <p> 542 * Class to signify how text should be handled on output. The following 543 * table provides details.</p> 544 * <table> 545 * <caption>TextMode details</caption> 546 * <tr> 547 * <th align="left"> 548 * Text Mode 549 * </th> 550 * <th> 551 * Resulting behavior. 552 * </th> 553 * </tr> 554 * <tr valign="top"> 555 * <td> 556 * <i>PRESERVE (Default)</i> 557 * </td> 558 * <td> 559 * All content is printed in the format it was created, no whitespace 560 * or line separators are are added or removed. 561 * </td> 562 * </tr> 563 * <tr valign="top"> 564 * <td> 565 * TRIM_FULL_WHITE 566 * </td> 567 * <td> 568 * Content between tags consisting of all whitespace is not printed. 569 * If the content contains even one non-whitespace character, it is 570 * printed verbatim, whitespace and all. 571 * </td> 572 * </tr> 573 * <tr valign="top"> 574 * <td> 575 * TRIM 576 * </td> 577 * <td> 578 * Same as TrimAllWhite, plus leading/trailing whitespace are 579 * trimmed. 580 * </td> 581 * </tr> 582 * <tr valign="top"> 583 * <td> 584 * NORMALIZE 585 * </td> 586 * <td> 587 * Same as TextTrim, plus addition interior whitespace is compressed 588 * to a single space. 589 * </td> 590 * </tr> 591 * </table> 592 * 593 * <p>In most cases textual content is aligned with the surrounding tags 594 * (after the appropriate text mode is applied). In the case where the only 595 * content between the start and end tags is textual, the start tag, text, 596 * and end tag are all printed on the same line. If the document being 597 * output already has whitespace, it's wise to turn on TRIM mode so the 598 * pre-existing whitespace can be trimmed before adding new whitespace.</p> 599 * 600 * <p>When a element has a xml:space attribute with the value of "preserve", 601 * all formating is turned off and reverts back to the default until the 602 * element and its contents have been printed. If a nested element contains 603 * another xml:space with the value "default" formatting is turned back on 604 * for the child element and then off for the remainder of the parent 605 * element.</p> 606 */ 607 public static class TextMode 608 { 609 /** Mode for literal text preservation. */ 610 public static final TextMode PRESERVE = new TextMode( "PRESERVE" ); 611 612 /** Mode for text trimming (left and right trim). */ 613 public static final TextMode TRIM = new TextMode( "TRIM" ); 614 615 /** 616 * Mode for text normalization (left and right trim plus internal 617 * whitespace is normalized to a single space. 618 * 619 * @see org.jdom.Element#getTextNormalize 620 */ 621 public static final TextMode NORMALIZE = new TextMode( "NORMALIZE" ); 622 623 /** 624 * Mode for text trimming of content consisting of nothing but 625 * whitespace but otherwise not changing output. 626 */ 627 public static final TextMode TRIM_FULL_WHITE = 628 new TextMode( "TRIM_FULL_WHITE" ); 629 630 private final String name; 631 632 private TextMode( String name ) 633 { 634 this.name = name; 635 } 636 637 @Override 638 public String toString() 639 { 640 return name; 641 } 642 } 643 } 644