1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.maven.archetype.common.util; 20 21 /* 22 * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin. 23 * All rights reserved. 24 * 25 * Redistribution and use in source and binary forms, with or without 26 * modification, are permitted provided that the following conditions 27 * are met: 28 * 29 * 1. Redistributions of source code must retain the above copyright 30 * notice, this list of conditions, and the following disclaimer. 31 * 32 * 2. Redistributions in binary form must reproduce the above copyright 33 * notice, this list of conditions, and the disclaimer that follows 34 * these conditions in the documentation and/or other materials 35 * provided with the distribution. 36 * 37 * 3. The name "JDOM" must not be used to endorse or promote products 38 * derived from this software without prior written permission. For 39 * written permission, please contact <request_AT_jdom_DOT_org>. 40 * 41 * 4. Products derived from this software may not be called "JDOM", nor 42 * may "JDOM" appear in their name, without prior written permission 43 * from the JDOM Project Management <request_AT_jdom_DOT_org>. 44 * 45 * In addition, we request (but do not require) that you include in the 46 * end-user documentation provided with the redistribution and/or in the 47 * software itself an acknowledgement equivalent to the following: 48 * "This product includes software developed by the 49 * JDOM Project (http://www.jdom.org/)." 50 * Alternatively, the acknowledgment may be graphical using the logos 51 * available at http://www.jdom.org/images/logos. 52 * 53 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 54 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 55 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 56 * DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT 57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 58 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 59 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 60 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 61 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 63 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * This software consists of voluntary contributions made by many 67 * individuals on behalf of the JDOM Project and was originally 68 * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and 69 * Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information 70 * on the JDOM Project, please see <http://www.jdom.org/>. 71 */ 72 73 import java.nio.charset.Charset; 74 import java.nio.charset.CharsetEncoder; 75 76 import org.jdom2.output.EscapeStrategy; 77 78 /** 79 * <p>Class to encapsulate XMLOutputter format options. 80 * Typical users can use the standard format configurations obtained by 81 * {@link #getRawFormat} (no whitespace changes), 82 * {@link #getPrettyFormat} (whitespace beautification), and 83 * {@link #getCompactFormat} (whitespace normalization).</p> 84 * 85 * <p>Several modes are available to effect the way textual content is printed. 86 * See the documentation for {@link TextMode} for details.</p> 87 * 88 * @author Jason Hunter 89 */ 90 public class Format implements Cloneable { 91 92 /** 93 * Returns a new Format object that performs no whitespace changes, uses 94 * the UTF-8 encoding, doesn't expand empty elements, includes the 95 * declaration and encoding, and uses the default entity escape strategy. 96 * Tweaks can be made to the returned Format instance without affecting 97 * other instances. 98 * 99 * @return a Format with no whitespace changes 100 */ 101 public static Format getRawFormat() { 102 return new Format(); 103 } 104 105 /** 106 * Returns a new Format object that performs whitespace beautification with 107 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements, 108 * includes the declaration and encoding, and uses the default entity 109 * escape strategy. 110 * Tweaks can be made to the returned Format instance without affecting 111 * other instances. 112 * 113 * @return a Format with whitespace beautification 114 */ 115 public static Format getPrettyFormat() { 116 Format f = new Format(); 117 f.setIndent(STANDARD_INDENT); 118 f.setTextMode(TextMode.TRIM); 119 return f; 120 } 121 122 /** 123 * Returns a new Format object that performs whitespace normalization, uses 124 * the UTF-8 encoding, doesn't expand empty elements, includes the 125 * declaration and encoding, and uses the default entity escape strategy. 126 * Tweaks can be made to the returned Format instance without affecting 127 * other instances. 128 * 129 * @return a Format with whitespace normalization 130 */ 131 public static Format getCompactFormat() { 132 Format f = new Format(); 133 f.setTextMode(TextMode.NORMALIZE); 134 return f; 135 } 136 137 /** standard value to indent by, if we are indenting */ 138 private static final String STANDARD_INDENT = " "; 139 140 /** standard string with which to end a line */ 141 private static final String STANDARD_LINE_SEPARATOR = "\r\n"; 142 143 /** standard encoding */ 144 private static final String STANDARD_ENCODING = "UTF-8"; 145 146 /** The default indent is no spaces (as original document) */ 147 String indent = null; 148 149 /** New line separator */ 150 String lineSeparator = STANDARD_LINE_SEPARATOR; 151 152 /** The encoding format */ 153 String encoding = STANDARD_ENCODING; 154 155 /** 156 * Whether or not to output the XML declaration 157 * - default is <code>false</code> 158 */ 159 boolean omitDeclaration = false; 160 161 /** 162 * Whether or not to output the encoding in the XML declaration 163 * - default is <code>false</code> 164 */ 165 boolean omitEncoding = false; 166 167 /** 168 * Whether or not to expand empty elements to 169 * <tagName></tagName> - default is <code>false</code> 170 */ 171 boolean expandEmptyElements = false; 172 173 /** 174 * Whether TrAX output escaping disabling/enabling PIs are ignored 175 * or processed - default is <code>false</code> 176 */ 177 boolean ignoreTrAXEscapingPIs = false; 178 179 /** text handling mode */ 180 TextMode mode = TextMode.PRESERVE; 181 182 /** entity escape logic */ 183 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding); 184 185 /** Creates a new Format instance with default (raw) behavior. */ 186 private Format() {} 187 188 /** 189 * Sets the {@link EscapeStrategy} to use for character escaping. 190 * 191 * @param strategy the EscapeStrategy to use 192 * @return a pointer to this Format for chaining 193 */ 194 public Format setEscapeStrategy(EscapeStrategy strategy) { 195 escapeStrategy = strategy; 196 return this; 197 } 198 199 /** 200 * Returns the current escape strategy 201 * 202 * @return the current escape strategy 203 */ 204 public EscapeStrategy getEscapeStrategy() { 205 return escapeStrategy; 206 } 207 208 /** 209 * <p>This will set the newline separator (<code>lineSeparator</code>). 210 * The default is <code>\r\n</code>. Note that if the "newlines" 211 * property is false, this value is irrelevant. To make it output 212 * the system default line ending string, call 213 * <code>setLineSeparator(System.getProperty("line.separator"))</code></p> 214 * 215 * <p>To output "UNIX-style" documents, call 216 * <code>setLineSeparator("\n")</code>. To output "Mac-style" 217 * documents, call <code>setLineSeparator("\r")</code>. DOS-style 218 * documents use CR-LF ("\r\n"), which is the default.</p> 219 * 220 * <p>Note that this only applies to newlines generated by the 221 * outputter. If you parse an XML document that contains newlines 222 * embedded inside a text node, and you do not set TextMode.NORMALIZE, 223 * then the newlines will be output 224 * verbatim, as "\n" which is how parsers normalize them. 225 * </p> 226 * 227 * @param separator <code>String</code> line separator to use. 228 * @return a pointer to this Format for chaining 229 * @see #setTextMode 230 */ 231 public Format setLineSeparator(String separator) { 232 this.lineSeparator = separator; 233 return this; 234 } 235 236 /** 237 * Returns the current line separator. 238 * 239 * @return the current line separator 240 */ 241 public String getLineSeparator() { 242 return lineSeparator; 243 } 244 245 /** 246 * This will set whether the XML declaration 247 * (<code><?xml version="1.0" 248 * encoding="UTF-8"?></code>) 249 * includes the encoding of the document. It is common to omit 250 * this in uses such as WML and other wireless device protocols. 251 * 252 * @param omitEncoding <code>boolean</code> indicating whether or not 253 * the XML declaration should indicate the document encoding. 254 * @return a pointer to this Format for chaining 255 */ 256 public Format setOmitEncoding(boolean omitEncoding) { 257 this.omitEncoding = omitEncoding; 258 return this; 259 } 260 261 /** 262 * Returns whether the XML declaration encoding will be omitted. 263 * 264 * @return whether the XML declaration encoding will be omitted 265 */ 266 public boolean getOmitEncoding() { 267 return omitEncoding; 268 } 269 270 /** 271 * This will set whether the XML declaration 272 * (<code><?xml version="1.0"?gt;</code>) 273 * will be omitted or not. It is common to omit this in uses such 274 * as SOAP and XML-RPC calls. 275 * 276 * @param omitDeclaration <code>boolean</code> indicating whether or not 277 * the XML declaration should be omitted. 278 * @return a pointer to this Format for chaining 279 */ 280 public Format setOmitDeclaration(boolean omitDeclaration) { 281 this.omitDeclaration = omitDeclaration; 282 return this; 283 } 284 285 /** 286 * Returns whether the XML declaration will be omitted. 287 * 288 * @return whether the XML declaration will be omitted 289 */ 290 public boolean getOmitDeclaration() { 291 return omitDeclaration; 292 } 293 294 /** 295 * This will set whether empty elements are expanded from 296 * <code><tagName/></code> to 297 * <code><tagName></tagName></code>. 298 * 299 * @param expandEmptyElements <code>boolean</code> indicating whether or not 300 * empty elements should be expanded. 301 * @return a pointer to this Format for chaining 302 */ 303 public Format setExpandEmptyElements(boolean expandEmptyElements) { 304 this.expandEmptyElements = expandEmptyElements; 305 return this; 306 } 307 308 /** 309 * Returns whether empty elements are expanded. 310 * 311 * @return whether empty elements are expanded 312 */ 313 public boolean getExpandEmptyElements() { 314 return expandEmptyElements; 315 } 316 317 /** 318 * <p>This will set whether JAXP TrAX processing instructions for 319 * disabling/enabling output escaping are ignored. Disabling 320 * output escaping allows using XML text as element content and 321 * outputing it verbatim, i.e. as element children would be.</p> 322 * 323 * <p>When processed, these processing instructions are removed from 324 * the generated XML text and control whether the element text 325 * content is output verbatim or with escaping of the pre-defined 326 * entities in XML 1.0. The text to be output verbatim shall be 327 * surrounded by the 328 * <code><?javax.xml.transform.disable-output-escaping ?></code> 329 * and <code><?javax.xml.transform.enable-output-escaping ?></code> 330 * PIs.</p> 331 * 332 * <p>When ignored, the processing instructions are present in the 333 * generated XML text and the pre-defined entities in XML 1.0 are 334 * escaped.</p> 335 * 336 * Default: <code>false</code>. 337 * 338 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating 339 * whether or not TrAX ouput escaping PIs are ignored. 340 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING 341 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING 342 */ 343 public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) { 344 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs; 345 } 346 347 /** 348 * Returns whether JAXP TrAX processing instructions for 349 * disabling/enabling output escaping are ignored. 350 * 351 * @return whether or not TrAX ouput escaping PIs are ignored. 352 */ 353 public boolean getIgnoreTrAXEscapingPIs() { 354 return ignoreTrAXEscapingPIs; 355 } 356 357 /** 358 * This sets the text output style. Options are available as static 359 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}. 360 * 361 * @return a pointer to this Format for chaining 362 */ 363 public Format setTextMode(Format.TextMode mode) { 364 this.mode = mode; 365 return this; 366 } 367 368 /** 369 * Returns the current text output style. 370 * 371 * @return the current text output style 372 */ 373 public Format.TextMode getTextMode() { 374 return mode; 375 } 376 377 /** 378 * This will set the indent <code>String</code> to use; this 379 * is usually a <code>String</code> of empty spaces. If you pass 380 * null, or the empty string (""), then no indentation will 381 * happen. Default: none (null) 382 * 383 * @param indent <code>String</code> to use for indentation. 384 * @return a pointer to this Format for chaining 385 */ 386 public Format setIndent(String indent) { 387 // if passed the empty string, change it to null, for marginal 388 // performance gains later (can compare to null first instead 389 // of calling equals()) 390 if ("".equals(indent)) { 391 indent = null; 392 } 393 this.indent = indent; 394 return this; 395 } 396 397 /** 398 * Returns the indent string in use. 399 * 400 * @return the indent string in use 401 */ 402 public String getIndent() { 403 return indent; 404 } 405 406 /** 407 * Sets the output encoding. The name should be an accepted XML 408 * encoding. 409 * 410 * @param encoding the encoding format. Use XML-style names like 411 * "UTF-8" or "ISO-8859-1" or "US-ASCII" 412 * @return a pointer to this Format for chaining 413 */ 414 public Format setEncoding(String encoding) { 415 this.encoding = encoding; 416 escapeStrategy = new DefaultEscapeStrategy(encoding); 417 return this; 418 } 419 420 /** 421 * Returns the configured output encoding. 422 * 423 * @return the output encoding 424 */ 425 public String getEncoding() { 426 return encoding; 427 } 428 429 @Override 430 protected Object clone() { 431 Format format = null; 432 433 try { 434 format = (Format) super.clone(); 435 } catch (CloneNotSupportedException ce) { 436 } 437 438 return format; 439 } 440 441 /** 442 * Handle common charsets quickly and easily. 443 * If JDK 1.4 isn't around, default to no special encoding. 444 */ 445 static class DefaultEscapeStrategy implements EscapeStrategy { 446 private int bits; 447 CharsetEncoder encoder; 448 449 DefaultEscapeStrategy(String encoding) { 450 if ("UTF-8".equalsIgnoreCase(encoding) || "UTF-16".equalsIgnoreCase(encoding)) { 451 bits = 16; 452 } else if ("ISO-8859-1".equalsIgnoreCase(encoding) || "Latin1".equalsIgnoreCase(encoding)) { 453 bits = 8; 454 } else if ("US-ASCII".equalsIgnoreCase(encoding) || "ASCII".equalsIgnoreCase(encoding)) { 455 bits = 7; 456 } else { 457 bits = 0; 458 encoder = Charset.forName(encoding).newEncoder(); 459 } 460 } 461 462 @Override 463 public boolean shouldEscape(char ch) { 464 if (bits == 16) { 465 return false; 466 } 467 if (bits == 8) { 468 return (ch > 255); 469 } 470 if (bits == 7) { 471 return (ch > 127); 472 } else { 473 if (encoder != null) { 474 return !encoder.canEncode(ch); 475 } 476 // Return false if we don't know. This risks not escaping 477 // things which should be escaped, but also means people won't 478 // start getting loads of unnecessary escapes. 479 return false; 480 } 481 } 482 } 483 484 /** 485 * <p> 486 * Class to signify how text should be handled on output. The following 487 * table provides details.</p> 488 * <table> 489 * <caption>TextMode details</caption> 490 * <tr> 491 * <th> 492 * Text Mode 493 * </th> 494 * <th> 495 * Resulting behavior. 496 * </th> 497 * </tr> 498 * <tr> 499 * <td> 500 * <i>PRESERVE (Default)</i> 501 * </td> 502 * <td> 503 * All content is printed in the format it was created, no whitespace 504 * or line separators are are added or removed. 505 * </td> 506 * </tr> 507 * <tr> 508 * <td> 509 * TRIM_FULL_WHITE 510 * </td> 511 * <td> 512 * Content between tags consisting of all whitespace is not printed. 513 * If the content contains even one non-whitespace character, it is 514 * printed verbatim, whitespace and all. 515 * </td> 516 * </tr> 517 * <tr> 518 * <td> 519 * TRIM 520 * </td> 521 * <td> 522 * Same as TrimAllWhite, plus leading/trailing whitespace are 523 * trimmed. 524 * </td> 525 * </tr> 526 * <tr> 527 * <td> 528 * NORMALIZE 529 * </td> 530 * <td> 531 * Same as TextTrim, plus addition interior whitespace is compressed 532 * to a single space. 533 * </td> 534 * </tr> 535 * </table> 536 * 537 * <p>In most cases textual content is aligned with the surrounding tags 538 * (after the appropriate text mode is applied). In the case where the only 539 * content between the start and end tags is textual, the start tag, text, 540 * and end tag are all printed on the same line. If the document being 541 * output already has whitespace, it's wise to turn on TRIM mode so the 542 * pre-existing whitespace can be trimmed before adding new whitespace.</p> 543 * 544 * <p>When a element has a xml:space attribute with the value of "preserve", 545 * all formating is turned off and reverts back to the default until the 546 * element and its contents have been printed. If a nested element contains 547 * another xml:space with the value "default" formatting is turned back on 548 * for the child element and then off for the remainder of the parent 549 * element.</p> 550 */ 551 public static class TextMode { 552 /** Mode for literal text preservation. */ 553 public static final TextMode PRESERVE = new TextMode("PRESERVE"); 554 555 /** Mode for text trimming (left and right trim). */ 556 public static final TextMode TRIM = new TextMode("TRIM"); 557 558 /** 559 * Mode for text normalization (left and right trim plus internal 560 * whitespace is normalized to a single space. 561 * 562 * @see org.jdom2.Element#getTextNormalize 563 */ 564 public static final TextMode NORMALIZE = new TextMode("NORMALIZE"); 565 566 /** 567 * Mode for text trimming of content consisting of nothing but 568 * whitespace but otherwise not changing output. 569 */ 570 public static final TextMode TRIM_FULL_WHITE = new TextMode("TRIM_FULL_WHITE"); 571 572 private final String name; 573 574 private TextMode(String name) { 575 this.name = name; 576 } 577 578 @Override 579 public String toString() { 580 return name; 581 } 582 } 583 }