1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.archetype.common.util;
20
21 /*
22 * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin.
23 * All rights reserved.
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 *
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions, and the following disclaimer.
31 *
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions, and the disclaimer that follows
34 * these conditions in the documentation and/or other materials
35 * provided with the distribution.
36 *
37 * 3. The name "JDOM" must not be used to endorse or promote products
38 * derived from this software without prior written permission. For
39 * written permission, please contact <request_AT_jdom_DOT_org>.
40 *
41 * 4. Products derived from this software may not be called "JDOM", nor
42 * may "JDOM" appear in their name, without prior written permission
43 * from the JDOM Project Management <request_AT_jdom_DOT_org>.
44 *
45 * In addition, we request (but do not require) that you include in the
46 * end-user documentation provided with the redistribution and/or in the
47 * software itself an acknowledgement equivalent to the following:
48 * "This product includes software developed by the
49 * JDOM Project (http://www.jdom.org/)."
50 * Alternatively, the acknowledgment may be graphical using the logos
51 * available at http://www.jdom.org/images/logos.
52 *
53 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
54 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
56 * DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
57 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
58 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
59 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
60 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
61 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
63 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * This software consists of voluntary contributions made by many
67 * individuals on behalf of the JDOM Project and was originally
68 * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
69 * Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
70 * on the JDOM Project, please see <http://www.jdom.org/>.
71 */
72
73 import java.nio.charset.Charset;
74 import java.nio.charset.CharsetEncoder;
75
76 import org.jdom2.output.EscapeStrategy;
77
78 /**
79 * <p>Class to encapsulate XMLOutputter format options.
80 * Typical users can use the standard format configurations obtained by
81 * {@link #getRawFormat} (no whitespace changes),
82 * {@link #getPrettyFormat} (whitespace beautification), and
83 * {@link #getCompactFormat} (whitespace normalization).</p>
84 *
85 * <p>Several modes are available to effect the way textual content is printed.
86 * See the documentation for {@link TextMode} for details.</p>
87 *
88 * @author Jason Hunter
89 */
90 public class Format implements Cloneable {
91
92 /**
93 * Returns a new Format object that performs no whitespace changes, uses
94 * the UTF-8 encoding, doesn't expand empty elements, includes the
95 * declaration and encoding, and uses the default entity escape strategy.
96 * Tweaks can be made to the returned Format instance without affecting
97 * other instances.
98 *
99 * @return a Format with no whitespace changes
100 */
101 public static Format getRawFormat() {
102 return new Format();
103 }
104
105 /**
106 * Returns a new Format object that performs whitespace beautification with
107 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements,
108 * includes the declaration and encoding, and uses the default entity
109 * escape strategy.
110 * Tweaks can be made to the returned Format instance without affecting
111 * other instances.
112 *
113 * @return a Format with whitespace beautification
114 */
115 public static Format getPrettyFormat() {
116 Format f = new Format();
117 f.setIndent(STANDARD_INDENT);
118 f.setTextMode(TextMode.TRIM);
119 return f;
120 }
121
122 /**
123 * Returns a new Format object that performs whitespace normalization, uses
124 * the UTF-8 encoding, doesn't expand empty elements, includes the
125 * declaration and encoding, and uses the default entity escape strategy.
126 * Tweaks can be made to the returned Format instance without affecting
127 * other instances.
128 *
129 * @return a Format with whitespace normalization
130 */
131 public static Format getCompactFormat() {
132 Format f = new Format();
133 f.setTextMode(TextMode.NORMALIZE);
134 return f;
135 }
136
137 /** standard value to indent by, if we are indenting */
138 private static final String STANDARD_INDENT = " ";
139
140 /** standard string with which to end a line */
141 private static final String STANDARD_LINE_SEPARATOR = "\r\n";
142
143 /** standard encoding */
144 private static final String STANDARD_ENCODING = "UTF-8";
145
146 /** The default indent is no spaces (as original document) */
147 String indent = null;
148
149 /** New line separator */
150 String lineSeparator = STANDARD_LINE_SEPARATOR;
151
152 /** The encoding format */
153 String encoding = STANDARD_ENCODING;
154
155 /**
156 * Whether or not to output the XML declaration
157 * - default is <code>false</code>
158 */
159 boolean omitDeclaration = false;
160
161 /**
162 * Whether or not to output the encoding in the XML declaration
163 * - default is <code>false</code>
164 */
165 boolean omitEncoding = false;
166
167 /**
168 * Whether or not to expand empty elements to
169 * <tagName></tagName> - default is <code>false</code>
170 */
171 boolean expandEmptyElements = false;
172
173 /**
174 * Whether TrAX output escaping disabling/enabling PIs are ignored
175 * or processed - default is <code>false</code>
176 */
177 boolean ignoreTrAXEscapingPIs = false;
178
179 /** text handling mode */
180 TextMode mode = TextMode.PRESERVE;
181
182 /** entity escape logic */
183 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy(encoding);
184
185 /** Creates a new Format instance with default (raw) behavior. */
186 private Format() {}
187
188 /**
189 * Sets the {@link EscapeStrategy} to use for character escaping.
190 *
191 * @param strategy the EscapeStrategy to use
192 * @return a pointer to this Format for chaining
193 */
194 public Format setEscapeStrategy(EscapeStrategy strategy) {
195 escapeStrategy = strategy;
196 return this;
197 }
198
199 /**
200 * Returns the current escape strategy
201 *
202 * @return the current escape strategy
203 */
204 public EscapeStrategy getEscapeStrategy() {
205 return escapeStrategy;
206 }
207
208 /**
209 * <p>This will set the newline separator (<code>lineSeparator</code>).
210 * The default is <code>\r\n</code>. Note that if the "newlines"
211 * property is false, this value is irrelevant. To make it output
212 * the system default line ending string, call
213 * <code>setLineSeparator(System.getProperty("line.separator"))</code></p>
214 *
215 * <p>To output "UNIX-style" documents, call
216 * <code>setLineSeparator("\n")</code>. To output "Mac-style"
217 * documents, call <code>setLineSeparator("\r")</code>. DOS-style
218 * documents use CR-LF ("\r\n"), which is the default.</p>
219 *
220 * <p>Note that this only applies to newlines generated by the
221 * outputter. If you parse an XML document that contains newlines
222 * embedded inside a text node, and you do not set TextMode.NORMALIZE,
223 * then the newlines will be output
224 * verbatim, as "\n" which is how parsers normalize them.
225 * </p>
226 *
227 * @param separator <code>String</code> line separator to use.
228 * @return a pointer to this Format for chaining
229 * @see #setTextMode
230 */
231 public Format setLineSeparator(String separator) {
232 this.lineSeparator = separator;
233 return this;
234 }
235
236 /**
237 * Returns the current line separator.
238 *
239 * @return the current line separator
240 */
241 public String getLineSeparator() {
242 return lineSeparator;
243 }
244
245 /**
246 * This will set whether the XML declaration
247 * (<code><?xml version="1.0"
248 * encoding="UTF-8"?></code>)
249 * includes the encoding of the document. It is common to omit
250 * this in uses such as WML and other wireless device protocols.
251 *
252 * @param omitEncoding <code>boolean</code> indicating whether or not
253 * the XML declaration should indicate the document encoding.
254 * @return a pointer to this Format for chaining
255 */
256 public Format setOmitEncoding(boolean omitEncoding) {
257 this.omitEncoding = omitEncoding;
258 return this;
259 }
260
261 /**
262 * Returns whether the XML declaration encoding will be omitted.
263 *
264 * @return whether the XML declaration encoding will be omitted
265 */
266 public boolean getOmitEncoding() {
267 return omitEncoding;
268 }
269
270 /**
271 * This will set whether the XML declaration
272 * (<code><?xml version="1.0"?gt;</code>)
273 * will be omitted or not. It is common to omit this in uses such
274 * as SOAP and XML-RPC calls.
275 *
276 * @param omitDeclaration <code>boolean</code> indicating whether or not
277 * the XML declaration should be omitted.
278 * @return a pointer to this Format for chaining
279 */
280 public Format setOmitDeclaration(boolean omitDeclaration) {
281 this.omitDeclaration = omitDeclaration;
282 return this;
283 }
284
285 /**
286 * Returns whether the XML declaration will be omitted.
287 *
288 * @return whether the XML declaration will be omitted
289 */
290 public boolean getOmitDeclaration() {
291 return omitDeclaration;
292 }
293
294 /**
295 * This will set whether empty elements are expanded from
296 * <code><tagName/></code> to
297 * <code><tagName></tagName></code>.
298 *
299 * @param expandEmptyElements <code>boolean</code> indicating whether or not
300 * empty elements should be expanded.
301 * @return a pointer to this Format for chaining
302 */
303 public Format setExpandEmptyElements(boolean expandEmptyElements) {
304 this.expandEmptyElements = expandEmptyElements;
305 return this;
306 }
307
308 /**
309 * Returns whether empty elements are expanded.
310 *
311 * @return whether empty elements are expanded
312 */
313 public boolean getExpandEmptyElements() {
314 return expandEmptyElements;
315 }
316
317 /**
318 * <p>This will set whether JAXP TrAX processing instructions for
319 * disabling/enabling output escaping are ignored. Disabling
320 * output escaping allows using XML text as element content and
321 * outputing it verbatim, i.e. as element children would be.</p>
322 *
323 * <p>When processed, these processing instructions are removed from
324 * the generated XML text and control whether the element text
325 * content is output verbatim or with escaping of the pre-defined
326 * entities in XML 1.0. The text to be output verbatim shall be
327 * surrounded by the
328 * <code><?javax.xml.transform.disable-output-escaping ?></code>
329 * and <code><?javax.xml.transform.enable-output-escaping ?></code>
330 * PIs.</p>
331 *
332 * <p>When ignored, the processing instructions are present in the
333 * generated XML text and the pre-defined entities in XML 1.0 are
334 * escaped.</p>
335 *
336 * Default: <code>false</code>.
337 *
338 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating
339 * whether or not TrAX ouput escaping PIs are ignored.
340 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING
341 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING
342 */
343 public void setIgnoreTrAXEscapingPIs(boolean ignoreTrAXEscapingPIs) {
344 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs;
345 }
346
347 /**
348 * Returns whether JAXP TrAX processing instructions for
349 * disabling/enabling output escaping are ignored.
350 *
351 * @return whether or not TrAX ouput escaping PIs are ignored.
352 */
353 public boolean getIgnoreTrAXEscapingPIs() {
354 return ignoreTrAXEscapingPIs;
355 }
356
357 /**
358 * This sets the text output style. Options are available as static
359 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}.
360 *
361 * @return a pointer to this Format for chaining
362 */
363 public Format setTextMode(Format.TextMode mode) {
364 this.mode = mode;
365 return this;
366 }
367
368 /**
369 * Returns the current text output style.
370 *
371 * @return the current text output style
372 */
373 public Format.TextMode getTextMode() {
374 return mode;
375 }
376
377 /**
378 * This will set the indent <code>String</code> to use; this
379 * is usually a <code>String</code> of empty spaces. If you pass
380 * null, or the empty string (""), then no indentation will
381 * happen. Default: none (null)
382 *
383 * @param indent <code>String</code> to use for indentation.
384 * @return a pointer to this Format for chaining
385 */
386 public Format setIndent(String indent) {
387 // if passed the empty string, change it to null, for marginal
388 // performance gains later (can compare to null first instead
389 // of calling equals())
390 if ("".equals(indent)) {
391 indent = null;
392 }
393 this.indent = indent;
394 return this;
395 }
396
397 /**
398 * Returns the indent string in use.
399 *
400 * @return the indent string in use
401 */
402 public String getIndent() {
403 return indent;
404 }
405
406 /**
407 * Sets the output encoding. The name should be an accepted XML
408 * encoding.
409 *
410 * @param encoding the encoding format. Use XML-style names like
411 * "UTF-8" or "ISO-8859-1" or "US-ASCII"
412 * @return a pointer to this Format for chaining
413 */
414 public Format setEncoding(String encoding) {
415 this.encoding = encoding;
416 escapeStrategy = new DefaultEscapeStrategy(encoding);
417 return this;
418 }
419
420 /**
421 * Returns the configured output encoding.
422 *
423 * @return the output encoding
424 */
425 public String getEncoding() {
426 return encoding;
427 }
428
429 @Override
430 protected Object clone() {
431 Format format = null;
432
433 try {
434 format = (Format) super.clone();
435 } catch (CloneNotSupportedException ce) {
436 }
437
438 return format;
439 }
440
441 /**
442 * Handle common charsets quickly and easily.
443 * If JDK 1.4 isn't around, default to no special encoding.
444 */
445 static class DefaultEscapeStrategy implements EscapeStrategy {
446 private int bits;
447 CharsetEncoder encoder;
448
449 DefaultEscapeStrategy(String encoding) {
450 if ("UTF-8".equalsIgnoreCase(encoding) || "UTF-16".equalsIgnoreCase(encoding)) {
451 bits = 16;
452 } else if ("ISO-8859-1".equalsIgnoreCase(encoding) || "Latin1".equalsIgnoreCase(encoding)) {
453 bits = 8;
454 } else if ("US-ASCII".equalsIgnoreCase(encoding) || "ASCII".equalsIgnoreCase(encoding)) {
455 bits = 7;
456 } else {
457 bits = 0;
458 encoder = Charset.forName(encoding).newEncoder();
459 }
460 }
461
462 @Override
463 public boolean shouldEscape(char ch) {
464 if (bits == 16) {
465 return false;
466 }
467 if (bits == 8) {
468 return (ch > 255);
469 }
470 if (bits == 7) {
471 return (ch > 127);
472 } else {
473 if (encoder != null) {
474 return !encoder.canEncode(ch);
475 }
476 // Return false if we don't know. This risks not escaping
477 // things which should be escaped, but also means people won't
478 // start getting loads of unnecessary escapes.
479 return false;
480 }
481 }
482 }
483
484 /**
485 * <p>
486 * Class to signify how text should be handled on output. The following
487 * table provides details.</p>
488 * <table>
489 * <caption>TextMode details</caption>
490 * <tr>
491 * <th>
492 * Text Mode
493 * </th>
494 * <th>
495 * Resulting behavior.
496 * </th>
497 * </tr>
498 * <tr>
499 * <td>
500 * <i>PRESERVE (Default)</i>
501 * </td>
502 * <td>
503 * All content is printed in the format it was created, no whitespace
504 * or line separators are are added or removed.
505 * </td>
506 * </tr>
507 * <tr>
508 * <td>
509 * TRIM_FULL_WHITE
510 * </td>
511 * <td>
512 * Content between tags consisting of all whitespace is not printed.
513 * If the content contains even one non-whitespace character, it is
514 * printed verbatim, whitespace and all.
515 * </td>
516 * </tr>
517 * <tr>
518 * <td>
519 * TRIM
520 * </td>
521 * <td>
522 * Same as TrimAllWhite, plus leading/trailing whitespace are
523 * trimmed.
524 * </td>
525 * </tr>
526 * <tr>
527 * <td>
528 * NORMALIZE
529 * </td>
530 * <td>
531 * Same as TextTrim, plus addition interior whitespace is compressed
532 * to a single space.
533 * </td>
534 * </tr>
535 * </table>
536 *
537 * <p>In most cases textual content is aligned with the surrounding tags
538 * (after the appropriate text mode is applied). In the case where the only
539 * content between the start and end tags is textual, the start tag, text,
540 * and end tag are all printed on the same line. If the document being
541 * output already has whitespace, it's wise to turn on TRIM mode so the
542 * pre-existing whitespace can be trimmed before adding new whitespace.</p>
543 *
544 * <p>When a element has a xml:space attribute with the value of "preserve",
545 * all formating is turned off and reverts back to the default until the
546 * element and its contents have been printed. If a nested element contains
547 * another xml:space with the value "default" formatting is turned back on
548 * for the child element and then off for the remainder of the parent
549 * element.</p>
550 */
551 public static class TextMode {
552 /** Mode for literal text preservation. */
553 public static final TextMode PRESERVE = new TextMode("PRESERVE");
554
555 /** Mode for text trimming (left and right trim). */
556 public static final TextMode TRIM = new TextMode("TRIM");
557
558 /**
559 * Mode for text normalization (left and right trim plus internal
560 * whitespace is normalized to a single space.
561 *
562 * @see org.jdom2.Element#getTextNormalize
563 */
564 public static final TextMode NORMALIZE = new TextMode("NORMALIZE");
565
566 /**
567 * Mode for text trimming of content consisting of nothing but
568 * whitespace but otherwise not changing output.
569 */
570 public static final TextMode TRIM_FULL_WHITE = new TextMode("TRIM_FULL_WHITE");
571
572 private final String name;
573
574 private TextMode(String name) {
575 this.name = name;
576 }
577
578 @Override
579 public String toString() {
580 return name;
581 }
582 }
583 }