1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.shared.utils;
20
21 import java.io.ByteArrayInputStream;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.io.SequenceInputStream;
25
26 import junit.framework.ComparisonFailure;
27 import junit.framework.TestCase;
28 import org.apache.commons.io.IOUtils;
29 import org.apache.maven.shared.utils.xml.XmlStreamReader;
30
31 /**
32 *
33 * @author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a>
34 */
35 public class XmlStreamReaderTest extends TestCase {
36 /** french */
37 private static final String TEXT_LATIN1 = "eacute: \u00E9";
38 /** greek */
39 private static final String TEXT_LATIN7 = "alpha: \u03B1";
40 /** euro support */
41 private static final String TEXT_LATIN15 = "euro: \u20AC";
42 /** japanese */
43 private static final String TEXT_EUC_JP = "hiragana A: \u3042";
44 /** Unicode: support everything */
45 private static final String TEXT_UNICODE =
46 TEXT_LATIN1 + ", " + TEXT_LATIN7 + ", " + TEXT_LATIN15 + ", " + TEXT_EUC_JP;
47 /** see http://unicode.org/faq/utf_bom.html#BOM */
48 private static final byte[] BOM_UTF8 = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
49
50 private static final byte[] BOM_UTF16BE = {(byte) 0xFE, (byte) 0xFF};
51 private static final byte[] BOM_UTF16LE = {(byte) 0xFF, (byte) 0xFE};
52
53 private static String createXmlContent(String text, String encoding) {
54 String xmlDecl = "<?xml version=\"1.0\"?>";
55 if (encoding != null) {
56 xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
57 }
58 return xmlDecl + "\n<text>" + text + "</text>";
59 }
60
61 private static void checkXmlContent(String xml, String encoding) throws IOException {
62 checkXmlContent(xml, encoding, null);
63 }
64
65 private static void checkXmlContent(String xml, String encoding, byte[] bom) throws IOException {
66 byte[] xmlContent = xml.getBytes(encoding);
67 InputStream in = new ByteArrayInputStream(xmlContent);
68
69 if (bom != null) {
70 in = new SequenceInputStream(new ByteArrayInputStream(bom), in);
71 }
72
73 XmlStreamReader reader = new XmlStreamReader(in);
74 assertEquals(encoding, reader.getEncoding());
75 String result = IOUtils.toString(reader);
76 assertEquals(xml, result);
77 }
78
79 private static void checkXmlStreamReader(String text, String encoding, String effectiveEncoding)
80 throws IOException {
81 checkXmlStreamReader(text, encoding, effectiveEncoding, null);
82 }
83
84 private static void checkXmlStreamReader(String text, String encoding) throws IOException {
85 checkXmlStreamReader(text, encoding, encoding, null);
86 }
87
88 private static void checkXmlStreamReader(String text, String encoding, byte[] bom) throws IOException {
89 checkXmlStreamReader(text, encoding, encoding, bom);
90 }
91
92 private static void checkXmlStreamReader(String text, String encoding, String effectiveEncoding, byte[] bom)
93 throws IOException {
94 String xml = createXmlContent(text, encoding);
95 checkXmlContent(xml, effectiveEncoding, bom);
96 }
97
98 public void testNoXmlHeader() throws IOException {
99 String xml = "<text>text with no XML header</text>";
100 checkXmlContent(xml, "UTF-8");
101 checkXmlContent(xml, "UTF-8", BOM_UTF8);
102 }
103
104 public void testDefaultEncoding() throws IOException {
105 checkXmlStreamReader(TEXT_UNICODE, null, "UTF-8");
106 checkXmlStreamReader(TEXT_UNICODE, null, "UTF-8", BOM_UTF8);
107 }
108
109 public void testUTF8Encoding() throws IOException {
110 checkXmlStreamReader(TEXT_UNICODE, "UTF-8");
111 checkXmlStreamReader(TEXT_UNICODE, "UTF-8", BOM_UTF8);
112 }
113
114 public void testUTF16Encoding() throws IOException {
115 checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16BE", null);
116 checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16LE", BOM_UTF16LE);
117 checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16BE", BOM_UTF16BE);
118 }
119
120 public void testUTF16BEEncoding() throws IOException {
121 checkXmlStreamReader(TEXT_UNICODE, "UTF-16BE");
122 }
123
124 public void testUTF16LEEncoding() throws IOException {
125 checkXmlStreamReader(TEXT_UNICODE, "UTF-16LE");
126 }
127
128 public void testLatin1Encoding() throws IOException {
129 checkXmlStreamReader(TEXT_LATIN1, "ISO-8859-1");
130 }
131
132 public void testLatin7Encoding() throws IOException {
133 checkXmlStreamReader(TEXT_LATIN7, "ISO-8859-7");
134 }
135
136 public void testLatin15Encoding() throws IOException {
137 checkXmlStreamReader(TEXT_LATIN15, "ISO-8859-15");
138 }
139
140 public void testEUC_JPEncoding() throws IOException {
141 checkXmlStreamReader(TEXT_EUC_JP, "EUC-JP");
142 }
143
144 public void testEBCDICEncoding() throws IOException {
145 checkXmlStreamReader("simple text in EBCDIC", "CP1047");
146 }
147
148 public void testInappropriateEncoding() throws IOException {
149 try {
150 checkXmlStreamReader(TEXT_UNICODE, "ISO-8859-2");
151 fail("Check should have failed, since some characters are not available in the specified encoding");
152 } catch (ComparisonFailure cf) {
153 // expected failure, since the encoding does not contain some characters
154 }
155 }
156
157 public void testEncodingAttribute() throws IOException {
158 String xml = "<?xml version='1.0' encoding='US-ASCII'?><element encoding='attribute value'/>";
159 checkXmlContent(xml, "US-ASCII");
160
161 xml = "<?xml version='1.0' encoding = 'US-ASCII' ?><element encoding='attribute value'/>";
162 checkXmlContent(xml, "US-ASCII");
163
164 xml = "<?xml version='1.0'?><element encoding='attribute value'/>";
165 checkXmlContent(xml, "UTF-8");
166
167 xml = "<?xml\nversion='1.0'\nencoding\n=\n'US-ASCII'\n?>\n<element encoding='attribute value'/>";
168 checkXmlContent(xml, "US-ASCII");
169
170 xml = "<?xml\nversion='1.0'\n?>\n<element encoding='attribute value'/>";
171 checkXmlContent(xml, "UTF-8");
172
173 xml = "<element encoding='attribute value'/>";
174 checkXmlContent(xml, "UTF-8");
175 }
176 }