View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.shared.utils;
20  
21  import java.io.ByteArrayInputStream;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.SequenceInputStream;
25  
26  import junit.framework.ComparisonFailure;
27  import junit.framework.TestCase;
28  import org.apache.commons.io.IOUtils;
29  import org.apache.maven.shared.utils.xml.XmlStreamReader;
30  
31  /**
32   *
33   * @author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a>
34   */
35  public class XmlStreamReaderTest extends TestCase {
36      /** french */
37      private static final String TEXT_LATIN1 = "eacute: \u00E9";
38      /** greek */
39      private static final String TEXT_LATIN7 = "alpha: \u03B1";
40      /** euro support */
41      private static final String TEXT_LATIN15 = "euro: \u20AC";
42      /** japanese */
43      private static final String TEXT_EUC_JP = "hiragana A: \u3042";
44      /** Unicode: support everything */
45      private static final String TEXT_UNICODE =
46              TEXT_LATIN1 + ", " + TEXT_LATIN7 + ", " + TEXT_LATIN15 + ", " + TEXT_EUC_JP;
47      /** see http://unicode.org/faq/utf_bom.html#BOM */
48      private static final byte[] BOM_UTF8 = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
49  
50      private static final byte[] BOM_UTF16BE = {(byte) 0xFE, (byte) 0xFF};
51      private static final byte[] BOM_UTF16LE = {(byte) 0xFF, (byte) 0xFE};
52  
53      private static String createXmlContent(String text, String encoding) {
54          String xmlDecl = "<?xml version=\"1.0\"?>";
55          if (encoding != null) {
56              xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
57          }
58          return xmlDecl + "\n<text>" + text + "</text>";
59      }
60  
61      private static void checkXmlContent(String xml, String encoding) throws IOException {
62          checkXmlContent(xml, encoding, null);
63      }
64  
65      private static void checkXmlContent(String xml, String encoding, byte[] bom) throws IOException {
66          byte[] xmlContent = xml.getBytes(encoding);
67          InputStream in = new ByteArrayInputStream(xmlContent);
68  
69          if (bom != null) {
70              in = new SequenceInputStream(new ByteArrayInputStream(bom), in);
71          }
72  
73          XmlStreamReader reader = new XmlStreamReader(in);
74          assertEquals(encoding, reader.getEncoding());
75          String result = IOUtils.toString(reader);
76          assertEquals(xml, result);
77      }
78  
79      private static void checkXmlStreamReader(String text, String encoding, String effectiveEncoding)
80              throws IOException {
81          checkXmlStreamReader(text, encoding, effectiveEncoding, null);
82      }
83  
84      private static void checkXmlStreamReader(String text, String encoding) throws IOException {
85          checkXmlStreamReader(text, encoding, encoding, null);
86      }
87  
88      private static void checkXmlStreamReader(String text, String encoding, byte[] bom) throws IOException {
89          checkXmlStreamReader(text, encoding, encoding, bom);
90      }
91  
92      private static void checkXmlStreamReader(String text, String encoding, String effectiveEncoding, byte[] bom)
93              throws IOException {
94          String xml = createXmlContent(text, encoding);
95          checkXmlContent(xml, effectiveEncoding, bom);
96      }
97  
98      public void testNoXmlHeader() throws IOException {
99          String xml = "<text>text with no XML header</text>";
100         checkXmlContent(xml, "UTF-8");
101         checkXmlContent(xml, "UTF-8", BOM_UTF8);
102     }
103 
104     public void testDefaultEncoding() throws IOException {
105         checkXmlStreamReader(TEXT_UNICODE, null, "UTF-8");
106         checkXmlStreamReader(TEXT_UNICODE, null, "UTF-8", BOM_UTF8);
107     }
108 
109     public void testUTF8Encoding() throws IOException {
110         checkXmlStreamReader(TEXT_UNICODE, "UTF-8");
111         checkXmlStreamReader(TEXT_UNICODE, "UTF-8", BOM_UTF8);
112     }
113 
114     public void testUTF16Encoding() throws IOException {
115         checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16BE", null);
116         checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16LE", BOM_UTF16LE);
117         checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16BE", BOM_UTF16BE);
118     }
119 
120     public void testUTF16BEEncoding() throws IOException {
121         checkXmlStreamReader(TEXT_UNICODE, "UTF-16BE");
122     }
123 
124     public void testUTF16LEEncoding() throws IOException {
125         checkXmlStreamReader(TEXT_UNICODE, "UTF-16LE");
126     }
127 
128     public void testLatin1Encoding() throws IOException {
129         checkXmlStreamReader(TEXT_LATIN1, "ISO-8859-1");
130     }
131 
132     public void testLatin7Encoding() throws IOException {
133         checkXmlStreamReader(TEXT_LATIN7, "ISO-8859-7");
134     }
135 
136     public void testLatin15Encoding() throws IOException {
137         checkXmlStreamReader(TEXT_LATIN15, "ISO-8859-15");
138     }
139 
140     public void testEUC_JPEncoding() throws IOException {
141         checkXmlStreamReader(TEXT_EUC_JP, "EUC-JP");
142     }
143 
144     public void testEBCDICEncoding() throws IOException {
145         checkXmlStreamReader("simple text in EBCDIC", "CP1047");
146     }
147 
148     public void testInappropriateEncoding() throws IOException {
149         try {
150             checkXmlStreamReader(TEXT_UNICODE, "ISO-8859-2");
151             fail("Check should have failed, since some characters are not available in the specified encoding");
152         } catch (ComparisonFailure cf) {
153             // expected failure, since the encoding does not contain some characters
154         }
155     }
156 
157     public void testEncodingAttribute() throws IOException {
158         String xml = "<?xml version='1.0' encoding='US-ASCII'?><element encoding='attribute value'/>";
159         checkXmlContent(xml, "US-ASCII");
160 
161         xml = "<?xml version='1.0' encoding  =  'US-ASCII'  ?><element encoding='attribute value'/>";
162         checkXmlContent(xml, "US-ASCII");
163 
164         xml = "<?xml version='1.0'?><element encoding='attribute value'/>";
165         checkXmlContent(xml, "UTF-8");
166 
167         xml = "<?xml\nversion='1.0'\nencoding\n=\n'US-ASCII'\n?>\n<element encoding='attribute value'/>";
168         checkXmlContent(xml, "US-ASCII");
169 
170         xml = "<?xml\nversion='1.0'\n?>\n<element encoding='attribute value'/>";
171         checkXmlContent(xml, "UTF-8");
172 
173         xml = "<element encoding='attribute value'/>";
174         checkXmlContent(xml, "UTF-8");
175     }
176 }