1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.maven.shared.utils; 20 21 import java.io.ByteArrayInputStream; 22 import java.io.IOException; 23 import java.io.InputStream; 24 import java.io.SequenceInputStream; 25 26 import junit.framework.ComparisonFailure; 27 import junit.framework.TestCase; 28 import org.apache.commons.io.IOUtils; 29 import org.apache.maven.shared.utils.xml.XmlStreamReader; 30 31 /** 32 * 33 * @author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a> 34 */ 35 public class XmlStreamReaderTest extends TestCase { 36 /** french */ 37 private static final String TEXT_LATIN1 = "eacute: \u00E9"; 38 /** greek */ 39 private static final String TEXT_LATIN7 = "alpha: \u03B1"; 40 /** euro support */ 41 private static final String TEXT_LATIN15 = "euro: \u20AC"; 42 /** japanese */ 43 private static final String TEXT_EUC_JP = "hiragana A: \u3042"; 44 /** Unicode: support everything */ 45 private static final String TEXT_UNICODE = 46 TEXT_LATIN1 + ", " + TEXT_LATIN7 + ", " + TEXT_LATIN15 + ", " + TEXT_EUC_JP; 47 /** see http://unicode.org/faq/utf_bom.html#BOM */ 48 private static final byte[] BOM_UTF8 = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; 49 50 private static final byte[] BOM_UTF16BE = {(byte) 0xFE, (byte) 0xFF}; 51 private static final byte[] BOM_UTF16LE = {(byte) 0xFF, (byte) 0xFE}; 52 53 private static String createXmlContent(String text, String encoding) { 54 String xmlDecl = "<?xml version=\"1.0\"?>"; 55 if (encoding != null) { 56 xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>"; 57 } 58 return xmlDecl + "\n<text>" + text + "</text>"; 59 } 60 61 private static void checkXmlContent(String xml, String encoding) throws IOException { 62 checkXmlContent(xml, encoding, null); 63 } 64 65 private static void checkXmlContent(String xml, String encoding, byte[] bom) throws IOException { 66 byte[] xmlContent = xml.getBytes(encoding); 67 InputStream in = new ByteArrayInputStream(xmlContent); 68 69 if (bom != null) { 70 in = new SequenceInputStream(new ByteArrayInputStream(bom), in); 71 } 72 73 XmlStreamReader reader = new XmlStreamReader(in); 74 assertEquals(encoding, reader.getEncoding()); 75 String result = IOUtils.toString(reader); 76 assertEquals(xml, result); 77 } 78 79 private static void checkXmlStreamReader(String text, String encoding, String effectiveEncoding) 80 throws IOException { 81 checkXmlStreamReader(text, encoding, effectiveEncoding, null); 82 } 83 84 private static void checkXmlStreamReader(String text, String encoding) throws IOException { 85 checkXmlStreamReader(text, encoding, encoding, null); 86 } 87 88 private static void checkXmlStreamReader(String text, String encoding, byte[] bom) throws IOException { 89 checkXmlStreamReader(text, encoding, encoding, bom); 90 } 91 92 private static void checkXmlStreamReader(String text, String encoding, String effectiveEncoding, byte[] bom) 93 throws IOException { 94 String xml = createXmlContent(text, encoding); 95 checkXmlContent(xml, effectiveEncoding, bom); 96 } 97 98 public void testNoXmlHeader() throws IOException { 99 String xml = "<text>text with no XML header</text>"; 100 checkXmlContent(xml, "UTF-8"); 101 checkXmlContent(xml, "UTF-8", BOM_UTF8); 102 } 103 104 public void testDefaultEncoding() throws IOException { 105 checkXmlStreamReader(TEXT_UNICODE, null, "UTF-8"); 106 checkXmlStreamReader(TEXT_UNICODE, null, "UTF-8", BOM_UTF8); 107 } 108 109 public void testUTF8Encoding() throws IOException { 110 checkXmlStreamReader(TEXT_UNICODE, "UTF-8"); 111 checkXmlStreamReader(TEXT_UNICODE, "UTF-8", BOM_UTF8); 112 } 113 114 public void testUTF16Encoding() throws IOException { 115 checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16BE", null); 116 checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16LE", BOM_UTF16LE); 117 checkXmlStreamReader(TEXT_UNICODE, "UTF-16", "UTF-16BE", BOM_UTF16BE); 118 } 119 120 public void testUTF16BEEncoding() throws IOException { 121 checkXmlStreamReader(TEXT_UNICODE, "UTF-16BE"); 122 } 123 124 public void testUTF16LEEncoding() throws IOException { 125 checkXmlStreamReader(TEXT_UNICODE, "UTF-16LE"); 126 } 127 128 public void testLatin1Encoding() throws IOException { 129 checkXmlStreamReader(TEXT_LATIN1, "ISO-8859-1"); 130 } 131 132 public void testLatin7Encoding() throws IOException { 133 checkXmlStreamReader(TEXT_LATIN7, "ISO-8859-7"); 134 } 135 136 public void testLatin15Encoding() throws IOException { 137 checkXmlStreamReader(TEXT_LATIN15, "ISO-8859-15"); 138 } 139 140 public void testEUC_JPEncoding() throws IOException { 141 checkXmlStreamReader(TEXT_EUC_JP, "EUC-JP"); 142 } 143 144 public void testEBCDICEncoding() throws IOException { 145 checkXmlStreamReader("simple text in EBCDIC", "CP1047"); 146 } 147 148 public void testInappropriateEncoding() throws IOException { 149 try { 150 checkXmlStreamReader(TEXT_UNICODE, "ISO-8859-2"); 151 fail("Check should have failed, since some characters are not available in the specified encoding"); 152 } catch (ComparisonFailure cf) { 153 // expected failure, since the encoding does not contain some characters 154 } 155 } 156 157 public void testEncodingAttribute() throws IOException { 158 String xml = "<?xml version='1.0' encoding='US-ASCII'?><element encoding='attribute value'/>"; 159 checkXmlContent(xml, "US-ASCII"); 160 161 xml = "<?xml version='1.0' encoding = 'US-ASCII' ?><element encoding='attribute value'/>"; 162 checkXmlContent(xml, "US-ASCII"); 163 164 xml = "<?xml version='1.0'?><element encoding='attribute value'/>"; 165 checkXmlContent(xml, "UTF-8"); 166 167 xml = "<?xml\nversion='1.0'\nencoding\n=\n'US-ASCII'\n?>\n<element encoding='attribute value'/>"; 168 checkXmlContent(xml, "US-ASCII"); 169 170 xml = "<?xml\nversion='1.0'\n?>\n<element encoding='attribute value'/>"; 171 checkXmlContent(xml, "UTF-8"); 172 173 xml = "<element encoding='attribute value'/>"; 174 checkXmlContent(xml, "UTF-8"); 175 } 176 }