View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.shared.utils;
20  
21  import java.util.Locale;
22  
23  import org.apache.commons.text.StringEscapeUtils;
24  import org.junit.AfterClass;
25  import org.junit.Assert;
26  import org.junit.ComparisonFailure;
27  import org.junit.Test;
28  
29  /**
30   * Test case for character case changes, to precisely point the situations when character case comparison doesn't
31   * give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
32   * (platform locale dependent, with sometimes unexpected results)
33   * but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
34   *
35   * @author Hervé Boutemy
36   * @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
37   */
38  public class CaseTest extends Assert {
39      private static final Locale LOCALE_TURKISH = new Locale("tr");
40  
41      /** common ASCII 'i' */
42      private static final char DOTTED_i = '\u0069';
43  
44      /** common ASCII 'I' */
45      private static final char DOTLESS_I = '\u0049';
46  
47      /** turkish dotless i = ı */
48      private static final char DOTLESS_i = '\u0131';
49  
50      /** turkish dotted I = İ */
51      private static final char DOTTED_I = '\u0130';
52  
53      /** http://en.wikipedia.org/wiki/Dot_(diacritic) */
54      private static final char COMBINING_DOT_ABOVE = '\u0307';
55  
56      private static final Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();
57  
58      @AfterClass
59      public static void restoreDefaultLocale() {
60          Locale.setDefault(SAVED_DEFAULT_LOCALE);
61      }
62  
63      /**
64       * test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
65       * @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
66       */
67      @Test
68      public void testTurkishI() {
69          // check common i and I
70          assertEquals("common lowercase i should have a dot", 'i', DOTTED_i);
71          assertEquals("common uppercase I should not have a dot", 'I', DOTLESS_I);
72  
73          final String iIıİ = "iIıİ";
74  
75          // check source encoding doesn't wreck havoc */
76          assertUnicodeEquals(
77                  "misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i + DOTTED_I);
78  
79          // check toUpperCase and toLowerCase difference with turkish and english locales
80          assertUnicodeEquals(
81                  "'iIıİ'.toUpperCase('tr')=='İIIİ'",
82                  "" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
83                  iIıİ.toUpperCase(LOCALE_TURKISH));
84          assertUnicodeEquals(
85                  "'iIıİ'.toLowerCase('tr')=='iııi'",
86                  "" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
87                  iIıİ.toLowerCase(LOCALE_TURKISH));
88          assertUnicodeEquals(
89                  "'iIıİ'.toUpperCase('en')=='IIIİ'",
90                  "" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
91                  iIıİ.toUpperCase(Locale.ENGLISH));
92          String lower = iIıİ.toLowerCase(Locale.ENGLISH); // on some platforms, ends with extra COMBINED DOT ABOVE
93          assertUnicodeEquals(
94                  "'iIıİ'.toLowerCase('en')=='iiıi'",
95                  "" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i + (lower.length() > 4 ? COMBINING_DOT_ABOVE : ""),
96                  lower);
97  
98          // check equalsIgnoreCase() , which has no locale
99          for (int i = 0; i < iIıİ.length(); i++) {
100             char currentI = iIıİ.charAt(i);
101 
102             StringBuilder sb = new StringBuilder(iIıİ.length());
103             for (int j = 0; j < iIıİ.length(); j++) {
104                 sb.append(currentI);
105             }
106             String current = sb.toString();
107 
108             assertTrue("'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase(iIıİ));
109         }
110     }
111 
112     /**
113      * Assert equals, and in case the result isn't as expected, display content unicode-escaped.
114      * @param message
115      * @param expected
116      * @param actual
117      */
118     private void assertUnicodeEquals(String message, String expected, String actual) {
119         if (expected.equals(actual)) {
120             return;
121         }
122 
123         throw new ComparisonFailure(
124                 message, StringEscapeUtils.escapeJava(expected), StringEscapeUtils.escapeJava(actual));
125     }
126 
127     /**
128      * Test case change on all ascii characters with every available locale, to check that turkish i is the only
129      * exception on these characters.
130      */
131     @Test
132     public void testAsciiAvailableLocales() {
133         final String lower = "abcdefghijklmnopqrstuvwxyz";
134         final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
135 
136         for (Locale locale : Locale.getAvailableLocales()) {
137             // check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
138             Locale.setDefault(locale);
139             assertEquals(lower.toUpperCase(), lower.toUpperCase(locale));
140             assertEquals(upper.toLowerCase(), upper.toLowerCase(locale));
141 
142             // check result
143             String expectedToUpperCase = upper;
144             String expectedToLowerCase = lower;
145             if (LOCALE_TURKISH.getLanguage().equals(locale.getLanguage())
146                     || new Locale("az").getLanguage().equals(locale.getLanguage())) {
147                 expectedToUpperCase = upper.replace(DOTLESS_I, DOTTED_I);
148                 expectedToLowerCase = lower.replace(DOTTED_i, DOTLESS_i);
149             }
150 
151             assertEquals(
152                     "'" + lower + "'.toUpperCase('" + locale.toString() + "')",
153                     expectedToUpperCase,
154                     lower.toUpperCase(locale));
155             assertEquals(
156                     "'" + upper + "'.toLowerCase('" + locale.toString() + "')",
157                     expectedToLowerCase,
158                     upper.toLowerCase(locale));
159 
160             // check that toLowerCase on lower and toUpperCase on upper don't cause harm
161             assertEquals("'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase(locale));
162             assertEquals("'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase(locale));
163 
164             // check equalsIgnoreCase
165             assertTrue("'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase(lower));
166             assertTrue(
167                     "'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
168                     upper.equalsIgnoreCase(expectedToLowerCase));
169             assertTrue(
170                     "'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
171                     expectedToUpperCase.equalsIgnoreCase(lower));
172         }
173     }
174 }