CaseTest

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.maven.shared.utils;

import java.util.Locale;

import org.apache.commons.text.StringEscapeUtils;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.ComparisonFailure;
import org.junit.Test;

/**
 * Test case for character case changes, to precisely point the situations when character case comparison doesn't
 * give intuitive result, or why one should avoid {@link String#toUpperCase()} and {@link String#toLowerCase()}
 * (platform locale dependent, with sometimes unexpected results)
 * but <b>prefer {@link String#equalsIgnoreCase(String)} when possible</b>.
 *
 * @author Hervé Boutemy
 * @see <a href="http://sim.ivi.co/2011/07/trap-of-case-insensitive-string.html">Simple Smiles - Xuelei Fan's Blog</a>
 */
public class CaseTest extends Assert {
    private static final Locale LOCALE_TURKISH = new Locale("tr");

    /** common ASCII 'i' */
    private static final char DOTTED_i = '\u0069';

    /** common ASCII 'I' */
    private static final char DOTLESS_I = '\u0049';

    /** turkish dotless i = ı */
    private static final char DOTLESS_i = '\u0131';

    /** turkish dotted I = İ */
    private static final char DOTTED_I = '\u0130';

    /** http://en.wikipedia.org/wiki/Dot_(diacritic) */
    private static final char COMBINING_DOT_ABOVE = '\u0307';

    private static final Locale SAVED_DEFAULT_LOCALE = Locale.getDefault();

    @AfterClass
    public static void restoreDefaultLocale() {
        Locale.setDefault(SAVED_DEFAULT_LOCALE);
    }

    /**
     * test the known case of upper I which doesn't give commonly expected i in Turkish locale, but ı (dotless i).
     * @see <a href="http://mattryall.net/blog/2009/02/the-infamous-turkish-locale-bug">The infamous Turkish locale bug</a>
     */
    @Test
    public void testTurkishI() {
        // check common i and I
        assertEquals("common lowercase i should have a dot", 'i', DOTTED_i);
        assertEquals("common uppercase I should not have a dot", 'I', DOTLESS_I);

        final String iIıİ = "iIıİ";

        // check source encoding doesn't wreck havoc */
        assertUnicodeEquals(
                "misc i directly in (UTF-8) source", iIıİ, "" + DOTTED_i + DOTLESS_I + DOTLESS_i + DOTTED_I);

        // check toUpperCase and toLowerCase difference with turkish and english locales
        assertUnicodeEquals(
                "'iIıİ'.toUpperCase('tr')=='İIIİ'",
                "" + DOTTED_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
                iIıİ.toUpperCase(LOCALE_TURKISH));
        assertUnicodeEquals(
                "'iIıİ'.toLowerCase('tr')=='iııi'",
                "" + DOTTED_i + DOTLESS_i + DOTLESS_i + DOTTED_i,
                iIıİ.toLowerCase(LOCALE_TURKISH));
        assertUnicodeEquals(
                "'iIıİ'.toUpperCase('en')=='IIIİ'",
                "" + DOTLESS_I + DOTLESS_I + DOTLESS_I + DOTTED_I,
                iIıİ.toUpperCase(Locale.ENGLISH));
        String lower = iIıİ.toLowerCase(Locale.ENGLISH); // on some platforms, ends with extra COMBINED DOT ABOVE
        assertUnicodeEquals(
                "'iIıİ'.toLowerCase('en')=='iiıi'",
                "" + DOTTED_i + DOTTED_i + DOTLESS_i + DOTTED_i + (lower.length() > 4 ? COMBINING_DOT_ABOVE : ""),
                lower);

        // check equalsIgnoreCase() , which has no locale
        for (int i = 0; i < iIıİ.length(); i++) {
            char currentI = iIıİ.charAt(i);

            StringBuilder sb = new StringBuilder(iIıİ.length());
            for (int j = 0; j < iIıİ.length(); j++) {
                sb.append(currentI);
            }
            String current = sb.toString();

            assertTrue("'" + current + "'.equalsIgnoreCase('" + iIıİ + "')", current.equalsIgnoreCase(iIıİ));
        }
    }

    /**
     * Assert equals, and in case the result isn't as expected, display content unicode-escaped.
     * @param message
     * @param expected
     * @param actual
     */
    private void assertUnicodeEquals(String message, String expected, String actual) {
        if (expected.equals(actual)) {
            return;
        }

        throw new ComparisonFailure(
                message, StringEscapeUtils.escapeJava(expected), StringEscapeUtils.escapeJava(actual));
    }

    /**
     * Test case change on all ascii characters with every available locale, to check that turkish i is the only
     * exception on these characters.
     */
    @Test
    public void testAsciiAvailableLocales() {
        final String lower = "abcdefghijklmnopqrstuvwxyz";
        final String upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

        for (Locale locale : Locale.getAvailableLocales()) {
            // check that toUpper() == toUpper(default locale) and toLower() = toLower(default locale)
            Locale.setDefault(locale);
            assertEquals(lower.toUpperCase(), lower.toUpperCase(locale));
            assertEquals(upper.toLowerCase(), upper.toLowerCase(locale));

            // check result
            String expectedToUpperCase = upper;
            String expectedToLowerCase = lower;
            if (LOCALE_TURKISH.getLanguage().equals(locale.getLanguage())
                    || new Locale("az").getLanguage().equals(locale.getLanguage())) {
                expectedToUpperCase = upper.replace(DOTLESS_I, DOTTED_I);
                expectedToLowerCase = lower.replace(DOTTED_i, DOTLESS_i);
            }

            assertEquals(
                    "'" + lower + "'.toUpperCase('" + locale.toString() + "')",
                    expectedToUpperCase,
                    lower.toUpperCase(locale));
            assertEquals(
                    "'" + upper + "'.toLowerCase('" + locale.toString() + "')",
                    expectedToLowerCase,
                    upper.toLowerCase(locale));

            // check that toLowerCase on lower and toUpperCase on upper don't cause harm
            assertEquals("'" + lower + "'.toLowerCase('" + locale.toString() + "')", lower, lower.toLowerCase(locale));
            assertEquals("'" + upper + "'.toUpperCase('" + locale.toString() + "')", upper, upper.toUpperCase(locale));

            // check equalsIgnoreCase
            assertTrue("'" + upper + "'.equalsIgnoreCase('" + lower + "')", upper.equalsIgnoreCase(lower));
            assertTrue(
                    "'" + upper + "'.equalsIgnoreCase('" + expectedToLowerCase + "')",
                    upper.equalsIgnoreCase(expectedToLowerCase));
            assertTrue(
                    "'" + expectedToUpperCase + "'.equalsIgnoreCase('" + lower + "')",
                    expectedToUpperCase.equalsIgnoreCase(lower));
        }
    }
}