function PhpTransliterationTest::providerTestPhpTransliteration

Same name in other branches
  1. 9 core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php \Drupal\Tests\Component\Transliteration\PhpTransliterationTest::providerTestPhpTransliteration()
  2. 8.9.x core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php \Drupal\Tests\Component\Transliteration\PhpTransliterationTest::providerTestPhpTransliteration()
  3. 11.x core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php \Drupal\Tests\Component\Transliteration\PhpTransliterationTest::providerTestPhpTransliteration()

Provides data for self::testPhpTransliteration().

Return value

array An array of arrays, each containing the parameters for self::testPhpTransliteration().

File

core/tests/Drupal/Tests/Component/Transliteration/PhpTransliterationTest.php, line 107

Class

PhpTransliterationTest
Tests Transliteration component functionality.

Namespace

Drupal\Tests\Component\Transliteration

Code

public static function providerTestPhpTransliteration() : array {
    $random_generator = new Random();
    $random = $random_generator->string(10);
    // Make some strings with two, three, and four-byte characters for testing.
    // Note that the 3-byte character is overridden by the 'kg' language.
    // cSpell:disable-next-line
    $two_byte = 'Ä Ö Ü Å Ø äöüåøhello';
    // This is a Cyrillic character that looks something like a "u". See
    // http://www.unicode.org/charts/PDF/U0400.pdf
    $three_byte = html_entity_decode('ц', ENT_NOQUOTES, 'UTF-8');
    // This is a Canadian Aboriginal character like a triangle. See
    // http://www.unicode.org/charts/PDF/U1400.pdf
    $four_byte = html_entity_decode('ᐑ', ENT_NOQUOTES, 'UTF-8');
    // These are two Gothic alphabet letters. See
    // http://wikipedia.org/wiki/Gothic_alphabet
    // They are not in our tables, but should at least give us '?' (unknown).
    $five_byte = html_entity_decode('𐌰𐌸', ENT_NOQUOTES, 'UTF-8');
    // cSpell:disable
    return [
        // Each test case is language code, input, output, unknown character, max
        // length.
'Test ASCII in English' => [
            'en',
            $random,
            $random,
        ],
        'Test ASCII in some other language with no overrides' => [
            'fr',
            $random,
            $random,
        ],
        'Test 3-byte characters from data table in a language without overrides' => [
            'fr',
            $three_byte,
            'c',
        ],
        'Test 4-byte characters from data table in a language without overrides' => [
            'fr',
            $four_byte,
            'wii',
        ],
        'Test 5-byte characters not existing in the data table' => [
            'en',
            $five_byte,
            '??',
        ],
        'Test a language with no overrides' => [
            'en',
            $two_byte,
            'A O U A O aouaohello',
        ],
        'Test language overrides in German' => [
            'de',
            $two_byte,
            'Ae Oe Ue A O aeoeueaohello',
        ],
        'Test ASCII in German language with overrides' => [
            'de',
            $random,
            $random,
        ],
        'Test language overrides in Danish' => [
            'da',
            $two_byte,
            'A O U Aa Oe aouaaoehello',
        ],
        'Test ASCII in Danish language with overrides' => [
            'da',
            $random,
            $random,
        ],
        'Test language overrides in Kyrgyz' => [
            'kg',
            $three_byte,
            'ts',
        ],
        'Test language overrides in Turkish' => [
            'tr',
            'Abayı serdiler bize. Söyleyeceğim yüzlerine. Sanırım hepimiz aynı şeyi düşünüyoruz.',
            'Abayi serdiler bize. Soyleyecegim yuzlerine. Sanirim hepimiz ayni seyi dusunuyoruz.',
        ],
        'Test language overrides in Ukrainian' => [
            'uk',
            'На подушечці форми любої є й ґудзик щоб пірʼя геть жовте сховати.',
            'Na podushechtsi formy lyuboyi ye y gudzyk shchob pirya het zhovte skhovaty.',
        ],
        'Max length' => [
            'de',
            $two_byte,
            'Ae Oe Ue A O aeoe',
            '?',
            17,
        ],
        'Do not split up the transliteration of a single character' => [
            'de',
            $two_byte,
            'Ae Oe Ue A O aeoe',
            '?',
            18,
        ],
        'Illegal/unknown unicode' => [
            'en',
            chr(0xf8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80),
            '?????',
        ],
        'Illegal/unknown unicode with non default replacement' => [
            'en',
            chr(0xf8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80),
            '-----',
            '-',
        ],
        'Contains Illegal/unknown unicode' => [
            'en',
            'Hel' . chr(0x80) . 'o World',
            'Hel?o World',
        ],
        'Illegal/unknown unicode at end' => [
            'en',
            'Hell' . chr(0x80) . ' World',
            'Hell? World',
        ],
        'Non default replacement' => [
            'en',
            chr(0x80) . 'ello World',
            '_ello World',
            '_',
        ],
        'Keep the original question marks' => [
            'en',
            chr(0xf8) . '?' . chr(0x80),
            '???',
        ],
        'Keep the original question marks when non default replacement' => [
            'en',
            chr(0x80) . 'ello ? World?',
            '_ello ? World?',
            '_',
        ],
        'Keep the original question marks in some other language' => [
            'pl',
            'aąeę' . chr(0x80) . 'oółżźz ?',
            'aaee?oolzzz ?',
        ],
        'Non-US-ASCII replacement in English' => [
            'en',
            chr(0x80) . 'ello World?',
            'Oello World?',
            'Ö',
        ],
        'Non-US-ASCII replacement in some other language' => [
            'pl',
            chr(0x80) . 'óóść',
            'ooosc',
            'ó',
        ],
        'Ensure question marks are replaced when max length used' => [
            'en',
            chr(0x80) . 'ello ? World?',
            '_ello ?',
            '_',
            7,
        ],
        'Empty replacement' => [
            'en',
            chr(0x80) . 'ello World' . chr(0xf8),
            'ello World',
            '',
        ],
        'Not affecting spacing from the beginning and end of a string' => [
            'en',
            ' Hello Abventor! ',
            ' Hello Abventor! ',
        ],
        'Not affecting spacing from the beginning and end of a string when max length used' => [
            'pl',
            ' Drupal Kraków Community',
            ' Drupal Krakow ',
            '?',
            15,
        ],
        'Keep many spaces between words' => [
            'en',
            'Too    many    spaces between words !',
            'Too    many    spaces between words !',
        ],
    ];
    // cSpell:enable
}

Buggy or inaccurate documentation? Please file an issue. Need support? Need help programming? Connect with the Drupal community.