diff --git a/src/modules/locale/CMakeLists.txt b/src/modules/locale/CMakeLists.txt index 6543d7d947bf7f53267366d826b0bcdd68569987..b631f77f770642a5d0f284e66806e461ae02909b 100644 --- a/src/modules/locale/CMakeLists.txt +++ b/src/modules/locale/CMakeLists.txt @@ -22,6 +22,7 @@ calamares_add_plugin(locale Config.cpp LCLocaleDialog.cpp LocaleConfiguration.cpp + LocaleNames.cpp LocalePage.cpp LocaleViewStep.cpp SetTimezoneJob.cpp @@ -39,15 +40,7 @@ calamares_add_plugin(locale calamares_add_test( localetest - SOURCES - Tests.cpp - Config.cpp - LocaleConfiguration.cpp - SetTimezoneJob.cpp - timezonewidget/TimeZoneImage.cpp - DEFINITIONS - SOURCE_DIR="${CMAKE_CURRENT_LIST_DIR}/images" - DEBUG_TIMEZONES=1 - LIBRARIES - Qt5::Gui + SOURCES Tests.cpp Config.cpp LocaleConfiguration.cpp LocaleNames.cpp SetTimezoneJob.cpp timezonewidget/TimeZoneImage.cpp + DEFINITIONS SOURCE_DIR="${CMAKE_CURRENT_LIST_DIR}/images" DEBUG_TIMEZONES=1 + LIBRARIES Qt5::Gui ) diff --git a/src/modules/locale/LocaleConfiguration.cpp b/src/modules/locale/LocaleConfiguration.cpp index 17953f079aa3d1fe871a107a27405f382276184f..c62b1ab0816f000112cc6844e2283105412f0d0f 100644 --- a/src/modules/locale/LocaleConfiguration.cpp +++ b/src/modules/locale/LocaleConfiguration.cpp @@ -9,11 +9,13 @@ */ #include "LocaleConfiguration.h" +#include "LocaleNames.h" #include "utils/Logger.h" #include <QLocale> #include <QRegularExpression> +#include <QVector> LocaleConfiguration::LocaleConfiguration() : explicit_lang( false ) @@ -40,107 +42,114 @@ LocaleConfiguration::setLanguage( const QString& localeName ) m_lang = localeName; } - -LocaleConfiguration -LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, - const QStringList& availableLocales, - const QString& countryCode ) +static LocaleNameParts +updateCountry( LocaleNameParts p, const QString& country ) { - cDebug() << "Mapping" << languageLocale << "in" << countryCode << "to locale."; - QString language = languageLocale.split( '_' ).first(); - QString region; - if ( language.contains( '@' ) ) - { - auto r = language.split( '@' ); - language = r.first(); - region = r[ 1 ]; // second() - } - - // Either an exact match, or the whole language part matches - // (followed by .<encoding> or _<country> - QStringList linesForLanguage = availableLocales.filter( QRegularExpression( language + "[._]" ) ); - cDebug() << Logger::SubEntry << "Matching" << linesForLanguage; + p.country = country; + return p; +} - QString lang; - if ( linesForLanguage.isEmpty() || languageLocale.isEmpty() ) +static QPair< int, LocaleNameParts > +identifyBestLanguageMatch( const LocaleNameParts& referenceLocale, QVector< LocaleNameParts >& others ) +{ + std::sort( others.begin(), + others.end(), + [ & ]( const LocaleNameParts& lhs, const LocaleNameParts& rhs ) + { return referenceLocale.similarity( lhs ) < referenceLocale.similarity( rhs ); } ); + // The best match is at the end + LocaleNameParts best_match = others.last(); + if ( !( referenceLocale.similarity( best_match ) > LocaleNameParts::no_match ) ) { - lang = "en_US.UTF-8"; + cDebug() << Logger::SubEntry << "Got no good match for" << referenceLocale.name(); + return { LocaleNameParts::no_match, LocaleNameParts {} }; } - else if ( linesForLanguage.length() == 1 ) + else { - lang = linesForLanguage.first(); + cDebug() << Logger::SubEntry << "Got best match for" << referenceLocale.name() << "as" << best_match.name(); + return { referenceLocale.similarity( best_match ), best_match }; } +} - // lang could still be empty if we found multiple locales that satisfy myLanguage - const QString combinedLanguageAndCountry = QString( "%1_%2" ).arg( language ).arg( countryCode ); - if ( lang.isEmpty() && region.isEmpty() ) - { - auto l = linesForLanguage.filter( - QRegularExpression( combinedLanguageAndCountry + "[._]" ) ); // no regional variants - if ( l.length() == 1 ) - { - lang = l.first(); - } - } +/** @brief Returns the QString from @p availableLocales that best-matches. + */ +static LocaleNameParts +identifyBestLanguageMatch( const QString& languageLocale, + const QStringList& availableLocales, + const QString& countryCode ) +{ + const QString default_lang = QStringLiteral( "en_US.UTF-8" ); - // The following block was inspired by Ubiquity, scripts/localechooser-apply. - // No copyright statement found in file, assuming GPL v2 or later. - /* # In the special cases of Portuguese and Chinese, selecting a - # different location may imply a different dialect of the language. - # In such cases, make LANG reflect the selected language (for - # messages, character types, and collation) and make the other - # locale categories reflect the selected location. */ - if ( language == "pt" || language == "zh" ) + const LocaleNameParts self = LocaleNameParts::fromName( languageLocale ); + if ( self.isValid() && !availableLocales.isEmpty() ) { - cDebug() << Logger::SubEntry << "Special-case Portuguese and Chinese"; - QString proposedLocale = QString( "%1_%2" ).arg( language ).arg( countryCode ); - for ( const QString& line : linesForLanguage ) + QVector< LocaleNameParts > others; + others.resize( availableLocales.length() ); // Makes default structs + std::transform( availableLocales.begin(), availableLocales.end(), others.begin(), LocaleNameParts::fromName ); + + // Keep track of the best match in various attempts + int best_score = LocaleNameParts::no_match; + LocaleNameParts best_match; + + // Check with the unmodified language setting { - if ( line.contains( proposedLocale ) ) + auto [ score, match ] = identifyBestLanguageMatch( self, others ); + if ( score >= LocaleNameParts::complete_match ) + { + return match; + } + else if ( score > best_score ) { - cDebug() << Logger::SubEntry << "Country-variant" << line << "chosen."; - lang = line; - break; + best_match = match; } } - } - if ( lang.isEmpty() && !region.isEmpty() ) - { - cDebug() << Logger::SubEntry << "Special-case region @" << region; - QString proposedRegion = QString( "@%1" ).arg( region ); - for ( const QString& line : linesForLanguage ) + + + // .. but it might match **better** with the chosen location country Code { - if ( line.startsWith( language ) && line.contains( proposedRegion ) ) + auto [ score, match ] = identifyBestLanguageMatch( updateCountry( self, countryCode ), others ); + if ( score >= LocaleNameParts::complete_match ) + { + return match; + } + else if ( score > best_score ) { - cDebug() << Logger::SubEntry << "Region-variant" << line << "chosen."; - lang = line; - break; + best_match = match; } } - } - - // If we found no good way to set a default lang, do a search with the whole - // language locale and pick the first result, if any. - if ( lang.isEmpty() ) - { - for ( const QString& line : availableLocales ) + // .. or better yet with the QLocale-derived country { - if ( line.startsWith( languageLocale ) ) + const QString localeCountry = LocaleNameParts::fromName( QLocale( languageLocale ).name() ).country; + auto [ score, match ] = identifyBestLanguageMatch( updateCountry( self, localeCountry ), others ); + if ( score >= LocaleNameParts::complete_match ) + { + return match; + } + else if ( score > best_score ) { - lang = line; - break; + best_match = match; } } + + if ( best_match.isValid() ) + { + cDebug() << Logger::SubEntry << "Matched best with" << best_match.name(); + return best_match; + } } // Else we have an unrecognized or unsupported locale, all we can do is go with // en_US.UTF-8 UTF-8. This completes all default language setting guesswork. - if ( lang.isEmpty() ) - { - lang = "en_US.UTF-8"; - } + return LocaleNameParts::fromName( default_lang ); +} +LocaleConfiguration +LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, + const QStringList& availableLocales, + const QString& countryCode ) +{ + cDebug() << "Mapping" << languageLocale << "in" << countryCode << "to locale."; + const auto bestLocale = identifyBestLanguageMatch( languageLocale, availableLocales, countryCode ); // The following block was inspired by Ubiquity, scripts/localechooser-apply. // No copyright statement found in file, assuming GPL v2 or later. @@ -188,34 +197,16 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, // We make a proposed locale based on the UI language and the timezone's country. There is no // guarantee that this will be a valid, supported locale (often it won't). QString lc_formats; - const QString combined = QString( "%1_%2" ).arg( language ).arg( countryCode ); - if ( lang.isEmpty() ) + const QString combined = QString( "%1_%2" ).arg( bestLocale.language ).arg( countryCode ); + if ( availableLocales.contains( bestLocale.language ) ) { - cDebug() << Logger::SubEntry << "Looking up formats for" << combinedLanguageAndCountry; - // We look up if it's a supported locale. - for ( const QString& line : availableLocales ) - { - if ( line.startsWith( combinedLanguageAndCountry ) ) - { - lang = line; - lc_formats = line; - break; - } - } + cDebug() << Logger::SubEntry << "Exact formats match for language tag" << bestLocale.language; + lc_formats = bestLocale.language; } - else + else if ( availableLocales.contains( combined ) ) { - if ( availableLocales.contains( lang ) ) - { - cDebug() << Logger::SubEntry << "Exact formats match for language tag" << lang; - lc_formats = lang; - } - else if ( availableLocales.contains( combinedLanguageAndCountry ) ) - { - cDebug() << Logger::SubEntry << "Exact formats match for combined" << combinedLanguageAndCountry; - lang = combinedLanguageAndCountry; - lc_formats = combinedLanguageAndCountry; - } + cDebug() << Logger::SubEntry << "Exact formats match for combined" << combined; + lc_formats = combined; } if ( lc_formats.isEmpty() ) @@ -303,12 +294,7 @@ LocaleConfiguration::fromLanguageAndLocation( const QString& languageLocale, // If we cannot make a good choice for a given country we go with the LANG // setting, which defaults to en_US.UTF-8 UTF-8 if all else fails. - if ( lc_formats.isEmpty() ) - { - lc_formats = lang; - } - - return LocaleConfiguration( lang, lc_formats ); + return LocaleConfiguration( bestLocale.name(), lc_formats.isEmpty() ? bestLocale.name() : lc_formats ); } diff --git a/src/modules/locale/LocaleNames.cpp b/src/modules/locale/LocaleNames.cpp new file mode 100644 index 0000000000000000000000000000000000000000..401aa4809f6307de74ad6538c67f8ec7b4625139 --- /dev/null +++ b/src/modules/locale/LocaleNames.cpp @@ -0,0 +1,90 @@ +/* === This file is part of Calamares - <https://calamares.io> === + * + * SPDX-FileCopyrightText: 2022 Adriaan de Groot <groot@kde.org> + * SPDX-License-Identifier: GPL-3.0-or-later + * + * Calamares is Free Software: see the License-Identifier above. + * + */ + +#include "LocaleNames.h" + +#include "utils/Logger.h" + +#include <QRegularExpression> + +LocaleNameParts +LocaleNameParts::fromName( const QString& name ) +{ + auto requireAndRemoveLeadingChar = []( QChar c, QString s ) + { + if ( s.startsWith( c ) ) + { + return s.remove( 0, 1 ); + } + else + { + return QString(); + } + }; + + auto parts = QRegularExpression( "^([a-zA-Z]+)(_[a-zA-Z]+)?(\\.[-a-zA-Z0-9]+)?(@[a-zA-Z]+)?" ).match( name ); + const QString calamaresLanguage = parts.captured( 1 ); + const QString calamaresCountry = requireAndRemoveLeadingChar( '_', parts.captured( 2 ) ); + const QString calamaresEncoding = requireAndRemoveLeadingChar( '.', parts.captured( 3 ) ); + const QString calamaresRegion = requireAndRemoveLeadingChar( '@', parts.captured( 4 ) ); + + if ( calamaresLanguage.isEmpty() ) + { + return LocaleNameParts {}; + } + else + { + return LocaleNameParts { calamaresLanguage, calamaresCountry, calamaresRegion, calamaresEncoding }; + } +} + +QString +LocaleNameParts::name() const +{ + // We don't want QStringView to a temporary; force conversion + auto insertLeadingChar = []( QChar c, QString s ) -> QString + { + if ( s.isEmpty() ) + { + return QString(); + } + else + { + return c + s; + } + }; + + if ( !isValid() ) + { + return QString(); + } + else + { + return language + insertLeadingChar( '_', country ) + insertLeadingChar( '.', encoding ) + + insertLeadingChar( '@', region ); + } +} + + +int +LocaleNameParts::similarity( const LocaleNameParts& other ) const +{ + if ( !isValid() || !other.isValid() ) + { + return 0; + } + if ( language != other.language ) + { + return 0; + } + const auto matched_region = ( region == other.region ? 30 : 0 ); + const auto matched_country = ( country == other.country ? ( country.isEmpty() ? 10 : 20 ) : 0 ); + const auto no_other_country_given = ( ( country != other.country && other.country.isEmpty() ) ? 10 : 0 ); + return 50 + matched_region + matched_country + no_other_country_given; +} diff --git a/src/modules/locale/LocaleNames.h b/src/modules/locale/LocaleNames.h new file mode 100644 index 0000000000000000000000000000000000000000..8498aa28a4967f6769f3240c74a649a3cd178cfe --- /dev/null +++ b/src/modules/locale/LocaleNames.h @@ -0,0 +1,46 @@ +/* === This file is part of Calamares - <https://calamares.io> === + * + * SPDX-FileCopyrightText: 2022 Adriaan de Groot <groot@kde.org> + * SPDX-License-Identifier: GPL-3.0-or-later + * + * Calamares is Free Software: see the License-Identifier above. + * + */ + +#ifndef LOCALENAMES_H +#define LOCALENAMES_H + +#include <QString> + +/** @brief parts of a locale-name (e.g. "ar_LY.UTF-8", split apart) + * + * These are created from lines in `/usr/share/i18n/SUPPORTED`, + * which lists all the locales supported by the system (there + * are also other sources of the same). + * + */ +struct LocaleNameParts +{ + QString language; // e.g. "ar" + QString country; // e.g. "LY" (may be empty) + QString region; // e.g. "@valencia" (may be empty) + QString encoding; // e.g. "UTF-8" (may be empty) + + bool isValid() const { return !language.isEmpty(); } + QString name() const; + + static LocaleNameParts fromName( const QString& name ); + + static inline constexpr const int no_match = 0; + static inline constexpr const int complete_match = 100; + + /** @brief Compute similarity-score with another locale-name. + * + * Similarity is driven by language and region, then country. + * Returns a number between 0 (no similarity, e.g. the + * language is different) and 100 (complete match). + */ + int similarity( const LocaleNameParts& other ) const; +}; + +#endif diff --git a/src/modules/locale/Tests.cpp b/src/modules/locale/Tests.cpp index 69a6a925854d5d05cc2fa41004a9f7e8aa4a4eaa..1e15519925ed82f42b25ce6fcb95775a0cde227c 100644 --- a/src/modules/locale/Tests.cpp +++ b/src/modules/locale/Tests.cpp @@ -9,6 +9,7 @@ #include "Config.h" #include "LocaleConfiguration.h" +#include "LocaleNames.h" #include "timezonewidget/TimeZoneImage.h" #include "CalamaresVersion.h" @@ -50,12 +51,16 @@ private Q_SLOTS: void testLanguageDetection(); void testLanguageDetectionValencia(); - // Check realistic language mapping for issue 2008 + // Check that the test-data is available and ok void testKDENeonLanguageData(); + void testLocaleNameParts(); + + // Check realistic language mapping for issue 2008 void testLanguageMappingNeon_data(); void testLanguageMappingNeon(); void testLanguageMappingFreeBSD_data(); void testLanguageMappingFreeBSD(); + void testLanguageSimilarity(); private: QStringList m_KDEneonLocales; @@ -395,6 +400,10 @@ splitTestFileIntoLines( const QString& filename ) void LocaleTests::testKDENeonLanguageData() { + if ( !m_KDEneonLocales.isEmpty() ) + { + return; + } const QStringList neonLocales = splitTestFileIntoLines( QStringLiteral( "locale-data-neon" ) ); cDebug() << "Loaded KDE neon locales test data" << neonLocales.front() << "to" << neonLocales.back(); QCOMPARE( neonLocales.length(), 318 ); // wc -l tells me 318 lines @@ -415,7 +424,7 @@ LocaleTests::MappingData() // Tired of writing QString or QStringLiteral all the time. auto l = []( const char* p ) { return QString::fromUtf8( p ); }; - auto u = [](){ return QString(); }; + auto u = []() { return QString(); }; // The KDEneon columns include the .UTF-8 from the source data // The FreeBSD columns may have u() to indicate "same as KDEneon", @@ -445,12 +454,14 @@ LocaleTests::MappingData() } -void LocaleTests::testLanguageMappingNeon_data() +void +LocaleTests::testLanguageMappingNeon_data() { MappingData(); } -void LocaleTests::testLanguageMappingFreeBSD_data() +void +LocaleTests::testLanguageMappingFreeBSD_data() { MappingData(); } @@ -458,6 +469,7 @@ void LocaleTests::testLanguageMappingFreeBSD_data() void LocaleTests::testLanguageMappingNeon() { + testKDENeonLanguageData(); QVERIFY( !m_KDEneonLocales.isEmpty() ); QFETCH( QString, selectedLanguage ); @@ -474,6 +486,7 @@ LocaleTests::testLanguageMappingNeon() void LocaleTests::testLanguageMappingFreeBSD() { + testKDENeonLanguageData(); QVERIFY( !m_FreeBSDLocales.isEmpty() ); QFETCH( QString, selectedLanguage ); @@ -488,6 +501,84 @@ LocaleTests::testLanguageMappingFreeBSD() QCOMPARE( bsd.language(), expected ); } +void +LocaleTests::testLocaleNameParts() +{ + testKDENeonLanguageData(); + QVERIFY( !m_FreeBSDLocales.isEmpty() ); + QVERIFY( !m_KDEneonLocales.isEmpty() ); + + // Example constant locales + { + auto c_parts = LocaleNameParts::fromName( QStringLiteral( "nl_NL.UTF-8" ) ); + QCOMPARE( c_parts.language, QStringLiteral( "nl" ) ); + QCOMPARE( c_parts.country, QStringLiteral( "NL" ) ); + QCOMPARE( c_parts.encoding, QStringLiteral( "UTF-8" ) ); + QVERIFY( c_parts.region.isEmpty() ); + } + { + auto c_parts = LocaleNameParts::fromName( QStringLiteral( "C.UTF-8" ) ); + QCOMPARE( c_parts.language, QStringLiteral( "C" ) ); + QVERIFY( c_parts.country.isEmpty() ); + QCOMPARE( c_parts.encoding, QStringLiteral( "UTF-8" ) ); + QVERIFY( c_parts.region.isEmpty() ); + } + + // Check all the loaded test locales + for ( const auto& s : m_FreeBSDLocales ) + { + auto parts = LocaleNameParts::fromName( s ); + QVERIFY( parts.isValid() ); + QCOMPARE( parts.name(), s ); + } + + for ( const auto& s : m_KDEneonLocales ) + { + auto parts = LocaleNameParts::fromName( s ); + QVERIFY( parts.isValid() ); + QCOMPARE( parts.name(), s ); + } +} + +void +LocaleTests::testLanguageSimilarity() +{ + // Empty + { + QCOMPARE( LocaleNameParts().similarity( LocaleNameParts() ), 0 ); + } + // Some simple Dutch situations + { + auto nl_parts = LocaleNameParts::fromName( QStringLiteral( "nl_NL.UTF-8" ) ); + auto be_parts = LocaleNameParts::fromName( QStringLiteral( "nl_BE.UTF-8" ) ); + auto nl_short_parts = LocaleNameParts::fromName( QStringLiteral( "nl" ) ); + QCOMPARE( nl_parts.similarity( nl_parts ), 100 ); + QCOMPARE( nl_parts.similarity( LocaleNameParts() ), 0 ); + QCOMPARE( nl_parts.similarity( be_parts ), 80 ); // Language + (empty) region match + QCOMPARE( nl_parts.similarity( nl_short_parts ), 90 ); + } + + // Everything matches itself + { + if ( m_KDEneonLocales.isEmpty() ) + { + testKDENeonLanguageData(); + } + QVERIFY( !m_FreeBSDLocales.isEmpty() ); + QVERIFY( !m_KDEneonLocales.isEmpty() ); + for ( const auto& l : m_KDEneonLocales ) + { + auto locale_name = LocaleNameParts::fromName( l ); + auto self_similarity = locale_name.similarity( locale_name ); + if ( self_similarity != 100 ) + { + cDebug() << "Locale" << l << "is unusual."; + } + QCOMPARE( self_similarity, 100 ); + } + } +} + #include "utils/moc-warnings.h" diff --git a/src/modules/localeq/CMakeLists.txt b/src/modules/localeq/CMakeLists.txt index 85b3721d9c970a95c41278805565e88505324ca3..d9741e5065d789b72907d12ea9fa89235aeaaf94 100644 --- a/src/modules/localeq/CMakeLists.txt +++ b/src/modules/localeq/CMakeLists.txt @@ -41,8 +41,9 @@ calamares_add_plugin(localeq EXPORT_MACRO PLUGINDLLEXPORT_PRO SOURCES LocaleQmlViewStep.cpp - ${_locale}/LocaleConfiguration.cpp ${_locale}/Config.cpp + ${_locale}/LocaleConfiguration.cpp + ${_locale}/LocaleNames.cpp ${_locale}/SetTimezoneJob.cpp RESOURCES localeq.qrc