From 7de056437f1d611219abfa5316b19e8161dc1261 Mon Sep 17 00:00:00 2001 From: CPython Devleopers <> Date: Fri, 24 Feb 2023 01:15:34 +0900 Subject: [PATCH] Update locale from CPython 3.11.2 --- Lib/locale.py | 138 ++++++++++++++++++++++++++-------------- Lib/test/test_locale.py | 64 +++++++++++++++++-- 2 files changed, 146 insertions(+), 56 deletions(-) diff --git a/Lib/locale.py b/Lib/locale.py index f3d3973d0..7a7694e1b 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -28,7 +28,7 @@ __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error", "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", "str", "atof", "atoi", "format", "format_string", "currency", "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", - "LC_NUMERIC", "LC_ALL", "CHAR_MAX"] + "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"] def _strcoll(a,b): """ strcoll(string,string) -> int. @@ -185,8 +185,14 @@ def _format(percent, value, grouping=False, monetary=False, *additional): formatted = percent % ((value,) + additional) else: formatted = percent % value + if percent[-1] in 'eEfFgGdiu': + formatted = _localize(formatted, grouping, monetary) + return formatted + +# Transform formatted as locale number according to the locale settings +def _localize(formatted, grouping=False, monetary=False): # floats and decimal ints need special action! - if percent[-1] in 'eEfFgG': + if '.' in formatted: seps = 0 parts = formatted.split('.') if grouping: @@ -196,7 +202,7 @@ def _format(percent, value, grouping=False, monetary=False, *additional): formatted = decimal_point.join(parts) if seps: formatted = _strip_padding(formatted, seps) - elif percent[-1] in 'diu': + else: seps = 0 if grouping: formatted, seps = _group(formatted, monetary=monetary) @@ -267,7 +273,7 @@ def currency(val, symbol=True, grouping=False, international=False): raise ValueError("Currency formatting is not possible using " "the 'C' locale.") - s = _format('%%.%if' % digits, abs(val), grouping, monetary=True) + s = _localize(f'{abs(val):.{digits}f}', grouping, monetary=True) # '<' and '>' are markers if the sign must be inserted between symbol and value s = '<' + s + '>' @@ -279,6 +285,8 @@ def currency(val, symbol=True, grouping=False, international=False): if precedes: s = smb + (separated and ' ' or '') + s else: + if international and smb[-1] == ' ': + smb = smb[:-1] s = s + (separated and ' ' or '') + smb sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn'] @@ -321,6 +329,10 @@ def delocalize(string): string = string.replace(dd, '.') return string +def localize(string, grouping=False, monetary=False): + """Parses a string as locale number according to the locale settings.""" + return _localize(string, grouping, monetary) + def atof(string, func=float): "Parses a string as a float according to the locale settings." return func(delocalize(string)) @@ -492,6 +504,10 @@ def _parse_localename(localename): return tuple(code.split('.')[:2]) elif code == 'C': return None, None + elif code == 'UTF-8': + # On macOS "LC_CTYPE=UTF-8" is a valid locale setting + # for getting UTF-8 handling for text. + return None, 'UTF-8' raise ValueError('unknown locale: %s' % localename) def _build_localename(localetuple): @@ -539,6 +555,12 @@ def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')): """ + import warnings + warnings.warn( + "Use setlocale(), getencoding() and getlocale() instead", + DeprecationWarning, stacklevel=2 + ) + try: # check if it's supported by the _locale module import _locale @@ -611,55 +633,72 @@ def resetlocale(category=LC_ALL): getdefaultlocale(). category defaults to LC_ALL. """ - _setlocale(category, _build_localename(getdefaultlocale())) + import warnings + warnings.warn( + 'Use locale.setlocale(locale.LC_ALL, "") instead', + DeprecationWarning, stacklevel=2 + ) -if sys.platform.startswith("win"): - # On Win32, this will return the ANSI code page - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using.""" - if sys.flags.utf8_mode: - return 'UTF-8' - import _bootlocale - return _bootlocale.getpreferredencoding(False) -else: - # On Unix, if CODESET is available, use that. - try: - CODESET - except NameError: + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=DeprecationWarning) + loc = getdefaultlocale() + + _setlocale(category, _build_localename(loc)) + + +try: + from _locale import getencoding +except ImportError: + def getencoding(): if hasattr(sys, 'getandroidapilevel'): # On Android langinfo.h and CODESET are missing, and UTF-8 is # always used in mbstowcs() and wcstombs(). - def getpreferredencoding(do_setlocale = True): - return 'UTF-8' - else: - # Fall back to parsing environment variables :-( - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - by looking at environment variables.""" - if sys.flags.utf8_mode: - return 'UTF-8' - res = getdefaultlocale()[1] - if res is None: - # LANG not set, default conservatively to ASCII - res = 'ascii' - return res - else: - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - according to the system configuration.""" - if sys.flags.utf8_mode: - return 'UTF-8' - import _bootlocale - if do_setlocale: - oldloc = setlocale(LC_CTYPE) - try: - setlocale(LC_CTYPE, "") - except Error: - pass - result = _bootlocale.getpreferredencoding(False) - if do_setlocale: - setlocale(LC_CTYPE, oldloc) - return result + return 'utf-8' + encoding = getdefaultlocale()[1] + if encoding is None: + # LANG not set, default to UTF-8 + encoding = 'utf-8' + return encoding + +try: + CODESET +except NameError: + def getpreferredencoding(do_setlocale=True): + """Return the charset that the user is likely using.""" + if sys.flags.warn_default_encoding: + import warnings + warnings.warn( + "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.", + EncodingWarning, 2) + if sys.flags.utf8_mode: + return 'utf-8' + return getencoding() +else: + # On Unix, if CODESET is available, use that. + def getpreferredencoding(do_setlocale=True): + """Return the charset that the user is likely using, + according to the system configuration.""" + + if sys.flags.warn_default_encoding: + import warnings + warnings.warn( + "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.", + EncodingWarning, 2) + if sys.flags.utf8_mode: + return 'utf-8' + + if not do_setlocale: + return getencoding() + + old_loc = setlocale(LC_CTYPE) + try: + try: + setlocale(LC_CTYPE, "") + except Error: + pass + return getencoding() + finally: + setlocale(LC_CTYPE, old_loc) ### Database @@ -734,6 +773,7 @@ locale_encoding_alias = { for k, v in sorted(locale_encoding_alias.items()): k = k.replace('_', '') locale_encoding_alias.setdefault(k, v) +del k, v # # The locale_alias table maps lowercase alias names to C locale names diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 25d883f43..196dc6e3a 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -1,4 +1,5 @@ -from test.support import verbose, is_android +from decimal import Decimal +from test.support import verbose, is_android, is_emscripten, is_wasi from test.support.warnings_helper import check_warnings import unittest import locale @@ -335,8 +336,7 @@ class TestFrFRNumberFormatting(FrFRCookedTest, BaseFormattingTest): euro = '\u20ac' self._test_currency(50000, "50000,00 " + euro) self._test_currency(50000, "50 000,00 " + euro, grouping=True) - # XXX is the trailing space a bug? - self._test_currency(50000, "50 000,00 EUR ", + self._test_currency(50000, "50 000,00 EUR", grouping=True, international=True) @@ -367,7 +367,7 @@ class TestEnUSCollation(BaseLocalizedTest, TestCollation): locale_type = locale.LC_ALL def setUp(self): - enc = codecs.lookup(locale.getpreferredencoding(False) or 'ascii').name + enc = codecs.lookup(locale.getencoding() or 'ascii').name if enc not in ('utf-8', 'iso8859-1', 'cp1252'): raise unittest.SkipTest('encoding not suitable') if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android or @@ -377,11 +377,19 @@ class TestEnUSCollation(BaseLocalizedTest, TestCollation): @unittest.skipIf(sys.platform.startswith('aix'), 'bpo-29972: broken test on AIX') + @unittest.skipIf( + is_emscripten or is_wasi, + "musl libc issue on Emscripten/WASI, bpo-46390" + ) def test_strcoll_with_diacritic(self): self.assertLess(locale.strcoll('à', 'b'), 0) @unittest.skipIf(sys.platform.startswith('aix'), 'bpo-29972: broken test on AIX') + @unittest.skipIf( + is_emscripten or is_wasi, + "musl libc issue on Emscripten/WASI, bpo-46390" + ) def test_strxfrm_with_diacritic(self): self.assertLess(locale.strxfrm('à'), locale.strxfrm('b')) @@ -502,7 +510,7 @@ class TestMiscellaneous(unittest.TestCase): @unittest.expectedFailure def test_defaults_UTF8(self): # Issue #18378: on (at least) macOS setting LC_CTYPE to "UTF-8" is - # valid. Futhermore LC_CTYPE=UTF is used by the UTF-8 locale coercing + # valid. Furthermore LC_CTYPE=UTF is used by the UTF-8 locale coercing # during interpreter startup (on macOS). import _locale import os @@ -524,7 +532,8 @@ class TestMiscellaneous(unittest.TestCase): os.environ['LC_CTYPE'] = 'UTF-8' - self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8')) + with check_warnings(('', DeprecationWarning)): + self.assertEqual(locale.getdefaultlocale(), (None, 'UTF-8')) finally: for k in orig_env: @@ -536,6 +545,14 @@ class TestMiscellaneous(unittest.TestCase): if orig_getlocale is not None: _locale._getdefaultlocale = orig_getlocale + def test_getencoding(self): + # Invoke getencoding to make sure it does not cause exceptions. + enc = locale.getencoding() + self.assertIsInstance(enc, str) + self.assertNotEqual(enc, "") + # make sure it is valid + codecs.lookup(enc) + def test_getpreferredencoding(self): # Invoke getpreferredencoding to make sure it does not cause exceptions. enc = locale.getpreferredencoding() @@ -573,7 +590,13 @@ class TestMiscellaneous(unittest.TestCase): loc = locale.getlocale(locale.LC_CTYPE) if verbose: print('testing with %a' % (loc,), end=' ', flush=True) - locale.setlocale(locale.LC_CTYPE, loc) + try: + locale.setlocale(locale.LC_CTYPE, loc) + except locale.Error as exc: + # bpo-37945: setlocale(LC_CTYPE) fails with getlocale(LC_CTYPE) + # and the tr_TR locale on Windows. getlocale() builds a locale + # which is not recognize by setlocale(). + self.skipTest(f"setlocale(LC_CTYPE, {loc!r}) failed: {exc!r}") self.assertEqual(loc, locale.getlocale(locale.LC_CTYPE)) def test_invalid_locale_format_in_localetuple(self): @@ -639,5 +662,32 @@ class TestfrFRDelocalizeTest(FrFRCookedTest, BaseDelocalizeTest): self._test_atoi('50 000', 50000) +class BaseLocalizeTest(BaseLocalizedTest): + + def _test_localize(self, value, out, grouping=False): + self.assertEqual(locale.localize(value, grouping=grouping), out) + + +class TestEnUSLocalize(EnUSCookedTest, BaseLocalizeTest): + + def test_localize(self): + self._test_localize('50000.00', '50000.00') + self._test_localize( + '{0:.16f}'.format(Decimal('1.15')), '1.1500000000000000') + + +class TestCLocalize(CCookedTest, BaseLocalizeTest): + + def test_localize(self): + self._test_localize('50000.00', '50000.00') + + +class TestfrFRLocalize(FrFRCookedTest, BaseLocalizeTest): + + def test_localize(self): + self._test_localize('50000.00', '50000,00') + self._test_localize('50000.00', '50 000,00', grouping=True) + + if __name__ == '__main__': unittest.main()