diff --git a/Lib/random.py b/Lib/random.py index 85bad08d57..36e3925811 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -32,6 +32,11 @@ circular uniform von Mises + discrete distributions + ---------------------- + binomial + + General notes on the underlying Mersenne Twister core generator: * The period is 2**19937-1. @@ -49,6 +54,7 @@ from warnings import warn as _warn from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin from math import tau as TWOPI, floor as _floor, isfinite as _isfinite +from math import lgamma as _lgamma, fabs as _fabs, log2 as _log2 try: from os import urandom as _urandom except ImportError: @@ -72,7 +78,7 @@ import _random try: # hashlib is pretty heavy to load, try lean internal module first - from _sha512 import sha512 as _sha512 + from _sha2 import sha512 as _sha512 except ImportError: # fallback to official implementation from hashlib import sha512 as _sha512 @@ -81,6 +87,7 @@ __all__ = [ "Random", "SystemRandom", "betavariate", + "binomialvariate", "choice", "choices", "expovariate", @@ -249,7 +256,7 @@ class Random(_random.Random): "Return a random int in the range [0,n). Defined for n > 0." getrandbits = self.getrandbits - k = n.bit_length() # don't use (n-1) here because n can be 1 + k = n.bit_length() r = getrandbits(k) # 0 <= r < 2**k while r >= n: r = getrandbits(k) @@ -304,58 +311,25 @@ class Random(_random.Random): # This code is a bit messy to make it fast for the # common case while still doing adequate error checking. - try: - istart = _index(start) - except TypeError: - istart = int(start) - if istart != start: - _warn('randrange() will raise TypeError in the future', - DeprecationWarning, 2) - raise ValueError("non-integer arg 1 for randrange()") - _warn('non-integer arguments to randrange() have been deprecated ' - 'since Python 3.10 and will be removed in a subsequent ' - 'version', - DeprecationWarning, 2) + istart = _index(start) if stop is None: # We don't check for "step != 1" because it hasn't been # type checked and converted to an integer yet. if step is not _ONE: - raise TypeError('Missing a non-None stop argument') + raise TypeError("Missing a non-None stop argument") if istart > 0: return self._randbelow(istart) raise ValueError("empty range for randrange()") - # stop argument supplied. - try: - istop = _index(stop) - except TypeError: - istop = int(stop) - if istop != stop: - _warn('randrange() will raise TypeError in the future', - DeprecationWarning, 2) - raise ValueError("non-integer stop for randrange()") - _warn('non-integer arguments to randrange() have been deprecated ' - 'since Python 3.10 and will be removed in a subsequent ' - 'version', - DeprecationWarning, 2) + # Stop argument supplied. + istop = _index(stop) width = istop - istart - try: - istep = _index(step) - except TypeError: - istep = int(step) - if istep != step: - _warn('randrange() will raise TypeError in the future', - DeprecationWarning, 2) - raise ValueError("non-integer step for randrange()") - _warn('non-integer arguments to randrange() have been deprecated ' - 'since Python 3.10 and will be removed in a subsequent ' - 'version', - DeprecationWarning, 2) + istep = _index(step) # Fast path. if istep == 1: if width > 0: return istart + self._randbelow(width) - raise ValueError("empty range for randrange() (%d, %d, %d)" % (istart, istop, width)) + raise ValueError(f"empty range in randrange({start}, {stop})") # Non-unit step argument supplied. if istep > 0: @@ -365,7 +339,7 @@ class Random(_random.Random): else: raise ValueError("zero step for randrange()") if n <= 0: - raise ValueError("empty range for randrange()") + raise ValueError(f"empty range in randrange({start}, {stop}, {step})") return istart + istep * self._randbelow(n) def randint(self, a, b): @@ -531,7 +505,14 @@ class Random(_random.Random): ## -------------------- real-valued distributions ------------------- def uniform(self, a, b): - "Get a random number in the range [a, b) or [a, b] depending on rounding." + """Get a random number in the range [a, b) or [a, b] depending on rounding. + + The mean (expected value) and variance of the random variable are: + + E[X] = (a + b) / 2 + Var[X] = (b - a) ** 2 / 12 + + """ return a + (b - a) * self.random() def triangular(self, low=0.0, high=1.0, mode=None): @@ -542,6 +523,11 @@ class Random(_random.Random): http://en.wikipedia.org/wiki/Triangular_distribution + The mean (expected value) and variance of the random variable are: + + E[X] = (low + high + mode) / 3 + Var[X] = (low**2 + high**2 + mode**2 - low*high - low*mode - high*mode) / 18 + """ u = self.random() try: @@ -623,7 +609,7 @@ class Random(_random.Random): """ return _exp(self.normalvariate(mu, sigma)) - def expovariate(self, lambd): + def expovariate(self, lambd=1.0): """Exponential distribution. lambd is 1.0 divided by the desired mean. It should be @@ -632,12 +618,15 @@ class Random(_random.Random): positive infinity if lambd is positive, and from negative infinity to 0 if lambd is negative. - """ - # lambd: rate lambd = 1/mean - # ('lambda' is a Python reserved word) + The mean (expected value) and variance of the random variable are: + E[X] = 1 / lambd + Var[X] = 1 / lambd ** 2 + + """ # we use 1-random() instead of random() to preclude the # possibility of taking the log of zero. + return -_log(1.0 - self.random()) / lambd def vonmisesvariate(self, mu, kappa): @@ -693,8 +682,12 @@ class Random(_random.Random): pdf(x) = -------------------------------------- math.gamma(alpha) * beta ** alpha + The mean (expected value) and variance of the random variable are: + + E[X] = alpha * beta + Var[X] = alpha * beta ** 2 + """ - # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2 # Warning: a few older sources define the gamma distribution in terms # of alpha > -1.0 @@ -753,6 +746,11 @@ class Random(_random.Random): Conditions on the parameters are alpha > 0 and beta > 0. Returned values range between 0 and 1. + The mean (expected value) and variance of the random variable are: + + E[X] = alpha / (alpha + beta) + Var[X] = alpha * beta / ((alpha + beta)**2 * (alpha + beta + 1)) + """ ## See ## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html @@ -793,6 +791,97 @@ class Random(_random.Random): return alpha * (-_log(u)) ** (1.0 / beta) + ## -------------------- discrete distributions --------------------- + + def binomialvariate(self, n=1, p=0.5): + """Binomial random variable. + + Gives the number of successes for *n* independent trials + with the probability of success in each trial being *p*: + + sum(random() < p for i in range(n)) + + Returns an integer in the range: 0 <= X <= n + + The mean (expected value) and variance of the random variable are: + + E[X] = n * p + Var[x] = n * p * (1 - p) + + """ + # Error check inputs and handle edge cases + if n < 0: + raise ValueError("n must be non-negative") + if p <= 0.0 or p >= 1.0: + if p == 0.0: + return 0 + if p == 1.0: + return n + raise ValueError("p must be in the range 0.0 <= p <= 1.0") + + random = self.random + + # Fast path for a common case + if n == 1: + return _index(random() < p) + + # Exploit symmetry to establish: p <= 0.5 + if p > 0.5: + return n - self.binomialvariate(n, 1.0 - p) + + if n * p < 10.0: + # BG: Geometric method by Devroye with running time of O(np). + # https://dl.acm.org/doi/pdf/10.1145/42372.42381 + x = y = 0 + c = _log2(1.0 - p) + if not c: + return x + while True: + y += _floor(_log2(random()) / c) + 1 + if y > n: + return x + x += 1 + + # BTRS: Transformed rejection with squeeze method by Wolfgang Hörmann + # https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.47.8407&rep=rep1&type=pdf + assert n*p >= 10.0 and p <= 0.5 + setup_complete = False + + spq = _sqrt(n * p * (1.0 - p)) # Standard deviation of the distribution + b = 1.15 + 2.53 * spq + a = -0.0873 + 0.0248 * b + 0.01 * p + c = n * p + 0.5 + vr = 0.92 - 4.2 / b + + while True: + + u = random() + u -= 0.5 + us = 0.5 - _fabs(u) + k = _floor((2.0 * a / us + b) * u + c) + if k < 0 or k > n: + continue + + # The early-out "squeeze" test substantially reduces + # the number of acceptance condition evaluations. + v = random() + if us >= 0.07 and v <= vr: + return k + + # Acceptance-rejection test. + # Note, the original paper erroneously omits the call to log(v) + # when comparing to the log of the rescaled binomial distribution. + if not setup_complete: + alpha = (2.83 + 5.1 / b) * spq + lpq = _log(p / (1.0 - p)) + m = _floor((n + 1) * p) # Mode of the distribution + h = _lgamma(m + 1) + _lgamma(n - m + 1) + setup_complete = True # Only needs to be done once + v *= alpha / (a / (us * us) + b) + if _log(v) <= h - _lgamma(k + 1) - _lgamma(n - k + 1) + (k - m) * lpq: + return k + + ## ------------------------------------------------------------------ ## --------------- Operating System Random Source ------------------ @@ -859,6 +948,7 @@ vonmisesvariate = _inst.vonmisesvariate gammavariate = _inst.gammavariate gauss = _inst.gauss betavariate = _inst.betavariate +binomialvariate = _inst.binomialvariate paretovariate = _inst.paretovariate weibullvariate = _inst.weibullvariate getstate = _inst.getstate @@ -883,15 +973,17 @@ def _test_generator(n, func, args): low = min(data) high = max(data) - print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}') + print(f'{t1 - t0:.3f} sec, {n} times {func.__name__}{args!r}') print('avg %g, stddev %g, min %g, max %g\n' % (xbar, sigma, low, high)) -def _test(N=2000): +def _test(N=10_000): _test_generator(N, random, ()) _test_generator(N, normalvariate, (0.0, 1.0)) _test_generator(N, lognormvariate, (0.0, 1.0)) _test_generator(N, vonmisesvariate, (0.0, 1.0)) + _test_generator(N, binomialvariate, (15, 0.60)) + _test_generator(N, binomialvariate, (100, 0.75)) _test_generator(N, gammavariate, (0.01, 1.0)) _test_generator(N, gammavariate, (0.1, 1.0)) _test_generator(N, gammavariate, (0.1, 2.0)) diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py index 4bf00d5dbb..70bfbf09b5 100644 --- a/Lib/test/test_random.py +++ b/Lib/test/test_random.py @@ -509,50 +509,44 @@ class SystemRandom_TestBasicOps(TestBasicOps, unittest.TestCase): self.assertEqual(rint, 0) def test_randrange_errors(self): - raises = partial(self.assertRaises, ValueError, self.gen.randrange) - # Empty range - raises(3, 3) - raises(-721) - raises(0, 100, -12) - # Non-integer start/stop - self.assertWarns(DeprecationWarning, raises, 3.14159) - self.assertWarns(DeprecationWarning, self.gen.randrange, 3.0) - self.assertWarns(DeprecationWarning, self.gen.randrange, Fraction(3, 1)) - self.assertWarns(DeprecationWarning, raises, '3') - self.assertWarns(DeprecationWarning, raises, 0, 2.71828) - self.assertWarns(DeprecationWarning, self.gen.randrange, 0, 2.0) - self.assertWarns(DeprecationWarning, self.gen.randrange, 0, Fraction(2, 1)) - self.assertWarns(DeprecationWarning, raises, 0, '2') - # Zero and non-integer step - raises(0, 42, 0) - self.assertWarns(DeprecationWarning, raises, 0, 42, 0.0) - self.assertWarns(DeprecationWarning, raises, 0, 0, 0.0) - self.assertWarns(DeprecationWarning, raises, 0, 42, 3.14159) - self.assertWarns(DeprecationWarning, self.gen.randrange, 0, 42, 3.0) - self.assertWarns(DeprecationWarning, self.gen.randrange, 0, 42, Fraction(3, 1)) - self.assertWarns(DeprecationWarning, raises, 0, 42, '3') - self.assertWarns(DeprecationWarning, self.gen.randrange, 0, 42, 1.0) - self.assertWarns(DeprecationWarning, raises, 0, 0, 1.0) + raises_value_error = partial(self.assertRaises, ValueError, self.gen.randrange) + raises_type_error = partial(self.assertRaises, TypeError, self.gen.randrange) - def test_randrange_argument_handling(self): - randrange = self.gen.randrange - with self.assertWarns(DeprecationWarning): - randrange(10.0, 20, 2) - with self.assertWarns(DeprecationWarning): - randrange(10, 20.0, 2) - with self.assertWarns(DeprecationWarning): - randrange(10, 20, 1.0) - with self.assertWarns(DeprecationWarning): - randrange(10, 20, 2.0) - with self.assertWarns(DeprecationWarning): - with self.assertRaises(ValueError): - randrange(10.5) - with self.assertWarns(DeprecationWarning): - with self.assertRaises(ValueError): - randrange(10, 20.5) - with self.assertWarns(DeprecationWarning): - with self.assertRaises(ValueError): - randrange(10, 20, 1.5) + # Empty range + raises_value_error(3, 3) + raises_value_error(-721) + raises_value_error(0, 100, -12) + + # Zero step + raises_value_error(0, 42, 0) + raises_type_error(0, 42, 0.0) + raises_type_error(0, 0, 0.0) + + # Non-integer stop + raises_type_error(3.14159) + raises_type_error(3.0) + raises_type_error(Fraction(3, 1)) + raises_type_error('3') + raises_type_error(0, 2.71827) + raises_type_error(0, 2.0) + raises_type_error(0, Fraction(2, 1)) + raises_type_error(0, '2') + raises_type_error(0, 2.71827, 2) + + # Non-integer start + raises_type_error(2.71827, 5) + raises_type_error(2.0, 5) + raises_type_error(Fraction(2, 1), 5) + raises_type_error('2', 5) + raises_type_error(2.71827, 5, 2) + + # Non-integer step + raises_type_error(0, 42, 3.14159) + raises_type_error(0, 42, 3.0) + raises_type_error(0, 42, Fraction(3, 1)) + raises_type_error(0, 42, '3') + raises_type_error(0, 42, 1.0) + raises_type_error(0, 0, 1.0) def test_randrange_step(self): # bpo-42772: When stop is None, the step argument was being ignored. @@ -1027,6 +1021,7 @@ class TestDistributions(unittest.TestCase): g.random = x[:].pop; g.uniform(1,10) g.random = x[:].pop; g.paretovariate(1.0) g.random = x[:].pop; g.expovariate(1.0) + g.random = x[:].pop; g.expovariate() g.random = x[:].pop; g.weibullvariate(1.0, 1.0) g.random = x[:].pop; g.vonmisesvariate(1.0, 1.0) g.random = x[:].pop; g.normalvariate(0.0, 1.0) @@ -1084,6 +1079,9 @@ class TestDistributions(unittest.TestCase): (g.lognormvariate, (0.0, 0.0), 1.0), (g.lognormvariate, (-float('inf'), 0.0), 0.0), (g.normalvariate, (10.0, 0.0), 10.0), + (g.binomialvariate, (0, 0.5), 0), + (g.binomialvariate, (10, 0.0), 0), + (g.binomialvariate, (10, 1.0), 10), (g.paretovariate, (float('inf'),), 1.0), (g.weibullvariate, (10.0, float('inf')), 10.0), (g.weibullvariate, (0.0, 10.0), 0.0), @@ -1091,6 +1089,59 @@ class TestDistributions(unittest.TestCase): for i in range(N): self.assertEqual(variate(*args), expected) + def test_binomialvariate(self): + B = random.binomialvariate + + # Cover all the code paths + with self.assertRaises(ValueError): + B(n=-1) # Negative n + with self.assertRaises(ValueError): + B(n=1, p=-0.5) # Negative p + with self.assertRaises(ValueError): + B(n=1, p=1.5) # p > 1.0 + self.assertEqual(B(10, 0.0), 0) # p == 0.0 + self.assertEqual(B(10, 1.0), 10) # p == 1.0 + self.assertTrue(B(1, 0.3) in {0, 1}) # n == 1 fast path + self.assertTrue(B(1, 0.9) in {0, 1}) # n == 1 fast path + self.assertTrue(B(1, 0.0) in {0}) # n == 1 fast path + self.assertTrue(B(1, 1.0) in {1}) # n == 1 fast path + + # BG method p <= 0.5 and n*p=1.25 + self.assertTrue(B(5, 0.25) in set(range(6))) + + # BG method p >= 0.5 and n*(1-p)=1.25 + self.assertTrue(B(5, 0.75) in set(range(6))) + + # BTRS method p <= 0.5 and n*p=25 + self.assertTrue(B(100, 0.25) in set(range(101))) + + # BTRS method p > 0.5 and n*(1-p)=25 + self.assertTrue(B(100, 0.75) in set(range(101))) + + # Statistical tests chosen such that they are + # exceedingly unlikely to ever fail for correct code. + + # BG code path + # Expected dist: [31641, 42188, 21094, 4688, 391] + c = Counter(B(4, 0.25) for i in range(100_000)) + self.assertTrue(29_641 <= c[0] <= 33_641, c) + self.assertTrue(40_188 <= c[1] <= 44_188) + self.assertTrue(19_094 <= c[2] <= 23_094) + self.assertTrue(2_688 <= c[3] <= 6_688) + self.assertEqual(set(c), {0, 1, 2, 3, 4}) + + # BTRS code path + # Sum of c[20], c[21], c[22], c[23], c[24] expected to be 36,214 + c = Counter(B(100, 0.25) for i in range(100_000)) + self.assertTrue(34_214 <= c[20]+c[21]+c[22]+c[23]+c[24] <= 38_214) + self.assertTrue(set(c) <= set(range(101))) + self.assertEqual(c.total(), 100_000) + + # Demonstrate the BTRS works for huge values of n + self.assertTrue(19_000_000 <= B(100_000_000, 0.2) <= 21_000_000) + self.assertTrue(89_000_000 <= B(100_000_000, 0.9) <= 91_000_000) + + def test_von_mises_range(self): # Issue 17149: von mises variates were not consistently in the # range [0, 2*PI].