forked from Rust-related/RustPython
This reverts commit ff970b0e1c.
This commit is contained in:
871
Lib/statistics.py
vendored
871
Lib/statistics.py
vendored
File diff suppressed because it is too large
Load Diff
526
Lib/test/test_statistics.py
vendored
526
Lib/test/test_statistics.py
vendored
@@ -1,4 +1,4 @@
|
||||
x = """Test suite for statistics module, including helper NumericTestCase and
|
||||
"""Test suite for statistics module, including helper NumericTestCase and
|
||||
approx_equal function.
|
||||
|
||||
"""
|
||||
@@ -9,14 +9,13 @@ import collections.abc
|
||||
import copy
|
||||
import decimal
|
||||
import doctest
|
||||
import itertools
|
||||
import math
|
||||
import pickle
|
||||
import random
|
||||
import sys
|
||||
import unittest
|
||||
from test import support
|
||||
from test.support import import_helper, requires_IEEE_754
|
||||
from test.support import import_helper
|
||||
|
||||
from decimal import Decimal
|
||||
from fractions import Fraction
|
||||
@@ -28,12 +27,6 @@ import statistics
|
||||
|
||||
# === Helper functions and class ===
|
||||
|
||||
# Test copied from Lib/test/test_math.py
|
||||
# detect evidence of double-rounding: fsum is not always correctly
|
||||
# rounded on machines that suffer from double rounding.
|
||||
x, y = 1e16, 2.9999 # use temporary values to defeat peephole optimizer
|
||||
HAVE_DOUBLE_ROUNDING = (x + y == 1e16 + 4)
|
||||
|
||||
def sign(x):
|
||||
"""Return -1.0 for negatives, including -0.0, otherwise +1.0."""
|
||||
return math.copysign(1, x)
|
||||
@@ -698,6 +691,14 @@ class GlobalsTest(unittest.TestCase):
|
||||
'missing name "%s" in __all__' % name)
|
||||
|
||||
|
||||
class DocTests(unittest.TestCase):
|
||||
@unittest.skipIf(sys.flags.optimize >= 2,
|
||||
"Docstrings are omitted with -OO and above")
|
||||
def test_doc_tests(self):
|
||||
failed, tried = doctest.testmod(statistics, optionflags=doctest.ELLIPSIS)
|
||||
self.assertGreater(tried, 0)
|
||||
self.assertEqual(failed, 0)
|
||||
|
||||
class StatisticsErrorTest(unittest.TestCase):
|
||||
def test_has_exception(self):
|
||||
errmsg = (
|
||||
@@ -1038,6 +1039,50 @@ class FailNegTest(unittest.TestCase):
|
||||
self.assertEqual(errmsg, msg)
|
||||
|
||||
|
||||
class FindLteqTest(unittest.TestCase):
|
||||
# Test _find_lteq private function.
|
||||
|
||||
def test_invalid_input_values(self):
|
||||
for a, x in [
|
||||
([], 1),
|
||||
([1, 2], 3),
|
||||
([1, 3], 2)
|
||||
]:
|
||||
with self.subTest(a=a, x=x):
|
||||
with self.assertRaises(ValueError):
|
||||
statistics._find_lteq(a, x)
|
||||
|
||||
def test_locate_successfully(self):
|
||||
for a, x, expected_i in [
|
||||
([1, 1, 1, 2, 3], 1, 0),
|
||||
([0, 1, 1, 1, 2, 3], 1, 1),
|
||||
([1, 2, 3, 3, 3], 3, 2)
|
||||
]:
|
||||
with self.subTest(a=a, x=x):
|
||||
self.assertEqual(expected_i, statistics._find_lteq(a, x))
|
||||
|
||||
|
||||
class FindRteqTest(unittest.TestCase):
|
||||
# Test _find_rteq private function.
|
||||
|
||||
def test_invalid_input_values(self):
|
||||
for a, l, x in [
|
||||
([1], 2, 1),
|
||||
([1, 3], 0, 2)
|
||||
]:
|
||||
with self.assertRaises(ValueError):
|
||||
statistics._find_rteq(a, l, x)
|
||||
|
||||
def test_locate_successfully(self):
|
||||
for a, l, x, expected_i in [
|
||||
([1, 1, 1, 2, 3], 0, 1, 2),
|
||||
([0, 1, 1, 1, 2, 3], 0, 1, 3),
|
||||
([1, 2, 3, 3, 3], 0, 3, 4)
|
||||
]:
|
||||
with self.subTest(a=a, l=l, x=x):
|
||||
self.assertEqual(expected_i, statistics._find_rteq(a, l, x))
|
||||
|
||||
|
||||
# === Tests for public functions ===
|
||||
|
||||
class UnivariateCommonMixin:
|
||||
@@ -1072,7 +1117,7 @@ class UnivariateCommonMixin:
|
||||
def test_order_doesnt_matter(self):
|
||||
# Test that the order of data points doesn't change the result.
|
||||
|
||||
# CAUTION: due to floating-point rounding errors, the result actually
|
||||
# CAUTION: due to floating point rounding errors, the result actually
|
||||
# may depend on the order. Consider this test representing an ideal.
|
||||
# To avoid this test failing, only test with exact values such as ints
|
||||
# or Fractions.
|
||||
@@ -1165,9 +1210,6 @@ class UnivariateTypeMixin:
|
||||
def __add__(self, other):
|
||||
return type(self)(super().__add__(other))
|
||||
__radd__ = __add__
|
||||
def __mul__(self, other):
|
||||
return type(self)(super().__mul__(other))
|
||||
__rmul__ = __mul__
|
||||
return (float, Decimal, Fraction, MyFloat)
|
||||
|
||||
def test_types_conserved(self):
|
||||
@@ -1740,12 +1782,6 @@ class TestMedianGrouped(TestMedian):
|
||||
data = [x]*count
|
||||
self.assertEqual(self.func(data), float(x))
|
||||
|
||||
def test_single_value(self):
|
||||
# Override method from AverageMixin.
|
||||
# Average of a single value is the value as a float.
|
||||
for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
|
||||
self.assertEqual(self.func([x]), float(x))
|
||||
|
||||
def test_odd_fractions(self):
|
||||
# Test median_grouped works with an odd number of Fractions.
|
||||
F = Fraction
|
||||
@@ -1925,27 +1961,6 @@ class TestFMean(unittest.TestCase):
|
||||
with self.assertRaises(ValueError):
|
||||
fmean([Inf, -Inf])
|
||||
|
||||
def test_weights(self):
|
||||
fmean = statistics.fmean
|
||||
StatisticsError = statistics.StatisticsError
|
||||
self.assertEqual(
|
||||
fmean([10, 10, 10, 50], [0.25] * 4),
|
||||
fmean([10, 10, 10, 50]))
|
||||
self.assertEqual(
|
||||
fmean([10, 10, 20], [0.25, 0.25, 0.50]),
|
||||
fmean([10, 10, 20, 20]))
|
||||
self.assertEqual( # inputs are iterators
|
||||
fmean(iter([10, 10, 20]), iter([0.25, 0.25, 0.50])),
|
||||
fmean([10, 10, 20, 20]))
|
||||
with self.assertRaises(StatisticsError):
|
||||
fmean([10, 20, 30], [1, 2]) # unequal lengths
|
||||
with self.assertRaises(StatisticsError):
|
||||
fmean(iter([10, 20, 30]), iter([1, 2])) # unequal lengths
|
||||
with self.assertRaises(StatisticsError):
|
||||
fmean([10, 20], [-1, 1]) # sum of weights is zero
|
||||
with self.assertRaises(StatisticsError):
|
||||
fmean(iter([10, 20]), iter([-1, 1])) # sum of weights is zero
|
||||
|
||||
|
||||
# === Tests for variances and standard deviations ===
|
||||
|
||||
@@ -2122,104 +2137,6 @@ class TestPStdev(VarianceStdevMixin, NumericTestCase):
|
||||
self.assertEqual(self.func(data), 2.5)
|
||||
self.assertEqual(self.func(data, mu=0.5), 6.5)
|
||||
|
||||
class TestSqrtHelpers(unittest.TestCase):
|
||||
|
||||
def test_integer_sqrt_of_frac_rto(self):
|
||||
for n, m in itertools.product(range(100), range(1, 1000)):
|
||||
r = statistics._integer_sqrt_of_frac_rto(n, m)
|
||||
self.assertIsInstance(r, int)
|
||||
if r*r*m == n:
|
||||
# Root is exact
|
||||
continue
|
||||
# Inexact, so the root should be odd
|
||||
self.assertEqual(r&1, 1)
|
||||
# Verify correct rounding
|
||||
self.assertTrue(m * (r - 1)**2 < n < m * (r + 1)**2)
|
||||
|
||||
@requires_IEEE_754
|
||||
@support.requires_resource('cpu')
|
||||
def test_float_sqrt_of_frac(self):
|
||||
|
||||
def is_root_correctly_rounded(x: Fraction, root: float) -> bool:
|
||||
if not x:
|
||||
return root == 0.0
|
||||
|
||||
# Extract adjacent representable floats
|
||||
r_up: float = math.nextafter(root, math.inf)
|
||||
r_down: float = math.nextafter(root, -math.inf)
|
||||
assert r_down < root < r_up
|
||||
|
||||
# Convert to fractions for exact arithmetic
|
||||
frac_root: Fraction = Fraction(root)
|
||||
half_way_up: Fraction = (frac_root + Fraction(r_up)) / 2
|
||||
half_way_down: Fraction = (frac_root + Fraction(r_down)) / 2
|
||||
|
||||
# Check a closed interval.
|
||||
# Does not test for a midpoint rounding rule.
|
||||
return half_way_down ** 2 <= x <= half_way_up ** 2
|
||||
|
||||
randrange = random.randrange
|
||||
|
||||
for i in range(60_000):
|
||||
numerator: int = randrange(10 ** randrange(50))
|
||||
denonimator: int = randrange(10 ** randrange(50)) + 1
|
||||
with self.subTest(numerator=numerator, denonimator=denonimator):
|
||||
x: Fraction = Fraction(numerator, denonimator)
|
||||
root: float = statistics._float_sqrt_of_frac(numerator, denonimator)
|
||||
self.assertTrue(is_root_correctly_rounded(x, root))
|
||||
|
||||
# Verify that corner cases and error handling match math.sqrt()
|
||||
self.assertEqual(statistics._float_sqrt_of_frac(0, 1), 0.0)
|
||||
with self.assertRaises(ValueError):
|
||||
statistics._float_sqrt_of_frac(-1, 1)
|
||||
with self.assertRaises(ValueError):
|
||||
statistics._float_sqrt_of_frac(1, -1)
|
||||
|
||||
# Error handling for zero denominator matches that for Fraction(1, 0)
|
||||
with self.assertRaises(ZeroDivisionError):
|
||||
statistics._float_sqrt_of_frac(1, 0)
|
||||
|
||||
# The result is well defined if both inputs are negative
|
||||
self.assertEqual(statistics._float_sqrt_of_frac(-2, -1), statistics._float_sqrt_of_frac(2, 1))
|
||||
|
||||
def test_decimal_sqrt_of_frac(self):
|
||||
root: Decimal
|
||||
numerator: int
|
||||
denominator: int
|
||||
|
||||
for root, numerator, denominator in [
|
||||
(Decimal('0.4481904599041192673635338663'), 200874688349065940678243576378, 1000000000000000000000000000000), # No adj
|
||||
(Decimal('0.7924949131383786609961759598'), 628048187350206338833590574929, 1000000000000000000000000000000), # Adj up
|
||||
(Decimal('0.8500554152289934068192208727'), 722594208960136395984391238251, 1000000000000000000000000000000), # Adj down
|
||||
]:
|
||||
with decimal.localcontext(decimal.DefaultContext):
|
||||
self.assertEqual(statistics._decimal_sqrt_of_frac(numerator, denominator), root)
|
||||
|
||||
# Confirm expected root with a quad precision decimal computation
|
||||
with decimal.localcontext(decimal.DefaultContext) as ctx:
|
||||
ctx.prec *= 4
|
||||
high_prec_ratio = Decimal(numerator) / Decimal(denominator)
|
||||
ctx.rounding = decimal.ROUND_05UP
|
||||
high_prec_root = high_prec_ratio.sqrt()
|
||||
with decimal.localcontext(decimal.DefaultContext):
|
||||
target_root = +high_prec_root
|
||||
self.assertEqual(root, target_root)
|
||||
|
||||
# Verify that corner cases and error handling match Decimal.sqrt()
|
||||
self.assertEqual(statistics._decimal_sqrt_of_frac(0, 1), 0.0)
|
||||
with self.assertRaises(decimal.InvalidOperation):
|
||||
statistics._decimal_sqrt_of_frac(-1, 1)
|
||||
with self.assertRaises(decimal.InvalidOperation):
|
||||
statistics._decimal_sqrt_of_frac(1, -1)
|
||||
|
||||
# Error handling for zero denominator matches that for Fraction(1, 0)
|
||||
with self.assertRaises(ZeroDivisionError):
|
||||
statistics._decimal_sqrt_of_frac(1, 0)
|
||||
|
||||
# The result is well defined if both inputs are negative
|
||||
self.assertEqual(statistics._decimal_sqrt_of_frac(-2, -1), statistics._decimal_sqrt_of_frac(2, 1))
|
||||
|
||||
|
||||
class TestStdev(VarianceStdevMixin, NumericTestCase):
|
||||
# Tests for sample standard deviation.
|
||||
def setUp(self):
|
||||
@@ -2234,7 +2151,7 @@ class TestStdev(VarianceStdevMixin, NumericTestCase):
|
||||
# Test that stdev is, in fact, the square root of variance.
|
||||
data = [random.uniform(-2, 9) for _ in range(1000)]
|
||||
expected = math.sqrt(statistics.variance(data))
|
||||
self.assertAlmostEqual(self.func(data), expected)
|
||||
self.assertEqual(self.func(data), expected)
|
||||
|
||||
def test_center_not_at_mean(self):
|
||||
data = (1.0, 2.0)
|
||||
@@ -2302,12 +2219,10 @@ class TestGeometricMean(unittest.TestCase):
|
||||
StatisticsError = statistics.StatisticsError
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([]) # empty input
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([3.5, 0.0, 5.25]) # zero input
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([3.5, -4.0, 5.25]) # negative input
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([0.0, -4.0, 5.25]) # negative input with zero
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean([3.5, -math.inf, 5.25]) # negative infinity
|
||||
with self.assertRaises(StatisticsError):
|
||||
geometric_mean(iter([])) # empty iterator
|
||||
with self.assertRaises(TypeError):
|
||||
@@ -2330,200 +2245,6 @@ class TestGeometricMean(unittest.TestCase):
|
||||
with self.assertRaises(ValueError):
|
||||
geometric_mean([Inf, -Inf])
|
||||
|
||||
# Cases with zero
|
||||
self.assertEqual(geometric_mean([3, 0.0, 5]), 0.0) # Any zero gives a zero
|
||||
self.assertEqual(geometric_mean([3, -0.0, 5]), 0.0) # Negative zero allowed
|
||||
self.assertTrue(math.isnan(geometric_mean([0, NaN]))) # NaN beats zero
|
||||
self.assertTrue(math.isnan(geometric_mean([0, Inf]))) # Because 0.0 * Inf -> NaN
|
||||
|
||||
def test_mixed_int_and_float(self):
|
||||
# Regression test for b.p.o. issue #28327
|
||||
geometric_mean = statistics.geometric_mean
|
||||
expected_mean = 3.80675409583932
|
||||
values = [
|
||||
[2, 3, 5, 7],
|
||||
[2, 3, 5, 7.0],
|
||||
[2, 3, 5.0, 7.0],
|
||||
[2, 3.0, 5.0, 7.0],
|
||||
[2.0, 3.0, 5.0, 7.0],
|
||||
]
|
||||
for v in values:
|
||||
with self.subTest(v=v):
|
||||
actual_mean = geometric_mean(v)
|
||||
self.assertAlmostEqual(actual_mean, expected_mean, places=5)
|
||||
|
||||
|
||||
class TestKDE(unittest.TestCase):
|
||||
|
||||
def test_kde(self):
|
||||
kde = statistics.kde
|
||||
StatisticsError = statistics.StatisticsError
|
||||
|
||||
kernels = ['normal', 'gauss', 'logistic', 'sigmoid', 'rectangular',
|
||||
'uniform', 'triangular', 'parabolic', 'epanechnikov',
|
||||
'quartic', 'biweight', 'triweight', 'cosine']
|
||||
|
||||
sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
|
||||
|
||||
# The approximate integral of a PDF should be close to 1.0
|
||||
|
||||
def integrate(func, low, high, steps=10_000):
|
||||
"Numeric approximation of a definite function integral."
|
||||
dx = (high - low) / steps
|
||||
midpoints = (low + (i + 1/2) * dx for i in range(steps))
|
||||
return sum(map(func, midpoints)) * dx
|
||||
|
||||
for kernel in kernels:
|
||||
with self.subTest(kernel=kernel):
|
||||
f_hat = kde(sample, h=1.5, kernel=kernel)
|
||||
area = integrate(f_hat, -20, 20)
|
||||
self.assertAlmostEqual(area, 1.0, places=4)
|
||||
|
||||
# Check CDF against an integral of the PDF
|
||||
|
||||
data = [3, 5, 10, 12]
|
||||
h = 2.3
|
||||
x = 10.5
|
||||
for kernel in kernels:
|
||||
with self.subTest(kernel=kernel):
|
||||
cdf = kde(data, h, kernel, cumulative=True)
|
||||
f_hat = kde(data, h, kernel)
|
||||
area = integrate(f_hat, -20, x, 100_000)
|
||||
self.assertAlmostEqual(cdf(x), area, places=4)
|
||||
|
||||
# Check error cases
|
||||
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde([], h=1.0) # Empty dataset
|
||||
with self.assertRaises(TypeError):
|
||||
kde(['abc', 'def'], 1.5) # Non-numeric data
|
||||
with self.assertRaises(TypeError):
|
||||
kde(iter(sample), 1.5) # Data is not a sequence
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde(sample, h=0.0) # Zero bandwidth
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde(sample, h=-1.0) # Negative bandwidth
|
||||
with self.assertRaises(TypeError):
|
||||
kde(sample, h='str') # Wrong bandwidth type
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde(sample, h=1.0, kernel='bogus') # Invalid kernel
|
||||
with self.assertRaises(TypeError):
|
||||
kde(sample, 1.0, 'gauss', True) # Positional cumulative argument
|
||||
|
||||
# Test name and docstring of the generated function
|
||||
|
||||
h = 1.5
|
||||
kernel = 'cosine'
|
||||
f_hat = kde(sample, h, kernel)
|
||||
self.assertEqual(f_hat.__name__, 'pdf')
|
||||
self.assertIn(kernel, f_hat.__doc__)
|
||||
self.assertIn(repr(h), f_hat.__doc__)
|
||||
|
||||
# Test closed interval for the support boundaries.
|
||||
# In particular, 'uniform' should non-zero at the boundaries.
|
||||
|
||||
f_hat = kde([0], 1.0, 'uniform')
|
||||
self.assertEqual(f_hat(-1.0), 1/2)
|
||||
self.assertEqual(f_hat(1.0), 1/2)
|
||||
|
||||
# Test online updates to data
|
||||
|
||||
data = [1, 2]
|
||||
f_hat = kde(data, 5.0, 'triangular')
|
||||
self.assertEqual(f_hat(100), 0.0)
|
||||
data.append(100)
|
||||
self.assertGreater(f_hat(100), 0.0)
|
||||
|
||||
def test_kde_kernel_invcdfs(self):
|
||||
kernel_invcdfs = statistics._kernel_invcdfs
|
||||
kde = statistics.kde
|
||||
|
||||
# Verify that cdf / invcdf will round trip
|
||||
xarr = [i/100 for i in range(-100, 101)]
|
||||
for kernel, invcdf in kernel_invcdfs.items():
|
||||
with self.subTest(kernel=kernel):
|
||||
cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True)
|
||||
for x in xarr:
|
||||
self.assertAlmostEqual(invcdf(cdf(x)), x, places=5)
|
||||
|
||||
@support.requires_resource('cpu')
|
||||
def test_kde_random(self):
|
||||
kde_random = statistics.kde_random
|
||||
StatisticsError = statistics.StatisticsError
|
||||
kernels = ['normal', 'gauss', 'logistic', 'sigmoid', 'rectangular',
|
||||
'uniform', 'triangular', 'parabolic', 'epanechnikov',
|
||||
'quartic', 'biweight', 'triweight', 'cosine']
|
||||
sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
|
||||
|
||||
# Smoke test
|
||||
|
||||
for kernel in kernels:
|
||||
with self.subTest(kernel=kernel):
|
||||
rand = kde_random(sample, h=1.5, kernel=kernel)
|
||||
selections = [rand() for i in range(10)]
|
||||
|
||||
# Check error cases
|
||||
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde_random([], h=1.0) # Empty dataset
|
||||
with self.assertRaises(TypeError):
|
||||
kde_random(['abc', 'def'], 1.5) # Non-numeric data
|
||||
with self.assertRaises(TypeError):
|
||||
kde_random(iter(sample), 1.5) # Data is not a sequence
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde_random(sample, h=-1.0) # Zero bandwidth
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde_random(sample, h=0.0) # Negative bandwidth
|
||||
with self.assertRaises(TypeError):
|
||||
kde_random(sample, h='str') # Wrong bandwidth type
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde_random(sample, h=1.0, kernel='bogus') # Invalid kernel
|
||||
|
||||
# Test name and docstring of the generated function
|
||||
|
||||
h = 1.5
|
||||
kernel = 'cosine'
|
||||
rand = kde_random(sample, h, kernel)
|
||||
self.assertEqual(rand.__name__, 'rand')
|
||||
self.assertIn(kernel, rand.__doc__)
|
||||
self.assertIn(repr(h), rand.__doc__)
|
||||
|
||||
# Approximate distribution test: Compare a random sample to the expected distribution
|
||||
|
||||
data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2, 7.8, 14.3, 15.1, 15.3, 15.8, 17.0]
|
||||
xarr = [x / 10 for x in range(-100, 250)]
|
||||
n = 1_000_000
|
||||
h = 1.75
|
||||
dx = 0.1
|
||||
|
||||
def p_observed(x):
|
||||
# P(x <= X < x+dx)
|
||||
i = bisect.bisect_left(big_sample, x)
|
||||
j = bisect.bisect_left(big_sample, x + dx)
|
||||
return (j - i) / len(big_sample)
|
||||
|
||||
def p_expected(x):
|
||||
# P(x <= X < x+dx)
|
||||
return F_hat(x + dx) - F_hat(x)
|
||||
|
||||
for kernel in kernels:
|
||||
with self.subTest(kernel=kernel):
|
||||
|
||||
rand = kde_random(data, h, kernel, seed=8675309**2)
|
||||
big_sample = sorted([rand() for i in range(n)])
|
||||
F_hat = statistics.kde(data, h, kernel, cumulative=True)
|
||||
|
||||
for x in xarr:
|
||||
self.assertTrue(math.isclose(p_observed(x), p_expected(x), abs_tol=0.0005))
|
||||
|
||||
# Test online updates to data
|
||||
|
||||
data = [1, 2]
|
||||
rand = kde_random(data, 5, 'triangular')
|
||||
self.assertLess(max([rand() for i in range(5000)]), 10)
|
||||
data.append(100)
|
||||
self.assertGreater(max(rand() for i in range(5000)), 10)
|
||||
|
||||
|
||||
class TestQuantiles(unittest.TestCase):
|
||||
|
||||
@@ -2634,11 +2355,6 @@ class TestQuantiles(unittest.TestCase):
|
||||
data = random.choices(range(100), k=k)
|
||||
q1, q2, q3 = quantiles(data, method='inclusive')
|
||||
self.assertEqual(q2, statistics.median(data))
|
||||
# Base case with a single data point: When estimating quantiles from
|
||||
# a sample, we want to be able to add one sample point at a time,
|
||||
# getting increasingly better estimates.
|
||||
self.assertEqual(quantiles([10], n=4), [10.0, 10.0, 10.0])
|
||||
self.assertEqual(quantiles([10], n=4, method='exclusive'), [10.0, 10.0, 10.0])
|
||||
|
||||
def test_equal_inputs(self):
|
||||
quantiles = statistics.quantiles
|
||||
@@ -2689,7 +2405,7 @@ class TestQuantiles(unittest.TestCase):
|
||||
with self.assertRaises(ValueError):
|
||||
quantiles([10, 20, 30], method='X') # method is unknown
|
||||
with self.assertRaises(StatisticsError):
|
||||
quantiles([], n=4) # not enough data points
|
||||
quantiles([10], n=4) # not enough data points
|
||||
with self.assertRaises(TypeError):
|
||||
quantiles([10, None, 30], n=4) # data is non-numeric
|
||||
|
||||
@@ -2748,95 +2464,6 @@ class TestCorrelationAndCovariance(unittest.TestCase):
|
||||
self.assertAlmostEqual(statistics.correlation(x, y), 1)
|
||||
self.assertAlmostEqual(statistics.covariance(x, y), 0.1)
|
||||
|
||||
def test_sqrtprod_helper_function_fundamentals(self):
|
||||
# Verify that results are close to sqrt(x * y)
|
||||
for i in range(100):
|
||||
x = random.expovariate()
|
||||
y = random.expovariate()
|
||||
expected = math.sqrt(x * y)
|
||||
actual = statistics._sqrtprod(x, y)
|
||||
with self.subTest(x=x, y=y, expected=expected, actual=actual):
|
||||
self.assertAlmostEqual(expected, actual)
|
||||
|
||||
x, y, target = 0.8035720646477457, 0.7957468097636939, 0.7996498651651661
|
||||
self.assertEqual(statistics._sqrtprod(x, y), target)
|
||||
self.assertNotEqual(math.sqrt(x * y), target)
|
||||
|
||||
# Test that range extremes avoid underflow and overflow
|
||||
smallest = sys.float_info.min * sys.float_info.epsilon
|
||||
self.assertEqual(statistics._sqrtprod(smallest, smallest), smallest)
|
||||
biggest = sys.float_info.max
|
||||
self.assertEqual(statistics._sqrtprod(biggest, biggest), biggest)
|
||||
|
||||
# Check special values and the sign of the result
|
||||
special_values = [0.0, -0.0, 1.0, -1.0, 4.0, -4.0,
|
||||
math.nan, -math.nan, math.inf, -math.inf]
|
||||
for x, y in itertools.product(special_values, repeat=2):
|
||||
try:
|
||||
expected = math.sqrt(x * y)
|
||||
except ValueError:
|
||||
expected = 'ValueError'
|
||||
try:
|
||||
actual = statistics._sqrtprod(x, y)
|
||||
except ValueError:
|
||||
actual = 'ValueError'
|
||||
with self.subTest(x=x, y=y, expected=expected, actual=actual):
|
||||
if isinstance(expected, str) and expected == 'ValueError':
|
||||
self.assertEqual(actual, 'ValueError')
|
||||
continue
|
||||
self.assertIsInstance(actual, float)
|
||||
if math.isnan(expected):
|
||||
self.assertTrue(math.isnan(actual))
|
||||
continue
|
||||
self.assertEqual(actual, expected)
|
||||
self.assertEqual(sign(actual), sign(expected))
|
||||
|
||||
@requires_IEEE_754
|
||||
@unittest.skipIf(HAVE_DOUBLE_ROUNDING,
|
||||
"accuracy not guaranteed on machines with double rounding")
|
||||
@support.cpython_only # Allow for a weaker sumprod() implmentation
|
||||
def test_sqrtprod_helper_function_improved_accuracy(self):
|
||||
# Test a known example where accuracy is improved
|
||||
x, y, target = 0.8035720646477457, 0.7957468097636939, 0.7996498651651661
|
||||
self.assertEqual(statistics._sqrtprod(x, y), target)
|
||||
self.assertNotEqual(math.sqrt(x * y), target)
|
||||
|
||||
def reference_value(x: float, y: float) -> float:
|
||||
x = decimal.Decimal(x)
|
||||
y = decimal.Decimal(y)
|
||||
with decimal.localcontext() as ctx:
|
||||
ctx.prec = 200
|
||||
return float((x * y).sqrt())
|
||||
|
||||
# Verify that the new function with improved accuracy
|
||||
# agrees with a reference value more often than old version.
|
||||
new_agreements = 0
|
||||
old_agreements = 0
|
||||
for i in range(10_000):
|
||||
x = random.expovariate()
|
||||
y = random.expovariate()
|
||||
new = statistics._sqrtprod(x, y)
|
||||
old = math.sqrt(x * y)
|
||||
ref = reference_value(x, y)
|
||||
new_agreements += (new == ref)
|
||||
old_agreements += (old == ref)
|
||||
self.assertGreater(new_agreements, old_agreements)
|
||||
|
||||
def test_correlation_spearman(self):
|
||||
# https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide-2.php
|
||||
# Compare with:
|
||||
# >>> import scipy.stats.mstats
|
||||
# >>> scipy.stats.mstats.spearmanr(reading, mathematics)
|
||||
# SpearmanrResult(correlation=0.6686960980480712, pvalue=0.03450954165178532)
|
||||
# And Wolfram Alpha gives: 0.668696
|
||||
# https://www.wolframalpha.com/input?i=SpearmanRho%5B%7B56%2C+75%2C+45%2C+71%2C+61%2C+64%2C+58%2C+80%2C+76%2C+61%7D%2C+%7B66%2C+70%2C+40%2C+60%2C+65%2C+56%2C+59%2C+77%2C+67%2C+63%7D%5D
|
||||
reading = [56, 75, 45, 71, 61, 64, 58, 80, 76, 61]
|
||||
mathematics = [66, 70, 40, 60, 65, 56, 59, 77, 67, 63]
|
||||
self.assertAlmostEqual(statistics.correlation(reading, mathematics, method='ranked'),
|
||||
0.6686960980480712)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
statistics.correlation(reading, mathematics, method='bad_method')
|
||||
|
||||
class TestLinearRegression(unittest.TestCase):
|
||||
|
||||
@@ -2860,22 +2487,6 @@ class TestLinearRegression(unittest.TestCase):
|
||||
self.assertAlmostEqual(intercept, true_intercept)
|
||||
self.assertAlmostEqual(slope, true_slope)
|
||||
|
||||
def test_proportional(self):
|
||||
x = [10, 20, 30, 40]
|
||||
y = [180, 398, 610, 799]
|
||||
slope, intercept = statistics.linear_regression(x, y, proportional=True)
|
||||
self.assertAlmostEqual(slope, 20 + 1/150)
|
||||
self.assertEqual(intercept, 0.0)
|
||||
|
||||
def test_float_output(self):
|
||||
x = [Fraction(2, 3), Fraction(3, 4)]
|
||||
y = [Fraction(4, 5), Fraction(5, 6)]
|
||||
slope, intercept = statistics.linear_regression(x, y)
|
||||
self.assertTrue(isinstance(slope, float))
|
||||
self.assertTrue(isinstance(intercept, float))
|
||||
slope, intercept = statistics.linear_regression(x, y, proportional=True)
|
||||
self.assertTrue(isinstance(slope, float))
|
||||
self.assertTrue(isinstance(intercept, float))
|
||||
|
||||
class TestNormalDist:
|
||||
|
||||
@@ -3029,8 +2640,6 @@ class TestNormalDist:
|
||||
self.assertTrue(math.isnan(X.cdf(float('NaN'))))
|
||||
|
||||
@support.skip_if_pgo_task
|
||||
@support.requires_resource('cpu')
|
||||
@unittest.skip("TODO: RUSTPYTHON Flaky")
|
||||
def test_inv_cdf(self):
|
||||
NormalDist = self.module.NormalDist
|
||||
|
||||
@@ -3088,10 +2697,9 @@ class TestNormalDist:
|
||||
iq.inv_cdf(1.0) # p is one
|
||||
with self.assertRaises(self.module.StatisticsError):
|
||||
iq.inv_cdf(1.1) # p over one
|
||||
|
||||
# Supported case:
|
||||
with self.assertRaises(self.module.StatisticsError):
|
||||
iq = NormalDist(100, 0) # sigma is zero
|
||||
self.assertEqual(iq.inv_cdf(0.5), 100)
|
||||
iq.inv_cdf(0.5)
|
||||
|
||||
# Special values
|
||||
self.assertTrue(math.isnan(Z.inv_cdf(float('NaN'))))
|
||||
@@ -3274,19 +2882,14 @@ class TestNormalDist:
|
||||
nd = NormalDist(100, 15)
|
||||
self.assertNotEqual(nd, lnd)
|
||||
|
||||
def test_copy(self):
|
||||
def test_pickle_and_copy(self):
|
||||
nd = self.module.NormalDist(37.5, 5.625)
|
||||
nd1 = copy.copy(nd)
|
||||
self.assertEqual(nd, nd1)
|
||||
nd2 = copy.deepcopy(nd)
|
||||
self.assertEqual(nd, nd2)
|
||||
|
||||
def test_pickle(self):
|
||||
nd = self.module.NormalDist(37.5, 5.625)
|
||||
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
|
||||
with self.subTest(proto=proto):
|
||||
pickled = pickle.loads(pickle.dumps(nd, protocol=proto))
|
||||
self.assertEqual(nd, pickled)
|
||||
nd3 = pickle.loads(pickle.dumps(nd))
|
||||
self.assertEqual(nd, nd3)
|
||||
|
||||
def test_hashability(self):
|
||||
ND = self.module.NormalDist
|
||||
@@ -3325,7 +2928,6 @@ class TestNormalDistC(unittest.TestCase, TestNormalDist):
|
||||
def load_tests(loader, tests, ignore):
|
||||
"""Used for doctest/unittest integration."""
|
||||
tests.addTests(doctest.DocTestSuite())
|
||||
tests.addTests(doctest.DocTestSuite(statistics))
|
||||
return tests
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user