#!/usr/bin/python
"""Different measures of central tendency have different variability.
And it depends on the distribution of the underlying data.
Some distributions are easy to characterize from a random sample. If
your data is normally or uniformly distributed, you can get a pretty
good estimate of the mean, which is also the median, after just ten or
twenty data points.
But some other distributions are not so well-behaved. The exponential
distribution is a common one. Its median is well to the left of its
mean. Does the sample mean or the sample median have greater
variance? I hypothesize, without actually doing the math, that the
sample mean of an exponential distribution has proportionally greater
variance, and therefore the sample median is a better measure to use,
if you have to pick one.
"""
from __future__ import division
import random, math, sys
sample_mean = lambda sample: sum(sample)/len(sample)
sample_means = lambda samples: map(sample_mean, samples)
# wrong for even samples, but close enough:
sample_median = lambda sample: sorted(sample)[len(sample)//2]
sample_medians = lambda samples: map(sample_median, samples)
uniform_sample = lambda n: [random.uniform(0, 1) for ii in range(n)]
expo_sample = lambda n: [random.expovariate(1) for ii in range(n)]
def standard_deviation(sample):
mean = sample_mean(sample)
return math.sqrt(sum((x - mean)**2 for x in sample)/(len(sample)-1))
uniform_samples = lambda n, m: [uniform_sample(m) for ii in range(n)]
expo_samples = lambda n, m: [expo_sample(m) for ii in range(n)]
def compare(n, m):
print "%d samples of %d items each:" % (n, m)
print "Uniform:",
describe(uniform_samples(n, m))
print "Exponential:",
describe(expo_samples(n, m))
def describe(samples):
means, medians = sample_means(samples), sample_medians(samples)
print "standard deviation of mean %.2f (mean mean %.2f), of median %.2f (mean median %.2f)" % (standard_deviation(means), sample_mean(means),
standard_deviation(medians), sample_mean(medians))
if __name__ == '__main__':
compare(int(sys.argv[1]), int(sys.argv[2]))