#!/usr/bin/python3 # -*- coding: utf-8 -*- """Generate deviates from a given discrete probability distribution. WOLOG these functions take a vector of floating-point probabilities and return an index into that vector with probability proportional to the value of the item at that index. If you want your distribution to be over some other kind of objects, index an array of them with the result. There are three functions here which all do exactly the same thing, unless one of them is buggy. They are all presented here to allow comparisons of their respective efficiency and clarity, or lack thereof. They are named after their implementation strategies. In a real-world situation, you’d probably want to curry them, as the fourth function, `logarithmic`, does. It runs in linear time, but returns a function that runs in logarithmic time. """ import random import bisect import numpy def linear(p): return numpy.where(random.random() * sum(p) <= numpy.cumsum(p))[0][0] def linear_bisect(p): return bisect.bisect_left(numpy.cumsum(p), random.random() * sum(p)) def builtin(p): "Use numpy’s built-in random choice function." return numpy.random.choice(range(len(p)), p=numpy.array(p, 'd')/sum(p)) def logarithmic(p): cdf = numpy.cumsum(p) return lambda: bisect.bisect_left(cdf, random.random() * cdf[-1])