Source code for astroML.density_estimation.empirical

import numpy as np
from scipy import interpolate
from sklearn.utils import check_random_state


[docs]class FunctionDistribution: """Generate random variables distributed according to an arbitrary function Parameters ---------- func : function func should take an array of x values, and return an array proportional to the probability density at each value xmin : float minimum value of interest xmax : float maximum value of interest Nx : int (optional) number of samples to draw. Default is 1000 random_state : None, int, or np.random.RandomState instance random seed or random number generator func_args : dictionary (optional) additional keyword arguments to be passed to func """
[docs] def __init__(self, func, xmin, xmax, Nx=1000, random_state=None, func_args=None): self.random_state = check_random_state(random_state) if func_args is None: func_args = {} x = np.linspace(xmin, xmax, Nx) Px = func(x, **func_args) # if there are too many zeros, interpolation will fail positive = (Px > 1E-10 * Px.max()) x = x[positive] Px = Px[positive].cumsum() Px /= Px[-1] self._tck = interpolate.splrep(Px, x)
def rvs(self, shape): """Draw random variables from the distribution Parameters ---------- shape : integer or tuple shape of desired array Returns ------- rv : ndarray, shape=shape random variables """ # generate uniform variables between 0 and 1 y = self.random_state.random_sample(shape) return interpolate.splev(y, self._tck)
[docs]class EmpiricalDistribution: """Empirically learn a distribution from one-dimensional data Parameters ---------- data : one-dimensional array input data Examples -------- >>> import numpy as np >>> np.random.seed(0) >>> x = np.random.normal(size=10000) # normally-distributed variables >>> x.mean(), x.std() (-0.018433720158265783, 0.98755656817612003) >>> x2 = EmpiricalDistribution(x).rvs(10000) >>> x2.mean(), x2.std() (-0.020293716681613363, 1.0039249294845276) Notes ----- This function works by approximating the inverse of the cumulative distribution using an efficient spline fit to the sorted values. """
[docs] def __init__(self, data): # copy, because we'll need to sort in-place data = np.array(data, copy=True) if data.ndim != 1: raise ValueError("data should be one-dimensional") data.sort() # set up spline y = np.linspace(0, 1, data.size) self._tck = interpolate.splrep(y, data)
def rvs(self, shape): """Draw random variables from the distribution Parameters ---------- shape : integer or tuple shape of desired array Returns ------- rv : ndarray, shape=shape random variables """ # generate uniform variables between 0 and 1 y = np.random.random(shape) return interpolate.splev(y, self._tck)