# Source code for astroML.stats._point_statistics

```import numpy as np
from scipy import stats

# from scipy.special import erfinv
# sigmaG_factor = 1. / (2 * np.sqrt(2) * erfinv(0.5))
sigmaG_factor = 0.74130110925280102

[docs]def mean_sigma(a, axis=None, dtype=None, ddof=0, keepdims=False):
"""Compute mean and standard deviation for an array

Parameters
----------
a : array_like
Array containing numbers whose mean is desired. If `a` is not an
array, a conversion is attempted.
axis : int, optional
Axis along which the means are computed. The default is to compute
the mean of the flattened array.
dtype : dtype, optional
Type to use in computing the standard deviation. For arrays of
integer type the default is float64, for arrays of float types it is
the same as the array type.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.

Returns
-------
mu : ndarray, see dtype parameter above
array containing the mean values

sigma : ndarray, see dtype parameter above.
array containing the standard deviation

--------
median_sigmaG : robust rank-based version of this calculation.

Notes
-----
This routine simply calls ``np.mean`` and ``np.std``, passing the
keyword arguments to them.  It is provided for ease of comparison
with the function median_sigmaG()
"""
mu = np.mean(a, axis=axis, dtype=dtype)
sigma = np.std(a, axis=axis, dtype=dtype, ddof=ddof)

if keepdims:
if axis is None:
newshape = a.ndim * (1,)
else:
newshape = np.asarray(a.shape)
newshape[axis] = 1

mu = mu.reshape(newshape)
sigma = sigma.reshape(newshape)

return mu, sigma

[docs]def median_sigmaG(a, axis=None, overwrite_input=False, keepdims=False):
"""Compute median and rank-based estimate of the standard deviation

Parameters
----------
a : array_like
Array containing numbers whose mean is desired. If `a` is not an
array, a conversion is attempted.
axis : int, optional
Axis along which the means are computed. The default is to compute
the mean of the flattened array.
overwrite_input : bool, optional
If True, then allow use of memory of input array `a` for
calculations. The input array will be modified by the call to
median. This will save memory when you do not need to preserve
the contents of the input array. Treat the input as undefined,
but it will probably be fully or partially sorted.
Default is False. Note that, if `overwrite_input` is True and the
input is not already an array, an error will be raised.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.

Returns
-------
median : ndarray, see dtype parameter above
array containing the median values

sigmaG : ndarray, see dtype parameter above.
array containing the robust estimator of the standard deviation

--------
mean_sigma : non-robust version of this calculation
sigmaG : robust rank-based estimate of standard deviation

Notes
-----
This routine uses a single call to ``np.percentile`` to find the
quartiles along the given axis, and uses these to compute the
median and sigmaG:

median = q50
sigmaG = (q75 - q25) * 0.7413

where 0.7413 ~ 1 / (2 sqrt(2) erf^-1(0.5))
"""
q25, median, q75 = np.percentile(a, [25, 50, 75],
axis=axis,
overwrite_input=overwrite_input)
sigmaG = sigmaG_factor * (q75 - q25)

if keepdims:
if axis is None:
newshape = a.ndim * (1,)
else:
newshape = np.asarray(a.shape)
newshape[axis] = 1

median = median.reshape(newshape)
sigmaG = sigmaG.reshape(newshape)

return median, sigmaG

[docs]def sigmaG(a, axis=None, overwrite_input=False, keepdims=False):
"""Compute the rank-based estimate of the standard deviation

Parameters
----------
a : array_like
Array containing numbers whose mean is desired. If `a` is not an
array, a conversion is attempted.
axis : int, optional
Axis along which the means are computed. The default is to compute
the mean of the flattened array.
overwrite_input : bool, optional
If True, then allow use of memory of input array `a` for
calculations. The input array will be modified by the call to
median. This will save memory when you do not need to preserve
the contents of the input array. Treat the input as undefined,
but it will probably be fully or partially sorted.
Default is False. Note that, if `overwrite_input` is True and the
input is not already an array, an error will be raised.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.

Returns
-------
median : ndarray, see dtype parameter above
array containing the median values

sigmaG : ndarray, see dtype parameter above.
array containing the robust estimator of the standard deviation

--------
median_sigmaG : robust rank-based estimate of mean and standard deviation

Notes
-----
This routine uses a single call to ``np.percentile`` to find the
quartiles along the given axis, and uses these to compute the
sigmaG, a robust estimate of the standard deviation sigma:

sigmaG = 0.7413 * (q75 - q25)

where 0.7413 ~ 1 / (2 sqrt(2) erf^-1(0.5))
"""
q25, q75 = np.percentile(a, [25, 75],
axis=axis,
overwrite_input=overwrite_input)
sigmaG = sigmaG_factor * (q75 - q25)

if keepdims:
if axis is None:
newshape = a.ndim * (1,)
else:
newshape = np.asarray(a.shape)
newshape[axis] = 1

sigmaG = sigmaG.reshape(newshape)

return sigmaG

[docs]def fit_bivariate_normal(x, y, robust=False):
"""Fit bivariate normal parameters to a 2D distribution of points

Parameters
----------
x, y : array_like
The x, y coordinates of the points

robust : boolean (optional, default=False)
If True, then use rank-based statistics which are robust to outliers
Otherwise, use mean/std statistics which are not robust

Returns
-------
mu : tuple
(x, y) location of the best-fit bivariate normal
sigma_1, sigma_2 : float
The best-fit gaussian widths in the uncorrelated frame
alpha : float
The rotation angle in radians of the uncorrelated frame
"""
x = np.asarray(x)
y = np.asarray(y)

assert x.shape == y.shape

if robust:
# use quartiles to compute center and spread
med_x, sigmaG_x = median_sigmaG(x)
med_y, sigmaG_y = median_sigmaG(y)

# define the principal variables from Shevlyakov & Smirnov (2011)
sx = 2 * sigmaG_x
sy = 2 * sigmaG_y

u = (x / sx + y / sy) / np.sqrt(2)
v = (x / sx - y / sy) / np.sqrt(2)

med_u, sigmaG_u = median_sigmaG(u)
med_v, sigmaG_v = median_sigmaG(v)

r_xy = ((sigmaG_u ** 2 - sigmaG_v ** 2) /
(sigmaG_u ** 2 + sigmaG_v ** 2))

# rename estimators
mu_x, mu_y = med_x, med_y
sigma_x, sigma_y = sigmaG_x, sigmaG_y
else:
mu_x = np.mean(x)
sigma_x = np.std(x)

mu_y = np.mean(y)
sigma_y = np.std(y)

r_xy = stats.pearsonr(x, y)

# We need to use the full (-180, 180) version of arctan: this is
# np.arctan2(x, y) = np.arctan(x / y), modulo 180 degrees
sigma_xy = r_xy * sigma_x * sigma_y
alpha = 0.5 * np.arctan2(2 * sigma_xy, sigma_x ** 2 - sigma_y ** 2)

sigma1 = np.sqrt((0.5 * (sigma_x ** 2 + sigma_y ** 2)
+ np.sqrt(0.25 * (sigma_x ** 2 - sigma_y ** 2) ** 2
+ sigma_xy ** 2)))
sigma2 = np.sqrt((0.5 * (sigma_x ** 2 + sigma_y ** 2)
- np.sqrt(0.25 * (sigma_x ** 2 - sigma_y ** 2) ** 2
+ sigma_xy ** 2)))

return [mu_x, mu_y], sigma1, sigma2, alpha
```