This documentation is for astroML version 0.2

astroML Mailing List

GitHub Issue Tracker

### Videos

Scipy 2012 (15 minute talk)

Scipy 2013 (20 minute talk)

### Citing

If you use the software, please consider citing astroML.

# Extreme Deconvolution exampleΒΆ

This demonstrates extreme deconvolution on a toy dataset.

@pickle_results: using precomputed results from 'XD_toy.pkl'
# Author: Jake VanderPlas <vanderplas@astro.washington.edu>
#   The figure is an example from astroML: see http://astroML.github.com
import os
import cPickle

import numpy as np
from matplotlib import pyplot as plt
from matplotlib.patches import Ellipse

from astroML.decorators import pickle_results
from astroML.density_estimation import XDGMM
from astroML.plotting.tools import draw_ellipse

#------------------------------------------------------------
# Sample the dataset
N = 2000
np.random.seed(0)

# generate the true data
x_true = (1.4 + 2 * np.random.random(N)) ** 2
y_true = 0.1 * x_true ** 2

# add scatter to "true" distribution
dx = 0.1 + 4. / x_true ** 2
dy = 0.1 + 10. / x_true ** 2

x_true += np.random.normal(0, dx, N)
y_true += np.random.normal(0, dy, N)

# add noise to get the "observed" distribution
dx = 0.2 + 0.5 * np.random.random(N)
dy = 0.2 + 0.5 * np.random.random(N)

x = x_true + np.random.normal(0, dx)
y = y_true + np.random.normal(0, dy)

# stack the results for computation
X = np.vstack([x, y]).T
Xerr = np.zeros(X.shape + X.shape[-1:])
diag = np.arange(X.shape[-1])
Xerr[:, diag, diag] = np.vstack([dx ** 2, dy ** 2]).T

#------------------------------------------------------------
# compute and save results
@pickle_results("XD_toy.pkl")
def compute_XD_results(n_components=10, n_iter=500):
clf = XDGMM(n_components, n_iter=n_iter)
clf.fit(X, Xerr)
return clf

clf = compute_XD_results(10, 500)
sample = clf.sample(N)

#------------------------------------------------------------
# Plot the results
fig = plt.figure()
bottom=0.1, top=0.95,
wspace=0.02, hspace=0.02)

ax1.scatter(x_true, y_true, s=4, lw=0, c='k')

ax2.scatter(x, y, s=4, lw=0, c='k')

ax3.scatter(sample[:, 0], sample[:, 1], s=4, lw=0, c='k')

for i in range(clf.n_components):
draw_ellipse(clf.mu[i], clf.V[i], scales=[2], ax=ax4,
ec='k', fc='gray', alpha=0.2)

titles = ["True Distribution", "Noisy Distribution",
"Extreme Deconvolution\n  resampling",
"Extreme Deconvolution\n  cluster locations"]

ax = [ax1, ax2, ax3, ax4]

for i in range(4):
ax[i].set_xlim(-1, 13)
ax[i].set_ylim(-6, 16)

ax[i].xaxis.set_major_locator(plt.MultipleLocator(4))
ax[i].yaxis.set_major_locator(plt.MultipleLocator(5))

ax[i].text(0.05, 0.95, titles[i],
ha='left', va='top', transform=ax[i].transAxes)

if i in (0, 1):
ax[i].xaxis.set_major_formatter(plt.NullFormatter())
else:
ax[i].set_xlabel('x')

if i in (1, 3):
ax[i].yaxis.set_major_formatter(plt.NullFormatter())
else:
ax[i].set_ylabel('y')

plt.show()