Source code for astroML.linear_model.linear_regression

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression, Lasso, Ridge


# ------------------------------------------------------------
#  Basis functions
def gaussian_basis(X, mu, sigma):
    """Gaussian Basis function

    Parameters
    ----------
    X : array_like
        input data: shape = (n_samples, n_features)
    mu : array_like
        means of bases, shape = (n_bases, n_features)
    sigma : float or array_like
        must broadcast to shape of mu

    Returns
    -------
    Xg : ndarray
        shape = (n_samples, n_bases)
    """
    X = np.asarray(X)
    mu = np.atleast_2d(mu)
    sigma = np.atleast_2d(sigma)

    n_samples, n_features = X.shape

    if mu.shape[1] != n_features:
        raise ValueError('shape of mu must match shape of X')

    r = (((X[:, None, :] - mu) / sigma) ** 2).sum(2)
    Xg = np.exp(-0.5 * r)
    Xg *= 1. / np.sqrt(2 * np.pi) / sigma.prod(1)

    return Xg


[docs]class LinearRegression(BaseEstimator): """Simple Linear Regression with errors in y This is a stripped-down version of sklearn.linear_model.LinearRegression which can correctly accounts for errors in the y variable Parameters ---------- fit_intercept : bool (optional) if True (default) then fit the intercept of the data regularization : string (optional) ['l1'|'l2'|'none'] Use L1 (Lasso) or L2 (Ridge) regression kwds: dict additional keyword arguments passed to sklearn estimators: LinearRegression, Lasso (L1), or Ridge (L2) Notes ----- This implementation may be compared to that in sklearn.linear_model.LinearRegression. The difference is that here errors are """ _regressors = {'none': LinearRegression, 'l1': Lasso, 'l2': Ridge}
[docs] def __init__(self, fit_intercept=True, regularization='none', kwds=None): if regularization.lower() not in ['l1', 'l2', 'none']: raise ValueError("regularization='{}' not recognized" "".format(regularization)) self.fit_intercept = fit_intercept self.regularization = regularization self.kwds = kwds
def _transform_X(self, X): X = np.asarray(X) if self.fit_intercept: X = np.hstack([np.ones([X.shape[0], 1]), X]) return X @staticmethod def _scale_by_error(X, y, y_error=1): """Scale regression by error on y""" X = np.atleast_2d(X) y = np.asarray(y) y_error = np.asarray(y_error) assert X.ndim == 2 assert y.ndim == 1 assert X.shape[0] == y.shape[0] if y_error.ndim == 0: return X / y_error, y / y_error elif y_error.ndim == 1: assert y_error.shape == y.shape X_out, y_out = X / y_error[:, None], y / y_error elif y_error.ndim == 2: assert y_error.shape == (y.size, y.size) evals, evecs = np.linalg.eigh(y_error) X_out = np.dot(evecs * (evals ** -0.5), np.dot(evecs.T, X)) y_out = np.dot(evecs * (evals ** -0.5), np.dot(evecs.T, y)) else: raise ValueError("shape of y_error does not match that of y") return X_out, y_out def _choose_regressor(self): model = self._regressors.get(self.regularization.lower(), None) if model is None: raise ValueError("regularization='{}' unrecognized" "".format(self.regularization)) return model def fit(self, X, y, y_error=1): kwds = {} if self.kwds is not None: kwds.update(self.kwds) kwds['fit_intercept'] = False model = self._choose_regressor() self.clf_ = model(**kwds) X = self._transform_X(X) X, y = self._scale_by_error(X, y, y_error) self.clf_.fit(X, y) return self def predict(self, X): X = self._transform_X(X) return self.clf_.predict(X) @property def coef_(self): return self.clf_.coef_
[docs]class PolynomialRegression(LinearRegression): """Polynomial Regression with errors in y Parameters ---------- degree : int degree of the polynomial. interaction_only : bool (optional) If true, only interaction features are produced: features that are products of at most ``degree`` *distinct* input features (so not ``x[1] ** 2``, ``x[0] * x[2] ** 3``, etc.). fit_intercept : bool (optional) if True (default) then fit the intercept of the data regularization : string (optional) ['l1'|'l2'|'none'] Use L1 (Lasso) or L2 (Ridge) regression kwds: dict additional keyword arguments passed to sklearn estimators: LinearRegression, Lasso (L1), or Ridge (L2) """
[docs] def __init__(self, degree=1, interaction_only=False, fit_intercept=True, regularization='none', kwds=None): self.degree = degree self.interaction_only = interaction_only LinearRegression.__init__(self, fit_intercept, regularization, kwds)
def _transform_X(self, X): trans = PolynomialFeatures(degree=self.degree, interaction_only=self.interaction_only, include_bias=self.fit_intercept) return trans.fit_transform(X)
[docs]class BasisFunctionRegression(LinearRegression): """Basis Function with errors in y Parameters ---------- basis_func : str or function specify the basis function to use. This should take an input matrix of size (n_samples, n_features), along with optional parameters, and return a matrix of size (n_samples, n_bases). fit_intercept : bool (optional) if True (default) then fit the intercept of the data regularization : string (optional) ['l1'|'l2'|'none'] Use L1 (Lasso) or L2 (Ridge) regression kwds: dict additional keyword arguments passed to sklearn estimators: LinearRegression, Lasso (L1), or Ridge (L2) """ _basis_funcs = {'gaussian': gaussian_basis}
[docs] def __init__(self, basis_func='gaussian', fit_intercept=True, regularization='none', kwds=None, **kwargs): self.basis_func = basis_func self.kwargs = kwargs LinearRegression.__init__(self, fit_intercept, regularization, kwds)
def _transform_X(self, X): if callable(self.basis_func): basis_func = self.basis_func else: basis_func = self._basis_funcs.get(self.basis_func, None) X = basis_func(X, **self.kwargs) if self.fit_intercept: X = np.hstack([np.ones((X.shape[0], 1)), X]) return X