Source code for astroML.datasets.LINEAR_sample

import os
import tarfile

import numpy as np
from astropy.table import Table

from . import get_data_home
from .tools import download_with_progress_bar

TARGETLIST_URL = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
                  "allLINEARfinal_targets.dat.gz")
DATA_URL = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
            "allLINEARfinal_dat.tar.gz")
GENEVA_URL = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
              "LINEARattributesFinalApr2013.dat.gz")
GENEVA_ARCHIVE = 'LINEARattributesFinalApr2013.npy'
ARCHIVE_DTYPE = ([(s, 'f8') for s in ('RA', 'Dec', 'ug', 'gi', 'iK',
                                      'JK', 'logP', 'Ampl', 'skew',
                                      'kurt', 'magMed', 'nObs')]
                 + [('LCtype', 'i4'), ('LINEARobjectID', '|S20')])


target_names = ['objectID', 'raLIN', 'decLIN', 'raSDSS', 'decSDSS', 'r',
                'ug', 'gr', 'ri', 'iz', 'JK', '<mL>', 'std', 'rms',
                'Lchi2', 'LP1', 'phi1', 'S', 'prior']


class LINEARdata:
    """A container class for the linear dataset.

    Because the dataset is often not needed all at once, this class
    offers tools to access just the needed components

    Example
    -------
    >>> data = fetch_LINEAR_sample()  # doctest: +IGNORE_OUTPUT +REMOTE_DATA
    >>> lightcurve = data[data.ids[0]]  # doctest: +REMOTE_DATA
    """
    @staticmethod
    def _name_to_id(name):
        return int(name.split('.')[0])

    @staticmethod
    def _id_to_name(id):
        return str(id) + '.dat'

    def __init__(self, data_file, targetlist_file):
        self.targets = np.recfromtxt(targetlist_file)
        self.targets.dtype.names = target_names

        self.dataF = tarfile.open(data_file)
        self.ids = np.array(list(map(self._name_to_id, self.dataF.getnames())))

        # rearrange targets so lists are in the same order
        self.targets = self.targets[self.targets['objectID'].argsort()]
        ind = self.targets['objectID'].searchsorted(self.ids)
        self.targets = self.targets[ind]

    def get_light_curve(self, id):
        """Get a light curve with the given id.

        Parameters
        ----------
        id: integer
            LINEAR id of the desired object

        Returns
        -------
        lightcurve: ndarray
            a size (n_observations, 3) light-curve.
            columns are [MJD, flux, flux_err]
        """
        return self[id]

    def get_target_parameter(self, id, param):
        """Get a target parameter associated with the given id.

        Parameters
        ----------
        id: integer
            LINEAR id of the desired object
        param: string
            parameter name of the desired object (see below)

        Returns
        -------
        val: scalar
            value of the requested target parameter

        Notes
        -----
        Target parameters are one of the following:

        ['objectID', 'raLIN', 'decLIN', 'raSDSS', 'decSDSS', 'r',
         'ug', 'gr', 'ri', 'iz', 'JK', '<mL>', 'std', 'rms',
         'Lchi2', 'LP1', 'phi1', 'S', 'prior']
        """
        i = np.where(self.targets['objectID'] == id)[0]
        try:
            val = self.targets[param][i[0]]
        except BaseException:
            raise KeyError(id)

        return val

    def __getitem__(self, id):
        try:
            lc = np.loadtxt(self.dataF.extractfile(self._id_to_name(id)))
        except BaseException:
            raise KeyError(id)

        return lc


[docs]def fetch_LINEAR_sample(data_home=None, download_if_missing=True):
    """Loader for LINEAR data sample

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all astroML data is stored in '~/astroML_data'.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : LINEARdata object
        A custom object which provides access to 7010 selected LINEAR light
        curves.
    """
    data_home = get_data_home(data_home)

    targetlist_file = os.path.join(data_home, os.path.basename(TARGETLIST_URL))
    data_file = os.path.join(data_home, os.path.basename(DATA_URL))

    if not os.path.exists(targetlist_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        targets = download_with_progress_bar(TARGETLIST_URL)
        open(targetlist_file, 'wb').write(targets)

    if not os.path.exists(data_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        databuffer = download_with_progress_bar(DATA_URL)
        open(data_file, 'wb').write(databuffer)

    return LINEARdata(data_file, targetlist_file)


[docs]def fetch_LINEAR_geneva(data_home=None, download_if_missing=True):
    """Loader for LINEAR geneva data.

    This supplements the LINEAR data above with well-determined periods
    and other light curve characteristics.

    Parameters
    ----------
    data_home : optional, default=None
        Specify another download and cache folder for the datasets. By default
        all astroML data is stored in '~/astroML_data'.

    download_if_missing : optional, default=True
        If False, raise a IOError if the data is not locally available
        instead of trying to download the data from the source site.

    Returns
    -------
    data : record array
        data on 7000+ LINEAR stars from the Geneva catalog
    """
    data_home = get_data_home(data_home)

    archive_file = os.path.join(data_home, GENEVA_ARCHIVE)

    if not os.path.exists(archive_file):
        if not download_if_missing:
            raise IOError('data not present on disk. '
                          'set download_if_missing=True to download')

        data = Table.read(GENEVA_URL, format='ascii', header_start=19)
        data = data.as_array()
        np.save(archive_file, data)
    else:
        data = np.load(archive_file)

    return data