# Source code for gatspy.periodic.lomb_scargle_fast

"""
Fast Lomb-Scargle Algorithm, following Press & Rybicki 1989
"""
from __future__ import print_function, division

__all__ = ['LombScargleFast']

import warnings
import numpy as np

from .lomb_scargle import LombScargle

# Precomputed factorials
FACTORIALS = [1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]

def factorial(N):
"""Compute the factorial of N.
If N <= 10, use a fast lookup table; otherwise use scipy.special.factorial
"""
if N < len(FACTORIALS):
return FACTORIALS[N]
else:
from scipy import special
return int(special.factorial(N))

def bitceil(N):
"""
Find the bit (i.e. power of 2) immediately greater than or equal to N
Note: this works for numbers up to 2 ** 64.

Roughly equivalent to int(2 ** np.ceil(np.log2(N)))
"""
# Note: for Python 2.7 and 3.x, this is faster:
# return 1 << int(N - 1).bit_length()
N = int(N) - 1
for i in [1, 2, 4, 8, 16, 32]:
N |= N >> i
return N + 1

def extirpolate(x, y, N=None, M=4):
"""
Extirpolate the values (x, y) onto an integer grid range(N),
using lagrange polynomial weights on the M nearest points.

Parameters
----------
x : array_like
array of abscissas
y : array_like
array of ordinates
N : int
number of integer bins to use. For best performance, N should be larger
than the maximum of x
M : int
number of adjoining points on which to extirpolate.

Returns
-------
yN : ndarray
N extirpolated values associated with range(N)

Example
-------
>>> rng = np.random.RandomState(0)
>>> x = 100 * rng.rand(20)
>>> y = np.sin(x)
>>> y_hat = extirpolate(x, y)
>>> x_hat = np.arange(len(y_hat))
>>> f = lambda x: np.sin(x / 10)
>>> np.allclose(np.sum(y * f(x)), np.sum(y_hat * f(x_hat)))
True

Notes
-----
This code is based on the C implementation of spread() presented in
Numerical Recipes in C, Second Edition (Press et al. 1989; p.583).
"""
x, y = map(np.ravel, np.broadcast_arrays(x, y))

if N is None:
N = int(np.max(x) + 0.5 * M + 1)

# Now use legendre polynomial weights to populate the results array;
# This is an efficient recursive implementation (See Press et al. 1989)
result = np.zeros(N, dtype=y.dtype)

# first take care of the easy cases where x is an integer
integers = (x % 1 == 0)
x, y = x[~integers], y[~integers]

# For each remaining x, find the index describing the extirpolation range.
# i.e. ilo[i] < x[i] < ilo[i] + M with x[i] in the center,
# adjusted so that the limits are within the range 0...N
ilo = np.clip((x - M // 2).astype(int), 0, N - M)
numerator = y * np.prod(x - ilo - np.arange(M)[:, np.newaxis], 0)
denominator = factorial(M - 1)

for j in range(M):
if j > 0:
denominator *= j / (j - M)
ind = ilo + (M - 1 - j)
np.add.at(result, ind, numerator / (denominator * (x - ind)))
return result

def trig_sum(t, h, df, N, f0=0, freq_factor=1,
oversampling=5, use_fft=True, Mfft=4):
"""Compute (approximate) trigonometric sums for a number of frequencies

This routine computes weighted sine and cosine sums:

S_j = sum_i { h_i * sin(2 pi * f_j * t_i) }
C_j = sum_i { h_i * cos(2 pi * f_j * t_i) }

Where f_j = freq_factor * (f0 + j * df) for the values j in 1 ... N.
The sums can be computed either by a brute force O[N^2] method, or
by an FFT-based O[Nlog(N)] method.

Parameters
----------
t : array_like
array of input times
h : array_like
array weights for the sum
df : float
frequency spacing
N : int
number of frequency bins to return
f0 : float (optional, default=0)
The low frequency to use
freq_factor : float (optional, default=1)
Factor which multiplies the frequency
use_fft : bool
if True, use the approximate FFT algorithm to compute the result.
This uses the FFT with Press & Rybicki's Lagrangian extirpolation.
oversampling : int (default = 5)
oversampling freq_factor for the approximation; roughtly the number of
time samples across the highest-frequency sinusoid. This parameter
contains the tradeoff between accuracy and speed. Not referenced
if use_fft is False.
Mfft : int
The number of adjacent points to use in the FFT approximation.
Not referenced if use_fft is False.

Returns
-------
S, C : ndarrays
summation arrays for frequencies f = df * np.arange(1, N + 1)
"""
df *= freq_factor
f0 *= freq_factor

assert df > 0
t, h = map(np.ravel, np.broadcast_arrays(t, h))

if use_fft:
Mfft = int(Mfft)
assert(Mfft > 0)

# required size of fft is the power of 2 above the oversampling rate
Nfft = bitceil(N * oversampling)
t0 = t.min()

if f0 > 0:
h = h * np.exp(2j * np.pi * f0 * (t - t0))

tnorm = ((t - t0) * Nfft * df) % Nfft
grid = extirpolate(tnorm, h, Nfft, Mfft)

fftgrid = np.fft.ifft(grid)
if t0 != 0:
f = f0 + df * np.arange(Nfft)
fftgrid *= np.exp(2j * np.pi * t0 * f)
fftgrid = fftgrid[:N]

C = Nfft * fftgrid.real
S = Nfft * fftgrid.imag
else:
f = f0 + df * np.arange(N)
C = np.dot(h, np.cos(2 * np.pi * f * t[:, np.newaxis]))
S = np.dot(h, np.sin(2 * np.pi * f * t[:, np.newaxis]))

return S, C

def lomb_scargle_fast(t, y, dy=1, f0=0, df=None, Nf=None,
center_data=True, fit_offset=True,
use_fft=True, freq_oversampling=5, nyquist_factor=2,
trig_sum_kwds=None):
"""Compute a lomb-scargle periodogram for the given data

This implements both an O[N^2] method if use_fft==False, or an
O[NlogN] method if use_fft==True.

Parameters
----------
t, y, dy : array_like
times, values, and errors of the data points. These should be
broadcastable to the same shape. If dy is not specified, a
constant error will be used.
f0, df, Nf : (float, float, int)
parameters describing the frequency grid, f = f0 + df * arange(Nf).
Defaults, with T = t.max() - t.min():
- f0 = 0
- df is set such that there are ``freq_oversampling`` points per
peak width. ``freq_oversampling`` defaults to 5.
- Nf is set such that the highest frequency is ``nyquist_factor``
times the so-called "average Nyquist frequency".
``nyquist_factor`` defaults to 2.
Note that for unevenly-spaced data, the periodogram can be sensitive
to frequencies far higher than the average Nyquist frequency.
center_data : bool (default=True)
Specify whether to subtract the mean of the data before the fit
fit_offset : bool (default=True)
If True, then compute the floating-mean periodogram; i.e. let the mean
vary with the fit.
use_fft : bool (default=True)
If True, then use the Press & Rybicki O[NlogN] algorithm to compute
the result. Otherwise, use a slower O[N^2] algorithm

Other Parameters
----------------
freq_oversampling : float (default=5)
Oversampling factor for the frequency bins. Only referenced if
``df`` is not specified
nyquist_factor : float (default=2)
Parameter controlling the highest probed frequency. Only referenced
if ``Nf`` is not specified.
trig_sum_kwds : dict or None (optional)
extra keyword arguments to pass to the ``trig_sum`` utility.
Options are ``oversampling`` and ``Mfft``. See documentation
of ``trig_sum`` for details.

Notes
-----
Note that the ``use_fft=True`` algorithm is an approximation to the true
Lomb-Scargle periodogram, and as the number of points grows this
approximation improves. On the other hand, for very small datasets
(<~50 points or so) this approximation may not be useful.

References
----------
.. [1] Press W.H. and Rybicki, G.B, "Fast algorithm for spectral analysis
of unevenly sampled data". ApJ 1:338, p277, 1989
.. [2] M. Zechmeister and M. Kurster, A&A 496, 577-584 (2009)
.. [3] W. Press et al, Numerical Recipies in C (2002)
"""
# Validate and setup input data
t, y, dy = map(np.ravel, np.broadcast_arrays(t, y, dy))
w = 1. / (dy ** 2)
w /= w.sum()

# Validate and setup frequency grid
if df is None:
peak_width = 1. / (t.max() - t.min())
df = peak_width / freq_oversampling
if Nf is None:
avg_Nyquist = 0.5 * len(t) / (t.max() - t.min())
Nf = max(16, (nyquist_factor * avg_Nyquist - f0) / df)
Nf = int(Nf)
assert(df > 0)
assert(Nf > 0)
freq = f0 + df * np.arange(Nf)

# Center the data. Even if we're fitting the offset,
# this step makes the expressions below more succinct
if center_data or fit_offset:
y = y - np.dot(w, y)

# set up arguments to trig_sum
kwargs = dict.copy(trig_sum_kwds or {})
kwargs.update(f0=f0, df=df, use_fft=use_fft, N=Nf)

#----------------------------------------------------------------------
# 1. compute functions of the time-shift tau at each frequency
Sh, Ch = trig_sum(t, w * y, **kwargs)
S2, C2 = trig_sum(t, w, freq_factor=2, **kwargs)

if fit_offset:
S, C = trig_sum(t, w, **kwargs)
with warnings.catch_warnings():
# Filter "invalid value in divide" warnings for zero-frequency
if f0 == 0:
warnings.simplefilter("ignore")
tan_2omega_tau = (S2 - 2 * S * C) / (C2 - (C * C - S * S))
# fix NaN at zero frequency
if np.isnan(tan_2omega_tau[0]):
tan_2omega_tau[0] = 0
else:
tan_2omega_tau = S2 / C2

# slower/less stable way: we'll use trig identities instead
# omega_tau = 0.5 * np.arctan(tan_2omega_tau)
# S2w, C2w = np.sin(2 * omega_tau), np.cos(2 * omega_tau)
# Sw, Cw = np.sin(omega_tau), np.cos(omega_tau)

S2w = tan_2omega_tau / np.sqrt(1 + tan_2omega_tau * tan_2omega_tau)
C2w = 1 / np.sqrt(1 + tan_2omega_tau * tan_2omega_tau)
Cw = np.sqrt(0.5) * np.sqrt(1 + C2w)
Sw = np.sqrt(0.5) * np.sign(S2w) * np.sqrt(1 - C2w)

#----------------------------------------------------------------------
# 2. Compute the periodogram, following Zechmeister & Kurster
#    and using tricks from Press & Rybicki.
YY = np.dot(w, y ** 2)
YC = Ch * Cw + Sh * Sw
YS = Sh * Cw - Ch * Sw
CC = 0.5 * (1 + C2 * C2w + S2 * S2w)
SS = 0.5 * (1 - C2 * C2w - S2 * S2w)

if fit_offset:
CC -= (C * Cw + S * Sw) ** 2
SS -= (S * Cw - C * Sw) ** 2

with warnings.catch_warnings():
# Filter "invalid value in divide" warnings for zero-frequency
if fit_offset and f0 == 0:
warnings.simplefilter("ignore")

power = (YC * YC / CC + YS * YS / SS) / YY

# fix NaN and INF at zero frequency
if np.isnan(power[0]) or np.isinf(power[0]):
power[0] = 0

return freq, power

[docs]class LombScargleFast(LombScargle): """Fast FFT-based Lomb-Scargle Periodogram Implementation This implements the O[N log N] lomb-scargle periodogram, described in Press & Rybicki (1989) [1]. To compute the periodogram via the fast algorithm, use the ``score_frequency_grid()`` method. The ``score()`` method and ``periodogram()`` method will default to the slower algorithm. See Notes below for more information about the algorithm. Parameters ---------- optimizer : PeriodicOptimizer instance Optimizer to use to find the best period. If not specified, the LinearScanOptimizer will be used. center_data : boolean (default = True) If True, then compute the weighted mean of the input data and subtract before fitting the model. fit_offset : boolean (default = True) If True, then fit a floating-mean sinusoid model. use_fft : boolean (default = True) Specify whether to use the Press & Rybicki FFT algorithm to compute the result ls_kwds : dict Dictionary of keywords to pass to the ``lomb_scargle_fast`` routine. fit_period : bool (optional) If True, then fit for the best period when fit() method is called. optimizer_kwds : dict (optional) Dictionary of keyword arguments for constructing the optimizer. For example, silence optimizer output with `optimizer_kwds={"quiet": True}`. silence_warnings : bool (default=False) If False, then warn the user when doing silly things, like calling ``score()`` rather than ``score_frequency_grid()`` or fitting this to small datasets (fewer than 50 points). Examples -------- >>> rng = np.random.RandomState(0) >>> t = 100 * rng.rand(100) >>> dy = 0.1 >>> omega = 10 >>> y = np.sin(omega * t) + dy * rng.randn(100) >>> ls = LombScargleFast().fit(t, y, dy) >>> ls.optimizer.period_range = (0.2, 1.2) >>> ls.best_period Finding optimal frequency: - Estimated peak width = 0.0639 - Using 5 steps per peak; omega_step = 0.0128 - User-specified period range: 0.2 to 1.2 - Computing periods at 2051 steps Zooming-in on 5 candidate peaks: - Computing periods at 1000 steps 0.62826265739259146 >>> ls.predict([0, 0.5]) array([-0.02019474, -0.92910567]) Notes ----- Currently, a NotImplementedError will be raised if both center_data and fit_offset are False. Note also that the fast algorithm is only an approximation to the true Lomb-Scargle periodogram, and as the number of points grows this approximation improves. On the other hand, for very small datasets (<~50 points or so) this approximation may produce incorrect results for some datasets. See Also -------- LombScargle LombScargleAstroML References ---------- .. [1] Press W.H. and Rybicki, G.B, "Fast algorithm for spectral analysis of unevenly sampled data". ApJ 1:338, p277, 1989 """ def __init__(self, optimizer=None, center_data=True, fit_offset=True, use_fft=True, ls_kwds=None, Nterms=1, fit_period=False, optimizer_kwds=None, silence_warnings=False): self.use_fft = use_fft self.ls_kwds = ls_kwds self.silence_warnings = silence_warnings if Nterms != 1: raise ValueError("LombScargleFast supports only Nterms = 1") LombScargle.__init__(self, optimizer=optimizer, center_data=center_data, fit_offset=fit_offset, Nterms=1, regularization=None, fit_period=fit_period, optimizer_kwds=optimizer_kwds) def _score_frequency_grid(self, f0, df, N): if not self.silence_warnings and self.t.size < 50: warnings.warn("For smaller datasets, the approximation used by " "LombScargleFast may not be suitable.\n" "It is recommended to use LombScargle instead.\n" "To silence this warning, set " "``silence_warnings=True``") freq, P = lomb_scargle_fast(self.t, self.y, self.dy, f0=f0, df=df, Nf=N, center_data=self.center_data, fit_offset=self.fit_offset, use_fft=self.use_fft, **(self.ls_kwds or {})) return P def _score(self, periods): if not self.silence_warnings: warnings.warn("The score() method defaults to a slower O[N^2] " "algorithm.\nUse the score_frequency_grid() method " "to access the fast FFT-based algorithm.\n" "To silence this warning, set " "``silence_warnings=True``") return LombScargle._score(self, periods)