# Search Algorithm ScalingΒΆ

Figure 2.1.

The scaling of two methods to search for an item in an ordered list: a linear method which performs a comparison on all N items, and a binary search which uses a more sophisticated algorithm. The theoretical scalings are shown by dashed lines.



# Author: Jake VanderPlas
#   The figure produced by this code is published in the textbook
#   "Statistics, Data Mining, and Machine Learning in Astronomy" (2013)
#   To report a bug or issue, use the following forum:
from time import time
import numpy as np
from matplotlib import pyplot as plt

#----------------------------------------------------------------------
# This function adjusts matplotlib settings for a uniform feel in the textbook.
# Note that with usetex=True, fonts are rendered with LaTeX.  This may
# result in an error if LaTeX is not installed on your system.  In that case,
# you can set usetex to False.
if "setup_text_plots" not in globals():
from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=8, usetex=True)

#------------------------------------------------------------
# Compute the execution times as a function of array size
Nsamples = 10 ** np.linspace(6.0, 7.8, 17)
time_linear = np.zeros_like(Nsamples)
time_binary = np.zeros_like(Nsamples)

for i in range(len(Nsamples)):
# create a sorted array
x = np.arange(Nsamples[i], dtype=int)

# Linear search: choose a single item in the array
item = int(0.4 * Nsamples[i])

t0 = time()
j = np.where(x == item)
t1 = time()

time_linear[i] = t1 - t0

# Binary search: this is much faster, so choose 1000 items to search for
items = np.linspace(0, Nsamples[i], 1000).astype(int)

t0 = time()
j = np.searchsorted(x, items)
t1 = time()

time_binary[i] = (t1 - t0)

#------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(5, 3.75))
ax = plt.axes(xscale='log', yscale='log')
ax.grid()

# plot the observed times
ax.plot(Nsamples, time_linear, 'ok', color='gray', markersize=5,
label=r'linear search $(\mathcal{O}[N])$')
ax.plot(Nsamples, time_binary, 'sk', color='gray', markersize=5,
label=r'efficient search $(\mathcal{O}[\log N])$')

# plot the expected scaling
scale = 10 ** np.linspace(5, 8, 100)
scaling_N = scale * time_linear[7] / Nsamples[7]
scaling_logN = np.log(scale) * time_binary[7] / np.log(Nsamples[7])
ax.plot(scale, scaling_N, '--k')
ax.plot(scale, scaling_logN, '--k')

ax.set_xlim(9E5, 1E8)