Source code for arsenal.timer

import matplotlib.pyplot as pl
import pandas as pd
import numpy as np
from sys import stderr
from time import time
from contextlib import contextmanager
from arsenal.humanreadable import htime
from arsenal.terminal import colors
from arsenal.misc import ddict
from scipy.stats import mannwhitneyu


[docs]def timers(title=None):
    return Benchmark(title)


[docs]class Benchmark(object):
    def __init__(self, title):
        self.title = title
        self.timers = ddict(Timer)
    def __getitem__(self, name):
        return self.timers[name]
[docs]    def compare(self, statistic=np.median):
        if len(self.timers) == 0:
            return
        if len(self.timers) == 1:
            [x] = self.timers.values()
            print(x)
            return
        best = min(self.timers.values(), key=lambda t: statistic(t.times))
        for name in sorted(self.timers):
            if name != best.name:
                best.compare(self.timers[name])
[docs]    def values(self):
        return list(self.timers.values())
[docs]    def keys(self):
        return list(self.timers.keys())
[docs]    def items(self):
        return list(self.timers.items())
    def __len__(self):
        return len(self.timers)
    def __iter__(self):
        return iter(sorted(self.keys()))
[docs]    def plot_feature(self, feature, timecol='timer', ax=None, **kw):
        if ax is None: ax = pl.figure().add_subplot(111)
        for t in list(self.timers.values()):
            t.plot_feature(feature=feature,
                           timecol=timecol,
                           ax=ax,
                           **kw)
        if self.title is not None: ax.set_title(self.title)
        ax.legend(loc=2)
        return ax

[docs]    def plot_survival(self,*args,**kwargs):
        "Show the probability each algorithm is still running."
        for _, t in sorted(self.items()):
            t.plot_survival(*args,**kwargs)

[docs]    def run(self, methods, reps):
        from arsenal import iterview, restore_random_state
        if isinstance(methods, (tuple, list)):
            methods = {m.__name__: m for m in methods}

        jobs = [
            (name, seed)
            for seed in range(reps)   # TODO: use a better strategy for picking random seeds.
            for name in methods
        ]
        np.random.shuffle(jobs)       # shuffle jobs to avoid weird ordering correlations
        for name, seed in iterview(jobs):
            with restore_random_state(seed):
                with self[name]:
                    methods[name]()


[docs]class Timer:
    """
    >>> from time import sleep
    >>> a = Timer('A')
    >>> b = Timer('B')
    >>> with a:
    ...     sleep(0.5)
    >>> with b:
    ...     sleep(1)
    >>> a.compare(b)          #doctest:+SKIP
    A is 2.0018x faster

    """
    def __init__(self, name=None):
        self.name = name
        self.times = []
        self.features = []
        self.b4 = None

    def __enter__(self):
        self.b4 = time()

    def __exit__(self, *_):
        self.times.append(time() - self.b4)

    def __str__(self):
        return 'Timer(name=%s, avg=%g, std=%g)' % (self.name, self.mean, self.std)

    def __call__(self, **features):
        self.features.append(features)
        return self

    @property
    def mean(self):
        return np.mean(self.times)

    @property
    def median(self):
        return np.median(self.times)

    @property
    def std(self):
        if len(self.times) <= 1:
            return 0.0
        return np.std(self.times, ddof=1)

    @property
    def total(self):
        return sum(self.times)

[docs]    def compare(self, other, attr='mean', verbose=True):
        if len(self.times) == 0 or len(other.times) == 0:
            print('%s %s %s' % (self.name, '???', other.name))
            return

        self_attr = getattr(self, attr)
        other_attr = getattr(other, attr)
        if self_attr <= other_attr:

            # use_continuity=True, alternative=None
            # alternative = {None, ‘two-sided’, ‘less’, ‘greater’}

            # XXX: support Wilcoxon signed rank test for paired examples.
            #print(np.array(self.times) - np.array(other.times))
            U = mannwhitneyu(self.times, other.times, alternative='two-sided')

            extra = ''
            if verbose:
                pval = f'p={U.pvalue:.5f}'
                if U.pvalue < 0.05:
                    pval = colors.green % pval
                else:
                    pval = colors.yellow % pval
                extra = f'({pval}, {attr}: {other.name}: {other_attr:g}, {self.name}: {self_attr:g})'

            print(f'{self.name} is %6.4fx faster than {other.name} %s' \
                % (other_attr / self_attr, extra))

        else:
            other.compare(self, attr=attr, verbose=verbose)

#    def compare_many(self, *others, **kw):
#        for x in sorted(others, key=lambda x: x.name):
#            if x != self:
#                self.compare(x, **kw)

[docs]    def plot_feature(self, feature, timecol='timer',
                     ax=None, label=None, scatter=False,
                     show_curve=False, **kw):
        if ax is None: ax = pl.figure().add_subplot(111)
        df = self.dataframe(timecol)
        a = df.groupby(feature).median()
#        a = df.groupby(feature).mean()

        X = a.index
        Y = a[timecol]
        ax.set_xlabel(feature)
        ax.set_ylabel('time (seconds)')

        if label is None: label = self.name

        [line] = ax.plot(X, Y, lw=2, alpha=0.5, label=label, **kw)
        kw.pop('c', None)

        c = line.get_color()
        ax.scatter(X, Y, c=c, lw=0, label=None, alpha=0.25, **kw)
        #ax.scatter(df[feature], df[timecol], c=c, alpha=0.25, marker='.', label=None, **kw)

        if show_curve:
            xs = np.array(X); ys = np.array(Y)
            assert np.all(xs > 0) and np.all(ys > 0)
            a,b = np.polyfit(np.log(xs), np.log(ys), deg=1)
            l = r'%s $\approx {%.2f} \cdot $%s$^{%.2f}$' % (label, np.exp(b), feature, a)
            ax.plot(xs, np.exp(b)*xs**a, alpha=0.5, label=l, c=c, linestyle=':')

        data = []
        for f, dd in df.groupby(feature):
            data.append([
                f,
                np.percentile(dd[timecol], 20),
                np.percentile(dd[timecol], 80),
            ])

            if scatter:
                ax.scatter([f]*len(dd), dd[timecol], c=c, alpha=0.25)

        data = list(sorted(data))
        fs, ls, us = zip(*data)
        ax.fill_between(fs, ls, us, alpha=0.2, color=c)

        #elif 'box' in show:
        #    # TODO: doen't work very well yet. need to fill out the x-axis since
        #    # feature might not be dense. Should throw an error if feature isn't
        #    # integral.
        #    ddd = [np.asarray(dd[timecol]) for f, dd in sorted(df.groupby(feature))]
        #    ax.boxplot(ddd)

        return ax

[docs]    def dataframe(self, timecol='timer'):
        df = pd.DataFrame(list(self.features))
        df[timecol] = self.times
        return df

[docs]    def filter(self, f, name=None):
        t = Timer(name)
        t.times, t.features = list(zip(*[(x,y) for (x,y) in zip(self.times, self.features) if f(x,y)]))
        return t

[docs]    def bucket_filter(self, feature_to_bucket, bucket_filter):
        df = self.dataframe()
        data = []
        for k, d in df.groupby(feature_to_bucket):
            d = d[bucket_filter(k, d)]
            _, x = list(zip(*d.iterrows()))
            data.append(x)
        return pd.DataFrame(data)

[docs]    def trim_slow(self, feature_to_bucket, threshold):
        return self.bucket_filter(
            feature_to_bucket,
            lambda k, d: d.timer <= d.timer.quantile(threshold)
        )

[docs]    def plot_survival(self, ax=None):
        if ax is None: ax = pl.figure().add_subplot(111)
        from arsenal.maths import cdf
        ts = np.array(self.times)
        xs = np.linspace(0, ts.max(), 1000)
        ax.plot(xs, 1-cdf(ts)(xs), label=self.name)
        ax.legend(loc='best'); ax.set_xscale('log'); ax.set_yscale('log')


[docs]@contextmanager
def timeit(name, fmt='{name} ({htime})', header=None):
    """Context Manager which prints the time it took to run code block."""
    if header is not None:
        print(header)
    b4 = time()
    yield
    sec = time() - b4
    if sec < 60:
        ht = '%.4f sec' % sec
    else:
        ht = htime(sec)
    print(fmt.format(name=name, htime=ht, sec=sec), file=stderr)


[docs]def main():
    from arsenal.iterview import iterview
    from time import sleep
    from numpy.random import uniform

    T = Benchmark('A vs B')
    for _ in iterview(range(1000), T.title):
        with T['A']:
            sleep(np.random.exponential(.001))
        with T['B']:
            sleep(np.random.exponential(.001))

    T.compare()


    t = Timer('test')

    for i in iterview(range(1, 20)):
        for _ in range(10):
            with t(i=i):
                c = 0.01
                z = max(i**2 * 0.0001 + uniform(-c, c), 0.0)
                sleep(z)

    t.plot_feature('i')
    pl.show()


if __name__ == '__main__':
    main()
Source code for arsenal.timer

arsenal

Navigation

Related Topics