Source code for pint.eventstats

"""
A collection of various routines for calculating pulsation test
test statistics on event data and helper functions.

author: M. Kerr <matthew.kerr@gmail.com>
"""

import numpy as np
from astropy.extern.six.moves import range

TWOPI = np.pi*2

[docs]def vec(func):
    from numpy import vectorize
    return vectorize(func,doc=func.__doc__)

[docs]def to_array(x):
    x = np.asarray(x)
    if len(x.shape)==0: return np.asarray([x])
    return x

[docs]def from_array(x):
    if (len(x.shape)==1) and (x.shape[0]==1): return x[0]
    return x


[docs]def sig2sigma(sig,two_tailed=True,logprob=False):
    """Convert tail probability to "sigma" units, i.e., find the value of the 
       argument for the normal distribution beyond which the integrated tail
       probability is sig.  Note that the default is to interpret this number
       as the two-tailed value, as this is the quantity that goes to 0
       when sig goes to 1.

       args
       ----
       sig     the chance probability

       kwargs
       ------
       two_tailed [True] interpret sig as two-tailed or one-tailed
                          probability in converting
       logprob [False] if True, the argument is the natural logarithm
                       of the probability
    """
    from scipy.special import erfc,erfcinv
    from scipy.optimize import fsolve

    sig     = to_array(sig)
    if logprob:
        logsig = sig.copy()
        sig = np.exp(sig)
    results = np.empty_like(sig)

    if np.any( (sig > 1) | (not logprob and sig <= 0 ) ):
        raise ValueError('Probability must be between 0 and 1.')

    if not two_tailed: sig *= 2

    def inverfc(x,*args):
        return erfc(x/2**0.5)-args[0]

    for isig,mysig in enumerate(sig):
        if mysig < 1e-120: # approx on asymptotic erfc
            if logprob:
                x0 = (-2*(logsig + np.log(np.pi**0.5)))**0.5
            else:
                x0 = (-2*np.log(mysig*(np.pi)**0.5))**0.5
            results[isig] = x0 - np.log(x0)/(1+2*x0)
        elif mysig > 1e-15:
            results[isig] = erfcinv(mysig)*2**0.5
        else:
            results[isig] = fsolve(inverfc,[8],(sig,))
    return from_array(results)


[docs]def sigma2sig(sigma,two_tailed=True):
    """Convert "sigma" units to chance probability, i.e., return the integral
       of the normal distribution from sigma to infinity, or twice that
       quantity if two_tailed.
       args
       ----
       sigma   argument for the normal distribution

       kwargs
       ------
       two_tailed [True] if True, return twice the integral, else once
    """
    # this appears to handle up to machine precision with no problem
    from scipy.special import erfc
    if two_tailed: return erfc(sigma/2**0.5)
    return 1 - 0.5*erfc(-sigma/2**0.5)

[docs]def sigma_trials(sigma,trials):
    # correct a sigmal value for a trials factor
    if sigma < 20:
        p = sigma2sig(sigma)*trials
        if p >= 1: return 0
        return sig2sigma(p)
    else:
        # use an asymptotic expansion -- this needs to be checked!
        return (sigma**2 - 2*np.log(trials))**0.5


[docs]def z2m(phases,m=2):
    """ Return the Z^2_m test for each harmonic up to the specified m.
        See de Jager et al. 1989 for definition.    
    """

    phases = np.asarray(phases)*TWOPI #phase in radians
    n = len(phases)

    if n < 5e3:  #faster for 100s to 1000s of phases, but requires ~20x memory of alternative

        s = (np.cos(np.outer(np.arange(1,m+1),phases))).sum(axis=1)**2 +\
            (np.sin(np.outer(np.arange(1,m+1),phases))).sum(axis=1)**2

    else:

        s = (np.asarray([(np.cos(k*phases)).sum() for k in range(1,m+1)]))**2 +\
            (np.asarray([(np.sin(k*phases)).sum() for k in range(1,m+1)]))**2

    return (2./n)*np.cumsum(s)

[docs]def z2mw(phases,weights,m=2):
    """ Return the Z^2_m test for each harmonic up to the specified m.

        The user provides a list of weights.  In the case that they are
        well-distributed or assumed to be fixed, the CLT applies and the
        statistic remains calibrated.  Nice!
     """

    phases = np.asarray(phases)*(2*np.pi) #phase in radians

    s = (np.asarray([(np.cos(k*phases)*weights).sum() for k in range(1,m+1)]))**2 +\
        (np.asarray([(np.sin(k*phases)*weights).sum() for k in range(1,m+1)]))**2

    return np.cumsum(s) * (2./(weights**2).sum())

[docs]def sf_z2m(ts,m=2):
    """ Return the survival function (chance probability) according to the
        asymptotic calibration for the Z^2_m test.

        args
        ----
        ts      result of the Z^2_m test
    """
    from scipy.stats import chi2
    return chi2.sf(ts,2*m)

[docs]def best_m(phases,weights=None,m=100):
    z = z2mw(phases,np.ones_like(phases) if weights is None else weights,m=m) 
    return np.arange(1,m+1)[np.argmax(z-4*np.arange(0,m))]

[docs]def em_four(phases,m=2,weights=None):
    """ Return the empirical Fourier coefficients up to the mth harmonic.
        These are derived from the empirical trignometric moments."""

    phases = np.asarray(phases)*TWOPI #phase in radians

    n = len(phases) if weights is None else weights.sum()
    weights = 1. if weights is None else weights

    aks = (1./n)*np.asarray([(weights*np.cos(k*phases)).sum() for k in range(1,m+1)])
    bks = (1./n)*np.asarray([(weights*np.sin(k*phases)).sum() for k in range(1,m+1)])

    return aks,bks

[docs]def em_lc(coeffs,dom):
    """ Evaluate the light curve at the provided phases (0 to 1) for the
        provided coeffs, e.g., as estimated by em_four."""

    dom = np.asarray(dom)*(2*np.pi)

    aks,bks = coeffs
    rval = np.ones_like(dom)
    for i in range(1,len(aks)+1):
        rval += 2*(aks[i-1]*np.cos(i*dom) + bks[i-1]*np.sin(i*dom))
    return rval

[docs]def hm(phases,m=20,c=4):
    """ Calculate the H statistic (de Jager et al. 1989) for given phases.
        H_m = max(Z^2_k - c*(k-1)), 1 <= k <= m
        m == maximum search harmonic
        c == offset for each successive harmonic
    """
    phases = np.asarray(phases)*(2*np.pi) #phase in radians

    s = (np.asarray([(np.cos(k*phases)).sum() for k in range(1,m+1)]))**2 +\
        (np.asarray([(np.sin(k*phases)).sum() for k in range(1,m+1)]))**2

    return ((2./len(phases))*np.cumsum(s) - c*np.arange(0,m)).max()


[docs]def hmw(phases,weights,m=20,c=4):
    """ Calculate the H statistic (de Jager et al. 1989) and weight each
        sine/cosine with the weights in the argument.  The distribution
        is corrected such that the CLT still applies, i.e., it maintains
        the same calibration as the unweighted version."""

    phases = np.asarray(phases)*(2*np.pi) #phase in radians

    s = (np.asarray([(weights*np.cos(k*phases)).sum() for k in range(1,m+1)]))**2 +\
        (np.asarray([(weights*np.sin(k*phases)).sum() for k in range(1,m+1)]))**2

    return ( (2./(weights**2).sum()) * np.cumsum(s) - c*np.arange(0,m) ).max()


#@vec
[docs]def sf_hm(h,m=20,c=4,logprob=False):
    """ Return (analytic, asymptotic) survival function (1-F(h))
        for the generalized H-test.
        For more details see:
            docstrings for hm, hmw
            M. Kerr dissertation (arXiv:1101.6072)
            Kerr, ApJ 732, 38, 2011 (arXiv:1103.2128)

        logprob [False] return natural logarithm of probability
    """
    if h < 1e-16: return 1.
    from numpy import exp,arange,log,empty
    from scipy.special import gamma
    fact = lambda x: gamma(x+1)

    # first, calculate the integrals of unity for all needed orders
    ints = empty(m)
    for i in range(m):
        sv = i - arange(0,i) # summation vector
        ints[i]  = exp(i*log(h+i*c)-log(fact(i)))
        ints[i] -= (ints[:i]*exp(sv*log(sv*c)-log(fact(sv)))).sum()

    # next, develop the integrals in the power series
    alpha = 0.5*exp(-0.5*c)
    if not logprob:
        return exp(-0.5*h)*(alpha**arange(0,m)*ints).sum()
    else:
        #NB -- this has NOT been tested for partial underflow
        return (-0.5*h+np.log((alpha**arange(0,m)*ints).sum()))

[docs]def h2sig(h):
    """ Shortcut function for calculating sigma from the H-test."""
    return sig2sigma(sf_hm(h,logprob=True),logprob=True)

@vec
def sf_h20_dj1989(h):
    """Convert the H-test statistic to a chance probability according to the
       formula of de Jager et al. 1989 -- NB the quadratic term is NOT correct."""
    if h <= 23:
        return 0.9999755*np.exp(-0.39802*h)
    if h > 50:
        return 4e-8
    return 1.210597*np.exp(-0.45901*h + 0.0022900*h**2)

[docs]def sf_h20_dj2010(h):
    """Use the approximate asymptotic calibration from de Jager et al. 2010."""
    return np.exp(-0.4*h)

[docs]def sig2h20(sig):
    """Use approximate (de Jager 2010) relation to invert."""
    return -np.log(sig)/0.4

[docs]def sf_stackedh(k,h,l=0.398405):
    """ Return the chance probability for a stacked H test assuming the
        null df for H is exponentially distributed with scale l and that
        there are k sub-integrations yielding a total TS of h.  See, e.g.
        de Jager & Busching 2010."""
    from scipy.special import gamma
    fact = lambda x: gamma(x+1)
    p = 0
    c = l*h
    for i in range(k):
        p += c**i/fact(i)
    return p*np.exp(-c)