Source code for pint.eventstats

"""
A collection of various routines for calculating pulsation test
test statistics on event data and helper functions.

author: M. Kerr <matthew.kerr@gmail.com>
"""

import numpy as np
from astropy.extern.six.moves import range

TWOPI = np.pi*2

[docs]def vec(func): from numpy import vectorize return vectorize(func,doc=func.__doc__)
[docs]def to_array(x): x = np.asarray(x) if len(x.shape)==0: return np.asarray([x]) return x
[docs]def from_array(x): if (len(x.shape)==1) and (x.shape[0]==1): return x[0] return x
[docs]def sig2sigma(sig,two_tailed=True,logprob=False): """Convert tail probability to "sigma" units, i.e., find the value of the argument for the normal distribution beyond which the integrated tail probability is sig. Note that the default is to interpret this number as the two-tailed value, as this is the quantity that goes to 0 when sig goes to 1. args ---- sig the chance probability kwargs ------ two_tailed [True] interpret sig as two-tailed or one-tailed probability in converting logprob [False] if True, the argument is the natural logarithm of the probability """ from scipy.special import erfc,erfcinv from scipy.optimize import fsolve sig = to_array(sig) if logprob: logsig = sig.copy() sig = np.exp(sig) results = np.empty_like(sig) if np.any( (sig > 1) | (not logprob and sig <= 0 ) ): raise ValueError('Probability must be between 0 and 1.') if not two_tailed: sig *= 2 def inverfc(x,*args): return erfc(x/2**0.5)-args[0] for isig,mysig in enumerate(sig): if mysig < 1e-120: # approx on asymptotic erfc if logprob: x0 = (-2*(logsig + np.log(np.pi**0.5)))**0.5 else: x0 = (-2*np.log(mysig*(np.pi)**0.5))**0.5 results[isig] = x0 - np.log(x0)/(1+2*x0) elif mysig > 1e-15: results[isig] = erfcinv(mysig)*2**0.5 else: results[isig] = fsolve(inverfc,[8],(sig,)) return from_array(results)
[docs]def sigma2sig(sigma,two_tailed=True): """Convert "sigma" units to chance probability, i.e., return the integral of the normal distribution from sigma to infinity, or twice that quantity if two_tailed. args ---- sigma argument for the normal distribution kwargs ------ two_tailed [True] if True, return twice the integral, else once """ # this appears to handle up to machine precision with no problem from scipy.special import erfc if two_tailed: return erfc(sigma/2**0.5) return 1 - 0.5*erfc(-sigma/2**0.5)
[docs]def sigma_trials(sigma,trials): # correct a sigmal value for a trials factor if sigma < 20: p = sigma2sig(sigma)*trials if p >= 1: return 0 return sig2sigma(p) else: # use an asymptotic expansion -- this needs to be checked! return (sigma**2 - 2*np.log(trials))**0.5
[docs]def z2m(phases,m=2): """ Return the Z^2_m test for each harmonic up to the specified m. See de Jager et al. 1989 for definition. """ phases = np.asarray(phases)*TWOPI #phase in radians n = len(phases) if n < 5e3: #faster for 100s to 1000s of phases, but requires ~20x memory of alternative s = (np.cos(np.outer(np.arange(1,m+1),phases))).sum(axis=1)**2 +\ (np.sin(np.outer(np.arange(1,m+1),phases))).sum(axis=1)**2 else: s = (np.asarray([(np.cos(k*phases)).sum() for k in range(1,m+1)]))**2 +\ (np.asarray([(np.sin(k*phases)).sum() for k in range(1,m+1)]))**2 return (2./n)*np.cumsum(s)
[docs]def z2mw(phases,weights,m=2): """ Return the Z^2_m test for each harmonic up to the specified m. The user provides a list of weights. In the case that they are well-distributed or assumed to be fixed, the CLT applies and the statistic remains calibrated. Nice! """ phases = np.asarray(phases)*(2*np.pi) #phase in radians s = (np.asarray([(np.cos(k*phases)*weights).sum() for k in range(1,m+1)]))**2 +\ (np.asarray([(np.sin(k*phases)*weights).sum() for k in range(1,m+1)]))**2 return np.cumsum(s) * (2./(weights**2).sum())
[docs]def sf_z2m(ts,m=2): """ Return the survival function (chance probability) according to the asymptotic calibration for the Z^2_m test. args ---- ts result of the Z^2_m test """ from scipy.stats import chi2 return chi2.sf(ts,2*m)
[docs]def best_m(phases,weights=None,m=100): z = z2mw(phases,np.ones_like(phases) if weights is None else weights,m=m) return np.arange(1,m+1)[np.argmax(z-4*np.arange(0,m))]
[docs]def em_four(phases,m=2,weights=None): """ Return the empirical Fourier coefficients up to the mth harmonic. These are derived from the empirical trignometric moments.""" phases = np.asarray(phases)*TWOPI #phase in radians n = len(phases) if weights is None else weights.sum() weights = 1. if weights is None else weights aks = (1./n)*np.asarray([(weights*np.cos(k*phases)).sum() for k in range(1,m+1)]) bks = (1./n)*np.asarray([(weights*np.sin(k*phases)).sum() for k in range(1,m+1)]) return aks,bks
[docs]def em_lc(coeffs,dom): """ Evaluate the light curve at the provided phases (0 to 1) for the provided coeffs, e.g., as estimated by em_four.""" dom = np.asarray(dom)*(2*np.pi) aks,bks = coeffs rval = np.ones_like(dom) for i in range(1,len(aks)+1): rval += 2*(aks[i-1]*np.cos(i*dom) + bks[i-1]*np.sin(i*dom)) return rval
[docs]def hm(phases,m=20,c=4): """ Calculate the H statistic (de Jager et al. 1989) for given phases. H_m = max(Z^2_k - c*(k-1)), 1 <= k <= m m == maximum search harmonic c == offset for each successive harmonic """ phases = np.asarray(phases)*(2*np.pi) #phase in radians s = (np.asarray([(np.cos(k*phases)).sum() for k in range(1,m+1)]))**2 +\ (np.asarray([(np.sin(k*phases)).sum() for k in range(1,m+1)]))**2 return ((2./len(phases))*np.cumsum(s) - c*np.arange(0,m)).max()
[docs]def hmw(phases,weights,m=20,c=4): """ Calculate the H statistic (de Jager et al. 1989) and weight each sine/cosine with the weights in the argument. The distribution is corrected such that the CLT still applies, i.e., it maintains the same calibration as the unweighted version.""" phases = np.asarray(phases)*(2*np.pi) #phase in radians s = (np.asarray([(weights*np.cos(k*phases)).sum() for k in range(1,m+1)]))**2 +\ (np.asarray([(weights*np.sin(k*phases)).sum() for k in range(1,m+1)]))**2 return ( (2./(weights**2).sum()) * np.cumsum(s) - c*np.arange(0,m) ).max()
#@vec
[docs]def sf_hm(h,m=20,c=4,logprob=False): """ Return (analytic, asymptotic) survival function (1-F(h)) for the generalized H-test. For more details see: docstrings for hm, hmw M. Kerr dissertation (arXiv:1101.6072) Kerr, ApJ 732, 38, 2011 (arXiv:1103.2128) logprob [False] return natural logarithm of probability """ if h < 1e-16: return 1. from numpy import exp,arange,log,empty from scipy.special import gamma fact = lambda x: gamma(x+1) # first, calculate the integrals of unity for all needed orders ints = empty(m) for i in range(m): sv = i - arange(0,i) # summation vector ints[i] = exp(i*log(h+i*c)-log(fact(i))) ints[i] -= (ints[:i]*exp(sv*log(sv*c)-log(fact(sv)))).sum() # next, develop the integrals in the power series alpha = 0.5*exp(-0.5*c) if not logprob: return exp(-0.5*h)*(alpha**arange(0,m)*ints).sum() else: #NB -- this has NOT been tested for partial underflow return (-0.5*h+np.log((alpha**arange(0,m)*ints).sum()))
[docs]def h2sig(h): """ Shortcut function for calculating sigma from the H-test.""" return sig2sigma(sf_hm(h,logprob=True),logprob=True)
@vec def sf_h20_dj1989(h): """Convert the H-test statistic to a chance probability according to the formula of de Jager et al. 1989 -- NB the quadratic term is NOT correct.""" if h <= 23: return 0.9999755*np.exp(-0.39802*h) if h > 50: return 4e-8 return 1.210597*np.exp(-0.45901*h + 0.0022900*h**2)
[docs]def sf_h20_dj2010(h): """Use the approximate asymptotic calibration from de Jager et al. 2010.""" return np.exp(-0.4*h)
[docs]def sig2h20(sig): """Use approximate (de Jager 2010) relation to invert.""" return -np.log(sig)/0.4
[docs]def sf_stackedh(k,h,l=0.398405): """ Return the chance probability for a stacked H test assuming the null df for H is exponentially distributed with scale l and that there are k sub-integrations yielding a total TS of h. See, e.g. de Jager & Busching 2010.""" from scipy.special import gamma fact = lambda x: gamma(x+1) p = 0 c = l*h for i in range(k): p += c**i/fact(i) return p*np.exp(-c)