import sys, math
# Thiele extrapolation of a function, given some data points

# Max number of points
THIELE_MAX_PTS = 100

# Min difference before points are "equal"
THIELE_TOLERANCE = 1e-7

##
## Extrapolate series to infinite time/space/index
# given set of M points (want M even!), extrapolate to x = Inf
# we do this using the set {(exp(i), y(i))} for i=0->Inf
# which is suitable for a sequence that has exponential convergence:
#	S-S_j < O(exp(-c*j)) as j->Inf
# See "Numerical Methods Based on Sinc and Analytic Functions" by F. Stenger
#
# ALGORITHM NOTE: CANNOT USE ARITHMETIC CONVERGENCE UNLESS THE SEQUENCE
# IS MONOTONIC.  Gravity data is not, so arithmetic convergence will
# result in divide by zero errors!
#
# N.B. - Since finite precision in a computer, cannot simply use all
# data in a time series with >500 points or so, since exp(500) is near
# the limit of overflow.  So, must truncate/filter/compress series to
# at most 100 points.  DO THIS BEFORE CALLING extrapolate()!  See
# seriesfilter(), below....
# Returns extrapolation result, None on error!
def extrapolate(y):
  # y is array of values, of length >= 2!

  # check for repeated y values - drop subsequent ones
  # note that check for values within TOLERANCE of each other,
  # since finite precision means really close, but not equal,
  # values cause divide by zero errors later on...
  changed=1;
  while(changed):
    changed=0;
    i=0;
    while(i<len(y)-1):
      if math.fabs(y[i+1] - y[i]) < THIELE_TOLERANCE:
	del y[i+1];
	changed=1;
      i=i+1;

  # take last MAX_PTS or less
  if len(y) > THIELE_MAX_PTS:
    y=y[-THIELE_MAX_PTS:-1]
  # compute size of system
  M = len(y)-1;
  # insure M even (only use first 2k pts for max k
  if ((M+1)%2) == 0:
    M = M-1;

  if M < 2:
    print "THIELE: series too short"
    return None

  # construct x[] for rho table
  x=zero(M+1);
  for i in range(M+1):
    # exponential convergence, as series is not MONOTONIC!
    x[i] = math.exp(i);

  rho = zero(M+1)
  rho[0] = zero(M+1)
  # compute the rho(i,j) table
  for j in range(M+1):
    rho[0][j] = y[j]
  rho[1] = zero(M+1)
  for j in range(M):
    rho[1][j] = (x[j+1] - x[j])/(rho[0][j+1] - rho[0][j]);
  for i in range(2,M+1):
    rho[i] = zero(M-i+1)
    for j in range(M-i+1):
      rho[i][j] = rho[i-2][j+1] + ( (x[j+i] - x[j])/
      		  (rho[i-1][j+1] - rho[i-1][j]) )

  # from the theory, as x->Inf, f(x)=y -> rho[M][0]
  return rho[M][0];

def zero(n):
  # create array of length n filled with zeros
  z=[]
  for i in range(n):
    z.append(0.0)

  return z

##
## Filter time series to compress to <= THIELE_MAX_PTS
# filter a raw time series into a set of averages, compressing from
# the full series length to <=THIELE_MAX_PTS points.  This is then
# suitable for the extrapolate() function above.
#
# returns the new series
def seriesfilter(raw, filtRadius):
  offset = int(math.ceil(len(raw)/float(THIELE_MAX_PTS)))

  # filter spikes, to prevent tweaking averages
  s = despike(raw, filtRadius)

  # filter despiked series
  npts = int((len(s)/offset))

  z = zero(npts)
  for i in range(npts):
    k = i*offset
    N = 0
    for j in range(-filtRadius, filtRadius+1):
      idx = k-j
      if (idx > 0) and (idx < len(s)):
	z[i] += s[idx]; N += 1
    z[i] /= N;
  return z

##
## Removes "spikes" from the data series s
# find spikes by computing a sliding window average, s.d. for each
# point.  Then look for any individual point > A*s.d. from the average;
# replace with window average. A found from Chauvenet's criterion for
# rejection, depending on actual # pts in the current window.
# Repeats the despiking until no points changed, and then 
# returns the new despiked series
def despike(s, r):
  z = zero(len(s))
  for i in range(len(s)):
    z[i] = s[i]
  changed = 1
  iter = 0
  while (changed and iter < 5000):
    changed=0
    S = zero(len(s)); sigma = zero(len(s))
    # compute window average, s.d. for the series, offset 1
    for i in range(len(z)):
      w = []
      for k in range(-r, r+1):
	idx = i-k
	if (idx > 0) and (idx < len(s)):
	  w.append(z[idx])
      (S[i], sigma[i]) = average(w)
      mult = chauvenet(len(w))
      sigma[i] *= mult
      del w
    # run through series, removing spikes
    for i in range(len(z)):
      if math.fabs(z[i] - S[i]) > sigma[i]:
	z[i] = S[i]	# replace with window average
	changed = 1
    del S; del sigma
    iter += 1;
  return z


##
## Compute average and s.d. of array y.
# computes average of sample array y, computing a s.d.
# returns (average, s.d.)
def average(y):
  S=0.0; ss=0.0;
  N=len(y);
  for i in range(N):
    S += y[i]
    ss += y[i]*y[i]
  ybar = S / N
  if (ss - N*ybar*ybar) < 0:	# numerical precision issues
    ss = N*ybar*ybar + 1e-12;	# make sigma small but real
  # standard dev. of data
  sigma = ( (ss - N*ybar*ybar)/(N-1) ) ** 0.5
  return (ybar, sigma)

##
## Interpolate Chauvenet's criterion for discarding data.
# interpolates multiplier for s.d. for discarding a point,
# based on Chauvenet's criterion for a sample of N points.
# N is number of points in window, series, etc. - more points
# makes it harder to discard data, as multiplier scales.
# returns multiplier, in the interval [1.65, 3.48]
# return value of 0 indicates error!
def chauvenet(N):
  npts = [5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 40,
          50, 60, 80, 100, 150, 200, 300, 400, 500, 1000];
  mult = [1.65, 1.73, 1.81, 1.86, 1.91, 1.96, 2.04, 2.1, 2.15,
          2.20, 2.24, 2.33, 2.39, 2.49, 2.57, 2.64, 2.74, 2.81,
          2.93, 3.02, 3.14, 3.23, 3.29, 3.48];
  # interpolate between entries
  if N <= npts[0]:
    return(mult[0])
  if N >= npts[-1]:
    return(mult[-1])
  for i in range(1, len(npts)):
    if N < npts[i]: # first entry larger than N
      m = ( (mult[i]-mult[i-1])/(npts[i]-npts[i-1]) ) * (N-npts[i-1]) + mult[i-1]
      return(m)

  return(0)
