# staircase drift function inversion

# REQUIRES SCIPY EXTENSIONS

from scipy import *
from scipy.linalg import *
from math import *

from grav_util import num_sort, print_matrix
import polyfit

def create_matrix(data, reps, weighted):
  # create local data arrays/dicts
  t = []; time = {}
  grav = {}; sigma = {}
  # sort in time order
  K = data.keys()
  T = {}
  for i in K:
    T[data[i].time] = i
  K = T.keys()
  K.sort(num_sort)
  for j in K:
    i = T[j]
    t.append(data[i].time)
    time[i] = data[i].time
    grav[i] = data[i].G
    sigma[i]= data[i].sigma
  del T

  # compute times[] as 1/2 way between endpoints
  times = []
  for i in range(len(t)-1):
    times.append((t[i+1] + t[i])/2.0)

  # create sorted array of data keys for iterating
  keys = reps.keys()
  keys.sort()

  # compute size of system (M & N)
  N = len(data)-1
  M = 0
  for i in keys:
    for j in reps[i]:
      M = M+1
  #   create inter-station difference mapping
  diff = {}
  for i in keys:
    for j in keys:
      if i == j:
	continue
      if time[i] >= time[j]:	# only look forward in time
	continue
      diff["%s+%s"%(i,j)] = []
      for k in reps[i]:
	for l in reps[j]:
	  if time[k] >= time[l]:
	    continue
	  diff["%s+%s"%(i,j)].append((k, l))
	  M = M+1

  # operator matrix is A, data matrix is d
  # A only stores 0, +-1 so we really only need small ints
  # HOWEVER, we need all the precision possible because the A*A
  # is near-singular (don't know why) so we need lots of precision
  # for problems with >100 stations; hence we use 64-bit floats
  # everywhere, just in case.
  A = zeros((M, N), float64)
  # W really should be square MxM matrix, but it only has diagonal elements
  # so we save LOTS of memory by storing as vector and using special
  # functions to manipulate; slower though.  If machines had >512MB of RAM,
  # wouldn't need to do this.
  W = zeros((M, ), float64)
  # d is column vector, as m will turn out to be
  d = zeros((M, ), float64)
  m = 0
  # Fill A with rows for station and repeats, then double
  # differences, then next stn...
  for i in keys:
    # repeats of station k
    for j in reps[i]:
      d[m] = grav[j] - grav[i]
      S = sqrt(sigma[j]**2 + sigma[i]**2)
      W[m] = 1.0/S
      for k in range(N):
	if times[k] >= time[i] and times[k] <= time[j]:
	  A[m, k] = 1
      m = m+1
  #   now fill inter-station differences entries
  for i in keys:
    for j in keys:
      if not diff.has_key("%s+%s"%(i,j)):
	continue
      for k in diff["%s+%s"%(i,j)]:
	D1 = grav[j] - grav[i]
	D2 = grav[k[1]] - grav[k[0]]
	S = sqrt(sigma[i]**2 + sigma[j]**2 + sigma[k[0]]**2 + sigma[k[1]]**2)
	d[m] = D2-D1
	W[m] = 1.0/S
	# ALGORITHM NOTE OF GREAT IMPORTANCE
	# Test each entry of the m'th row of A, summing our
	# coefficients; MUST sum, since we have cases where a
	# difference entry has the +1 and -1 coefficients overlap.
	# If we assign values, this will result in serious problems
	# in the inversion, as many coefficients will have the wrong
	# value (+-1 instead of 0)!
	# Should probably trap that everything ends up elements of
	# {-1, 0, 1}, but since we process each row once, that should
	# always hold....
	for l in range(N):
	  if times[l] > time[i] and times[l] < time[j]:
	    # we are subtracting these values, so need -1
	    A[m, l] = A[m, l] + -1
	  if times[l] > time[k[0]] and times[l] < time[k[1]]:
	    A[m, l] = A[m, l] +  1
	m = m+1
  if not weighted:	# unweighted fit, so set all weights equal
    for i in range(len(W)):
      W[i] = 1;

  return (A, W, d, times)

# show the amount of memory this process is using
#import resource
#def showmem():
#  rusage = resource.getrusage(resource.RUSAGE_SELF)
#  print rusage
#  rlimit = resource.getrlimit(resource.RLIMIT_DATA)
#  print rlimit
#  print "SHOWMEM: Max. resident set size is %d pages = %d bytes"%(rusage[2], rusage[2]*resource.getpagesize())
#  print "SHOWMEM: Shared memory size %d bytes"%rusage[3]
#  print "SHOWMEM: Unshared memory size %d bytes"%rusage[4]


def calc_model(A, W, d):
  # solve Am=d with weighting matrix W:
  #            T 2  -1 T 2   -1 T 2
  # I = A* = (A W A)  A W = Q  A W
  # m = I*d
  # do everything as matrix ops, not loops!

  # compute W^2, since we never need W...
  W2 =  W[:,newaxis]*W[:,newaxis]
  # delete an axis from W2, to make shape [M,] again; needed for newaxis trick!
  # this is very, very ugly, but seems to be the only way to delete
  # an axis without making a copy.
  shape = W2.shape
  W2.shape = (shape[0],)

  # Build intermediate matrices to hold parts of equation above
  Q = dot(transpose(A), W2[:,newaxis]*A)
  Qp = pinv2(Q)	# SVD-based pseudo-inverse of Q, as Q is near-singular
  # unregularized computation of m from Qp, A, W2, d
  m = dot( dot( Qp, (transpose(A)*W2[newaxis,:]) ), d )

  return m

def calc_dG(m, times, data):
  # compute correction for data points, given model and intervals
  (N, ) = shape(m)
  for i in data.keys():
    C = 0.0
    # sum from start to this interval
    for j in range(N):
      if data[i].time >= times[j]:
	C = C + m[j]
    data[i].drift_correction = C

def calc_L1_L2(data, reps):
  # compute L1, L2 residuals for pre and post drift correction
  L1 = 0.0; L2 = 0.0
  L1p = 0.0; L2p = 0.0
  m = 0.0
  for i in reps.keys():
    for j in reps[i]:
      dG = data[j].G - data[i].G
      S = sqrt(data[i].sigma**2 + data[j].sigma**2)
      L1 = L1 + ( dG/S )
      L2 = L2 + ( dG/S )**2
      L1p = L1p + ((dG - data[j].drift_correction + data[i].drift_correction) / S)
      L2p = L2p + (((dG - data[j].drift_correction + data[i].drift_correction) / S)**2)
      m = m + (1.0/S)
  r1 = L1/m; r2 = sqrt(L2)/m
  r1p = L1p/m; r2p = sqrt(L2p)/m

  return (r1, r2, r1p, r2p)

def correction(data, reps, weighted=1):
  (A, W, d, T) = create_matrix(data, reps, weighted)
  m = calc_model(A, W, d)
  calc_dG(m, T, data)
  (r1, r2, r1p, r2p) = calc_L1_L2(data, reps)

  return (m, r1, r2, r1p, r2p)

