From 7da83a7537e375299f0d18d20731cc45e79ae037 Mon Sep 17 00:00:00 2001
From: dave <dave@dtu.dk>
Date: Wed, 26 Oct 2016 12:47:42 +0200
Subject: [PATCH] wetb.prepost.misc: general re-fitting histogram method

---
 wetb/prepost/misc.py | 91 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 85 insertions(+), 6 deletions(-)

diff --git a/wetb/prepost/misc.py b/wetb/prepost/misc.py
index f25b9789..d95b2b4f 100644
--- a/wetb/prepost/misc.py
+++ b/wetb/prepost/misc.py
@@ -19,22 +19,17 @@ from future import standard_library
 standard_library.install_aliases()
 from builtins import object
 
-
-
-#print(*objects, sep=' ', end='\n', file=sys.stdout)
 import os
 import sys
 import shutil
 import unittest
 import pickle
 
-#from xlrd import open_workbook
 import numpy as np
 import scipy as sp
 from scipy import optimize as opt
 from scipy import stats
-#import scipy.interpolate
-#import scipy.ndimage
+from scipy.interpolate import griddata as interp
 from matplotlib import pyplot as plt
 import pandas as pd
 
@@ -950,6 +945,9 @@ def histfit(hist, bin_edges, xnew):
     http://nbviewer.ipython.org/url/xweb.geos.ed.ac.uk/~jsteven5/blog/
     fitting_distributions_from_percentiles.ipynb
 
+    Calculate the CDF of given PDF, and fit a lognorm distribution onto the
+    CDF. This obviously only works if your PDF is lognorm.
+
     Parameters
     ----------
 
@@ -988,6 +986,87 @@ def histfit(hist, bin_edges, xnew):
     return shape_out, scale_out, pdf_fit
 
 
+def histfit_arbritrary(edges, pdf, edges_new, resolution=100):
+    """Re-bin based on the CDF of a PDF. Assume normal distribution within
+    a bin to transform the CDF to higher resolution.
+
+    Parameters
+    ----------
+
+    edges : ndarray(n+1)
+        edges of the bins, inlcuding most left and right edges.
+
+    pdf : ndarray(n)
+        probability of the bins
+
+    edges_new : ndarray(m+1)
+        edges of the new bins
+
+    resolution : int
+        resolution of the intermediate CDF used for re-fitting.
+
+
+    Returns
+    -------
+
+    centers_new : ndarray(m)
+
+    widths_new : ndarray(m)
+
+    pdf_new : ndarray(m)
+
+    """
+
+    x_hd = np.ndarray((0,))
+    cdf_hd = np.ndarray((0,))
+    binw = np.ndarray((0,))
+
+    for i in range(len(pdf)):
+        # HD grid for x
+        x_inc = np.linspace(edges[i], edges[i+1], num=resolution)
+
+        # FIXME: let the distribution in a bin be a user configurable input
+        # define a distribution within the bin: norm
+        shape = 2.5
+        scale = shape*2/10
+        x_inc = np.linspace(0, scale*10, num=resolution)
+        cdf_inc = stats.norm.cdf(x_inc, shape, scale=scale)
+
+        # scale cdf_inc and x-coordinates
+        cdf_inc_scale = pdf[i] * cdf_inc / cdf_inc[-1]
+        binw = edges[i+1] - edges[i]
+        x_inc_scale = edges[i] + (binw * x_inc / x_inc[-1])
+
+        # add to the new hd corodinates and cdf
+        x_hd = np.append(x_hd, x_inc_scale)
+        if i == 0:
+            cdf_i  = 0
+        else:
+            cdf_i = cdf_hd[-1]
+        cdf_hd = np.append(cdf_hd, cdf_inc_scale + cdf_i)
+
+#        plt.plot(x_inc, cdf_inc)
+#        plt.plot(x_inc_scale, cdf_inc_scale)
+
+    cdf_new = interp(x_hd, cdf_hd, edges_new)
+    # last point includes everything that comes after
+    cdf_new[-1] = 1
+    pdf_new = np.diff(cdf_new)
+    widths_new = np.diff(edges_new)
+    centers_new = widths_new + edges[0]
+    # the first bin also includes everything that came before
+    pdf_new[0] += cdf_new[0]
+    pdf_new /= pdf_new.sum()
+
+#    plt.plot(x_hd, cdf_hd)
+#    plt.plot(edges_new, cdf_new, 'rs')
+#
+#    plt.bar(edges_new[:-1], pdf_new, width=widths_new, color='b')
+#    plt.bar(edges[:-1], pdf, width=np.diff(edges), color='r', alpha=0.7)
+
+    return centers_new, widths_new, pdf_new
+
+
 def hist_centers2edges(centers):
     """Given the centers of bins, return its edges and bin widths.
     """
-- 
GitLab