Source code for pyacs.gts.lib.offset.test_offset_significance

"""
Test whether an offset at a given date is statistically significant.
"""

import numpy as np

from ._utils import __fmt_date, print_no_return



[docs]
def test_offset_significance(self, date, conf_level=95, lcomponent='NE', verbose=True, debug=False, mode='local'):
    """Test whether an offset is statistically significant.

    Parameters
    ----------
    date : float
        Offset date in decimal year.
    conf_level : float, optional
        Confidence level in percent. Default is 95.
    lcomponent : str, optional
        Components to test ('N','E','U'). Default is 'NE'.
    verbose : bool, optional
        Verbose mode. Default is True.
    debug : bool, optional
        Debug output. Default is False.
    mode : str, optional
        'local', 'detrend', or 'detrend_seasonal'. Default is 'local'.

    Returns
    -------
    bool
        True if significant, else False.
    """
    def f_ratio(chi_square_1, p1, chi_square_2, p2, n):
        F = ((chi_square_1 - chi_square_2) / (p2 - p1)) / (chi_square_2 / (n - p2))
        from scipy.stats import f
        return f.cdf(F, p2 - p1, n - p2)

    if verbose:
        print("-- Testing offset for site %s date %s using mode %s on components %s with confidence level %6.1lf %%" % (self.code, __fmt_date(date), mode, lcomponent, conf_level))

    OK = False
    component = {}
    component[1] = 'North'
    component[2] = 'East'
    component[3] = 'Up'
    li = []
    if 'N' in lcomponent:
        li.append(1)
    if 'E' in lcomponent:
        li.append(2)
    if 'U' in lcomponent:
        li.append(3)
    H = {}
    score = {}

    if mode == 'smooth':
        l_n_days = [5, 7, 9, 11, 13, 15, 17]
        lwindow_len = [3, 3, 5, 5, 5, 7, 7]
        coeff = [0.5, 1, 3, 4, 10, 8, 6]
        for i in sorted(li):
            print_no_return("-- n_samples/probability %5s:" % component[i])
            k = -1
            for n in l_n_days:
                k = k + 1
                before = np.array([])
                after = np.array([])
                around = np.array([])
                try:
                    before = self.extract_ndates_before_date(date, n).smooth(window_len=lwindow_len[k]).data[:, i] - self.extract_ndates_before_date(date, n).data[:, i]
                    after = self.extract_ndates_after_date(date, n).smooth(window_len=lwindow_len[k]).data[:, i] - self.extract_ndates_after_date(date, n).data[:, i]
                    around = self.extract_ndates_around_date(date, n).smooth(window_len=lwindow_len[k]).data[:, i] - self.extract_ndates_around_date(date, n).data[:, i]
                except Exception:
                    if debug:
                        print("!!! Could not test offset significancy for date %lf using n=%d surrounding data" % (date, n))
                    break
                if debug:
                    print("-- Testing significancy for date %lf using n=%d surrounding data" % (date, n))
                if before.shape[0] * after.shape[0] * around.shape[0] != 0.0:
                    before_std = np.std(before) * 1.E3
                    after_std = np.std(after) * 1.E3
                    around_std = np.std(around) * 1.E3
                    chi_square_2 = before.shape[0] * before_std**2 + after.shape[0] * after_std**2
                    chi_square_1 = around.shape[0] * around_std**2
                    H[i, k] = f_ratio(chi_square_1, 1, chi_square_2, 2, around.shape[0]) * 100.0
                    print_no_return(" %02d %5.2lf%% " % (n, H[i, k]))
            print("")
        for i in sorted(li):
            summ = 0.0
            score[i] = 0.0
            if H != {}:
                k = -1
                for n in l_n_days:
                    k = k + 1
                    try:
                        score[i] += H[i, k] * coeff[k]
                        summ = summ + coeff[k]
                    except Exception:
                        if debug:
                            print("-- No data for n=%d " % n)
                        pass
                score[i] = score[i] / summ
            if verbose:
                print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i]))

    if mode == 'local':
        l_n_days = range(2, 10)
        coeff = [0.5, 1, 1, 4, 10, 8, 6]
        for i in sorted(li):
            print_no_return("-- n_samples/probability %5s:" % component[i])
            for n in l_n_days:
                before = np.array([])
                after = np.array([])
                around = np.array([])
                try:
                    before = self.extract_ndates_before_date(date, n).data[:, i]
                    after = self.extract_ndates_after_date(date, n).data[:, i]
                    around = self.extract_ndates_around_date(date, n).data[:, i]
                except Exception:
                    if debug:
                        print("!!! Could not test offset significancy for date %lf using n=%d surrounding data" % (date, n))
                    break
                if debug:
                    print("-- Testing significancy for date %lf using n=%d surrounding data" % (date, n))
                if before.shape[0] * after.shape[0] * around.shape[0] != 0.0:
                    before_std = np.std(before) * 1.E3
                    after_std = np.std(after) * 1.E3
                    around_std = np.std(around) * 1.E3
                    chi_square_2 = before.shape[0] * before_std**2 + after.shape[0] * after_std**2
                    chi_square_1 = around.shape[0] * around_std**2
                    H[i, n] = f_ratio(chi_square_1, 1, chi_square_2, 2, around.shape[0]) * 100.0
                    print_no_return(" %02d %5.2lf%% " % (n, H[i, n]))
            print("")
        for i in sorted(li):
            summ = 0.0
            score[i] = 0.0
            for n in range(2, 7):
                try:
                    score[i] += H[i, n] * coeff[n]
                    summ = summ + coeff[n]
                except Exception:
                    if debug:
                        print("-- No data for n=%d " % n)
                    pass
            score[i] = score[i] / summ
            if verbose:
                print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i]))

    if mode == 'detrend':
        for i in sorted(li):
            tmp_ts = self.copy()
            tmp_ts.offsets_dates = []
            chi_square_1 = np.std(tmp_ts.detrend().data[:, i])**2 * tmp_ts.data.shape[0]
            chi_square_2 = np.std(tmp_ts.add_offsets_dates([date]).detrend().data[:, i])**2 * tmp_ts.data.shape[0]
            score[i] = f_ratio(chi_square_1, 2, chi_square_2, 3, tmp_ts.data.shape[0]) * 100.0
            if verbose:
                print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i]))

    if mode == 'detrend_seasonal':
        for i in sorted(li):
            chi_square_1 = np.sum(self.detrend().data[:, i]**2)
            chi_square_2 = np.sum(self.add_offsets_dates([date]).detrend_seasonal().data[:, i]**2)
            n = self.data.shape[0]
            score[i] = f_ratio(chi_square_1, 1, chi_square_2, 3, 2 * n) * 100.0
            if verbose:
                print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i]))

    score_max = np.max(np.array(list(score.values())))
    if score_max > conf_level:
        OK = True
    if verbose:
        if OK:
            print("-- Offset IS significant at the %5.2lf%% confidence level (threshold=%5.2lf%%) " % (score_max, conf_level))
        else:
            print("-- Offset NOT significant %5.2lf%% confidence level (threshold=%5.2lf%%) " % (score_max, conf_level))
    return OK