Source code for pyacs.gts.lib.offset.test_offset_significance

"""
Test whether an offset at a given date is statistically significant.
"""

import numpy as np

from ._utils import __fmt_date, print_no_return


[docs] def test_offset_significance(self, date, conf_level=95, lcomponent='NE', verbose=True, debug=False, mode='local'): """Test whether an offset is statistically significant. Parameters ---------- date : float Offset date in decimal year. conf_level : float, optional Confidence level in percent. Default is 95. lcomponent : str, optional Components to test ('N','E','U'). Default is 'NE'. verbose : bool, optional Verbose mode. Default is True. debug : bool, optional Debug output. Default is False. mode : str, optional 'local', 'detrend', or 'detrend_seasonal'. Default is 'local'. Returns ------- bool True if significant, else False. """ def f_ratio(chi_square_1, p1, chi_square_2, p2, n): F = ((chi_square_1 - chi_square_2) / (p2 - p1)) / (chi_square_2 / (n - p2)) from scipy.stats import f return f.cdf(F, p2 - p1, n - p2) if verbose: print("-- Testing offset for site %s date %s using mode %s on components %s with confidence level %6.1lf %%" % (self.code, __fmt_date(date), mode, lcomponent, conf_level)) OK = False component = {} component[1] = 'North' component[2] = 'East' component[3] = 'Up' li = [] if 'N' in lcomponent: li.append(1) if 'E' in lcomponent: li.append(2) if 'U' in lcomponent: li.append(3) H = {} score = {} if mode == 'smooth': l_n_days = [5, 7, 9, 11, 13, 15, 17] lwindow_len = [3, 3, 5, 5, 5, 7, 7] coeff = [0.5, 1, 3, 4, 10, 8, 6] for i in sorted(li): print_no_return("-- n_samples/probability %5s:" % component[i]) k = -1 for n in l_n_days: k = k + 1 before = np.array([]) after = np.array([]) around = np.array([]) try: before = self.extract_ndates_before_date(date, n).smooth(window_len=lwindow_len[k]).data[:, i] - self.extract_ndates_before_date(date, n).data[:, i] after = self.extract_ndates_after_date(date, n).smooth(window_len=lwindow_len[k]).data[:, i] - self.extract_ndates_after_date(date, n).data[:, i] around = self.extract_ndates_around_date(date, n).smooth(window_len=lwindow_len[k]).data[:, i] - self.extract_ndates_around_date(date, n).data[:, i] except Exception: if debug: print("!!! Could not test offset significancy for date %lf using n=%d surrounding data" % (date, n)) break if debug: print("-- Testing significancy for date %lf using n=%d surrounding data" % (date, n)) if before.shape[0] * after.shape[0] * around.shape[0] != 0.0: before_std = np.std(before) * 1.E3 after_std = np.std(after) * 1.E3 around_std = np.std(around) * 1.E3 chi_square_2 = before.shape[0] * before_std**2 + after.shape[0] * after_std**2 chi_square_1 = around.shape[0] * around_std**2 H[i, k] = f_ratio(chi_square_1, 1, chi_square_2, 2, around.shape[0]) * 100.0 print_no_return(" %02d %5.2lf%% " % (n, H[i, k])) print("") for i in sorted(li): summ = 0.0 score[i] = 0.0 if H != {}: k = -1 for n in l_n_days: k = k + 1 try: score[i] += H[i, k] * coeff[k] summ = summ + coeff[k] except Exception: if debug: print("-- No data for n=%d " % n) pass score[i] = score[i] / summ if verbose: print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i])) if mode == 'local': l_n_days = range(2, 10) coeff = [0.5, 1, 1, 4, 10, 8, 6] for i in sorted(li): print_no_return("-- n_samples/probability %5s:" % component[i]) for n in l_n_days: before = np.array([]) after = np.array([]) around = np.array([]) try: before = self.extract_ndates_before_date(date, n).data[:, i] after = self.extract_ndates_after_date(date, n).data[:, i] around = self.extract_ndates_around_date(date, n).data[:, i] except Exception: if debug: print("!!! Could not test offset significancy for date %lf using n=%d surrounding data" % (date, n)) break if debug: print("-- Testing significancy for date %lf using n=%d surrounding data" % (date, n)) if before.shape[0] * after.shape[0] * around.shape[0] != 0.0: before_std = np.std(before) * 1.E3 after_std = np.std(after) * 1.E3 around_std = np.std(around) * 1.E3 chi_square_2 = before.shape[0] * before_std**2 + after.shape[0] * after_std**2 chi_square_1 = around.shape[0] * around_std**2 H[i, n] = f_ratio(chi_square_1, 1, chi_square_2, 2, around.shape[0]) * 100.0 print_no_return(" %02d %5.2lf%% " % (n, H[i, n])) print("") for i in sorted(li): summ = 0.0 score[i] = 0.0 for n in range(2, 7): try: score[i] += H[i, n] * coeff[n] summ = summ + coeff[n] except Exception: if debug: print("-- No data for n=%d " % n) pass score[i] = score[i] / summ if verbose: print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i])) if mode == 'detrend': for i in sorted(li): tmp_ts = self.copy() tmp_ts.offsets_dates = [] chi_square_1 = np.std(tmp_ts.detrend().data[:, i])**2 * tmp_ts.data.shape[0] chi_square_2 = np.std(tmp_ts.add_offsets_dates([date]).detrend().data[:, i])**2 * tmp_ts.data.shape[0] score[i] = f_ratio(chi_square_1, 2, chi_square_2, 3, tmp_ts.data.shape[0]) * 100.0 if verbose: print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i])) if mode == 'detrend_seasonal': for i in sorted(li): chi_square_1 = np.sum(self.detrend().data[:, i]**2) chi_square_2 = np.sum(self.add_offsets_dates([date]).detrend_seasonal().data[:, i]**2) n = self.data.shape[0] score[i] = f_ratio(chi_square_1, 1, chi_square_2, 3, 2 * n) * 100.0 if verbose: print("-- Probability of %s offset for site %s date %s component %5s using model: %6.1lf %%" % (mode.upper(), self.code, __fmt_date(date), component[i], score[i])) score_max = np.max(np.array(list(score.values()))) if score_max > conf_level: OK = True if verbose: if OK: print("-- Offset IS significant at the %5.2lf%% confidence level (threshold=%5.2lf%%) " % (score_max, conf_level)) else: print("-- Offset NOT significant %5.2lf%% confidence level (threshold=%5.2lf%%) " % (score_max, conf_level)) return OK