Source code for pyacs.gts.lib.offset.find_offsets

"""
Simple empirical procedure to find offsets (multi-step: large, intermediate, small).
"""

from ._utils import __fmt_date, get_suspected_dates


def _print_dates_4digits(L):
    if isinstance(L, float):
        L = [L]
    return ["%9.4lf" % date for date in sorted(L)]


[docs] def find_offsets(self, threshold=3, n_max_offsets=9, conf_level=95, lcomponent='NE', verbose=True, in_place=False): """Simple empirical procedure to find offsets. Parameters ---------- threshold : float, optional Threshold for preliminary offset detection. Default is 3. n_max_offsets : int, optional Maximum number of offsets to detect. Default is 9. conf_level : float, optional Confidence level (percent) to accept offset. Default is 95. lcomponent : str, optional Components for detection ('N','E','U'). Default is 'NE'. verbose : bool, optional Verbose mode. Default is True. in_place : bool, optional If True, modify self. Default is False. Returns ------- Gts New Gts (or self if in_place) with offsets_dates and outliers set. """ tts = self.copy() loutliers_dates = [] gross_threshold = 10 if verbose: print("********************************************************************") print("-- %s STEP #1: trying to identify large offsets" % self.code) print("********************************************************************") tmp_gts_gross = tts.suspect_offsets(threshold=gross_threshold, verbose=True, lcomponent=lcomponent, n_max_offsets=n_max_offsets) if verbose: print("-- %1d suspected large offsets found" % (len(tmp_gts_gross.offsets_dates))) for odate in tmp_gts_gross.offsets_dates: print("-- %s %12.8lf" % (__fmt_date(odate), odate)) significant_offsets = [] for offset_date in tmp_gts_gross.offsets_dates: if tts.test_offset_significance(offset_date, conf_level=conf_level, lcomponent=lcomponent, verbose=verbose, mode='local') \ and tts.test_offset_significance(offset_date, conf_level=conf_level, lcomponent=lcomponent, verbose=verbose, mode='detrend'): significant_offsets.append(offset_date) else: if verbose: print("=> Since it is not an offset, date %10.4lf is potentially an outlier" % offset_date) tmp_gts_gross.find_outlier_around_date(offset_date, conf_level=conf_level, n=3, lcomponent=lcomponent, verbose=verbose) if verbose: print("=> Large offset search : %1d offsets confirmed, %1d were actually outliers" % (len(significant_offsets), len(tmp_gts_gross.outliers))) tts.offsets_dates += significant_offsets if tmp_gts_gross.outliers != []: lindex_outliers = tmp_gts_gross.outliers new_outliers_dates = tmp_gts_gross.data[lindex_outliers, 0].tolist() loutliers_dates = new_outliers_dates loutliers_gross = tmp_gts_gross.find_outliers_simple(threshold=gross_threshold).outliers loutliers_dates += self.data[loutliers_gross, 0].tolist() if verbose: print("=> Outliers search : %03d outliers found" % (len(loutliers_dates))) tmp_gts_gross.plot() if verbose: print("********************************************************************") print("-- %s STEP #2: trying to identify intermediate size offsets" % self.code) print("********************************************************************") intermediate_threshold = 0.5 * (10. + threshold) previous_offsets_values = tmp_gts_gross.remove_outliers().detrend().offsets_values tmp_gts_intermediate = tmp_gts_gross.remove_outliers().apply_offsets(previous_offsets_values).suspect_offsets(threshold=intermediate_threshold, verbose=True, lcomponent='NE', n_max_offsets=10) str_offsets_dates = " ".join(_print_dates_4digits(tmp_gts_intermediate.offsets_dates)) if verbose: print("=> %1d potential intermediate offsets found at %s" % (len(tmp_gts_intermediate.offsets_dates), str_offsets_dates)) significant_offsets = [] lpotential_offsets_dates = tmp_gts_intermediate.offsets_dates tmp_gts_intermediate.offsets_dates = [] for offset_date in lpotential_offsets_dates: if tmp_gts_intermediate.test_offset_significance(offset_date, conf_level=conf_level, lcomponent=lcomponent, verbose=verbose, mode='local') \ and tmp_gts_intermediate.test_offset_significance(offset_date, conf_level=conf_level, lcomponent=lcomponent, verbose=verbose, mode='detrend'): significant_offsets.append(offset_date) else: if verbose: print("=> Since it is not an offset, date %10.4lf is potentially an outlier" % offset_date) tmp_gts_intermediate.find_outlier_around_date(offset_date, conf_level=conf_level, n=3, lcomponent=lcomponent, verbose=verbose) if verbose: print("=> Intermediate size offsets search : %1d offsets confirmed, %1d were actually outliers" % (len(significant_offsets), len(tmp_gts_intermediate.outliers))) tts.offsets_dates += significant_offsets if tmp_gts_intermediate.outliers != []: lindex_outliers = tmp_gts_intermediate.outliers new_outliers_dates = tmp_gts_intermediate.data[lindex_outliers, 0].tolist() loutliers_dates = loutliers_dates + new_outliers_dates loutliers_intermediate = tmp_gts_intermediate.find_outliers_simple(threshold=intermediate_threshold).outliers loutliers_dates += self.data[loutliers_intermediate, 0].tolist() if verbose: print("=> Outliers search : %03d outliers found" % (len(loutliers_dates))) if verbose: print("********************************************************************") print("-- %s STEP #3: trying to identify small offsets" % self.code) print("********************************************************************") previous_offsets_values = tmp_gts_gross.remove_outliers().detrend().offsets_values tmp_gts_final = tmp_gts_intermediate.remove_outliers().apply_offsets(previous_offsets_values).suspect_offsets(threshold=threshold, verbose=True, lcomponent='NE', n_max_offsets=10) str_offsets_dates = " ".join(_print_dates_4digits(tmp_gts_final.offsets_dates)) if verbose: print("=> %1d potential subtle offsets found at %s" % (len(tmp_gts_final.offsets_dates), str_offsets_dates)) significant_offsets = [] lpotential_offsets_dates = tmp_gts_final.offsets_dates tmp_gts_final.offsets_dates = [] for offset_date in lpotential_offsets_dates: if tmp_gts_intermediate.test_offset_significance(offset_date, conf_level=conf_level, lcomponent=lcomponent, verbose=verbose, mode='local') \ and tmp_gts_intermediate.test_offset_significance(offset_date, conf_level=conf_level, lcomponent=lcomponent, verbose=verbose, mode='detrend_seasonal'): significant_offsets.append(offset_date) else: tmp_gts_final.find_outlier_around_date(offset_date, conf_level=conf_level, n=3, lcomponent=lcomponent, verbose=verbose) if verbose: print("=> Subtle offset search: %1d offsets confirmed, %1d were actually outliers" % (len(significant_offsets), len(tmp_gts_final.outliers))) tts.offsets_dates += significant_offsets offsets_dates = sorted(list(set(tts.offsets_dates))) tts.offsets_dates = offsets_dates if tmp_gts_final.outliers != []: lindex_outliers = tmp_gts_final.outliers new_outliers_dates = tmp_gts_final.data[lindex_outliers, 0].tolist() loutliers_dates = loutliers_dates + new_outliers_dates loutliers_final = tmp_gts_final.find_outliers_simple(threshold=threshold).outliers loutliers_dates += self.data[loutliers_final, 0].tolist() if verbose: print("=> Outliers search : %03d outliers found" % (len(loutliers_dates))) if loutliers_dates != []: from pyacs.gts.Gts import get_index_from_dates returned_index = get_index_from_dates(loutliers_dates, tts.data, tol=0.25) loutliers = sorted(list(set(returned_index))) else: loutliers = [] if verbose: print("**********************************************************************************") print("=> Final results of offset search: %02d offsets found; additionally %02d outliers were flagged" % (len(offsets_dates), len(loutliers_dates))) print("**********************************************************************************") new_Gts = self.copy() new_Gts.offsets_dates = offsets_dates new_Gts.outliers = loutliers if in_place: self.offsets_dates = new_Gts.offsets_dates self.outliers = new_Gts.outliers return self else: return new_Gts