Source code for pyacs.gts.lib.l1trend.outlier_flagging

"""
Outlier flagging using L1-trend analysis.

This module provides functions to identify and flag outliers in GPS time series
based on their deviation from L1-trend filtered representations.
"""

import numpy as np
import pyacs.message.verbose_message as VERBOSE


[docs] def flag_outliers_using_l1trend(self, l1trend, threshold=5): """ Flag outliers in the time series based on a user provided l1trend filter representation of the time series. This function identifies outliers by comparing the original time series to its L1-trend representation. Outliers are flagged based on their deviation from the trend using the Median Absolute Deviation (MAD) method. Parameters ---------- self : Gts object The time series to flag outliers in. l1trend : Gts object The l1trend representation of the time series. threshold : float, optional The threshold for flagging outliers. Default is 5 for 5 times the median absolute deviation (MAD) from the median. Returns ------- Gts The original time series object with outliers flagged in self.outliers Notes ----- The method uses the Median Absolute Deviation (MAD) which is more robust to outliers than standard deviation. Outliers are identified as points that deviate more than threshold * MAD from the median of the absolute differences between the original time series and the L1-trend representation. """ VERBOSE("Flagging outliers using L1-trend with threshold: %.1f" % threshold) # Calculate the difference between original time series and L1-trend diff_ts = self.substract_ts_daily(l1trend) # Get absolute differences for N, E, U components data_fabs = np.fabs(diff_ts.data[:, 1:4]) # Columns 1, 2, 3 for N, E, U # Calculate median of absolute differences for each component median = np.median(data_fabs, axis=0) # Calculate Median Absolute Deviation (MAD) for each component mad = np.median(np.fabs(data_fabs - median), axis=0) # Calculate threshold values for each component threshold_values = threshold * mad VERBOSE("MAD values (N, E, U): [%.3f, %.3f, %.3f] mm" % tuple(mad * 1000)) VERBOSE("Threshold values (N, E, U): [%.3f, %.3f, %.3f] mm" % tuple(threshold_values * 1000)) # Find outliers: points where any component exceeds its threshold # Check each component separately and combine results outliers_by_component = [] for i in range(3): # N, E, U components component_outliers = np.where(data_fabs[:, i] > threshold_values[i])[0] outliers_by_component.append(component_outliers) # Combine outliers from all components outliers = np.unique(np.concatenate(outliers_by_component)) # Do not flag outliers that are close to identified offsets (5 days) if len(outliers) > 0 and len(self.offsets_dates) > 0: outliers_dates = self.data[outliers, 0] outliers_close_to_offsets = np.array([], dtype=int) for offset_date in self.offsets_dates: # Find outliers that are within 5 days of this offset close_indices = np.where(np.abs(outliers_dates - offset_date) < 5/365.25)[0] outliers_close_to_offsets = np.concatenate([outliers_close_to_offsets, close_indices]) # Remove duplicates from outliers_close_to_offsets outliers_close_to_offsets = np.unique(outliers_close_to_offsets) # Remove outliers close to offsets outliers = np.setdiff1d(outliers, outliers[outliers_close_to_offsets]) # create new Gts new_gts = self.copy() new_gts.outliers = np.sort(outliers).tolist() VERBOSE("Flagged %d outliers out of %d total points (%.1f%%)" % (len(new_gts.outliers), len(self.data), 100.0 * len(new_gts.outliers) / len(self.data))) return new_gts