diff --git a/redpy/table.py b/redpy/table.py old mode 100644 new mode 100755 index de32742..c6da802 --- a/redpy/table.py +++ b/redpy/table.py @@ -468,7 +468,7 @@ def moveOrphan(rtable, otable, ftable, oindex, opt): otable.flush() -def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt): +def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt, verbose=False): """ Moves the core of the families into the dtable, deletes the rest of the members. @@ -481,6 +481,8 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt): opt: Options object describing station/run parameters """ + if verbose: print("Removing families...") + cnums = np.sort(cnums) old = list(range(len(rtable))) transform = np.zeros((len(rtable),)).astype(int) @@ -493,7 +495,8 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt): ftable.flush() ftable.attrs.nClust-=len(cnums) members = np.sort(members).astype('uint32') - + + if verbose: print("Updating cores...") cores = rtable[np.intersect1d(members, oldcores)] for core in cores: trigger = dtable.row @@ -507,20 +510,23 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt): trigger['windowAmp'] = core['windowAmp'] trigger['FI'] = core['FI'] trigger.append() - + + if verbose: print("Updating correlation table...") ids = ids[members] id2 = ctable.cols.id2[:] idxc = np.where(np.in1d(id2,ids))[0] for c in idxc[::-1]: ctable.remove_row(c) + if verbose: print("Updating repeater table...") for m in members[::-1]: rtable.remove_row(m) old.remove(m) new = range(len(rtable)) transform[old] = new - + + if verbose: print("Updating family table...") np.set_printoptions(threshold=sys.maxsize) np.set_printoptions(linewidth=sys.maxsize) for n in range(len(ftable)): @@ -529,12 +535,109 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt): ftable.cols.members[n] = np.array2string(transform[fmembers])[1:-1] ftable.cols.core[n] = transform[core] ftable.flush() - + + if verbose: print("Flushing tables...") ftable.cols.printme[-1] = 1 rtable.flush() dtable.flush() +def removeSmallFamilies(rtable, ctable, dtable, ftable, ttable, minmembers, maxdays, seedtime, opt, verbose=False, list_only=False): + """ + Searches for families that have less than minmembers and are more than maxdays old. Sends the cluster numbers to + removeFamilies for deletion. + + rtable: Repeater table + ctable: Correlation matrix table + dtable: Deleted table + ftable: Families table + ttable: Trigger table + minmembers: Minimum number of members needed to keep a family + maxdays: Maximum number of days + seedtime: Date from which to measure maxdays + opt: Options object describing station/run parameters + verbose: Boolen value for printing + + :return + cnums: List of family numbers that were removed + """ + + """ + Programmer's notes: + ftable[i]["startTime"] : matplotlib integer : start of Family i + ftable[i]["longevity"] : int : longevity of family i in days + seedTime = UTCDateTime.nowutc() + startTime = UTCDateTime(num2date(ftable[i]["startTime"])) # UTCDateTime object + #longevity = ftable[i]["longevity"]*86400 # int (seconds), addable to UTCDateTime object + #lastTime = startTime + longevity # UTCDateTime object + #ageOfFamily = (nowTime - lastTime)/86400 # returns ageOfFamily in days (subtracting 2 UTCDateTime objects returns seconds) + ageOfFamily = (nowTime - startTime)/86400 # returns ageOfFamily in days (subtracting 2 UTCDateTime objects returns seconds) + a = ageOfFamily # age of Family i in days + + Note: A negative age means the family started after the seed time. It will be kept. + """ + + from matplotlib.dates import num2date + from obspy import UTCDateTime + + if seedtime: + seedtime = UTCDateTime(seedtime) + else: + seedtime = UTCDateTime(num2date(ttable[-1]["startTimeMPL"])) # UTC time of last trigger + + # If using list_only mode, automatically invoke verbose mode too + if list_only: + verbose = True + + if verbose: + print("::: table.removeSmallFamilies()") + print("::: - minmembers : {}".format(minmembers)) + print("::: - maxdays : {}".format(maxdays)) + print("::: - seedtime : {}".format(seedtime)) + print() + + cnums = [] # list of cluster numbers to be removed + nremoved = 0 # number of repeaters removed + nFamiliesRemoved = 0 # number of familiese to be removed + if verbose: + print("::: Member count per Family :::") + print("#{:>12s} | {:>12s} | {:>12s} | {:<12s}".format("Family #", "Members", "Age (d)", "Fate")) + for i in range(len(ftable)): + fate = "keep" # initialize fate of family as "keep," for printing purposes only + n = len(np.fromstring(ftable[i]['members'], dtype=int, sep=' ')) # number of repeaters in the family + a = (seedtime - (UTCDateTime(num2date(ftable[i]["startTime"])))) / 86400 # a = age in days (measured from family beginning), see explanation above + if n < minmembers and a > maxdays: # if family has too few members and is too old + cnums.append(i) # append it to the list to remove + fate = "REMOVE" # update fate as "REMOVE" + nremoved += n # keep track of total number of repeaters removed + nFamiliesRemoved += 1 # keep track of number of families removed + if verbose: + print("#{:>12d} | {:12d} | {:>12.2f} | {:<12s}".format(i, n, a, fate)) + + if verbose: + print() + print("Remove families : {}".format(cnums)) + print("# Families removed : {}/{}".format(len(cnums), len(ftable))) + percent_removed = nremoved/len(rtable)*100 + print("# Repeaters removed : {}/{} ({:2.1f}%)".format(nremoved, len(rtable), percent_removed)) + #print("# Repeaters removed : {}/{} ({:2.1f}%)".format(nremoved, len(rtable), nremoved/len(rtable)*100)) # throws a divide by zero error if no repeaters + print() + + if list_only: + # If list_only (-l), do not execute removeFamilies() + print("Families listed but not removed. Remove -l flag to actually modify table.") + else: + # removeFamilies() if there are families to remove + if nFamiliesRemoved: + # print("Removing families...") # This verbosity message exists in removeFamilies() + removeFamilies(rtable, ctable, dtable, ftable, cnums, opt, verbose=verbose) + else: + if verbose: + print("No families to remove.") + + return cnums + + def clearExpiredOrphans(otable, opt, tend): """ @@ -766,4 +869,5 @@ def checkMPL(rtable, ftable, ttable, otable, dtable, opt): # Fix families for i in range(len(ftable.cols.startTime[:])): if ftable.cols.startTime[i] < reftime: - ftable.cols.startTime[i] = ftable.cols.startTime[i] + epoch \ No newline at end of file + ftable.cols.startTime[i] = ftable.cols.startTime[i] + epoch + diff --git a/removeSmallFamily.py b/removeSmallFamily.py new file mode 100755 index 0000000..698c329 --- /dev/null +++ b/removeSmallFamily.py @@ -0,0 +1,88 @@ +# REDPy - Repeating Earthquake Detector in Python +# Copyright (C) 2016-2020 Alicia Hotovec-Ellis (ahotovec-ellis@usgs.gov) +# Licensed under GNU GPLv3 (see LICENSE.txt) + +import redpy.config +import redpy.table +import argparse + +""" +Run this script to remove small families/clusters (i.e., families that have less than M members and are more than D days +old). Reclusters and remakes images when done. This module works by determining the families that need to be removed, +and then it passes those families to removeFamily.py. Note: Removing families from datasets with manny families and +repeaters may take a significant amount of time. + +usage: removeSmallFamily.py [-h] [-v] [-c CONFIGFILE] [-m MINMEMBERS] [-d MAXDAYS] [-t SEEDTIME] + +optional arguments: + -h, --help show this help message and exit + -v, --verbose increase written print statements + -m, --MINMEMBERS minimum family size to keep (default: 5) + -d, --MAXDAYS maximum age of a family (days) to keep; measured from first member in family + in other words: keep families less than or equal to d days old + (default: 0; i.e., removes all small families) + -t --SEEDTIME Time from which to compute families' ages (default: last trigger time UTC) + If a family started after the seedtime, it will be kept + -l --LIST Lists families to keep and remove, but does not actually modify anything. Automatically uses + verbose mode. + -c CONFIGFILE, --configfile CONFIGFILE + use configuration file named CONFIGFILE instead of + default settings.cfg +""" + + +parser = argparse.ArgumentParser(description= + "Run this script to manually remove small families/clusters") +parser.add_argument("-v", "--verbose", action="count", default=0, + help="increase written print statements") +parser.add_argument("-c", "--configfile", + help="use configuration file named CONFIGFILE instead of default settings.cfg") +parser.add_argument("-m", "--minmembers", type=int, default=5, + help="minimum family size to keep") +parser.add_argument("-d", "--maxdays", type=int, default=0, + help="maximum age of a family to be saved (default: 0, i.e., removes every small family regardless of age") +parser.add_argument("-t", "--seedtime", default=False, + help="time from which to compute families' repose times (YYYY-MM-DDTHH:MM:SS) (deafult: last trigger time UTC)") +parser.add_argument("-l", "--list", action="store_true", default=False, + help="list families to keep and remove, but do not execute") +args = parser.parse_args() + + +def main(args): + + if args.configfile: + opt = redpy.config.Options(args.configfile) + if args.verbose: print("Using config file: {0}".format(args.configfile)) + else: + opt = redpy.config.Options("settings.cfg") + if args.verbose: print("Using config file: settings.cfg") + + if args.verbose: print("Opening hdf5 table: {0}".format(opt.filename)) + h5file, rtable, otable, ttable, ctable, jtable, dtable, ftable = redpy.table.openTable(opt) + + # Check for MPL version mismatch + redpy.table.checkMPL(rtable, ftable, ttable, otable, dtable, opt) + + oldnClust = ftable.attrs.nClust + + # Determines which families to remove, sends to table.removeFamilies(), outputs number of families removed + cnums = redpy.table.removeSmallFamilies(rtable, ctable, dtable, ftable, ttable, args.minmembers, args.maxdays, + args.seedtime, opt, list_only=args.list, verbose=args.verbose) + + if len(cnums) > 0: + # Only update plots if there are families removed + if args.verbose: print("Creating plots...") + redpy.plotting.createPlots(rtable, ftable, ttable, ctable, otable, opt) + + if args.verbose: print("Cleaning up old .html & .png files...") + redpy.plotting.cleanHTML(oldnClust, ftable.attrs.nClust, opt) + else: + if args.verbose: print("No families removed. No plots to update...") + + if args.verbose: print("Closing table...") + h5file.close() + if args.verbose: print("Done") + + +if __name__ == "__main__": + main(args)