Skip to content

Commit

Permalink
removeSmallFamily (#23)
Browse files Browse the repository at this point in the history
Added removeSmallFamily.py, redpy.table.removeSmallFamilies(), and additional verbosity option in redpy.table.removeFamily()
  • Loading branch information
jwellik authored Nov 22, 2022
1 parent daa0421 commit 4ae2a9c
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 6 deletions.
116 changes: 110 additions & 6 deletions redpy/table.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def moveOrphan(rtable, otable, ftable, oindex, opt):
otable.flush()


def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt):
def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt, verbose=False):

"""
Moves the core of the families into the dtable, deletes the rest of the members.
Expand All @@ -481,6 +481,8 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt):
opt: Options object describing station/run parameters
"""

if verbose: print("Removing families...")

cnums = np.sort(cnums)
old = list(range(len(rtable)))
transform = np.zeros((len(rtable),)).astype(int)
Expand All @@ -493,7 +495,8 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt):
ftable.flush()
ftable.attrs.nClust-=len(cnums)
members = np.sort(members).astype('uint32')


if verbose: print("Updating cores...")
cores = rtable[np.intersect1d(members, oldcores)]
for core in cores:
trigger = dtable.row
Expand All @@ -507,20 +510,23 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt):
trigger['windowAmp'] = core['windowAmp']
trigger['FI'] = core['FI']
trigger.append()


if verbose: print("Updating correlation table...")
ids = ids[members]
id2 = ctable.cols.id2[:]
idxc = np.where(np.in1d(id2,ids))[0]
for c in idxc[::-1]:
ctable.remove_row(c)

if verbose: print("Updating repeater table...")
for m in members[::-1]:
rtable.remove_row(m)
old.remove(m)

new = range(len(rtable))
transform[old] = new


if verbose: print("Updating family table...")
np.set_printoptions(threshold=sys.maxsize)
np.set_printoptions(linewidth=sys.maxsize)
for n in range(len(ftable)):
Expand All @@ -529,12 +535,109 @@ def removeFamilies(rtable, ctable, dtable, ftable, cnums, opt):
ftable.cols.members[n] = np.array2string(transform[fmembers])[1:-1]
ftable.cols.core[n] = transform[core]
ftable.flush()


if verbose: print("Flushing tables...")
ftable.cols.printme[-1] = 1
rtable.flush()
dtable.flush()


def removeSmallFamilies(rtable, ctable, dtable, ftable, ttable, minmembers, maxdays, seedtime, opt, verbose=False, list_only=False):
"""
Searches for families that have less than minmembers and are more than maxdays old. Sends the cluster numbers to
removeFamilies for deletion.
rtable: Repeater table
ctable: Correlation matrix table
dtable: Deleted table
ftable: Families table
ttable: Trigger table
minmembers: Minimum number of members needed to keep a family
maxdays: Maximum number of days
seedtime: Date from which to measure maxdays
opt: Options object describing station/run parameters
verbose: Boolen value for printing
:return
cnums: List of family numbers that were removed
"""

"""
Programmer's notes:
ftable[i]["startTime"] : matplotlib integer : start of Family i
ftable[i]["longevity"] : int : longevity of family i in days
seedTime = UTCDateTime.nowutc()
startTime = UTCDateTime(num2date(ftable[i]["startTime"])) # UTCDateTime object
#longevity = ftable[i]["longevity"]*86400 # int (seconds), addable to UTCDateTime object
#lastTime = startTime + longevity # UTCDateTime object
#ageOfFamily = (nowTime - lastTime)/86400 # returns ageOfFamily in days (subtracting 2 UTCDateTime objects returns seconds)
ageOfFamily = (nowTime - startTime)/86400 # returns ageOfFamily in days (subtracting 2 UTCDateTime objects returns seconds)
a = ageOfFamily # age of Family i in days
Note: A negative age means the family started after the seed time. It will be kept.
"""

from matplotlib.dates import num2date
from obspy import UTCDateTime

if seedtime:
seedtime = UTCDateTime(seedtime)
else:
seedtime = UTCDateTime(num2date(ttable[-1]["startTimeMPL"])) # UTC time of last trigger

# If using list_only mode, automatically invoke verbose mode too
if list_only:
verbose = True

if verbose:
print("::: table.removeSmallFamilies()")
print("::: - minmembers : {}".format(minmembers))
print("::: - maxdays : {}".format(maxdays))
print("::: - seedtime : {}".format(seedtime))
print()

cnums = [] # list of cluster numbers to be removed
nremoved = 0 # number of repeaters removed
nFamiliesRemoved = 0 # number of familiese to be removed
if verbose:
print("::: Member count per Family :::")
print("#{:>12s} | {:>12s} | {:>12s} | {:<12s}".format("Family #", "Members", "Age (d)", "Fate"))
for i in range(len(ftable)):
fate = "keep" # initialize fate of family as "keep," for printing purposes only
n = len(np.fromstring(ftable[i]['members'], dtype=int, sep=' ')) # number of repeaters in the family
a = (seedtime - (UTCDateTime(num2date(ftable[i]["startTime"])))) / 86400 # a = age in days (measured from family beginning), see explanation above
if n < minmembers and a > maxdays: # if family has too few members and is too old
cnums.append(i) # append it to the list to remove
fate = "REMOVE" # update fate as "REMOVE"
nremoved += n # keep track of total number of repeaters removed
nFamiliesRemoved += 1 # keep track of number of families removed
if verbose:
print("#{:>12d} | {:12d} | {:>12.2f} | {:<12s}".format(i, n, a, fate))

if verbose:
print()
print("Remove families : {}".format(cnums))
print("# Families removed : {}/{}".format(len(cnums), len(ftable)))
percent_removed = nremoved/len(rtable)*100
print("# Repeaters removed : {}/{} ({:2.1f}%)".format(nremoved, len(rtable), percent_removed))
#print("# Repeaters removed : {}/{} ({:2.1f}%)".format(nremoved, len(rtable), nremoved/len(rtable)*100)) # throws a divide by zero error if no repeaters
print()

if list_only:
# If list_only (-l), do not execute removeFamilies()
print("Families listed but not removed. Remove -l flag to actually modify table.")
else:
# removeFamilies() if there are families to remove
if nFamiliesRemoved:
# print("Removing families...") # This verbosity message exists in removeFamilies()
removeFamilies(rtable, ctable, dtable, ftable, cnums, opt, verbose=verbose)
else:
if verbose:
print("No families to remove.")

return cnums


def clearExpiredOrphans(otable, opt, tend):

"""
Expand Down Expand Up @@ -766,4 +869,5 @@ def checkMPL(rtable, ftable, ttable, otable, dtable, opt):
# Fix families
for i in range(len(ftable.cols.startTime[:])):
if ftable.cols.startTime[i] < reftime:
ftable.cols.startTime[i] = ftable.cols.startTime[i] + epoch
ftable.cols.startTime[i] = ftable.cols.startTime[i] + epoch

88 changes: 88 additions & 0 deletions removeSmallFamily.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# REDPy - Repeating Earthquake Detector in Python
# Copyright (C) 2016-2020 Alicia Hotovec-Ellis ([email protected])
# Licensed under GNU GPLv3 (see LICENSE.txt)

import redpy.config
import redpy.table
import argparse

"""
Run this script to remove small families/clusters (i.e., families that have less than M members and are more than D days
old). Reclusters and remakes images when done. This module works by determining the families that need to be removed,
and then it passes those families to removeFamily.py. Note: Removing families from datasets with manny families and
repeaters may take a significant amount of time.
usage: removeSmallFamily.py [-h] [-v] [-c CONFIGFILE] [-m MINMEMBERS] [-d MAXDAYS] [-t SEEDTIME]
optional arguments:
-h, --help show this help message and exit
-v, --verbose increase written print statements
-m, --MINMEMBERS minimum family size to keep (default: 5)
-d, --MAXDAYS maximum age of a family (days) to keep; measured from first member in family
in other words: keep families less than or equal to d days old
(default: 0; i.e., removes all small families)
-t --SEEDTIME Time from which to compute families' ages (default: last trigger time UTC)
If a family started after the seedtime, it will be kept
-l --LIST Lists families to keep and remove, but does not actually modify anything. Automatically uses
verbose mode.
-c CONFIGFILE, --configfile CONFIGFILE
use configuration file named CONFIGFILE instead of
default settings.cfg
"""


parser = argparse.ArgumentParser(description=
"Run this script to manually remove small families/clusters")
parser.add_argument("-v", "--verbose", action="count", default=0,
help="increase written print statements")
parser.add_argument("-c", "--configfile",
help="use configuration file named CONFIGFILE instead of default settings.cfg")
parser.add_argument("-m", "--minmembers", type=int, default=5,
help="minimum family size to keep")
parser.add_argument("-d", "--maxdays", type=int, default=0,
help="maximum age of a family to be saved (default: 0, i.e., removes every small family regardless of age")
parser.add_argument("-t", "--seedtime", default=False,
help="time from which to compute families' repose times (YYYY-MM-DDTHH:MM:SS) (deafult: last trigger time UTC)")
parser.add_argument("-l", "--list", action="store_true", default=False,
help="list families to keep and remove, but do not execute")
args = parser.parse_args()


def main(args):

if args.configfile:
opt = redpy.config.Options(args.configfile)
if args.verbose: print("Using config file: {0}".format(args.configfile))
else:
opt = redpy.config.Options("settings.cfg")
if args.verbose: print("Using config file: settings.cfg")

if args.verbose: print("Opening hdf5 table: {0}".format(opt.filename))
h5file, rtable, otable, ttable, ctable, jtable, dtable, ftable = redpy.table.openTable(opt)

# Check for MPL version mismatch
redpy.table.checkMPL(rtable, ftable, ttable, otable, dtable, opt)

oldnClust = ftable.attrs.nClust

# Determines which families to remove, sends to table.removeFamilies(), outputs number of families removed
cnums = redpy.table.removeSmallFamilies(rtable, ctable, dtable, ftable, ttable, args.minmembers, args.maxdays,
args.seedtime, opt, list_only=args.list, verbose=args.verbose)

if len(cnums) > 0:
# Only update plots if there are families removed
if args.verbose: print("Creating plots...")
redpy.plotting.createPlots(rtable, ftable, ttable, ctable, otable, opt)

if args.verbose: print("Cleaning up old .html & .png files...")
redpy.plotting.cleanHTML(oldnClust, ftable.attrs.nClust, opt)
else:
if args.verbose: print("No families removed. No plots to update...")

if args.verbose: print("Closing table...")
h5file.close()
if args.verbose: print("Done")


if __name__ == "__main__":
main(args)

0 comments on commit 4ae2a9c

Please sign in to comment.