Skip to content

Commit

Permalink
Merge pull request #1189 from cmu-delphi/ds/format100
Browse files Browse the repository at this point in the history
style(black): format acquisition with `black`, line-length=100
  • Loading branch information
dshemetov authored Jun 26, 2023
2 parents 7fd6a90 + f93f020 commit 0609700
Show file tree
Hide file tree
Showing 33 changed files with 4,051 additions and 3,502 deletions.
22 changes: 22 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# EditorConfig helps developers define and maintain consistent
# coding styles between different editors and IDEs
# editorconfig.org

root = true

[*]
# We recommend you to keep these unchanged
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true


[*.py]
# Change these settings to your own preference
indent_style = space
indent_size = 4


[*.md]
trim_trailing_whitespace = false
24 changes: 24 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# style(black): format cdc acquisition
980b0b7e80c7923b79e14fee620645e680785703
# style(black): format covidcast_nowcast acquisition
9e6ff16f599e8feec34a08dd1bddbc5eae347b55
# style(black): format ecdc acquisition
d1141d904da4e62992b97c92d5caebd8fadffd42
# style(black): format flusurv acquisition
08af0f6b7bff85bbc2b193b63b5abf6a16ba03e4
# style(black): format fluview acquisition
0133ef2042c4df8867e91595eb1f64873edb4632
# style(black): format ght acquisition
b8900a0bc846888885310911efd6e26459effa99
# style(black): format kcdc acquisition
a849384c884934b3b7c3c67b68aa6240277d6b6d
# style(black): format nidss acquisition
d04af3c02fda7708a16bec0952b1aa7475acaec7
# style(black): format paho acquisition
7f60fbba572c1b6e5153a9ef216895bdc2f7f5b3
# style(black): format quidel acquisition
b9ceb400d9248c8271e8342275664ac5524e335d
# style(black): format twitter acquisition
07ed83e5768f717ab0f9a62a9209e4e2cffa058d
# style(black): format wiki acquisition
923852eafa86b8f8b182d499489249ba8f815843
25 changes: 23 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@

[tool.black]
line-length = 200
line-length = 100
target-version = ['py38']
include = 'server,tests/server'

[tool.pylint]
[tool.pylint.'MESSAGES CONTROL']
max-line-length = 100
disable = [
'logging-format-interpolation',
# Allow pytest functions to be part of a class
'no-self-use',
'too-many-locals',
'too-many-arguments',
# Allow pytest classes to have one test
'too-few-public-methods',
]

[tool.pylint.'BASIC']
# Allow arbitrarily short-named variables.
variable-rgx = ['[a-z_][a-z0-9_]*']
argument-rgx = [ '[a-z_][a-z0-9_]*' ]
attr-rgx = ['[a-z_][a-z0-9_]*']

[tool.pylint.'DESIGN']
ignored-argument-names = ['(_.*|run_as_module)']
212 changes: 106 additions & 106 deletions src/acquisition/cdcp/cdc_dropbox_receiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,128 +29,128 @@


# location constants
DROPBOX_BASE_DIR = '/cdc_page_stats'
DELPHI_BASE_DIR = '/common/cdc_stage'
DROPBOX_BASE_DIR = "/cdc_page_stats"
DELPHI_BASE_DIR = "/common/cdc_stage"


def get_timestamp_string():
"""
Return the current local date and time as a string.
"""
Return the current local date and time as a string.
The format is "%Y%m%d_%H%M%S".
"""
return datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
The format is "%Y%m%d_%H%M%S".
"""
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")


def trigger_further_processing():
"""Add CDCP processing scripts to the Automation run queue."""
"""Add CDCP processing scripts to the Automation run queue."""

# connect
u, p = secrets.db.auto
cnx = mysql.connector.connect(user=u, password=p, database='automation')
cur = cnx.cursor()
# connect
u, p = secrets.db.auto
cnx = mysql.connector.connect(user=u, password=p, database="automation")
cur = cnx.cursor()

# add step "Process CDCP Data" to queue
cur.execute('CALL automation.RunStep(46)')
# add step "Process CDCP Data" to queue
cur.execute("CALL automation.RunStep(46)")

# disconnect
cur.close()
cnx.commit()
cnx.close()
# disconnect
cur.close()
cnx.commit()
cnx.close()


def fetch_data():
"""
Check for new files on dropbox, download them, zip them, cleanup dropbox, and
trigger further processing of new data.
"""

# initialize dropbox api
dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)

# look for new CDC data files
print('checking dropbox:%s' % DROPBOX_BASE_DIR)
save_list = []
for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
name = entry.name
if name.endswith('.csv') or name.endswith('.zip'):
print(' download "%s"' % name)
save_list.append(name)
else:
print(' skip "%s"' % name)

# determine if there's anything to be done
if len(save_list) == 0:
print('did not find any new data files')
return

# download new files, saving them inside of a new zip file
timestamp = get_timestamp_string()
zip_path = '%s/dropbox_%s.zip' % (DELPHI_BASE_DIR, timestamp)
print('downloading into delphi:%s' % zip_path)
with ZipFile(zip_path, 'w', ZIP_DEFLATED) as zf:
"""
Check for new files on dropbox, download them, zip them, cleanup dropbox, and
trigger further processing of new data.
"""

# initialize dropbox api
dbx = dropbox.Dropbox(secrets.cdcp.dropbox_token)

# look for new CDC data files
print(f"checking dropbox: {DROPBOX_BASE_DIR}")
save_list = []
for entry in dbx.files_list_folder(DROPBOX_BASE_DIR).entries:
name = entry.name
if name.endswith(".csv") or name.endswith(".zip"):
print(f" download: {name}")
save_list.append(name)
else:
print(f" skip: {name}")

# determine if there's anything to be done
if len(save_list) == 0:
print("did not find any new data files")
return

# download new files, saving them inside of a new zip file
timestamp = get_timestamp_string()
zip_path = f"{DELPHI_BASE_DIR}/dropbox_{timestamp}.zip"
print(f"downloading into delphi:{zip_path}")
with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf:
for name in save_list:
# location of the file on dropbox
dropbox_path = f"{DROPBOX_BASE_DIR}/{name}"
print(f" {dropbox_path}")

# start the download
meta, resp = dbx.files_download(dropbox_path)

# check status and length
if resp.status_code != 200:
raise Exception(["resp.status_code", resp.status_code])
dropbox_len = meta.size
print(f" need {int(dropbox_len)} bytes...")
content_len = int(resp.headers.get("Content-Length", -1))
if dropbox_len != content_len:
info = ["dropbox_len", dropbox_len, "content_len", content_len]
raise Exception(info)

# finish the download, holding the data in this variable
filedata = resp.content

# check the length again
payload_len = len(filedata)
print(" downloaded")
if dropbox_len != payload_len:
info = ["dropbox_len", dropbox_len, "payload_len", payload_len]
raise Exception(info)

# add the downloaded file to the zip file
zf.writestr(name, filedata)
print(" added")

# At this point, all the data is stored and awaiting further processing on
# the delphi server.
print(f"saved all new data in {zip_path}")

# on dropbox, archive downloaded files so they won't be downloaded again
archive_dir = f"archived_reports/processed_{timestamp}"
print("archiving files...")
for name in save_list:
# location of the file on dropbox
dropbox_path = '%s/%s' % (DROPBOX_BASE_DIR, name)
print(' %s' % dropbox_path)

# start the download
meta, resp = dbx.files_download(dropbox_path)

# check status and length
if resp.status_code != 200:
raise Exception(['resp.status_code', resp.status_code])
dropbox_len = meta.size
print(' need %d bytes...' % dropbox_len)
content_len = int(resp.headers.get('Content-Length', -1))
if dropbox_len != content_len:
info = ['dropbox_len', dropbox_len, 'content_len', content_len]
raise Exception(info)

# finish the download, holding the data in this variable
filedata = resp.content

# check the length again
payload_len = len(filedata)
print(' downloaded')
if dropbox_len != payload_len:
info = ['dropbox_len', dropbox_len, 'payload_len', payload_len]
raise Exception(info)

# add the downloaded file to the zip file
zf.writestr(name, filedata)
print(' added')

# At this point, all the data is stored and awaiting further processing on
# the delphi server.
print('saved all new data in %s' % zip_path)

# on dropbox, archive downloaded files so they won't be downloaded again
archive_dir = 'archived_reports/processed_%s' % timestamp
print('archiving files...')
for name in save_list:
# source and destination
dropbox_src = '%s/%s' % (DROPBOX_BASE_DIR, name)
dropbox_dst = '%s/%s/%s' % (DROPBOX_BASE_DIR, archive_dir, name)
print(' "%s" -> "%s"' % (dropbox_src, dropbox_dst))

# move the file
meta = dbx.files_move(dropbox_src, dropbox_dst)

# sanity check
if archive_dir not in meta.path_lower:
raise Exception('failed to move "%s"' % name)

# finally, trigger the usual processing flow
print('triggering processing flow')
trigger_further_processing()
print('done')
# source and destination
dropbox_src = f"{DROPBOX_BASE_DIR}/{name}"
dropbox_dst = f"{DROPBOX_BASE_DIR}/{archive_dir}/{name}"
print(f" {dropbox_src} -> {dropbox_dst}")

# move the file
meta = dbx.files_move(dropbox_src, dropbox_dst)

# sanity check
if archive_dir not in meta.path_lower:
raise Exception(f"failed to move {name}")

# finally, trigger the usual processing flow
print("triggering processing flow")
trigger_further_processing()
print("done")


def main():
# fetch new data
fetch_data()
# fetch new data
fetch_data()


if __name__ == '__main__':
main()
if __name__ == "__main__":
main()
Loading

0 comments on commit 0609700

Please sign in to comment.