Skip to content

Commit

Permalink
HALO flight decomposition (eurec4a#6)
Browse files Browse the repository at this point in the history
adds:

* utility for downloading and loading JOANNE level 3 dataset
* functions to download and parse HALO flight segment yaml-files into xarray dataset
* function to annotate dataset with HALO flight segment information
   so that this can be used to create aggregations on dataset later (for
   example calculate mean profiles).
  • Loading branch information
leifdenby authored Jul 28, 2020
1 parent 631157f commit d6ca587
Show file tree
Hide file tree
Showing 7 changed files with 188 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.pyc
.*.sw*
.DS_Store
*.nc
data/
Empty file.
47 changes: 47 additions & 0 deletions eurec4a_env/decomposition/time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import xarray as xr

from ..source_data import get_halo_flight_legs


def annotate_with_halo_flight_legs(ds, time="time", legtype="circle"):
"""
Add halo flight-leg information to the dataset provided by matching on
`time` coordinate, HALO flight leg variables start with `halo_`. Once added
these extra variables can be used to aggregate over flight legs, for
example:
>> ds_joanne = open_joanne_dataset()
>> ds = annotate_with_halo_flight_legs(ds_joanne, time="launch_time", legtype="circle")
>> ds.groupby('halo_segment_id')
See [xarray.groupby](http://xarray.pydata.org/en/stable/groupby.html) on
how to calculate e.g. mean per-leg or apply other functions. You can also
simply call `list(annotate_with_halo_flight_legs(...))` to get individual
datasets per leg
"""
ds_legs = get_halo_flight_legs(legtype=legtype)

# ensure we have time as a coordinate
org_dim = None
try:
ds.isel(**{time: 0})
except ValueError:
org_dim = ds[time].dims[0]
ds = ds.swap_dims({org_dim: time})

annotated = []
for segment_id in ds_legs.segment.values:
ds_segment = ds_legs.sel(segment=segment_id)
ds_ = ds.sel(**{time: slice(ds_segment.start, ds_segment.end)})
# only keep segments which contain data
if ds_[time].count() == 0:
continue
for v in ds_segment.data_vars:
ds_[f"halo_{v}"] = ds_segment[v]
annotated.append(ds_)

ds_annotated = xr.concat(annotated, dim="launch_time")

if org_dim is not None:
ds_annotated = ds_annotated.swap_dims({time: org_dim})
return ds_annotated
110 changes: 110 additions & 0 deletions eurec4a_env/source_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from pathlib import Path
from datetime import datetime, timedelta

from tqdm import tqdm
import requests
import xarray as xr
import yaml


JOANNE_LEV3_URL = "https://owncloud.gwdg.de/index.php/s/uy2eBvEI2pHRVKf/download?path=%2FLevel_3&files=EUREC4A_JOANNE_Dropsonde-RD41_Level_3_v0.5.7-alpha%2B0.g45fe69d.dirty.nc"

HALO_FLIGHT_DECOMP_DATE_FORMAT = "%Y%m%d"
HALO_FLIGHT_DECOMP_URL = "https://raw.githubusercontent.com/eurec4a/halo-flight-phase-separation/master/flight_phase_files/EUREC4A_HALO_Flight-Segments_{date}_v1.0.yaml"

HALO_FLIGHT_DAYS = [
"2020-01-19",
"2020-01-22",
"2020-01-24",
"2020-01-26",
"2020-01-28",
"2020-01-30",
"2020-01-31",
"2020-02-02",
"2020-02-05",
"2020-02-07",
"2020-02-09",
"2020-02-11",
"2020-02-13",
"2020-02-15",
]


def _download_file(url, block_size=8192):
"""
Download a remote file and show a progress bar during download
Based on https://stackoverflow.com/a/16696317/271776 and
https://stackoverflow.com/a/37573701/271776
"""
local_filename = url.split("/")[-1]
if "=" in local_filename:
local_filename = local_filename.split("=")[-1]

# put files in data/ locally
local_filename = Path("data") / local_filename

if Path(local_filename).exists():
pass
else:
local_filename.parent.mkdir(exist_ok=True, parents=True)
with requests.get(url, stream=True) as r:
r.raise_for_status()
total_size_in_bytes = int(r.headers.get("content-length", 0))
progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True)
with open(local_filename, "wb") as f:
for chunk in r.iter_content(block_size):
progress_bar.update(len(chunk))
f.write(chunk)
return local_filename


def open_joanne_dataset(level=3):
"""
Quick-hack routine to download whole JOANNE dataset to local path and open it with xarray
This will be replaced with intake-based OPeNDAP download once JOANNE is on AERIS
"""
if not level == 3:
raise NotImplementedError(level)
filename = _download_file(JOANNE_LEV3_URL)
ds = xr.open_dataset(filename)
return ds.sortby("launch_time")


def _open_halo_decomp_dataset(date):
date_str = date.strftime(HALO_FLIGHT_DECOMP_DATE_FORMAT)
url = HALO_FLIGHT_DECOMP_URL.format(date=date_str)
filename = _download_file(url)
with open(filename) as f:
return yaml.load(f, Loader=yaml.FullLoader)


def get_halo_flight_legs(legtype="all"):
"""
Turn the HALO flight legs files into a xarray Dataset
"""
segment_variables_to_keep = {"start", "end", "segment_id", "name"}
datasets = []
for date_str in HALO_FLIGHT_DAYS:
t = datetime.strptime(date_str, "%Y-%m-%d")
try:
flightinfo = _open_halo_decomp_dataset(date=t)
except requests.exceptions.HTTPError:
# some days don't have measurements, should probably handle this better...
pass
t += timedelta(hours=24)
dss_segments = []
segments = flightinfo["segments"]
if legtype != "all":
segments = filter(lambda s: legtype in s["kinds"], segments)

for segment in segments:
ds_segment = xr.Dataset(coords=dict(segment=segment["segment_id"]))
for v in segment_variables_to_keep:
ds_segment[v] = segment[v]
dss_segments.append(ds_segment)

if len(dss_segments) > 0:
ds_flight = xr.concat(dss_segments, dim="segment")
ds_flight["flight_num"] = flightinfo["name"]
datasets.append(ds_flight)
return xr.concat(datasets, dim="segment")
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
xarray
netcdf4
typhon
tqdm
12 changes: 12 additions & 0 deletions tests/test_decomposition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from eurec4a_env.decomposition import time as time_decomposition
from eurec4a_env.source_data import open_joanne_dataset


def test_time_decomposition():
ds_joanne = open_joanne_dataset()
ds = time_decomposition.annotate_with_halo_flight_legs(
ds_joanne, time="launch_time", legtype="circle"
)

ds_mean_per_segment = ds.groupby("halo_segment_id").mean()
assert ds_mean_per_segment.halo_segment_id.count() > 0
16 changes: 16 additions & 0 deletions tests/test_source_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import eurec4a_env.source_data


def test_download_joanne_level3():
ds = eurec4a_env.source_data.open_joanne_dataset()
assert ds.height.count() > 0
assert ds.sounding.count() > 0


def test_download_halo_flight_segments():
ds = eurec4a_env.source_data.get_halo_flight_legs()
N_segments = ds.flight_num.count()
assert N_segments > 0

ds = eurec4a_env.source_data.get_halo_flight_legs(legtype="circle")
ds.flight_num.count() < N_segments

0 comments on commit d6ca587

Please sign in to comment.