forked from eurec4a/eurec4a-environment
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HALO flight decomposition (eurec4a#6)
adds: * utility for downloading and loading JOANNE level 3 dataset * functions to download and parse HALO flight segment yaml-files into xarray dataset * function to annotate dataset with HALO flight segment information so that this can be used to create aggregations on dataset later (for example calculate mean profiles).
- Loading branch information
Showing
7 changed files
with
188 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
*.pyc | ||
.*.sw* | ||
.DS_Store | ||
*.nc | ||
data/ |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import xarray as xr | ||
|
||
from ..source_data import get_halo_flight_legs | ||
|
||
|
||
def annotate_with_halo_flight_legs(ds, time="time", legtype="circle"): | ||
""" | ||
Add halo flight-leg information to the dataset provided by matching on | ||
`time` coordinate, HALO flight leg variables start with `halo_`. Once added | ||
these extra variables can be used to aggregate over flight legs, for | ||
example: | ||
>> ds_joanne = open_joanne_dataset() | ||
>> ds = annotate_with_halo_flight_legs(ds_joanne, time="launch_time", legtype="circle") | ||
>> ds.groupby('halo_segment_id') | ||
See [xarray.groupby](http://xarray.pydata.org/en/stable/groupby.html) on | ||
how to calculate e.g. mean per-leg or apply other functions. You can also | ||
simply call `list(annotate_with_halo_flight_legs(...))` to get individual | ||
datasets per leg | ||
""" | ||
ds_legs = get_halo_flight_legs(legtype=legtype) | ||
|
||
# ensure we have time as a coordinate | ||
org_dim = None | ||
try: | ||
ds.isel(**{time: 0}) | ||
except ValueError: | ||
org_dim = ds[time].dims[0] | ||
ds = ds.swap_dims({org_dim: time}) | ||
|
||
annotated = [] | ||
for segment_id in ds_legs.segment.values: | ||
ds_segment = ds_legs.sel(segment=segment_id) | ||
ds_ = ds.sel(**{time: slice(ds_segment.start, ds_segment.end)}) | ||
# only keep segments which contain data | ||
if ds_[time].count() == 0: | ||
continue | ||
for v in ds_segment.data_vars: | ||
ds_[f"halo_{v}"] = ds_segment[v] | ||
annotated.append(ds_) | ||
|
||
ds_annotated = xr.concat(annotated, dim="launch_time") | ||
|
||
if org_dim is not None: | ||
ds_annotated = ds_annotated.swap_dims({time: org_dim}) | ||
return ds_annotated |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
from pathlib import Path | ||
from datetime import datetime, timedelta | ||
|
||
from tqdm import tqdm | ||
import requests | ||
import xarray as xr | ||
import yaml | ||
|
||
|
||
JOANNE_LEV3_URL = "https://owncloud.gwdg.de/index.php/s/uy2eBvEI2pHRVKf/download?path=%2FLevel_3&files=EUREC4A_JOANNE_Dropsonde-RD41_Level_3_v0.5.7-alpha%2B0.g45fe69d.dirty.nc" | ||
|
||
HALO_FLIGHT_DECOMP_DATE_FORMAT = "%Y%m%d" | ||
HALO_FLIGHT_DECOMP_URL = "https://raw.githubusercontent.com/eurec4a/halo-flight-phase-separation/master/flight_phase_files/EUREC4A_HALO_Flight-Segments_{date}_v1.0.yaml" | ||
|
||
HALO_FLIGHT_DAYS = [ | ||
"2020-01-19", | ||
"2020-01-22", | ||
"2020-01-24", | ||
"2020-01-26", | ||
"2020-01-28", | ||
"2020-01-30", | ||
"2020-01-31", | ||
"2020-02-02", | ||
"2020-02-05", | ||
"2020-02-07", | ||
"2020-02-09", | ||
"2020-02-11", | ||
"2020-02-13", | ||
"2020-02-15", | ||
] | ||
|
||
|
||
def _download_file(url, block_size=8192): | ||
""" | ||
Download a remote file and show a progress bar during download | ||
Based on https://stackoverflow.com/a/16696317/271776 and | ||
https://stackoverflow.com/a/37573701/271776 | ||
""" | ||
local_filename = url.split("/")[-1] | ||
if "=" in local_filename: | ||
local_filename = local_filename.split("=")[-1] | ||
|
||
# put files in data/ locally | ||
local_filename = Path("data") / local_filename | ||
|
||
if Path(local_filename).exists(): | ||
pass | ||
else: | ||
local_filename.parent.mkdir(exist_ok=True, parents=True) | ||
with requests.get(url, stream=True) as r: | ||
r.raise_for_status() | ||
total_size_in_bytes = int(r.headers.get("content-length", 0)) | ||
progress_bar = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) | ||
with open(local_filename, "wb") as f: | ||
for chunk in r.iter_content(block_size): | ||
progress_bar.update(len(chunk)) | ||
f.write(chunk) | ||
return local_filename | ||
|
||
|
||
def open_joanne_dataset(level=3): | ||
""" | ||
Quick-hack routine to download whole JOANNE dataset to local path and open it with xarray | ||
This will be replaced with intake-based OPeNDAP download once JOANNE is on AERIS | ||
""" | ||
if not level == 3: | ||
raise NotImplementedError(level) | ||
filename = _download_file(JOANNE_LEV3_URL) | ||
ds = xr.open_dataset(filename) | ||
return ds.sortby("launch_time") | ||
|
||
|
||
def _open_halo_decomp_dataset(date): | ||
date_str = date.strftime(HALO_FLIGHT_DECOMP_DATE_FORMAT) | ||
url = HALO_FLIGHT_DECOMP_URL.format(date=date_str) | ||
filename = _download_file(url) | ||
with open(filename) as f: | ||
return yaml.load(f, Loader=yaml.FullLoader) | ||
|
||
|
||
def get_halo_flight_legs(legtype="all"): | ||
""" | ||
Turn the HALO flight legs files into a xarray Dataset | ||
""" | ||
segment_variables_to_keep = {"start", "end", "segment_id", "name"} | ||
datasets = [] | ||
for date_str in HALO_FLIGHT_DAYS: | ||
t = datetime.strptime(date_str, "%Y-%m-%d") | ||
try: | ||
flightinfo = _open_halo_decomp_dataset(date=t) | ||
except requests.exceptions.HTTPError: | ||
# some days don't have measurements, should probably handle this better... | ||
pass | ||
t += timedelta(hours=24) | ||
dss_segments = [] | ||
segments = flightinfo["segments"] | ||
if legtype != "all": | ||
segments = filter(lambda s: legtype in s["kinds"], segments) | ||
|
||
for segment in segments: | ||
ds_segment = xr.Dataset(coords=dict(segment=segment["segment_id"])) | ||
for v in segment_variables_to_keep: | ||
ds_segment[v] = segment[v] | ||
dss_segments.append(ds_segment) | ||
|
||
if len(dss_segments) > 0: | ||
ds_flight = xr.concat(dss_segments, dim="segment") | ||
ds_flight["flight_num"] = flightinfo["name"] | ||
datasets.append(ds_flight) | ||
return xr.concat(datasets, dim="segment") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
xarray | ||
netcdf4 | ||
typhon | ||
tqdm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from eurec4a_env.decomposition import time as time_decomposition | ||
from eurec4a_env.source_data import open_joanne_dataset | ||
|
||
|
||
def test_time_decomposition(): | ||
ds_joanne = open_joanne_dataset() | ||
ds = time_decomposition.annotate_with_halo_flight_legs( | ||
ds_joanne, time="launch_time", legtype="circle" | ||
) | ||
|
||
ds_mean_per_segment = ds.groupby("halo_segment_id").mean() | ||
assert ds_mean_per_segment.halo_segment_id.count() > 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import eurec4a_env.source_data | ||
|
||
|
||
def test_download_joanne_level3(): | ||
ds = eurec4a_env.source_data.open_joanne_dataset() | ||
assert ds.height.count() > 0 | ||
assert ds.sounding.count() > 0 | ||
|
||
|
||
def test_download_halo_flight_segments(): | ||
ds = eurec4a_env.source_data.get_halo_flight_legs() | ||
N_segments = ds.flight_num.count() | ||
assert N_segments > 0 | ||
|
||
ds = eurec4a_env.source_data.get_halo_flight_legs(legtype="circle") | ||
ds.flight_num.count() < N_segments |