Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor remaining functions from Issue #11 #31

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a755119
adding a messy code for iteration
annalea-albright Jul 29, 2020
edc6ebc
add messy file, BL height
annalea-albright Jul 29, 2020
9bb5db2
minor change
annalea-albright Jul 29, 2020
eec0c98
minor edit
annalea-albright Jul 29, 2020
0e63e1f
calculate density
annalea-albright Jul 30, 2020
3322c86
Update calculate_density.py
annalea-albright Jul 30, 2020
686adf1
test for calc_density function
annalea-albright Jul 30, 2020
f2312e6
fixed coordinates
annalea-albright Jul 30, 2020
22ba43e
minor edit
annalea-albright Jul 30, 2020
364a2c4
add test file
annalea-albright Jul 30, 2020
f7e1075
updating files
annalea-albright Jul 30, 2020
675a4b2
remove redundant code
annalea-albright Jul 30, 2020
3a198e8
Merge branch 'master' of https://github.com/eurec4a/eurec4a-environme…
annalea-albright Jul 30, 2020
1436af2
Update eurec4a_environment/variables/calc_density.py
annalea-albright Jul 31, 2020
5924a80
Delete mixed_layer_height_from_gradient.py
annalea-albright Aug 12, 2020
13c4d4f
refactor remaining functions
annalea-albright Aug 20, 2020
c0bbb7c
minor changes
annalea-albright Aug 24, 2020
fdb0668
Merge branch 'master' into define_vertical_structure
leifdenby Aug 31, 2020
d836e54
Merge branch 'master' into define_vertical_structure
leifdenby Aug 31, 2020
6795d30
Merge branch 'master' into define_vertical_structure
leifdenby Feb 3, 2021
b619ec1
apply black and fix flake8 errors
leifdenby Feb 3, 2021
cbd6bb3
start refactoring mixed-layer peak RH linfit calculation
leifdenby Feb 3, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions eurec4a_environment/plot/profile.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import matplotlib.dates as mdates
import seaborn as sns
import typhon

sns.set(
context="notebook",
Expand Down
90 changes: 82 additions & 8 deletions eurec4a_environment/variables/boundary_layer/mixed_layer_height.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import statsmodels.api as sm


def calc_peak_RH(ds, altitude="alt", rh="RH", z_min=200.0, z_max=900.0):
def calc_peak_RH(ds, altitude="height", rh="rh", z_min=200.0, z_max=900.0):
"""
Calculate height at maximum relative humidity values
"""
Expand All @@ -28,9 +28,9 @@ def calc_peakRH_linearize(
ds,
altitude="height",
rh="rh",
time_dim="sounding",
time="sounding",
z_min=200.0,
z_max=900.0,
z_max=1500.0,
z_min_lin=50.0,
z_max_lin=400.0,
):
Expand All @@ -41,7 +41,7 @@ def calc_peakRH_linearize(
assume linear profile as an idealization
Inputs:
-- ds: dataset
-- altitude, rh, time_dim: variable names
-- altitude, rh, time: variable names
-- z_min and z_max: lower and upper bounds for mixed layer height
-- z_min_lin and z_max_lin: bounds for linearization of observed RH profile
Outputs:
Expand All @@ -51,16 +51,14 @@ def calc_peakRH_linearize(

dz = int(ds[altitude].diff(dim=altitude)[1]) # dz=10m

mixed_layer = np.logical_and(
ds[altitude] >= z_min, ds[altitude] <= 1500
) # enforce z_max later
mixed_layer = np.logical_and(ds[altitude] >= z_min, ds[altitude] <= z_max)
ml_linfit = np.logical_and(
ds[altitude] >= z_min_lin, ds[altitude] <= z_max_lin
) # for linearized RH profile

for i in range(len(ds[rh])):

rh_profile = ds[rh].isel({time_dim: i}).interpolate_na(dim=altitude)
rh_profile = ds[rh].isel({time: i}).interpolate_na(dim=altitude)
X_height = rh_profile[ml_linfit][altitude]
X = sm.add_constant(X_height.values) # add intercept
model = sm.OLS(
Expand Down Expand Up @@ -93,3 +91,79 @@ def calc_peakRH_linearize(
da.attrs["long_name"] = "mixed layer height (from RH peak and linearization)"
da.attrs["units"] = "m"
return da


def calc_from_gradient(
ds, var, threshold, z_min=200, altitude="height", time="sounding"
):
"""
Find mixed layer height as layer over which x(z+1) - x(z) < threshold

Inputs:
-- ds: Dataset
-- var: variable name
-- threshold: in units of variable (i.e. g/kg or K). Maximum difference allowed in one vertical step, dz=10m, i.e. threshold_q = 0.4 g/kg,
or vertical gradient dq/dz < 0.04 g/kg/m

Outputs: DataArray containing mixed layer height from gradient method

Note that function is slow and should be optimized
"""

def calculateHmix_var(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to define this outside of the function in case it could be used elsewhere? If you don't want it as a generally visible function you can rename it with an underscore in front (i.e. _calculateHmix_var)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raising an issue for higher precision tests sounds great -- thank you!

Copy link
Collaborator Author

@annalea-albright annalea-albright Aug 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And yes that sounds good with _calculateHmix_var, will do. Thanks for the tip

density_profile,
var_profile,
threshold,
z_min,
altitude="height",
time="sounding",
):

var_diff = 0
numer = 0
denom = 0
var_mix = 0

k = int(z_min / 10) # enforce lower limit = 200m. k is index of lower limit

# var(z) - weighted_mean(z-1) < threshold
while abs(var_diff) < threshold:

numer += 0.5 * (
density_profile[k + 1] * var_profile[k + 1]
+ density_profile[k] * var_profile[k]
)
denom += 0.5 * (density_profile[k + 1] + density_profile[k])
var_mix = numer / denom
k += 1
var_diff = (var_profile[k] - var_mix).values

hmix = var_profile[altitude].values[k]
return hmix

# call inner function
from eurec4a_environment.variables.calc_density import calc_density
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to move this import to top of module, that way you're sure the import is successful before we attempt to call the function


da_density = calc_density(ds)

hmix_vec = np.zeros(len(ds[var]))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would change this slightly so that it doesn't necessarily assume there is just one other dimension (here that is time). You could do

dims = ds.dims
del dims[altiude]

# and then apply your function that works on one column
def _calc_height(ds_column):
   # add function definition here
   
da_height = ds.stack(dict(n=dims)).groupby("n").apply(_calc_height).unstack("n")

This is a general thing across all the functions we've implemented I think. But just wondering what you think?

Copy link
Collaborator Author

@annalea-albright annalea-albright Aug 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good! I'm wondering how to do something similar to the for-loop to load individual variable and density profiles?

da_height = ds.stack(dict(n=dims)).groupby("n").apply(_calc_height).unstack("n")

vs.

hmix_vec = np.zeros(len(ds[var]))
    for i in range((len(ds[var]))):
        density_profile = da_density.isel({time: i})
        var_profile = ds[var].isel({time: i})
        hmix_vec[i] = _calc_height(
            density_profile,
            var_profile,
            threshold,
            z_min,
            altitude="height",
            time="sounding",
        )

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think this looks good. I hadn't really considered this earlier, but most of the calculations we do operate on a single-column at a time, so this is probably a general pattern we want to follow. Where the calculation only works if a single column is used at a time we can use stack(..).groupby(...).apply(...).unstack(...).

Also, where you're defining a nested function (here _calc_height) inside another function you don't need to pass in the variables already available (for example z_min). Actually it's best to avoid doing that in case you accidentally redefine what a variable means within a function (by passing in the wrong thing for for example z_min or setting it to something else in the call).

for i in range((len(ds[var]))):
density_profile = da_density.isel({time: i})
var_profile = ds[var].isel({time: i})
hmix_vec[i] = calculateHmix_var(
density_profile,
var_profile,
threshold,
z_min,
altitude="height",
time="sounding",
)

dims = list(ds.dims.keys())
# drop the height coord
del dims[dims.index(altitude)]
da = xr.DataArray(hmix_vec, dims=dims, coords={d: ds[d] for d in dims})
da.attrs["long_name"] = f"mixed layer height (from gradient method using {var})"
da.attrs["units"] = "m"

return da
42 changes: 42 additions & 0 deletions eurec4a_environment/variables/calc_density.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import xarray as xr
from ..constants import Rd, Rv, eps
#from constants import Rd, Rv, eps


def calc_density(ds, pres="p", temp="T", specific_humidity="q", altitude="height"):

# equation: rho = P/(Rd * Tv), where Tv = T(1 + mr/eps)/(1+mr)

# convert pressure from hPa to Pa
if ds[pres].max().values < 1200:
pressure = ds[pres] * 100
else:
pressure = ds[pres]
# convert temperature from Celsius to Kelvin
if ds[temp].max().values < 100:
temp_K = ds[temp] + 273.15
else:
temp_K = ds[temp]
# convert specific humidity from g/kg to kg/kg
if ds[specific_humidity].max().values > 10:
q = ds[specific_humidity] / 1000
else:
q = ds[specific_humidity]

mixing_ratio = q / (1 - q)

density = (pressure) / (
Rd * (temp_K) * (1 + (mixing_ratio / eps)) / (1 + mixing_ratio)
)
density = density.transpose(transpose_coords=True)

dims = list(ds.dims.keys())
da = xr.DataArray(density, dims=dims, coords={d: ds[d] for d in dims})
da.attrs["long_name"] = "density of air"
da.attrs["units"] = "kg/m3"

return da
42 changes: 42 additions & 0 deletions eurec4a_environment/variables/calc_static_stability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import xarray as xr


def calc_static_stability(ds, pres="p", temp="T", theta="theta", altitude="height"):

""" Equation from: https://www.ncl.ucar.edu/Document/Functions/Contributed/static_stability.shtml
S = -(T/theta)*d(theta)/dp. From Bluestein (1992), pg 197, eqn 4.3.8

Static stability measures the gravitational resistance of an atmosphere to vertical displacements.
It results from fundamental buoyant adjustments, and so it is determined by the vertical stratification
of density or potential temperature.
"""

# !! edit these lines based on units function !!
# Celsius to Kelvin
if ds[temp].max().values < 100:
temp_K = ds[temp] + 273.15
else:
temp_K = ds[temp]

# convert Pa to hPa
if ds[pres].max().values > 1400:
ds[pres] = ds[pres] / 100

static_stability = -(temp_K / ds[theta]) * (
ds[theta].diff(dim=altitude) / ds[pres].diff(dim=altitude)
)
static_stability = static_stability.transpose(
transpose_coords=True
) # same dimension order as original dataset

dims = list(static_stability.dims)
da = xr.DataArray(
static_stability, dims=dims, coords={d: static_stability[d] for d in dims}
)
da.attrs["long_name"] = "static stability"
da.attrs["units"] = "K/hPa"

return da
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import numpy as np
import xarray as xr


def calc_inversion_static_stability(
ds, time="sounding", altitude="height", z_min=1000, z_max=3000
):

"""
Returns inversion height defined as the height where static stability profiles
first exceed 0.1 K/hPa between given heights
Adapted from Bony and Stevens 2018, Measuring Area-Averaged Vertical Motions with Dropsondes, p 772
To do: combine static stability criterion with one for moisture (i.e. hydrolapse)
"""

from eurec4a_environment.variables.calc_static_stability import (
calc_static_stability,
)

ds_inversion = ds.sel({altitude: slice(z_min, z_max)})
da_static_stability = calc_static_stability(ds_inversion)

inversion_height_stability = np.zeros(len(da_static_stability[time]))
for i in range(len(da_static_stability[time])):
stability_sounding = da_static_stability.isel({time: i}).values
idx_stability = np.argmax(stability_sounding > 0.1)
inversion_height_stability[i] = da_static_stability[altitude][idx_stability]

dims = list(da_static_stability.dims)
# drop the height coord
del dims[dims.index(altitude)]

da = xr.DataArray(
inversion_height_stability,
dims=dims,
coords={d: da_static_stability[d] for d in dims},
)
da.attrs["long_name"] = "inversion base height, static stability > 0.1 K/hPa"
da.attrs["units"] = "m"

return da


def calc_inversion_grad_RH_T(
ds,
time="sounding",
altitude="height",
temperature="T",
rh="rh",
z_min=1000,
z_max=2500,
):

"""
Inversion base following Grindinger et al, 1992; Cao et al 2007
dT/dz > 0 and dRH/dz < 0
To do: find all inversion bases and heights, not just first inversion base
"""

ds_inversion = ds.sel({altitude: slice(z_min, z_max)})
gradient_RH = ds_inversion[rh].differentiate(coord=altitude)
gradient_T = ds_inversion[temperature].differentiate(coord=altitude)
inversion_levels = gradient_T[altitude].where(
(gradient_RH <= 0) & (gradient_T >= 0)
)

num_iters = len(inversion_levels[time])
inv_base_height_gradients = np.zeros(num_iters)
for i in range(num_iters):
alt_inv_da = inversion_levels.isel({time: i})
inversion_alts = alt_inv_da[~np.isnan(alt_inv_da)]
if inversion_alts[altitude].size == 0:
print(f"sounding {i} has no inversion below {z_max} m")
continue
inv_base_height_gradients[i] = inversion_alts[0] # find first inversion base
inv_base_height_gradients[inv_base_height_gradients == 0] = np.nan

dims = list(ds.dims.keys())
# drop the height coord
del dims[dims.index(altitude)]
da = xr.DataArray(
inv_base_height_gradients, dims=dims, coords={d: ds[d] for d in dims}
)
da.attrs["long_name"] = "inversion base height from RH and T gradient"
da.attrs["units"] = "m"
return da

# sns.distplot(inv_base_height_gradients)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this accidental leftovers or something you want to incorporate later?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was thinking we could possibly incorporate this as a plot, but it's not very high priority

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think doing plotting in a different module sounds like a good idea. You could always add a second pull-request with some plots for this :)


# plot vertical profiles dRH/dz and dT/dz

# fig, ax = plt.subplots(1,2,figsize=(20,8))
# for i in range(5): # len(gradient_RH)
# ax[0].plot(gradient_RH.isel({time: i}), gradient_RH[altitude].values,color="lightgrey", linewidth=2,alpha=0.5)
# ax[0].plot(gradient_RH.mean(dim=time),gradient_RH[altitude].values,linewidth=4, color='black')
# ax[0].spines['right'].set_visible(False)
# ax[0].spines['top'].set_visible(False)
# ax[0].set_xlabel('$\partial$RH/$\partial$z')
# ax[0].set_ylabel('Altitude / m')
# ax[0].set_ylim([0,z_max])
# ax[0].axvline(x=0, color='black')
# ax[0].set_xlim([ -1, 1])

# for i in range(5):
# ax[1].plot(gradient_T.isel({time: i}), gradient_T[altitude].values,color="lightgrey", linewidth=2,alpha=0.5)
# ax[1].plot(gradient_T.mean(dim=time),gradient_T[altitude].values,linewidth=4, color='black')
# ax[1].spines['right'].set_visible(False)
# ax[1].spines['top'].set_visible(False)
# ax[1].set_xlabel('$\partial$T/$\partial$z (dz=10m)' )
# #ax[1].set_ylabel('Altitude / m')
# ax[1].set_ylim([0,z_max])
# ax[1].axvline(x=0, color='black')
# ax[1].set_xlim([-0.03, 0.03])
18 changes: 15 additions & 3 deletions tests/test_boundary_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ def test_LCL_Bolton(ds_isentropic_test_profiles):
def test_mixed_layer_height_RHmax(ds_isentropic_test_profiles):
ds = ds_isentropic_test_profiles
# set the RH profile so we know where the peak should be
z0 = 600.
z0 = 600.0
z = ds.alt
ds['rh'] = 1.0 - np.maximum(np.abs(z0 - z), 0) / z0
ds["rh"] = 1.0 - np.maximum(np.abs(z0 - z), 0) / z0
da_rh_peak = boundary_layer.mixed_layer_height.calc_peak_RH(
ds=ds, altitude="alt", rh="rh"
)
Expand All @@ -29,4 +29,16 @@ def test_mixed_layer_height_RH_lin():
ds = ds.isel(sounding=slice(0, 10))
da_lin = boundary_layer.mixed_layer_height.calc_peakRH_linearize(ds)
assert len(da_lin) == 10
assert np.all(da_lin < 1500.)
assert np.all(da_lin < 1500.0)


def test_mixed_layer_height_gradient():
ds = eurec4a_environment.source_data.open_joanne_dataset()
ds = ds.isel(sounding=slice(0, 10))
# adjust threshold based on units 0.4 g/kg/10m or 0.0004 kg/kg/10m
da_gradient = boundary_layer.mixed_layer_height.calc_from_gradient(
ds, var="q", threshold=0.4 / 1000
)
assert len(da_gradient) == 10
assert np.all(da_gradient < 1500.0)
assert da_gradient.units == "m"
16 changes: 16 additions & 0 deletions tests/test_density.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-

import numpy as np

from eurec4a_environment.variables.calc_density import calc_density
import eurec4a_environment.source_data


def test_density_calculation():
ds = eurec4a_environment.source_data.open_joanne_dataset()
ds = ds.isel(sounding=slice(0, 10))
da_density = calc_density(ds)
# check that we get some sensible numbers out for the density
assert da_density.units == "kg/m3"
assert da_density.max() < 2.5
assert da_density.min() > 0.0
Loading