Skip to content

Commit

Permalink
Merge pull request #24 from strongio/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
jwdink authored Sep 3, 2024
2 parents 4cb937b + e3b3d42 commit 148af61
Show file tree
Hide file tree
Showing 23 changed files with 712 additions and 431 deletions.
11 changes: 11 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
To compile the docs, you must install the package with the `docs` extra:

```bash
pip install git+https://github.com/strongio/torchcast.git#egg=torchcast[docs]
```

Then from project root run:

```bash
sphinx-build -b html ./docs ./docs/_html
```
2 changes: 1 addition & 1 deletion docs/api/kalman_filter.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Kalman Filter
=============

.. automodule:: torchcast.kalman_filter
.. automodule:: torchcast.kalman_filter.kalman_filter
:members: KalmanFilter
:exclude-members: ss_step_cls
:show-inheritance:
Expand Down
15 changes: 10 additions & 5 deletions docs/examples/air_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@
def inverse_transform(df):
df = df.copy()
# bias-correction for log-transform (see https://otexts.com/fpp2/transformations.html#bias-adjustments)
df['mean'] = df['mean'] + .5 * (df['upper'] - df['lower']) / 1.96 ** 2
df['mean'] = df['mean'] + .5 * df['std'] ** 2
df['lower'] = df['mean'] - 1.96 * df['std']
df['upper'] = df['mean'] + 1.96 * df['std']
# inverse the log10:
df[['actual', 'mean', 'upper', 'lower']] = 10 ** df[['actual', 'mean', 'upper', 'lower']]
df['measure'] = df['measure'].str.replace('_log10', '')
Expand All @@ -94,7 +96,7 @@ def inverse_transform(df):
out_timesteps=dataset_pm_univariate.tensors[0].shape[1]
)

df_forecast = inverse_transform(forecast.to_dataframe(dataset_pm_univariate))
df_forecast = inverse_transform(forecast.to_dataframe(dataset_pm_univariate, conf=None))
print(forecast.plot(df_forecast, max_num_groups=3, split_dt=SPLIT_DT))

# %% [markdown]
Expand All @@ -117,7 +119,7 @@ def inverse_transform(df):
)

df_univariate_error = pred_4step.\
to_dataframe(dataset_pm_univariate, group_colname='station', time_colname='week').\
to_dataframe(dataset_pm_univariate, group_colname='station', time_colname='week', conf=None).\
pipe(inverse_transform).\
merge(df_aq.loc[:,['station', 'week', 'PM']]).\
assign(
Expand Down Expand Up @@ -234,7 +236,7 @@ def mc_preds_to_dataframe(preds: Predictions,
df_multivariate_error.groupby('validation')['error'].agg(['mean','std'])

# %% [markdown]
# We see that this approach has reduced our error: substantially in the training period, and moderately in the validation period. We can look at the per-site differences to reduce common sources of noise and see that the reduction is consistent (it holds for all but one site):
# We see that this approach has reduced our error in the validation period. We can look at the per-site differences to reduce noise:

# %%
df_multivariate_error.\
Expand Down Expand Up @@ -304,7 +306,10 @@ def mc_preds_to_dataframe(preds: Predictions,
start_offsets=dataset_pm_lm.start_datetimes,
n_step=4
)
pred_4step.plot(pred_4step.to_dataframe(dataset_pm_lm, type='components').query("process.str.contains('lm')"), split_dt=SPLIT_DT)
pred_4step.plot(
pred_4step.to_dataframe(dataset_pm_lm, type='components').query("process.str.contains('lm')"),
split_dt=SPLIT_DT
)

# %% [markdown]
# Now let's look at error:
Expand Down
2 changes: 1 addition & 1 deletion docs/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
# `Predictions` can easily be converted to Pandas `DataFrames` for ease of inspecting predictions, comparing them to actuals, and visualizing:

# %%
df_pred = pred.to_dataframe(dataset_all, multi=None)
df_pred = pred.to_dataframe(dataset_all, conf=None)
# bias-correction for log-transform (see https://otexts.com/fpp2/transformations.html#bias-adjustments)
df_pred['mean'] += .5 * df_pred['std'] ** 2
df_pred['lower'] = df_pred['mean'] - 1.96 * df_pred['std']
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
install_requires=[
'torch>=1.12',
'numpy>=1.4',
'scipy>=1.10'
],
extras_require={
'tests': ['parameterized>=0.7', 'filterpy>=1.4', 'pandas>=1.0'],
Expand Down
18 changes: 8 additions & 10 deletions tests/test_kalman_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import copy
import itertools
from collections import defaultdict
from typing import Callable, Optional, Dict
from unittest import TestCase
from typing import Callable, Dict
from unittest import TestCase, expectedFailure

import torch
from parameterized import parameterized
Expand Down Expand Up @@ -66,7 +66,7 @@ def test_nans(self, ndim: int = 3, n_step: int = 1):
processes=[LocalLevel(id=f'lm{i}', measure=str(i)) for i in range(ndim)],
measures=[str(i) for i in range(ndim)]
)
kf = torch.jit.script(kf)
# kf = torch.jit.script(kf)
obs_means, obs_covs = kf(data, n_step=n_step)
self.assertFalse(torch.isnan(obs_means).any())
self.assertFalse(torch.isnan(obs_covs).any())
Expand Down Expand Up @@ -150,7 +150,7 @@ def test_equations_decay(self):
# confirm decay works in forward pass
# also tests that kf.forward works with `out_timesteps > input.shape[1]`
pred = torch_kf(
initial_state=torch_kf._prepare_initial_state((None, None), start_offsets=np.zeros(1)),
initial_state=torch_kf._prepare_initial_state(None, start_offsets=np.zeros(1)),
X=torch.randn(1, num_times, 3),
out_timesteps=num_times
)
Expand All @@ -168,7 +168,6 @@ def test_equations(self):
processes=[LocalTrend(id='lt', decay_velocity=None, measure='y', velocity_multi=1.)],
measures=['y']
)
kf = torch.jit.script(torch_kf)
expectedF = torch.tensor([[1., 1.], [0., 1.]])
expectedH = torch.tensor([[1., 0.]])
kwargs_per_process = torch_kf._parse_design_kwargs(input=data, out_timesteps=num_times)
Expand All @@ -184,7 +183,7 @@ def test_equations(self):

# make filterpy kf:
filter_kf = filterpy_KalmanFilter(dim_x=2, dim_z=1)
filter_kf.x, filter_kf.P = torch_kf._prepare_initial_state((None, None))
filter_kf.x, filter_kf.P = torch_kf._prepare_initial_state(None)
filter_kf.x = filter_kf.x.detach().numpy().T
filter_kf.P = filter_kf.P.detach().numpy().squeeze(0)
filter_kf.Q = Q.numpy()
Expand Down Expand Up @@ -228,7 +227,7 @@ def __init__(self, *args, **kwargs):
],
measures=['y']
)
kf._scale_by_measure_var = False
kf._get_measure_scaling = lambda: torch.ones(2)

kf.state_dict()['initial_mean'][:] = torch.tensor([1.5, -0.5])
kf.state_dict()['measure_covariance.cholesky_log_diag'][0] = np.log(.1 ** .5)
Expand Down Expand Up @@ -348,7 +347,7 @@ def _build_h_mat(self, inputs: Dict[str, Tensor], num_groups: int, num_times: in
processes=[Season(id='s1')],
measures=['y']
)
kf._scale_by_measure_var = False
kf._get_measure_scaling = lambda: torch.ones(1)
data = torch.arange(7).view(1, -1, 1).to(torch.float32)
for init_state in [0., 1.]:
kf.state_dict()['initial_mean'][:] = torch.ones(1) * init_state
Expand Down Expand Up @@ -401,11 +400,10 @@ def test_no_proc_variance(self):
self.assertTrue((cov == 0).all())

@parameterized.expand([
(torch.float64, 2, True),
(torch.float64, 2, False)
])
@torch.no_grad()
def test_dtype(self, dtype: torch.dtype, ndim: int = 2, compiled: bool = True):
def test_dtype(self, dtype: torch.dtype, ndim: int = 2, compiled: bool = False):
data = torch.zeros((2, 5, ndim), dtype=dtype)
kf = KalmanFilter(
processes=[LocalLevel(id=f'll{i}', measure=str(i)) for i in range(ndim)],
Expand Down
2 changes: 1 addition & 1 deletion tests/test_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_fourier_season(self):
processes=[Season(id='day_of_week', period='7D', dt_unit='D', K=3, fixed=True)],
measures=['y']
)
kf._scale_by_measure_var = False
kf._get_measure_scaling = lambda: torch.ones(6)
kf.state_dict()['initial_mean'][:] = torch.tensor([1., 0., 0., 0., 0., 0.])
kf.state_dict()['measure_covariance.cholesky_log_diag'] -= 2
pred = kf(data, start_offsets=start_datetimes)
Expand Down
31 changes: 7 additions & 24 deletions tests/test_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,6 @@


class TestTraining(unittest.TestCase):
@parameterized.expand([(1,), (2,), (3,)])
@torch.no_grad()
def test_gaussian_log_prob(self, ndim: int = 1):
data = torch.zeros((2, 5, ndim))
kf = KalmanFilter(
processes=[LocalLevel(id=f'lm{i}', measure=str(i)) for i in range(ndim)],
measures=[str(i) for i in range(ndim)]
)
dist = kf.ss_step.get_distribution()
pred = kf(data)
log_lik1 = dist(*pred).log_prob(data)
from torch.distributions import MultivariateNormal
mv = MultivariateNormal(*pred)
log_lik2 = mv.log_prob(data)
self.assertAlmostEqual(log_lik1.sum().item(), log_lik2.sum().item())

@parameterized.expand([(1,), (2,), (3,)])
@torch.no_grad()
Expand All @@ -46,8 +31,6 @@ def test_log_prob_with_missings(self, ndim: int = 1, num_groups: int = 1, num_ti
lp_method1 = pred.log_prob(data)
lp_method1_sum = lp_method1.sum().item()

dist = kf.ss_step.get_distribution()

lp_method2_sum = 0
for g in range(num_groups):
data_g = data[[g]]
Expand All @@ -59,20 +42,20 @@ def test_log_prob_with_missings(self, ndim: int = 1, num_groups: int = 1, num_ti
if not isvalid_gt.any():
continue
if isvalid_gt.all():
lp_gt = dist(*pred_gt).log_prob(data_gt).item()
lp_gt = torch.distributions.MultivariateNormal(*pred_gt).log_prob(data_gt).item()
else:
pred_gtm = pred_gt.observe(
state_means=pred_gt.state_means,
state_covs=pred_gt.state_covs,
R=pred_gt.R[..., isvalid_gt, :][..., isvalid_gt],
H=pred_gt.H[..., isvalid_gt, :]
)
lp_gt = dist(*pred_gtm).log_prob(data_gt[..., isvalid_gt]).item()
lp_gt = torch.distributions.MultivariateNormal(*pred_gtm).log_prob(data_gt[..., isvalid_gt]).item()
self.assertAlmostEqual(lp_method1[g, t].item(), lp_gt, places=4)
lp_method2_sum += lp_gt
self.assertAlmostEqual(lp_method1_sum, lp_method2_sum, places=3)

def test_training1(self, ndim: int = 2, num_groups: int = 150, num_times: int = 24, compile: bool = True):
def test_training1(self, ndim: int = 2, num_groups: int = 150, num_times: int = 24, compile: bool = False):
"""
simulated data with known parameters, fitted loss should approach the loss given known params
"""
Expand Down Expand Up @@ -101,8 +84,8 @@ def _make_kf():
X = torch.randn((num_groups, num_times, 5))
kf_generator = _make_kf()
with torch.no_grad():
sim = kf_generator.simulate(out_timesteps=num_times, num_sims=num_groups, X=X)
y = sim.sample()
sim = kf_generator.simulate(out_timesteps=num_times, num_groups=num_groups, X=X)
y = torch.distributions.MultivariateNormal(*sim).sample()
assert not y.requires_grad

# train:
Expand Down Expand Up @@ -136,7 +119,7 @@ def closure():
oracle_loss = -kf_generator(y, X=X).log_prob(y).mean()
self.assertAlmostEqual(oracle_loss.item(), loss.item(), places=1)

def test_training2(self, num_groups: int = 50, compile: bool = True):
def test_training2(self, num_groups: int = 50, compile: bool = False):
"""
# manually generated data (sin-wave, trend, etc.) with virtually no noise: MSE should be near zero
"""
Expand Down Expand Up @@ -199,7 +182,7 @@ def closure():
# trend should be identified:
self.assertAlmostEqual(pred.state_means[:, :, 1].mean().item(), 5., places=1)

def test_training3(self, compile: bool = True):
def test_training3(self, compile: bool = False):
"""
Test TBATS and TimeSeriesDataset integration
"""
Expand Down
2 changes: 1 addition & 1 deletion torchcast/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.3.0'
__version__ = '0.3.1'
2 changes: 0 additions & 2 deletions torchcast/exp_smooth/exp_smooth.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ class ExpSmoother(StateSpaceModel):
:param processes: A list of :class:`.Process` modules.
:param measures: A list of strings specifying the names of the dimensions of the time-series being measured.
:param measure_covariance: A module created with ``Covariance.from_measures(measures)``.
:param predict_smoothing: A ``torch.nn.Module`` which predicts the smoothing parameters. The module should predict
these as real-values and they will be constrained to 0-1 internally.
"""
ss_step_cls = ExpSmoothStep

Expand Down
15 changes: 15 additions & 0 deletions torchcast/internals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
import numpy as np


def transpose_last_dims(x: torch.Tensor) -> torch.Tensor:
args = list(range(len(x.shape)))
args[-2], args[-1] = args[-1], args[-2]
return x.permute(*args)


def get_nan_groups(isnan: torch.Tensor) -> List[Tuple[torch.Tensor, Optional[torch.Tensor]]]:
"""
Iterable of (group_idx, valid_idx) tuples that can be passed to torch.meshgrid. If no valid, then not returned; if
Expand Down Expand Up @@ -148,3 +154,12 @@ def true1d_idx(arr: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
def is_near_zero(tens: torch.Tensor, rtol: float = 1e-05, atol: float = 1e-08, equal_nan: bool = False) -> torch.Tensor:
z = torch.zeros(1, dtype=tens.dtype, device=tens.device)
return torch.isclose(tens, other=z, rtol=rtol, atol=atol, equal_nan=equal_nan)


def repeat(x: Union[torch.Tensor, np.ndarray], times: int, dim: int) -> Union[torch.Tensor, np.ndarray]:
reps = [1 for _ in x.shape]
reps[dim] = times
if isinstance(x, np.ndarray):
return np.tile(x, reps=reps)
else:
return x.repeat(*reps)
1 change: 1 addition & 0 deletions torchcast/kalman_filter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .kalman_filter import KalmanFilter
Loading

0 comments on commit 148af61

Please sign in to comment.