Merge pull request #24 from strongio/develop

Develop
strongio · Sep 3, 2024 · 148af61 · 148af61
2 parents 4cb937b + e3b3d42
commit 148af61
Show file tree

Hide file tree

Showing 23 changed files with 712 additions and 431 deletions.
diff --git a/docs/README.md b/docs/README.md
@@ -0,0 +1,11 @@
+To compile the docs, you must install the package with the `docs` extra:
+
+```bash
+pip install git+https://github.com/strongio/torchcast.git#egg=torchcast[docs]
+```
+
+Then from project root run:
+
+```bash
+sphinx-build -b html ./docs ./docs/_html
+```
diff --git a/docs/api/kalman_filter.rst b/docs/api/kalman_filter.rst
@@ -1,7 +1,7 @@
 Kalman Filter
 =============
 
-.. automodule:: torchcast.kalman_filter
+.. automodule:: torchcast.kalman_filter.kalman_filter
    :members: KalmanFilter
    :exclude-members: ss_step_cls
    :show-inheritance:

diff --git a/docs/examples/air_quality.py b/docs/examples/air_quality.py
@@ -81,7 +81,9 @@
 def inverse_transform(df):
     df = df.copy()
     # bias-correction for log-transform (see https://otexts.com/fpp2/transformations.html#bias-adjustments)
-    df['mean'] = df['mean'] + .5 * (df['upper'] - df['lower']) / 1.96 ** 2
+    df['mean'] = df['mean'] + .5 * df['std'] ** 2
+    df['lower'] = df['mean'] - 1.96 * df['std']
+    df['upper'] = df['mean'] + 1.96 * df['std']
     # inverse the log10:
     df[['actual', 'mean', 'upper', 'lower']] = 10 ** df[['actual', 'mean', 'upper', 'lower']]
     df['measure'] = df['measure'].str.replace('_log10', '')
@@ -94,7 +96,7 @@ def inverse_transform(df):
         out_timesteps=dataset_pm_univariate.tensors[0].shape[1]
 )
 
-df_forecast = inverse_transform(forecast.to_dataframe(dataset_pm_univariate))
+df_forecast = inverse_transform(forecast.to_dataframe(dataset_pm_univariate, conf=None))
 print(forecast.plot(df_forecast, max_num_groups=3, split_dt=SPLIT_DT))
 
 # %% [markdown]
@@ -117,7 +119,7 @@ def inverse_transform(df):
     )
 
 df_univariate_error = pred_4step.\
-    to_dataframe(dataset_pm_univariate, group_colname='station', time_colname='week').\
+    to_dataframe(dataset_pm_univariate, group_colname='station', time_colname='week', conf=None).\
     pipe(inverse_transform).\
     merge(df_aq.loc[:,['station', 'week', 'PM']]).\
     assign(
@@ -234,7 +236,7 @@ def mc_preds_to_dataframe(preds: Predictions,
 df_multivariate_error.groupby('validation')['error'].agg(['mean','std'])
 
 # %% [markdown]
-# We see that this approach has reduced our error: substantially in the training period, and moderately in the validation period. We can look at the per-site differences to reduce common sources of noise and see that the reduction is consistent (it holds for all but one site):
+# We see that this approach has reduced our error in the validation period. We can look at the per-site differences to reduce noise:
 
 # %%
 df_multivariate_error.\
@@ -304,7 +306,10 @@ def mc_preds_to_dataframe(preds: Predictions,
         start_offsets=dataset_pm_lm.start_datetimes,
         n_step=4
     )
-pred_4step.plot(pred_4step.to_dataframe(dataset_pm_lm, type='components').query("process.str.contains('lm')"), split_dt=SPLIT_DT)
+pred_4step.plot(
+    pred_4step.to_dataframe(dataset_pm_lm, type='components').query("process.str.contains('lm')"),
+    split_dt=SPLIT_DT
+)
 
 # %% [markdown]
 # Now let's look at error:

diff --git a/docs/quick_start.py b/docs/quick_start.py
@@ -110,7 +110,7 @@
 # `Predictions` can easily be converted to Pandas `DataFrames` for ease of inspecting predictions, comparing them to actuals, and visualizing:
 
 # %%
-df_pred = pred.to_dataframe(dataset_all, multi=None)
+df_pred = pred.to_dataframe(dataset_all, conf=None)
 # bias-correction for log-transform (see https://otexts.com/fpp2/transformations.html#bias-adjustments)
 df_pred['mean'] += .5 * df_pred['std'] ** 2
 df_pred['lower'] = df_pred['mean'] - 1.96 * df_pred['std']

diff --git a/setup.py b/setup.py
@@ -12,6 +12,7 @@
     install_requires=[
         'torch>=1.12',
         'numpy>=1.4',
+        'scipy>=1.10'
     ],
     extras_require={
         'tests': ['parameterized>=0.7', 'filterpy>=1.4', 'pandas>=1.0'],

diff --git a/tests/test_kalman_filter.py b/tests/test_kalman_filter.py
@@ -1,8 +1,8 @@
 import copy
 import itertools
 from collections import defaultdict
-from typing import Callable, Optional, Dict
-from unittest import TestCase
+from typing import Callable, Dict
+from unittest import TestCase, expectedFailure
 
 import torch
 from parameterized import parameterized
@@ -66,7 +66,7 @@ def test_nans(self, ndim: int = 3, n_step: int = 1):
             processes=[LocalLevel(id=f'lm{i}', measure=str(i)) for i in range(ndim)],
             measures=[str(i) for i in range(ndim)]
         )
-        kf = torch.jit.script(kf)
+#        kf = torch.jit.script(kf)
         obs_means, obs_covs = kf(data, n_step=n_step)
         self.assertFalse(torch.isnan(obs_means).any())
         self.assertFalse(torch.isnan(obs_covs).any())
@@ -150,7 +150,7 @@ def test_equations_decay(self):
         # confirm decay works in forward pass
         # also tests that kf.forward works with `out_timesteps > input.shape[1]`
         pred = torch_kf(
-            initial_state=torch_kf._prepare_initial_state((None, None), start_offsets=np.zeros(1)),
+            initial_state=torch_kf._prepare_initial_state(None, start_offsets=np.zeros(1)),
             X=torch.randn(1, num_times, 3),
             out_timesteps=num_times
         )
@@ -168,7 +168,6 @@ def test_equations(self):
             processes=[LocalTrend(id='lt', decay_velocity=None, measure='y', velocity_multi=1.)],
             measures=['y']
         )
-        kf = torch.jit.script(torch_kf)
         expectedF = torch.tensor([[1., 1.], [0., 1.]])
         expectedH = torch.tensor([[1., 0.]])
         kwargs_per_process = torch_kf._parse_design_kwargs(input=data, out_timesteps=num_times)
@@ -184,7 +183,7 @@ def test_equations(self):
 
         # make filterpy kf:
         filter_kf = filterpy_KalmanFilter(dim_x=2, dim_z=1)
-        filter_kf.x, filter_kf.P = torch_kf._prepare_initial_state((None, None))
+        filter_kf.x, filter_kf.P = torch_kf._prepare_initial_state(None)
         filter_kf.x = filter_kf.x.detach().numpy().T
         filter_kf.P = filter_kf.P.detach().numpy().squeeze(0)
         filter_kf.Q = Q.numpy()
@@ -228,7 +227,7 @@ def __init__(self, *args, **kwargs):
             ],
             measures=['y']
         )
-        kf._scale_by_measure_var = False
+        kf._get_measure_scaling = lambda: torch.ones(2)
 
         kf.state_dict()['initial_mean'][:] = torch.tensor([1.5, -0.5])
         kf.state_dict()['measure_covariance.cholesky_log_diag'][0] = np.log(.1 ** .5)
@@ -348,7 +347,7 @@ def _build_h_mat(self, inputs: Dict[str, Tensor], num_groups: int, num_times: in
             processes=[Season(id='s1')],
             measures=['y']
         )
-        kf._scale_by_measure_var = False
+        kf._get_measure_scaling = lambda: torch.ones(1)
         data = torch.arange(7).view(1, -1, 1).to(torch.float32)
         for init_state in [0., 1.]:
             kf.state_dict()['initial_mean'][:] = torch.ones(1) * init_state
@@ -401,11 +400,10 @@ def test_no_proc_variance(self):
         self.assertTrue((cov == 0).all())
 
     @parameterized.expand([
-        (torch.float64, 2, True),
         (torch.float64, 2, False)
     ])
     @torch.no_grad()
-    def test_dtype(self, dtype: torch.dtype, ndim: int = 2, compiled: bool = True):
+    def test_dtype(self, dtype: torch.dtype, ndim: int = 2, compiled: bool = False):
         data = torch.zeros((2, 5, ndim), dtype=dtype)
         kf = KalmanFilter(
             processes=[LocalLevel(id=f'll{i}', measure=str(i)) for i in range(ndim)],

diff --git a/tests/test_process.py b/tests/test_process.py
@@ -16,7 +16,7 @@ def test_fourier_season(self):
             processes=[Season(id='day_of_week', period='7D', dt_unit='D', K=3, fixed=True)],
             measures=['y']
         )
-        kf._scale_by_measure_var = False
+        kf._get_measure_scaling = lambda: torch.ones(6)
         kf.state_dict()['initial_mean'][:] = torch.tensor([1., 0., 0., 0., 0., 0.])
         kf.state_dict()['measure_covariance.cholesky_log_diag'] -= 2
         pred = kf(data, start_offsets=start_datetimes)

diff --git a/tests/test_training.py b/tests/test_training.py
@@ -14,21 +14,6 @@
 
 
 class TestTraining(unittest.TestCase):
-    @parameterized.expand([(1,), (2,), (3,)])
-    @torch.no_grad()
-    def test_gaussian_log_prob(self, ndim: int = 1):
-        data = torch.zeros((2, 5, ndim))
-        kf = KalmanFilter(
-            processes=[LocalLevel(id=f'lm{i}', measure=str(i)) for i in range(ndim)],
-            measures=[str(i) for i in range(ndim)]
-        )
-        dist = kf.ss_step.get_distribution()
-        pred = kf(data)
-        log_lik1 = dist(*pred).log_prob(data)
-        from torch.distributions import MultivariateNormal
-        mv = MultivariateNormal(*pred)
-        log_lik2 = mv.log_prob(data)
-        self.assertAlmostEqual(log_lik1.sum().item(), log_lik2.sum().item())
 
     @parameterized.expand([(1,), (2,), (3,)])
     @torch.no_grad()
@@ -46,8 +31,6 @@ def test_log_prob_with_missings(self, ndim: int = 1, num_groups: int = 1, num_ti
         lp_method1 = pred.log_prob(data)
         lp_method1_sum = lp_method1.sum().item()
 
-        dist = kf.ss_step.get_distribution()
-
         lp_method2_sum = 0
         for g in range(num_groups):
             data_g = data[[g]]
@@ -59,20 +42,20 @@ def test_log_prob_with_missings(self, ndim: int = 1, num_groups: int = 1, num_ti
                 if not isvalid_gt.any():
                     continue
                 if isvalid_gt.all():
-                    lp_gt = dist(*pred_gt).log_prob(data_gt).item()
+                    lp_gt = torch.distributions.MultivariateNormal(*pred_gt).log_prob(data_gt).item()
                 else:
                     pred_gtm = pred_gt.observe(
                         state_means=pred_gt.state_means,
                         state_covs=pred_gt.state_covs,
                         R=pred_gt.R[..., isvalid_gt, :][..., isvalid_gt],
                         H=pred_gt.H[..., isvalid_gt, :]
                     )
-                    lp_gt = dist(*pred_gtm).log_prob(data_gt[..., isvalid_gt]).item()
+                    lp_gt = torch.distributions.MultivariateNormal(*pred_gtm).log_prob(data_gt[..., isvalid_gt]).item()
                 self.assertAlmostEqual(lp_method1[g, t].item(), lp_gt, places=4)
                 lp_method2_sum += lp_gt
         self.assertAlmostEqual(lp_method1_sum, lp_method2_sum, places=3)
 
-    def test_training1(self, ndim: int = 2, num_groups: int = 150, num_times: int = 24, compile: bool = True):
+    def test_training1(self, ndim: int = 2, num_groups: int = 150, num_times: int = 24, compile: bool = False):
         """
         simulated data with known parameters, fitted loss should approach the loss given known params
         """
@@ -101,8 +84,8 @@ def _make_kf():
         X = torch.randn((num_groups, num_times, 5))
         kf_generator = _make_kf()
         with torch.no_grad():
-            sim = kf_generator.simulate(out_timesteps=num_times, num_sims=num_groups, X=X)
-            y = sim.sample()
+            sim = kf_generator.simulate(out_timesteps=num_times, num_groups=num_groups, X=X)
+            y = torch.distributions.MultivariateNormal(*sim).sample()
         assert not y.requires_grad
 
         # train:
@@ -136,7 +119,7 @@ def closure():
         oracle_loss = -kf_generator(y, X=X).log_prob(y).mean()
         self.assertAlmostEqual(oracle_loss.item(), loss.item(), places=1)
 
-    def test_training2(self, num_groups: int = 50, compile: bool = True):
+    def test_training2(self, num_groups: int = 50, compile: bool = False):
         """
         # manually generated data (sin-wave, trend, etc.) with virtually no noise: MSE should be near zero
         """
@@ -199,7 +182,7 @@ def closure():
         # trend should be identified:
         self.assertAlmostEqual(pred.state_means[:, :, 1].mean().item(), 5., places=1)
 
-    def test_training3(self, compile: bool = True):
+    def test_training3(self, compile: bool = False):
         """
         Test TBATS and TimeSeriesDataset integration
         """

diff --git a/torchcast/__init__.py b/torchcast/__init__.py
@@ -1 +1 @@
-__version__ = '0.3.0'
+__version__ = '0.3.1'
diff --git a/torchcast/exp_smooth/exp_smooth.py b/torchcast/exp_smooth/exp_smooth.py
@@ -67,8 +67,6 @@ class ExpSmoother(StateSpaceModel):
     :param processes: A list of :class:`.Process` modules.
     :param measures: A list of strings specifying the names of the dimensions of the time-series being measured.
     :param measure_covariance: A module created with ``Covariance.from_measures(measures)``.
-    :param predict_smoothing: A ``torch.nn.Module`` which predicts the smoothing parameters. The module should predict
-     these as real-values and they will be constrained to 0-1 internally.
     """
     ss_step_cls = ExpSmoothStep
 

diff --git a/torchcast/internals/utils.py b/torchcast/internals/utils.py
@@ -5,6 +5,12 @@
 import numpy as np
 
 
+def transpose_last_dims(x: torch.Tensor) -> torch.Tensor:
+    args = list(range(len(x.shape)))
+    args[-2], args[-1] = args[-1], args[-2]
+    return x.permute(*args)
+
+
 def get_nan_groups(isnan: torch.Tensor) -> List[Tuple[torch.Tensor, Optional[torch.Tensor]]]:
     """
     Iterable of (group_idx, valid_idx) tuples that can be passed to torch.meshgrid. If no valid, then not returned; if
@@ -148,3 +154,12 @@ def true1d_idx(arr: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
 def is_near_zero(tens: torch.Tensor, rtol: float = 1e-05, atol: float = 1e-08, equal_nan: bool = False) -> torch.Tensor:
     z = torch.zeros(1, dtype=tens.dtype, device=tens.device)
     return torch.isclose(tens, other=z, rtol=rtol, atol=atol, equal_nan=equal_nan)
+
+
+def repeat(x: Union[torch.Tensor, np.ndarray], times: int, dim: int) -> Union[torch.Tensor, np.ndarray]:
+    reps = [1 for _ in x.shape]
+    reps[dim] = times
+    if isinstance(x, np.ndarray):
+        return np.tile(x, reps=reps)
+    else:
+        return x.repeat(*reps)
diff --git a/torchcast/kalman_filter/__init__.py b/torchcast/kalman_filter/__init__.py
@@ -0,0 +1 @@
+from .kalman_filter import KalmanFilter