Some changes for the run settings of pyblastafterglow.

ThibeauWouters · Dec 10, 2024 · a09e86c · a09e86c
1 parent 8e6f126
commit a09e86c
Show file tree

Hide file tree

Showing 9 changed files with 170 additions and 86 deletions.
diff --git a/flux_models/afterglowpy_tophat/train_afterglowpy_tophat.py b/flux_models/afterglowpy_tophat/train_afterglowpy_tophat.py
@@ -79,7 +79,7 @@
         X_example = f["val"]["X"][-1]
         y_raw = f["val"]["y"][-1, data_manager.mask]
 
-    y_raw = y_raw.reshape(256, len(lc_model.times))
+    y_raw = y_raw.reshape(len(lc_model.nus), len(lc_model.times))
     y_raw = np.exp(y_raw)
     y_raw = np.array([np.interp(filt.nu, lc_model.metadata["nus"], column) for column in y_raw.T]) 
     y_raw = -48.6 + -1 * np.log10(y_raw*1e-3 / 1e23) * 2.5

diff --git a/flux_models/pyblastafterglow_tophat/benchmark_pyblastafterglow_tophat.py b/flux_models/pyblastafterglow_tophat/benchmark_pyblastafterglow_tophat.py
@@ -0,0 +1,45 @@
+import numpy as np 
+import matplotlib.pyplot as plt
+
+from fiesta.train.BenchmarkerFluxes import Benchmarker
+from fiesta.inference.lightcurve_model import AfterglowpyPCA
+from fiesta.utils import Filter
+
+
+name = "tophat"
+model_dir = f"./model/"
+FILTERS = ["radio-3GHz", "radio-6GHz", "bessellv"]
+
+for metric_name in ["$\\mathcal{L}_2$", "$\\mathcal{L}_\infty$"]:   
+    if metric_name == "$\\mathcal{L}_2$":
+        file_ending = "L2"
+    else:
+        file_ending = "Linf"
+
+    B = Benchmarker(name = name,
+                model_dir = model_dir,
+                MODEL = AfterglowpyPCA,
+                filters = FILTERS,
+                metric_name = metric_name,
+                )
+
+
+    for filt in FILTERS:
+
+        fig, ax = B.plot_lightcurves_mismatch(filter =filt, parameter_labels = ["$\\iota$", "$\log_{10}(E_0)$", "$\\theta_{\\mathrm{c}}$", "$\log_{10}(n_{\mathrm{ism}})$", "$p$", "$\\epsilon_E$", "$\\epsilon_B$", "$\\Gamma_0$"])
+        fig.savefig(f"./benchmarks/benchmark_{filt}_{file_ending}.pdf", dpi = 200)
+
+        B.print_correlations(filter = filt)    
+
+    fig, ax = B.plot_worst_lightcurves()
+    fig.savefig(f"./benchmarks/worst_lightcurves_{file_ending}.pdf", dpi = 200)
+
+
+fig, ax = B.plot_error_distribution()
+fig.savefig(f"./benchmarks/error_distribution.pdf", dpi = 200)
+
+fig, ax = B.plot_error_over_time()
+fig.savefig(f"./benchmarks/error_over_time.pdf", dpi = 200)
+
+
+
diff --git a/flux_models/pyblastafterglow_tophat/create_pyblastafterglow_tophat.py b/flux_models/pyblastafterglow_tophat/create_pyblastafterglow_tophat.py
@@ -1,5 +1,4 @@
 import numpy as np 
-import matplotlib.pyplot as plt
 
 from fiesta.train.AfterglowData import PyblastafterglowData
 from mpi4py import MPI
@@ -70,4 +69,4 @@
                           n_nu = n_nu,
                           parameter_distributions = parameter_distributions,
                           rank = rank,
-                          path_to_exec = "/home/aya/work/hkoehn/fiesta/PyBlastAfterglowMag/src/pba.out")
+                          path_to_exec = "/hppfs/scratch/06/di35kuf/pyblastafterglow/PyBlastAfterglowMag/src/pba.out")
diff --git a/flux_models/pyblastafterglow_tophat/join_h5files.py b/flux_models/pyblastafterglow_tophat/join_h5files.py
@@ -0,0 +1,37 @@
+import os
+import h5py
+import numpy as np
+import shutil
+import tqdm
+
+#####
+
+directory = "./model"
+
+#####
+
+outfile = os.path.join(directory, "pyblastafterglow_raw_data.h5")
+file_list = [f for f in os.listdir(directory) if f.endswith(".h5")]
+shutil.copy(os.path.join(directory, file_list[0]), outfile)
+
+with h5py.File(outfile, "a") as f:
+
+    for file in tqdm.tqdm(file_list[1:]):
+        file = h5py.File(os.path.join(directory, file))
+        for group in ["train", "val", "test"]:
+            X = file[group]["X"]
+            Xset = f[group]["X"]
+            Xset.resize(Xset.shape[0]+X.shape[0], axis = 0)
+            Xset[-X.shape[0]:] = X
+
+            y = file[group]["y"]
+            yset = f[group]["y"]
+            yset.resize(yset.shape[0]+y.shape[0], axis = 0)
+            yset[-y.shape[0]:] = y
+        file.close()
+
+    print("train: ", f["train"]["y"].shape[0])
+    print("val: ", f["val"]["y"].shape[0])
+    print("test: ", f["test"]["y"].shape[0])
+
+
diff --git a/flux_models/pyblastafterglow_tophat/plot_training_data_distribution.py b/flux_models/pyblastafterglow_tophat/plot_training_data_distribution.py
@@ -0,0 +1,36 @@
+import numpy as np
+import corner
+import matplotlib.pyplot as plt
+import h5py
+
+default_corner_kwargs = dict(bins=40, 
+                        smooth=1., 
+                        label_kwargs=dict(fontsize=16),
+                        title_kwargs=dict(fontsize=16),
+                        #color = "blue"
+                        # quantiles=[],
+                        # levels=[0.9],
+                        plot_density=False, 
+                        plot_datapoints=True, 
+                        plot_contours = False,
+                        fill_contours=False,
+                        max_n_ticks=4, 
+                        min_n_ticks=3,
+                        save=False,
+                        truth_color="red")
+
+#######
+
+file = "./model/pyblastafterglow_raw_data.h5"
+
+#######
+
+
+with h5py.File(file, "r") as f:
+    X = f["train"]["X"][:]
+    parameter_names = f["parameter_names"][()]
+
+
+corner.corner(X, **default_corner_kwargs, labels = parameter_names)
+plt.show()
+breakpoint()
diff --git a/flux_models/pyblastafterglow_tophat/train_pyblastafterglow_tophat.py b/flux_models/pyblastafterglow_tophat/train_pyblastafterglow_tophat.py
@@ -1,8 +1,8 @@
 import numpy as np 
 import matplotlib.pyplot as plt
+import h5py
 
-from fiesta.train.FluxTrainer import PCATrainer, PyblastafterglowData, DataManager
-from fiesta.train.BenchmarkerFluxes import Benchmarker
+from fiesta.train.FluxTrainer import PCATrainer, DataManager
 from fiesta.inference.lightcurve_model import AfterglowpyPCA
 from fiesta.train.neuralnets import NeuralnetConfig
 from fiesta.utils import Filter
@@ -11,101 +11,52 @@
 ### SETUP ###
 #############
 
-tmin = 0.1 # days
+tmin = 1 # days
 tmax = 2000
-n_times = 200
 
 
 numin = 1e9 # Hz 
-numax = 2.5e18
-n_nu = 256
+numax = 1e17
 
+n_training = 50_000
+n_val = 5000
+n_pca = 100
 
-parameter_distributions = {
-    'inclination_EM': (0, np.pi/2, "uniform"),
-    'log10_E0': (47, 57, "uniform"), 
-    'thetaCore': (0.01, np.pi/5, "loguniform"),
-    'log10_n0': (-6, 2, "uniform"),
-    'p': (2.01, 3, "uniform"),
-    'log10_epsilon_e': (-4, 0, "uniform"),
-    'log10_epsilon_B': (-8,0, "uniform"),
-    'Gamma0': (100, 1000, "uniform")
-}
-
-
-
-jet_name = "tophat"
-jet_conversion = {"tophat": -1,
-                  "gaussian": 0,
-                  "powerlaw": 4}
-
-n_training = 10
-n_val = 4
-n_test = 4
-
-n_pool = 1
-
-retrain_weights = None
-
-
-#######################
-### CREATE RAW DATA ###
-#######################
-name = jet_name
-outdir = f"./pyblastafterglow/{name}/"
-
-jet_type = jet_conversion[jet_name]
-
-
-
-creator = PyblastafterglowData(outdir = outdir,
-                          jet_type = jet_type,
-                          n_training = n_training, 
-                          n_val = n_val,
-                          n_test = n_test,
-                          n_pool = n_pool,
-                          tmin = tmin,
-                          tmax = tmax,
-                          n_times = n_times,
-                          numin = numin,
-                          numax = numax,
-                          n_nu = n_nu,
-                          parameter_distributions = parameter_distributions)
-
-exit()
+name = "tophat"
+outdir = f"./model/"
+file = outdir + "pyblastafterglow_raw_data.h5"
 
+config = NeuralnetConfig(output_size=n_pca,
+                         nb_epochs=100_000,
+                         hidden_layer_sizes = [256, 512, 256],
+                         learning_rate =8e-3)
 
 ###############
 ### TRAINER ###
 ###############
 
 
-data_manager = DataManager(outdir = outdir,
-                           n_training= 20_000, 
-                           n_val= 2500, 
-                           tmin= 10,
-                           tmax= 1000,
-                           numin = 1e9,
-                           numax = 2.5e18,
-                           retrain_weights = retrain_weights)
+data_manager = DataManager(file = file,
+                           n_training= n_training,
+                           n_val= n_val, 
+                           tmin= tmin,
+                           tmax= tmax,
+                           numin = numin,
+                           numax = numax,
+                           special_training=[])
 
 trainer = PCATrainer(name,
                      outdir,
                      data_manager = data_manager,
-                     plots_dir=f"./benchmarks/{name}",
-                     n_pca = 50,
+                     plots_dir=f"./benchmarks/",
+                     n_pca = n_pca,
                      save_preprocessed_data=False
                      )
 
 ###############
 ### FITTING ###
 ###############
 
-config = NeuralnetConfig(output_size=trainer.n_pca,
-                         nb_epochs=100_000,
-                         hidden_layer_sizes = [64, 128, 64],
-                         learning_rate =8e-3)
-
 trainer.fit(config=config)
 trainer.save()
 
@@ -121,10 +72,11 @@
                           filters = FILTERS)
 
 for filt in lc_model.Filters:
-    X_example = trainer.val_X_raw[-1]
+    with h5py.File(file, "r") as f:
+        X_example = f["val"]["X"][-1]
+        y_raw = f["val"]["y"][-1, data_manager.mask]
 
-    y_raw = trainer.val_y_raw[-1]
-    y_raw = y_raw.reshape(256, len(lc_model.times))
+    y_raw = y_raw.reshape(len(lc_model.nus), len(lc_model.times))
     y_raw = np.exp(y_raw)
     y_raw = np.array([np.interp(filt.nu, lc_model.metadata["nus"], column) for column in y_raw.T]) 
     y_raw = -48.6 + -1 * np.log10(y_raw*1e-3 / 1e23) * 2.5
@@ -145,5 +97,5 @@
     plt.legend()
     plt.gca().invert_yaxis()
 
-    plt.savefig(f"./pyblastafterglow/benchmarks/{name}/pyblastafterglow_{name}_{filt.name}_example.png")
+    plt.savefig(f"./benchmarks/pyblastafterglow_{name}_{filt.name}_example.png")
     plt.close()
diff --git a/src/fiesta/train/AfterglowData.py b/src/fiesta/train/AfterglowData.py
@@ -255,7 +255,14 @@ def run_afterglow_model(self, X):
                 idx, out = pbag(j)
                 y[idx] = out
             except:
-                y[j] = np.full(len(self.times)*len(self.nus), np.nan)      
+                try: 
+                    pbag.n_tb = 3000 # increase blast wave evolution time grid if there is an error
+                    idx, out = pbag(j)
+                    y[idx] = out
+                    pbag.n_tb = 1000
+                except:
+                    y[j] = np.full(len(self.times)*len(self.nus), np.nan)
+
         return X, y
 
 
@@ -354,6 +361,7 @@ def __init__(self, jet_type, times, nus, X, parameter_names, fixed_parameters =
         self.rank = rank
         self.path_to_exec = path_to_exec
         self.grb_resolution = grb_resolution
+        self.n_tb = 1000 # set default blast wave evolution timegrid to 1000
 
     def _call_pyblastafterglow(self,
                          params_dict: dict[str, float]):
@@ -393,7 +401,7 @@ def _call_pyblastafterglow(self,
                     theta_obs= params_dict["inclination_EM"], # observer angle [rad] (from pol to jet axis)  
                     lc_freqs= self.lc_freqs, # frequencies for light curve calculation
                     lc_times= self.lc_times, # times for light curve calculation
-                    tb0=1e1, tb1=1e9, ntb=1000, # burster frame time grid boundary, resolution, for the simulation
+                    tb0=1e1, tb1=1e11, ntb=self.n_tb, # burster frame time grid boundary, resolution, for the simulation
                 ),
 
                 # ejecta parameters; FS only -- 3 free parameters 

diff --git a/src/fiesta/train/BenchmarkerFluxes.py b/src/fiesta/train/BenchmarkerFluxes.py
@@ -66,14 +66,17 @@ def load_filters(self, filters: list[str]):
 
     def get_test_data(self,):
 
-        with h5py.File(os.path.join(self.model_dir, "afterglowpy_raw_data.h5"), "r") as f:
+        file = [f for f in os.listdir(self.model_dir) if f.endswith("_raw_data.h5")][0]
+
+        with h5py.File(os.path.join(self.model_dir, file), "r") as f:
             self.parameter_distributions = ast.literal_eval(f["parameter_distributions"][()].decode('utf-8'))
             self.parameter_names =  f["parameter_names"][:].astype(str).tolist()
             self.test_X_raw = f["test"]["X"][:]
             y_raw = f["test"]["y"][:]
             y_raw = y_raw.reshape(len(self.test_X_raw), len(f["nus"]), len(f["times"]) ) 
             y_raw = interp1d(f["times"][:], y_raw, axis = 2)(self.times) # interpolate the test data over the time range of the model
-            self.fluxes_raw = y_raw.reshape(len(self.test_X_raw), len(f["nus"]) * len(self.times) )
+            y_raw = interp1d(f["nus"][:], y_raw, axis = 1)(self.nus) # interpolate the test data over the frequency range of the model
+            self.fluxes_raw = y_raw.reshape(len(self.test_X_raw), len(self.nus) * len(self.times) )
 
     def lightcurve_test_data(self, ):
 

diff --git a/src/fiesta/train/FluxTrainer.py b/src/fiesta/train/FluxTrainer.py
@@ -279,12 +279,16 @@ def preprocess_data_from_file(self, n_components: int)->None:
                 train_X_raw = np.concatenate((train_X_raw, f["special_train"][label]["X"][:]))
             train_X = Xscaler.fit_transform(train_X_raw)
 
-
-            yscaler.fit(f["train"]["y"][:15_000, self.mask]) # only load 15k cause otherwise the array might get too large
+            loaded = f["train"]["y"][:15_000, self.mask]
+            if np.any(np.isinf(loaded)):
+                raise ValueError(f"Found inftys in training data.")
+            yscaler.fit(loaded) # only load 15k cause otherwise the array might get too large
             train_y = np.empty((self.n_training, n_components))
             n_loaded = 0
             for chunk in f["train"]["y"].iter_chunks():
                 loaded = f["train"]["y"][chunk][:, self.mask]
+                if np.any(np.isinf(loaded)):
+                    raise ValueError(f"Found inftys in training data.")
                 train_y[n_loaded:n_loaded+len(loaded)] = yscaler.transform(loaded)
                 n_loaded += len(loaded)
                 if n_loaded >= self.n_training: