Skip to content

Commit

Permalink
Merge pull request #96 from torresramiro350/fixing_uproot
Browse files Browse the repository at this point in the history
Fixing uproot
  • Loading branch information
xiaojieww authored May 24, 2024
2 parents cd2b4b5 + 542a493 commit 6696aee
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 90 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ response = ... # This can be either a ROOT or hdf5 file
hawc = HAL("HAWC",
maptree,
response,
roi)
roi,
n_workers=3) # enables ability of loading ROOT's files faster with multiprocessing

# Use from bin 1 to bin 9
hawc.set_active_measurements(1, 9)
Expand Down
89 changes: 25 additions & 64 deletions hawc_hal/HAL.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,12 @@
from threeML.utils.statistics.gammaln import logfactorial
from tqdm.auto import tqdm

from hawc_hal.convolved_source import (
ConvolvedExtendedSource2D,
ConvolvedExtendedSource3D,
ConvolvedPointSource,
ConvolvedSourcesContainer,
)
from hawc_hal.healpix_handling import (
FlatSkyToHealpixTransform,
SparseHealpix,
get_gnomonic_projection,
)
from hawc_hal.convolved_source import (ConvolvedExtendedSource2D,
ConvolvedExtendedSource3D,
ConvolvedPointSource,
ConvolvedSourcesContainer)
from hawc_hal.healpix_handling import (FlatSkyToHealpixTransform,
SparseHealpix, get_gnomonic_projection)
from hawc_hal.log_likelihood import log_likelihood
from hawc_hal.maptree import map_tree_factory
from hawc_hal.maptree.data_analysis_bin import DataAnalysisBin
Expand All @@ -53,6 +48,9 @@ class HAL(PluginPrototype):
:param response: Response of HAWC (either ROOT or hd5 format)
:param roi: a ROI instance describing the Region Of Interest
:param flat_sky_pixels_size: size of the pixel for the flat sky projection (Hammer Aitoff)
:param n_workers: number of workers to use for the parallelization of reading
:param set_transits: specifies the number of transits to use for the given maptree.
ROOT files, default=1
"""

def __init__(
Expand Down Expand Up @@ -283,9 +281,7 @@ def set_active_measurements(self, bin_id_min=None, bin_id_max=None, bin_list=Non
for this_bin in range(bin_id_min, bin_id_max + 1):
this_bin = str(this_bin)
if this_bin not in self._all_planes:
raise ValueError(
f"Bin {this_bin} is not contained in this maptree."
)
raise ValueError(f"Bin {this_bin} is not contained in this maptree.")

self._active_planes.append(this_bin)

Expand All @@ -301,9 +297,7 @@ def set_active_measurements(self, bin_id_min=None, bin_id_max=None, bin_list=Non
for this_bin in bin_list:
# if not this_bin in self._all_planes:
if this_bin not in self._all_planes:
raise ValueError(
f"Bin {this_bin} is not contained in this maptree."
)
raise ValueError(f"Bin {this_bin} is not contained in this maptree.")

self._active_planes.append(this_bin)

Expand Down Expand Up @@ -433,10 +427,7 @@ def get_excess_background(self, ra: float, dec: float, radius: float):
this_nside = data_analysis_bin.observation_map.nside

radial_bin_pixels = hp.query_disc(
this_nside,
center,
radius_radians,
inclusive=False,
this_nside, center, radius_radians, inclusive=False
)

# calculate the areas per bin by the product
Expand All @@ -445,9 +436,7 @@ def get_excess_background(self, ra: float, dec: float, radius: float):

# NOTE: select active pixels according to each radial bin
bin_active_pixel_indexes = np.intersect1d(
self._active_pixels[energy_id],
radial_bin_pixels,
return_indices=True,
self._active_pixels[energy_id], radial_bin_pixels, return_indices=True
)[1]

# obtain the excess, background, and expected excess at
Expand Down Expand Up @@ -523,32 +512,23 @@ def get_radial_profile(
# The area of each ring is then given by the difference between two
# subsequent circe areas.
area = np.array(
[
self.get_excess_background(ra, dec, r + offset * delta_r)[0]
for r in radii
]
[self.get_excess_background(ra, dec, r + offset * delta_r)[0] for r in radii]
)

temp = area[1:] - area[:-1]
area[1:] = temp

# signals
signal = np.array(
[
self.get_excess_background(ra, dec, r + offset * delta_r)[1]
for r in radii
]
[self.get_excess_background(ra, dec, r + offset * delta_r)[1] for r in radii]
)

temp = signal[1:] - signal[:-1]
signal[1:] = temp

# backgrounds
bkg = np.array(
[
self.get_excess_background(ra, dec, r + offset * delta_r)[2]
for r in radii
]
[self.get_excess_background(ra, dec, r + offset * delta_r)[2] for r in radii]
)

temp = bkg[1:] - bkg[:-1]
Expand All @@ -559,10 +539,7 @@ def get_radial_profile(
# model
# convert 'top hat' excess into 'ring' excesses.
model = np.array(
[
self.get_excess_background(ra, dec, r + offset * delta_r)[3]
for r in radii
]
[self.get_excess_background(ra, dec, r + offset * delta_r)[3] for r in radii]
)

temp = model[1:] - model[:-1]
Expand Down Expand Up @@ -696,9 +673,7 @@ def plot_radial_profile(

plt.plot(radii, excess_model, color="red", label="Model")

plt.legend(
bbox_to_anchor=(1.0, 1.0), loc="upper right", numpoints=1, fontsize=16
)
plt.legend(bbox_to_anchor=(1.0, 1.0), loc="upper right", numpoints=1, fontsize=16)
plt.axhline(0, color="deepskyblue", linestyle="--")

x_limits = [0, max_radius]
Expand Down Expand Up @@ -805,9 +780,7 @@ def display_spectrum(self):

yerr = [yerr_high, yerr_low]

return self._plot_spectrum(
net_counts, yerr, model_only, residuals, residuals_err
)
return self._plot_spectrum(net_counts, yerr, model_only, residuals, residuals_err)

def _plot_spectrum(self, net_counts, yerr, model_only, residuals, residuals_err):
fig, subs = plt.subplots(
Expand Down Expand Up @@ -1080,9 +1053,7 @@ def _get_expectation(
)

# Now multiply by the pixel area of the new map to go back to flux
this_model_map_hpx *= hp.nside2pixarea(
data_analysis_bin.nside, degrees=True
)
this_model_map_hpx *= hp.nside2pixarea(data_analysis_bin.nside, degrees=True)

else:
# No sources
Expand Down Expand Up @@ -1207,9 +1178,7 @@ def display_fit(self, smoothing_kernel_sigma=0.1, display_colorbar=False):
subs[i][0].set_title("model, bin {}".format(data_analysis_bin.name))

# Plot data map
images[1] = subs[i][1].imshow(
proj_data, origin="lower", vmin=vmin, vmax=vmax
)
images[1] = subs[i][1].imshow(proj_data, origin="lower", vmin=vmin, vmax=vmax)
subs[i][1].set_title("excess, bin {}".format(data_analysis_bin.name))

# Plot background map.
Expand Down Expand Up @@ -1329,9 +1298,7 @@ def _get_model_map(self, plane_id, n_pt_src, n_ext_src):
raise ValueError(f"{plane_id} not a plane in the current model")

model_map = SparseHealpix(
self._get_expectation(
self._maptree[plane_id], plane_id, n_pt_src, n_ext_src
),
self._get_expectation(self._maptree[plane_id], plane_id, n_pt_src, n_ext_src),
self._active_pixels[plane_id],
self._maptree[plane_id].observation_map.nside,
)
Expand Down Expand Up @@ -1404,17 +1371,13 @@ def _write_a_map(self, file_name, which, fluctuate=False, return_map=False):
if return_map:
return new_map_tree

def write_model_map(
self, file_name, poisson_fluctuate=False, test_return_map=False
):
def write_model_map(self, file_name, poisson_fluctuate=False, test_return_map=False):
"""
This function writes the model map to a file.
The interface is based off of HAWCLike for consistency
"""
if test_return_map:
log.warning(
"test_return_map=True should only be used for testing purposes!"
)
log.warning("test_return_map=True should only be used for testing purposes!")
return self._write_a_map(file_name, "model", poisson_fluctuate, test_return_map)

def write_residual_map(self, file_name, test_return_map=False):
Expand All @@ -1423,7 +1386,5 @@ def write_residual_map(self, file_name, test_return_map=False):
The interface is based off of HAWCLike for consistency
"""
if test_return_map:
log.warning(
"test_return_map=True should only be used for testing purposes!"
)
log.warning("test_return_map=True should only be used for testing purposes!")
return self._write_a_map(file_name, "residual", False, test_return_map)
33 changes: 17 additions & 16 deletions hawc_hal/maptree/from_root_file.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from __future__ import absolute_import,annotations
from __future__ import absolute_import, annotations

import collections
import multiprocessing
Expand Down Expand Up @@ -61,9 +61,7 @@ def _counts_npixels(self) -> int:
if self._legacy_convention
else self.analysis_bin_names[0]
)
return self.maptree_ttree_directory[f"nHit{bin_id}/data/count"].member(
"fEntries"
)
return self.maptree_ttree_directory[f"nHit{bin_id}/data/count"].member("fEntries")

@property
def _bkg_npixels(self) -> int:
Expand All @@ -73,9 +71,7 @@ def _bkg_npixels(self) -> int:
if self._legacy_convention
else self.analysis_bin_names[0]
)
return self.maptree_ttree_directory[f"nHit{bin_id}/bkg/count"].member(
"fEntries"
)
return self.maptree_ttree_directory[f"nHit{bin_id}/bkg/count"].member("fEntries")

@property
def nside_cnt(self) -> int:
Expand Down Expand Up @@ -121,9 +117,9 @@ def get_array_from_file(

if roi is not None:
# NOTE: load only the pixels within the ROI
return bin_id, map_infile[
f"nHit{current_bin_id}/data/count"
].array().to_numpy()[hpx_map > 0.0]
return bin_id, map_infile[f"nHit{current_bin_id}/data/count"].array().to_numpy()[
hpx_map > 0.0
]

return bin_id, map_infile[f"nHit{current_bin_id}/data/count"].array().to_numpy()

Expand Down Expand Up @@ -210,7 +206,15 @@ def from_root_file(
# cannot perform operations on histrograms

# Read the maptree
with uproot.open(str(map_tree_file)) as map_infile:
with multiprocessing.Pool(processes=n_workers) as pool, uproot.open(
map_tree_file.as_posix(),
handler=uproot.MemmapSource,
num_fallback_workers=n_workers,
) as map_infile:
# the handler for MemmapSource loads the file as it's needed
# suggested as the best for large local files
# otherwise use MultithreadedFileSource for remote files
# which requires setting the option for use_threads to True
log.info("Reading Maptree!")

maptree_metadata = MaptreeMetaData(map_infile)
Expand Down Expand Up @@ -246,11 +250,8 @@ def from_root_file(
# NOTE: The number of workers is suggested to be kept equal one less
# than the number of available cores in the system.

with multiprocessing.Pool(processes=n_workers) as executor:
result_data = list(executor.starmap(get_array_from_file, signal_data_info))
result_bkg = list(
executor.starmap(get_bkg_array_from_file, signal_data_info)
)
result_data = list(pool.starmap(get_array_from_file, signal_data_info))
result_bkg = list(pool.starmap(get_bkg_array_from_file, signal_data_info))

# Processes are not guaranteed to preserve order of analysis bin names
# Organize them into a dictionary for proper readout
Expand Down
19 changes: 12 additions & 7 deletions hawc_hal/response/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,15 @@ def from_root_file(cls, response_file_name: Path, n_workers: int = 1):
f"Response {response_file_name} does not exist or is not readable"
)

with uproot.open(response_file_name) as response_file_directory:
with multiprocessing.Pool(processes=n_workers) as pool, uproot.open(
response_file_name,
handler=uproot.MemmapSource,
num_fallback_workers=n_workers,
) as response_file_directory:
# the handler for MemmapSource loads the file as it's needed
# suggested as the best for large local files
# otherwise use MultithreadedFileSource for remote files
# which requires setting the option for use_threads to True
resp_metadata = ResponseMetaData(response_file_directory)

# NOTE:Get the Response function basic information
Expand All @@ -426,12 +434,9 @@ def from_root_file(cls, response_file_name: Path, n_workers: int = 1):
for bin_id in analysis_bins_arr
]

with multiprocessing.Pool(processes=n_workers) as executor:
results = list(executor.starmap(resp_metadata.get_energy_hist, args))
results_bkg = list(
executor.starmap(resp_metadata.get_energy_bkg_hist, args)
)
psf_param = list(executor.starmap(resp_metadata.get_psf_params, args))
results = list(pool.starmap(resp_metadata.get_energy_hist, args))
results_bkg = list(pool.starmap(resp_metadata.get_energy_bkg_hist, args))
psf_param = list(pool.starmap(resp_metadata.get_psf_params, args))

energy_hists = cls.create_dict_from_results(results)
energy_bkgs = cls.create_dict_from_results(results_bkg)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ def find_data_files(directory):
"threeml",
"astromodels",
"pandas",
"healpy",
"pytest",
"six",
"astropy",
"scipy",
"matplotlib",
"numba",
"reproject",
"tqdm",
"uproot==5.1.2",
"uproot",
"awkward",
"mplhep",
"hist",
Expand Down

0 comments on commit 6696aee

Please sign in to comment.