From 29f1374fde9135b8f98164d5db5f268c85c6b466 Mon Sep 17 00:00:00 2001 From: Grey Christoforo Date: Wed, 27 Feb 2019 00:34:18 +0000 Subject: [PATCH] make csv dumper --- setup.py | 3 +- util/h52csv | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 util/h52csv diff --git a/setup.py b/setup.py index b802f9f..4ac0b86 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="mutovis-analysis", - version="3.0.0", + version="3.0.1", author="Grey Christoforo", author_email="grey@mutovis.com", description="Software for analyzing solar cell i-v curves", @@ -14,6 +14,7 @@ url="https://github.com/mutovis/analysis-software", packages=setuptools.find_packages(), entry_points={'gui_scripts': ['mutovis-analysis = batch_iv_analysis:main', ],}, + data_files=[('bin',['util/h52csv'])], classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: GPL-3.0", diff --git a/util/h52csv b/util/h52csv new file mode 100644 index 0000000..9747b09 --- /dev/null +++ b/util/h52csv @@ -0,0 +1,88 @@ +#!/usr/bin/env python + +# hdf5 --> csv converter +# written by grey@mutovis.com + +import h5py +import argparse +import os +import glob +import numpy + +tool_revision = '1.0.0' + +def is_dir(dirname): + """Checks if a path is an actual directory""" + if (not os.path.isdir(dirname)): + msg = "{0} is not a directory".format(dirname) + raise argparse.ArgumentTypeError(msg) + else: + return dirname + +parser = argparse.ArgumentParser(description='hdf5 --> csv converter') + +parser.add_argument('-i', '--input-folder', type=is_dir, required=True, help='Input directory containing .h5 files') +parser.add_argument('-o', '--output-folder', type=is_dir, required=True, help='Output directory for resulting .csv files') + +args = parser.parse_args() + +# recursively get all the files in the input folder that end in .h5 +h5s = glob.glob(args.input_folder + '/**/*.h5', recursive=True) + +def decode_try(this): + """if this is decodeable, do it""" + if hasattr(this, 'decode'): + return this.decode() + else: + return this + +# loop through them and convert them to csv +for file in h5s: + if h5py.is_hdf5(file): + try: + with h5py.File(file, 'r') as f: + run_level_attributes = f.attrs + substrate_names = f.keys() + # step through the substrates + for substrate_name in substrate_names: + substrate = f[substrate_name] + substrate_level_attributes = substrate.attrs + pixel_names = substrate.keys() + # step through the pixels + for pixel_name in pixel_names: + pixel = substrate[pixel_name] + pixel_level_attributes = pixel.attrs + mds = pixel['all_measurements'] # the all measurements dataset + region_names = mds.attrs # the region refrences for this dataset are stored as its attributes + d_regions = [] + for region_name in region_names: + region = mds[mds.attrs[region_name]] # a region of interest + d_regions.append({'name': region_name, 'data': region, 'col_names': region.dtype.names}) + origin_file = file.replace(os.path.sep, '_').lstrip('_') + csv_file_name = "{:}_{:}_{:}.csv".format(origin_file, substrate_name, pixel_name) + csv_full_path = args.output_folder + os.path.sep + csv_file_name + # now we have everything we need to dump the csv + with open (csv_full_path, mode='x') as cf: + print("# created with h52csv version {:}".format(tool_revision), file=cf) + print("# from origin file = {:}".format(file), file=cf) + print("# ===run level attributes===", file=cf) + for rla in run_level_attributes: + print("# {:} = {:}".format(rla, decode_try(f.attrs[rla])), file=cf) + print("# ===substrate level attributes===", file=cf) + for sla in substrate_level_attributes: + print("# {:} = {:}".format(sla, decode_try(substrate.attrs[sla])), file=cf) + print("# ===pixel level attributes===", file=cf) + for pla in pixel_level_attributes: + print("# {:} = {:}".format(pla, decode_try(pixel.attrs[pla])), file=cf) + for d_region in d_regions: + print("", file=cf) + print(d_region['name'], file=cf) + print(','.join(d_region['col_names']), file=cf) + numpy.savetxt(cf, d_region['data']) + print("Created {:}".format(csv_full_path)) + print("Done processing {:}".format(file)) + except Exception as inst: + print("Failed to process {:} with error:".format(file)) + print(inst) + print("") +print("Task complete.") \ No newline at end of file