From 29f1374fde9135b8f98164d5db5f268c85c6b466 Mon Sep 17 00:00:00 2001
From: Grey Christoforo <grey@christoforo.net>
Date: Wed, 27 Feb 2019 00:34:18 +0000
Subject: [PATCH] make csv dumper

---
 setup.py    |  3 +-
 util/h52csv | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 util/h52csv

diff --git a/setup.py b/setup.py
index b802f9f..4ac0b86 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="mutovis-analysis",
-    version="3.0.0",
+    version="3.0.1",
     author="Grey Christoforo",
     author_email="grey@mutovis.com",
     description="Software for analyzing solar cell i-v curves",
@@ -14,6 +14,7 @@
     url="https://github.com/mutovis/analysis-software",
     packages=setuptools.find_packages(),
     entry_points={'gui_scripts': ['mutovis-analysis = batch_iv_analysis:main', ],},
+    data_files=[('bin',['util/h52csv'])],
     classifiers=[
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: GPL-3.0",
diff --git a/util/h52csv b/util/h52csv
new file mode 100644
index 0000000..9747b09
--- /dev/null
+++ b/util/h52csv
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+# hdf5 --> csv converter
+# written by grey@mutovis.com
+
+import h5py
+import argparse
+import os
+import glob
+import numpy
+
+tool_revision = '1.0.0'
+
+def is_dir(dirname):
+  """Checks if a path is an actual directory"""
+  if (not os.path.isdir(dirname)):
+    msg = "{0} is not a directory".format(dirname)
+    raise argparse.ArgumentTypeError(msg)
+  else:
+    return dirname
+
+parser = argparse.ArgumentParser(description='hdf5 --> csv converter')
+
+parser.add_argument('-i', '--input-folder', type=is_dir, required=True, help='Input directory containing .h5 files')
+parser.add_argument('-o', '--output-folder', type=is_dir, required=True, help='Output directory for resulting .csv files')
+
+args = parser.parse_args()
+
+# recursively get all the files in the input folder that end in .h5
+h5s = glob.glob(args.input_folder + '/**/*.h5', recursive=True)
+
+def decode_try(this):
+  """if this is decodeable, do it"""
+  if hasattr(this, 'decode'):
+    return this.decode()
+  else:
+    return this
+
+# loop through them and convert them to csv
+for file in h5s:
+  if h5py.is_hdf5(file):
+    try:
+      with h5py.File(file, 'r') as f:
+        run_level_attributes = f.attrs
+        substrate_names = f.keys()
+        #  step through the substrates
+        for substrate_name in substrate_names:
+          substrate = f[substrate_name]
+          substrate_level_attributes = substrate.attrs
+          pixel_names = substrate.keys()
+          # step through the pixels
+          for pixel_name in pixel_names:
+            pixel = substrate[pixel_name]
+            pixel_level_attributes = pixel.attrs
+            mds = pixel['all_measurements']  # the all measurements dataset
+            region_names = mds.attrs  # the region refrences for this dataset are stored as its attributes
+            d_regions = []
+            for region_name in region_names:
+              region = mds[mds.attrs[region_name]]  # a region of interest
+              d_regions.append({'name': region_name, 'data': region, 'col_names': region.dtype.names})
+            origin_file = file.replace(os.path.sep, '_').lstrip('_')
+            csv_file_name = "{:}_{:}_{:}.csv".format(origin_file, substrate_name, pixel_name)
+            csv_full_path = args.output_folder + os.path.sep + csv_file_name
+            # now we have everything we need to dump the csv
+            with open (csv_full_path, mode='x') as cf:
+              print("# created with h52csv version {:}".format(tool_revision), file=cf)
+              print("# from origin file = {:}".format(file), file=cf)
+              print("# ===run level attributes===", file=cf)
+              for rla in run_level_attributes:
+                print("# {:} = {:}".format(rla, decode_try(f.attrs[rla])), file=cf)
+              print("# ===substrate level attributes===", file=cf)
+              for sla in substrate_level_attributes:
+                print("# {:} = {:}".format(sla, decode_try(substrate.attrs[sla])), file=cf)
+              print("# ===pixel level attributes===", file=cf)
+              for pla in pixel_level_attributes:
+                print("# {:} = {:}".format(pla, decode_try(pixel.attrs[pla])), file=cf)
+              for d_region in d_regions:
+                print("", file=cf)
+                print(d_region['name'], file=cf)
+                print(','.join(d_region['col_names']), file=cf)
+                numpy.savetxt(cf, d_region['data'])
+              print("Created {:}".format(csv_full_path))
+      print("Done processing {:}".format(file))
+    except Exception as inst:
+      print("Failed to process {:} with error:".format(file))
+      print(inst)
+    print("")
+print("Task complete.")
\ No newline at end of file