larray-project · alixdamman · Apr 15, 2019 · Aug 26, 2019 · Aug 26, 2019 · Aug 26, 2019
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -706,6 +706,7 @@ ReportSheet
    ReportSheet.add_graphs
    ReportSheet.newline
 
+
 .. _api-misc:
 
 Miscellaneous

diff --git a/larray/core/array.py b/larray/core/array.py
@@ -62,7 +62,7 @@
 from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
 from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
                               float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
-                              renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
+                              renamed_to, deprecate_kwarg, lazy_attribute, unique_multi, SequenceZip,
                               Repeater, Product, ensure_no_numpy_type, PY2)
 from larray.util.options import _OPTIONS, DISPLAY_MAXLINES, DISPLAY_EDGEITEMS, DISPLAY_WIDTH, DISPLAY_PRECISION
 
@@ -6701,7 +6701,7 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dr
             series = self.to_series(value_name, dropna is not None)
             series.to_csv(filepath, sep=sep, na_rep=na_rep, header=True, **kwargs)
 
-    def to_hdf(self, filepath, key):
+    def to_hdf(self, filepath, key, engine='auto'):
         r"""
         Writes array to a HDF file.
 
@@ -6714,6 +6714,9 @@ def to_hdf(self, filepath, key):
             Path where the hdf file has to be written.
         key : str or Group
             Key (path) of the array within the HDF file (see Notes below).
+        engine: {'auto', 'tables', 'pandas'}, optional
+            Dump using `engine`. Use 'pandas' to update an HDF file generated with a LArray version previous to 0.31.
+            Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
 
         Notes
         -----
@@ -6734,13 +6737,9 @@ def to_hdf(self, filepath, key):
 
         >>> a.to_hdf('test.h5', 'arrays/a')  # doctest: +SKIP
         """
-        key = _translate_group_key_hdf(key)
-        with LHDFStore(filepath) as store:
-            store.put(key, self.to_frame())
-            attrs = store.get_storer(key).attrs
-            attrs.type = 'Array'
-            attrs.writer = 'LArray'
-            self.meta.to_hdf(store, key)
+        from larray.inout.hdf import LHDFStore
+        with LHDFStore(filepath, engine=engine) as store:
+            store.put(key, self)
 
     def to_stata(self, filepath_or_buffer, **kwargs):
         r"""

diff --git a/larray/core/axis.py b/larray/core/axis.py
@@ -16,7 +16,7 @@
                                _range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
 from larray.util.oset import *
 from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
-                              renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice, unique_multi, Product)
+                              renamed_to, common_type, lazy_attribute, _isnoneslice, unique_multi, Product)
 
 
 np_frompyfunc = np.frompyfunc
@@ -1304,7 +1304,7 @@ def align(self, other, join='outer'):
             else:
                 return self
 
-    def to_hdf(self, filepath, key=None):
+    def to_hdf(self, filepath, key=None, engine='auto'):
         r"""
         Writes axis to a HDF file.
 
@@ -1319,6 +1319,9 @@ def to_hdf(self, filepath, key=None):
             Key (path) of the axis within the HDF file (see Notes below).
             If None, the name of the axis is used.
             Defaults to None.
+        engine: {'auto', 'tables', 'pandas'}, optional
+            Dump using `engine`. Use 'pandas' to update an HDF file generated with a LArray version previous to 0.31.
+            Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
 
         Notes
         -----
@@ -1344,19 +1347,13 @@ def to_hdf(self, filepath, key=None):
 
         >>> a.to_hdf('test.h5', 'axes/a')  # doctest: +SKIP
         """
+        from larray.inout.hdf import LHDFStore
         if key is None:
             if self.name is None:
                 raise ValueError("Argument key must be provided explicitly in case of anonymous axis")
             key = self.name
-        key = _translate_group_key_hdf(key)
-        dtype_kind = self.labels.dtype.kind
-        data = np.char.encode(self.labels, 'utf-8') if dtype_kind == 'U' else self.labels
-        s = pd.Series(data=data, name=self.name)
-        with LHDFStore(filepath) as store:
-            store.put(key, s)
-            store.get_storer(key).attrs.type = 'Axis'
-            store.get_storer(key).attrs.dtype_kind = dtype_kind
-            store.get_storer(key).attrs.wildcard = self.iswildcard
+        with LHDFStore(filepath, engine=engine) as store:
+            store.put(key, self)
 
     @property
     def dtype(self):

diff --git a/larray/core/group.py b/larray/core/group.py
@@ -13,7 +13,7 @@
 from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray
 from larray.util.oset import *
 from larray.util.misc import (basestring, PY2, unique, find_closing_chr, _parse_bound, _seq_summary, _isintstring,
-                              renamed_to, LHDFStore)
+                              renamed_to)
 
 
 def _slice_to_str(key, repr_func=str):
@@ -1396,7 +1396,7 @@ def containing(self, substring):
             substring = substring.eval()
         return LGroup([v for v in self.eval() if substring in v], axis=self.axis)
 
-    def to_hdf(self, filepath, key=None, axis_key=None):
+    def to_hdf(self, filepath, key=None, axis_key=None, engine='auto'):
         r"""
         Writes group to a HDF file.
 
@@ -1417,6 +1417,9 @@ def to_hdf(self, filepath, key=None, axis_key=None):
             Key (path) of the associated axis in the HDF file (see Notes below).
             If None, the name of the axis associated with the group is used.
             Defaults to None.
+        engine: {'auto', 'tables', 'pandas'}, optional
+            Dump using `engine`. Use 'pandas' to update an HDF file generated with a LArray version previous to 0.31.
+            Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
 
         Notes
         -----
@@ -1453,27 +1456,13 @@ def to_hdf(self, filepath, key=None, axis_key=None):
         >>> # save both the group 'b01' and the associated axis 'b'
         >>> b01.to_hdf('test.h5')                   # doctest: +SKIP
         """
+        from larray.inout.hdf import LHDFStore
         if key is None:
             if self.name is None:
                 raise ValueError("Argument key must be provided explicitly in case of anonymous group")
             key = self.name
-        key = _translate_group_key_hdf(key)
-        if axis_key is None:
-            if self.axis.name is None:
-                raise ValueError("Argument axis_key must be provided explicitly if the associated axis is anonymous")
-            axis_key = self.axis.name
-        data = self.eval()
-        dtype_kind = data.dtype.kind if isinstance(data, np.ndarray) else ''
-        if dtype_kind == 'U':
-            data = np.char.encode(data, 'utf-8')
-        s = pd.Series(data=data, name=self.name)
-        with LHDFStore(filepath) as store:
-            store.put(key, s)
-            store.get_storer(key).attrs.type = 'Group'
-            store.get_storer(key).attrs.dtype_kind = dtype_kind
-            if axis_key not in store:
-                self.axis.to_hdf(store, key=axis_key)
-            store.get_storer(key).attrs.axis_key = axis_key
+        with LHDFStore(filepath, engine=engine) as store:
+            store.put(key, self, axis_key=axis_key)
 
     # this makes range(LGroup(int)) possible
     def __index__(self):

diff --git a/larray/core/metadata.py b/larray/core/metadata.py
@@ -162,17 +162,3 @@ def _convert_value(value):
             return value
 
         return Metadata([(key, _convert_value(value)) for key, value in zip(array.axes.labels[0], array.data)])
-
-    # ---------- IO methods ----------
-    def to_hdf(self, hdfstore, key=None):
-        if len(self):
-            attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
-            attrs.metadata = self
-
-    @classmethod
-    def from_hdf(cls, hdfstore, key=None):
-        attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
-        if 'metadata' in attrs:
-            return attrs.metadata
-        else:
-            return None
diff --git a/larray/core/session.py b/larray/core/session.py
@@ -1,22 +1,41 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function
 
+import fnmatch
 import os
-import sys
 import re
-import fnmatch
+import sys
 import warnings
 from collections import OrderedDict, Iterable
 
 import numpy as np
 
-from larray.core.metadata import Metadata
-from larray.core.group import Group
+from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence
 from larray.core.axis import Axis
 from larray.core.constants import nan
-from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence, aslarray
-from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring
+from larray.core.group import Group
+from larray.core.metadata import Metadata
 from larray.inout.session import ext_default_engine, get_file_handler
+from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring
+
+
+def _get_handler(engine, fname, overwrite, **kwargs):
+    if engine == 'auto':
+        _, ext = os.path.splitext(fname)
+        ext = ext.strip('.') if '.' in ext else 'csv'
+        engine = ext_default_engine[ext]
+    if engine == 'hdf':
+        engine_hdf = 'auto'
+    if '_hdf' in engine:
+        engine_hdf, engine = engine.split('_')
+    handler_cls = get_file_handler(engine)
+    if engine == 'pandas_csv' and 'sep' in kwargs:
+        handler = handler_cls(fname, overwrite, kwargs['sep'])
+    elif engine == 'hdf':
+        handler = handler_cls(fname, overwrite, engine=engine_hdf)
+    else:
+        handler = handler_cls(fname, overwrite)
+    return handler
 
 
 # XXX: inherit from OrderedDict or LArray?
@@ -358,7 +377,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
             List of objects to load.
             If `fname` is None, list of paths to CSV files.
             Defaults to all valid objects present in the file/directory.
-        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
+        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
             Load using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
         display : bool, optional
             Whether or not to display which file is being worked on. Defaults to False.
@@ -415,15 +434,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
                 engine = ext_default_engine['csv']
             else:
                 raise ValueError("List of paths to only CSV files expected. Got {}".format(names))
-        if engine == 'auto':
-            _, ext = os.path.splitext(fname)
-            ext = ext.strip('.') if '.' in ext else 'csv'
-            engine = ext_default_engine[ext]
-        handler_cls = get_file_handler(engine)
-        if engine == 'pandas_csv' and 'sep' in kwargs:
-            handler = handler_cls(fname, kwargs['sep'])
-        else:
-            handler = handler_cls(fname)
+        handler = _get_handler(engine, fname, False, **kwargs)
         metadata, objects = handler.read(names, display=display, **kwargs)
         for k, v in objects.items():
             self[k] = v
@@ -442,7 +453,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
             List of names of LArray/Axis/Group objects to dump.
             If `fname` is None, list of paths to CSV files.
             Defaults to all objects present in the Session.
-        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
+        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
             Dump using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
         overwrite: bool, optional
             Whether or not to overwrite an existing file, if any. Ignored for CSV files and 'pandas_excel' engine.
@@ -482,15 +493,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
         >>> # replace arr1 and add arr4 in file output.h5
         >>> s2.save('output.h5', overwrite=False)           # doctest: +SKIP
         """
-        if engine == 'auto':
-            _, ext = os.path.splitext(fname)
-            ext = ext.strip('.') if '.' in ext else 'csv'
-            engine = ext_default_engine[ext]
-        handler_cls = get_file_handler(engine)
-        if engine == 'pandas_csv' and 'sep' in kwargs:
-            handler = handler_cls(fname, overwrite, kwargs['sep'])
-        else:
-            handler = handler_cls(fname, overwrite)
+        handler = _get_handler(engine, fname, overwrite, **kwargs)
         meta = self.meta if overwrite else None
         items = self.items()
         if names is not None: