From f88090a0283299fee5d897eb31924f3582e7f00b Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Mon, 15 Apr 2019 16:25:47 +0200
Subject: [PATCH 1/4] (issue 724) : - moved LHDFStore to inout/hdf.py -
 implemented PandasStorer and PytablesStorer - updated
 LArray/Axis/Group.to_hdf - removed Metadata.to_hdf and Metadata.from_hdf -
 renamed PandasHDFHandler as HDFHandler

---
 doc/source/api.rst           |  19 ++
 larray/__init__.py           |   3 +-
 larray/core/array.py         |  10 +-
 larray/core/axis.py          |  12 +-
 larray/core/group.py         |  20 +-
 larray/core/metadata.py      |  14 -
 larray/core/session.py       |  55 ++--
 larray/inout/hdf.py          | 622 +++++++++++++++++++++++++++++++----
 larray/tests/test_array.py   |  11 +-
 larray/tests/test_session.py |  44 ++-
 larray/util/misc.py          |  22 --
 11 files changed, 667 insertions(+), 165 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index a23f3794b..6b6f54e62 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -706,6 +706,25 @@ ReportSheet
    ReportSheet.add_graphs
    ReportSheet.newline
 
+HDF
+===
+
+.. autosummary::
+   :toctree: _generated/
+
+   LHDFStore
+
+.. autosummary::
+   :toctree: _generated/
+
+    LHDFStore.filename
+    LHDFStore.is_open
+    LHDFStore.keys
+    LHDFStore.items
+    LHDFStore.summary
+    LHDFStore.close
+
+
 .. _api-misc:
 
 Miscellaneous
diff --git a/larray/__init__.py b/larray/__init__.py
index ab65cf548..6e011a042 100644
--- a/larray/__init__.py
+++ b/larray/__init__.py
@@ -26,7 +26,7 @@
 from larray.inout.pandas import from_frame, from_series
 from larray.inout.csv import read_csv, read_tsv, read_eurostat
 from larray.inout.excel import read_excel
-from larray.inout.hdf import read_hdf
+from larray.inout.hdf import read_hdf, LHDFStore
 from larray.inout.sas import read_sas
 from larray.inout.stata import read_stata
 from larray.inout.xw_excel import open_excel, Workbook
@@ -78,6 +78,7 @@
     'from_lists', 'from_string', 'from_frame', 'from_series', 'read_csv', 'read_tsv',
     'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'read_stata',
     'open_excel', 'Workbook', 'ExcelReport', 'ReportSheet',
+    'LHDFStore',
     # utils
     'get_options', 'set_options',
     # viewer
diff --git a/larray/core/array.py b/larray/core/array.py
index 17612fa91..1e07e5a77 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -62,7 +62,7 @@
 from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
 from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
                               float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
-                              renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
+                              renamed_to, deprecate_kwarg, lazy_attribute, unique_multi, SequenceZip,
                               Repeater, Product, ensure_no_numpy_type, PY2)
 from larray.util.options import _OPTIONS, DISPLAY_MAXLINES, DISPLAY_EDGEITEMS, DISPLAY_WIDTH, DISPLAY_PRECISION
 
@@ -6734,13 +6734,9 @@ def to_hdf(self, filepath, key):
 
         >>> a.to_hdf('test.h5', 'arrays/a')  # doctest: +SKIP
         """
-        key = _translate_group_key_hdf(key)
+        from larray.inout.hdf import LHDFStore
         with LHDFStore(filepath) as store:
-            store.put(key, self.to_frame())
-            attrs = store.get_storer(key).attrs
-            attrs.type = 'Array'
-            attrs.writer = 'LArray'
-            self.meta.to_hdf(store, key)
+            store.put(key, self)
 
     def to_stata(self, filepath_or_buffer, **kwargs):
         r"""
diff --git a/larray/core/axis.py b/larray/core/axis.py
index 0c11f0997..f7ee22e38 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -16,7 +16,7 @@
                                _range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
 from larray.util.oset import *
 from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
-                              renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice, unique_multi, Product)
+                              renamed_to, common_type, lazy_attribute, _isnoneslice, unique_multi, Product)
 
 
 np_frompyfunc = np.frompyfunc
@@ -1344,19 +1344,13 @@ def to_hdf(self, filepath, key=None):
 
         >>> a.to_hdf('test.h5', 'axes/a')  # doctest: +SKIP
         """
+        from larray.inout.hdf import LHDFStore
         if key is None:
             if self.name is None:
                 raise ValueError("Argument key must be provided explicitly in case of anonymous axis")
             key = self.name
-        key = _translate_group_key_hdf(key)
-        dtype_kind = self.labels.dtype.kind
-        data = np.char.encode(self.labels, 'utf-8') if dtype_kind == 'U' else self.labels
-        s = pd.Series(data=data, name=self.name)
         with LHDFStore(filepath) as store:
-            store.put(key, s)
-            store.get_storer(key).attrs.type = 'Axis'
-            store.get_storer(key).attrs.dtype_kind = dtype_kind
-            store.get_storer(key).attrs.wildcard = self.iswildcard
+            store.put(key, self)
 
     @property
     def dtype(self):
diff --git a/larray/core/group.py b/larray/core/group.py
index b73417381..faf58a5cb 100644
--- a/larray/core/group.py
+++ b/larray/core/group.py
@@ -13,7 +13,7 @@
 from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray
 from larray.util.oset import *
 from larray.util.misc import (basestring, PY2, unique, find_closing_chr, _parse_bound, _seq_summary, _isintstring,
-                              renamed_to, LHDFStore)
+                              renamed_to)
 
 
 def _slice_to_str(key, repr_func=str):
@@ -1453,27 +1453,13 @@ def to_hdf(self, filepath, key=None, axis_key=None):
         >>> # save both the group 'b01' and the associated axis 'b'
         >>> b01.to_hdf('test.h5')                   # doctest: +SKIP
         """
+        from larray.inout.hdf import LHDFStore
         if key is None:
             if self.name is None:
                 raise ValueError("Argument key must be provided explicitly in case of anonymous group")
             key = self.name
-        key = _translate_group_key_hdf(key)
-        if axis_key is None:
-            if self.axis.name is None:
-                raise ValueError("Argument axis_key must be provided explicitly if the associated axis is anonymous")
-            axis_key = self.axis.name
-        data = self.eval()
-        dtype_kind = data.dtype.kind if isinstance(data, np.ndarray) else ''
-        if dtype_kind == 'U':
-            data = np.char.encode(data, 'utf-8')
-        s = pd.Series(data=data, name=self.name)
         with LHDFStore(filepath) as store:
-            store.put(key, s)
-            store.get_storer(key).attrs.type = 'Group'
-            store.get_storer(key).attrs.dtype_kind = dtype_kind
-            if axis_key not in store:
-                self.axis.to_hdf(store, key=axis_key)
-            store.get_storer(key).attrs.axis_key = axis_key
+            store.put(key, self, axis_key=axis_key)
 
     # this makes range(LGroup(int)) possible
     def __index__(self):
diff --git a/larray/core/metadata.py b/larray/core/metadata.py
index c0d9f32b5..beb3d07e9 100644
--- a/larray/core/metadata.py
+++ b/larray/core/metadata.py
@@ -162,17 +162,3 @@ def _convert_value(value):
             return value
 
         return Metadata([(key, _convert_value(value)) for key, value in zip(array.axes.labels[0], array.data)])
-
-    # ---------- IO methods ----------
-    def to_hdf(self, hdfstore, key=None):
-        if len(self):
-            attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
-            attrs.metadata = self
-
-    @classmethod
-    def from_hdf(cls, hdfstore, key=None):
-        attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
-        if 'metadata' in attrs:
-            return attrs.metadata
-        else:
-            return None
diff --git a/larray/core/session.py b/larray/core/session.py
index 110c0d1e3..6c597f854 100644
--- a/larray/core/session.py
+++ b/larray/core/session.py
@@ -1,22 +1,41 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, division, print_function
 
+import fnmatch
 import os
-import sys
 import re
-import fnmatch
+import sys
 import warnings
 from collections import OrderedDict, Iterable
 
 import numpy as np
 
-from larray.core.metadata import Metadata
-from larray.core.group import Group
+from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence
 from larray.core.axis import Axis
 from larray.core.constants import nan
-from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence, aslarray
-from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring
+from larray.core.group import Group
+from larray.core.metadata import Metadata
 from larray.inout.session import ext_default_engine, get_file_handler
+from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring
+
+
+def _get_handler(engine, fname, overwrite, **kwargs):
+    if engine == 'auto':
+        _, ext = os.path.splitext(fname)
+        ext = ext.strip('.') if '.' in ext else 'csv'
+        engine = ext_default_engine[ext]
+    if engine == 'hdf':
+        engine_hdf = 'auto'
+    if '_hdf' in engine:
+        engine_hdf, engine = engine.split('_')
+    handler_cls = get_file_handler(engine)
+    if engine == 'pandas_csv' and 'sep' in kwargs:
+        handler = handler_cls(fname, overwrite, kwargs['sep'])
+    elif engine == 'hdf':
+        handler = handler_cls(fname, overwrite, engine=engine_hdf)
+    else:
+        handler = handler_cls(fname, overwrite)
+    return handler
 
 
 # XXX: inherit from OrderedDict or LArray?
@@ -358,7 +377,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
             List of objects to load.
             If `fname` is None, list of paths to CSV files.
             Defaults to all valid objects present in the file/directory.
-        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
+        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
             Load using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
         display : bool, optional
             Whether or not to display which file is being worked on. Defaults to False.
@@ -415,15 +434,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
                 engine = ext_default_engine['csv']
             else:
                 raise ValueError("List of paths to only CSV files expected. Got {}".format(names))
-        if engine == 'auto':
-            _, ext = os.path.splitext(fname)
-            ext = ext.strip('.') if '.' in ext else 'csv'
-            engine = ext_default_engine[ext]
-        handler_cls = get_file_handler(engine)
-        if engine == 'pandas_csv' and 'sep' in kwargs:
-            handler = handler_cls(fname, kwargs['sep'])
-        else:
-            handler = handler_cls(fname)
+        handler = _get_handler(engine, fname, False, **kwargs)
         metadata, objects = handler.read(names, display=display, **kwargs)
         for k, v in objects.items():
             self[k] = v
@@ -442,7 +453,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
             List of names of LArray/Axis/Group objects to dump.
             If `fname` is None, list of paths to CSV files.
             Defaults to all objects present in the Session.
-        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
+        engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
             Dump using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
         overwrite: bool, optional
             Whether or not to overwrite an existing file, if any. Ignored for CSV files and 'pandas_excel' engine.
@@ -482,15 +493,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
         >>> # replace arr1 and add arr4 in file output.h5
         >>> s2.save('output.h5', overwrite=False)           # doctest: +SKIP
         """
-        if engine == 'auto':
-            _, ext = os.path.splitext(fname)
-            ext = ext.strip('.') if '.' in ext else 'csv'
-            engine = ext_default_engine[ext]
-        handler_cls = get_file_handler(engine)
-        if engine == 'pandas_csv' and 'sep' in kwargs:
-            handler = handler_cls(fname, overwrite, kwargs['sep'])
-        else:
-            handler = handler_cls(fname, overwrite)
+        handler = _get_handler(engine, fname, overwrite, **kwargs)
         meta = self.meta if overwrite else None
         items = self.items()
         if names is not None:
diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py
index 92bbc7516..14d6d6b46 100644
--- a/larray/inout/hdf.py
+++ b/larray/inout/hdf.py
@@ -1,20 +1,564 @@
 from __future__ import absolute_import, print_function
 
+import os
 import warnings
 
 import numpy as np
-from pandas import HDFStore
+from pandas import Series, HDFStore
 
 from larray.core.array import LArray
 from larray.core.axis import Axis
 from larray.core.constants import nan
 from larray.core.group import Group, LGroup, _translate_group_key_hdf
 from larray.core.metadata import Metadata
-from larray.util.misc import LHDFStore
-from larray.inout.session import register_file_handler
+from larray.example import get_example_filepath
 from larray.inout.common import FileHandler
 from larray.inout.pandas import df_aslarray
-from larray.example import get_example_filepath
+from larray.inout.session import register_file_handler
+
+
+class ClosedFileError(Exception):
+    pass
+
+
+class AbstractStorer(object):
+    def __init__(self, filepath, mode=None, complevel=None, complib=None, fletcher32=False, **kwargs):
+        pandas_hdfstore = HDFStore(filepath, mode, complevel, complib, fletcher32, **kwargs)
+        self._pandas_hdfstore = pandas_hdfstore
+        self._path = pandas_hdfstore._path
+        self._mode = pandas_hdfstore._mode
+        self._handle = pandas_hdfstore._handle
+        self._complevel = pandas_hdfstore._complevel
+        self._complib = pandas_hdfstore._complib
+        self._fletcher32 = pandas_hdfstore._fletcher32
+        self._filters = pandas_hdfstore._filters
+
+    @property
+    def root(self):
+        """ return the root node """
+        return self._pandas_hdfstore.root
+
+    @property
+    def attrs(self):
+        return self.root._v_attrs
+
+    @property
+    def is_open(self):
+        """
+        return a boolean indicating whether the file is open
+        """
+        return self._pandas_hdfstore.is_open
+
+    def __contains__(self, key):
+        return key in self._pandas_hdfstore
+
+    def __len__(self):
+        return len(self._pandas_hdfstore)
+
+    def get_node(self, key):
+        return self._pandas_hdfstore.get_node(key)
+
+    def close(self):
+        self._pandas_hdfstore.close()
+
+    def remove(self, key):
+        """
+        Remove LArray object.
+
+        Parameters
+        ----------
+        key : str
+            Key associated to the object to be removed.
+        """
+        s = self._pandas_hdfstore.get_storer(key)
+        s.group._f_remove(recursive=True)
+
+    def groups(self):
+        """
+        return a list of all groups containing an LArray object.
+        """
+        raise NotImplementedError()
+
+    def _check_if_open(self):
+        if not self.is_open:
+            raise ClosedFileError("{} file is not open!".format(self._path))
+
+    def _get(self, key, **kwargs):
+        raise NotImplementedError()
+
+    def get(self, key, **kwargs):
+        key = _translate_group_key_hdf(key)
+        return self._get(key, **kwargs)
+
+    def _put(self, key, value, **kwargs):
+        raise NotImplementedError()
+
+    def put(self, key, value, **kwargs):
+        key = _translate_group_key_hdf(key)
+        self._put(key, value, **kwargs)
+
+
+class PytablesStorer(AbstractStorer):
+    """
+    Read and write LArray objects into HDF5 file using pytables.
+    """
+    def __init__(self, filepath, mode=None, complevel=None, complib=None, fletcher32=False, **kwargs):
+        AbstractStorer.__init__(self, filepath, mode, complevel, complib, fletcher32, **kwargs)
+
+    def groups(self):
+        import tables
+        self._check_if_open()
+        return [g for g in self._handle.walk_groups()
+                if (not isinstance(g, tables.link.Link) and 'type' in g._v_attrs)]
+
+    def _read_data(self, group, name, attrs):
+        dtype = np.dtype(attrs['dtype'])
+        data = group[name].read()
+        if dtype.kind == 'U':
+            data = np.char.decode(data, 'utf-8')
+        if dtype.kind == 'O':
+            data = data[0]
+            data = data.astype(dtype)
+        return data
+
+    def _read_group(self, group):
+        def _get_name(attrs):
+            name = attrs['name']
+            return name if name is None else str(name)
+
+        attrs = group._v_attrs
+        _type = attrs.type if 'type' in attrs else 'Array'
+        _meta = attrs.metadata if 'metadata' in attrs else None
+        res = None
+        if _type == 'Array':
+            axes_keys = [n._v_pathname for n in group if n._v_name.startswith('axis')]
+            axes = [self._get(axis_key) for axis_key in axes_keys]
+            data = self._read_data(group, 'data', attrs)
+            res = LArray(data=data, axes=axes)
+            if _meta is not None:
+                res.meta = _meta
+        elif _type == 'Axis':
+            name = _get_name(attrs)
+            labels = self._read_data(group, 'labels', attrs)
+            res = Axis(labels=labels, name=name)
+            res._iswildcard = attrs['wildcard']
+        elif _type == 'Group':
+            axis = self._get(attrs['axis_key'])
+            name = _get_name(attrs)
+            key = self._read_data(group, 'key', attrs)
+            res = LGroup(key=key, name=name, axis=axis)
+        return res
+
+    def _get(self, key, **kwargs):
+        group = self.get_node(key)
+        if group is None:
+            raise KeyError('No object named {} in the file'.format(key))
+        return self._read_group(group)
+
+    def _dump_data(self, group, name, data, attrs):
+        import tables
+        data = np.asarray(data)
+        dtype = data.dtype
+        attrs['dtype'] = dtype
+        # https://www.pytables.org/MIGRATING_TO_3.x.html#unicode-all-the-strings
+        # Warning: In Python 3, all strings are natively in Unicode.
+        #          This introduces some difficulties, as the native HDF5 string format is not Unicode-compatible.
+        #          To minimize explicit conversion troubles when writing, especially when creating data sets
+        #          from existing Python objects, string objects are implicitly cast to non-Unicode byte strings
+        #          for HDF5 storage by default.
+        #          To avoid such problem, one way is to use the VLArray class and dump unicode string arrays
+        #          as object arrays.
+        if dtype.kind == 'O':
+            vlarr = self._handle.create_vlarray(group, name=name, filters=self._filters, atom=tables.ObjectAtom())
+            vlarr.append(data)
+        else:
+            if dtype.kind == 'U':
+                data = np.char.encode(data, 'utf-8')
+            self._handle.create_carray(group, name=name, obj=data, filters=self._filters)
+
+    def _write_obj(self, group, value, **kwargs):
+        if isinstance(value, LArray):
+            attrs = group._v_attrs
+            attrs['type'] = 'Array'
+            # dump axes
+            for axis in value.axes:
+                axis_key = 'axis_{}'.format(value.axes.axis_id(axis))
+                axis_group = self._handle.create_group(group, axis_key)
+                self._write_obj(axis_group, axis)
+            # dump data
+            self._dump_data(group, name='data', data=value.data, attrs=attrs)
+            # dump metadata
+            self._write_obj(group, value.meta)
+        elif isinstance(value, Axis):
+            attrs = group._v_attrs
+            attrs['type'] = 'Axis'
+            attrs['name'] = value.name
+            attrs['wildcard'] = value.iswildcard
+            self._dump_data(group, name='labels', data=value.labels, attrs=attrs)
+        elif isinstance(value, Group):
+            axis_key = kwargs.pop('axis_key', None)
+            if axis_key is None:
+                if value.axis.name is None:
+                    raise ValueError(
+                        "Argument axis_key must be provided explicitly if the associated axis is anonymous")
+                axis_key = value.axis.name
+            if self.get_node(axis_key) is None:
+                self._put(axis_key, value.axis)
+            attrs = group._v_attrs
+            attrs['type'] = 'Group'
+            attrs['name'] = value.name
+            attrs['axis_key'] = axis_key
+            self._dump_data(group, name='key', data=value.eval(), attrs=attrs)
+        elif isinstance(value, Metadata):
+            if len(value):
+                group._v_attrs['metadata'] = value
+        else:
+            warnings.warn('{}: Type {} is currently not supported'.format(group._v_name, type(value)))
+
+    def _write_group(self, key, value, **kwargs):
+        # remove the group if exists already
+        group = self.get_node(key)
+        if group is not None:
+            self._handle.remove_node(group, recursive=True)
+        paths = key.split('/')
+        # recursively create the parent groups
+        path = '/'
+        for p in paths:
+            if not len(p):
+                continue
+            new_path = path
+            if not path.endswith('/'):
+                new_path += '/'
+            new_path += p
+            group = self.get_node(new_path)
+            if group is None:
+                group = self._handle.create_group(path, p)
+            path = new_path
+        self._write_obj(group, value, **kwargs)
+
+    def _put(self, key, value, **kwargs):
+        key = _translate_group_key_hdf(key)
+        self._write_group(key, value, **kwargs)
+
+
+class PandasStorer(AbstractStorer):
+    """
+    Read and write LArray objects into HDF5 file using pandas.
+    """
+    def __init__(self, filepath, mode=None, complevel=None, complib=None, fletcher32=False, **kwargs):
+        AbstractStorer.__init__(self, filepath, mode, complevel, complib, fletcher32, **kwargs)
+
+    def groups(self):
+        return self._pandas_hdfstore.groups()
+
+    def _get(self, key, **kwargs):
+        name = kwargs.pop('name', None)
+        pd_obj = self._pandas_hdfstore.get(key)
+        attrs = self._pandas_hdfstore.get_storer(key).attrs
+        _writer = attrs.writer if 'writer' in attrs else None
+        # for backward compatibility but any object read from an hdf file should have an attribute 'type'
+        _type = attrs.type if 'type' in attrs else 'Array'
+        _meta = attrs.metadata if 'metadata' in attrs else None
+        res = None
+        if _type == 'Array':
+            sort_rows = kwargs.pop('sort_rows', False)
+            sort_columns = kwargs.pop('sort_columns', False)
+            fill_value = kwargs.pop('fill_value', nan)
+            # cartesian product is not necessary if the array was written by LArray
+            cartesian_prod = _writer != 'LArray'
+            res = df_aslarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
+                              parse_header=False, cartesian_prod=cartesian_prod)
+            if _meta is not None:
+                res.meta = _meta
+        elif _type == 'Axis':
+            if name is None:
+                name = str(pd_obj.name)
+            if name == 'None':
+                name = None
+            labels = pd_obj.values
+            dtype = attrs['dtype'] if 'dtype' in attrs else None
+            if dtype is not None and dtype.kind == 'U':
+                labels = np.char.decode(labels, 'utf-8')
+            res = Axis(labels=labels, name=name)
+            res._iswildcard = attrs['wildcard']
+        elif _type == 'Group':
+            if name is None:
+                name = str(pd_obj.name)
+            if name == 'None':
+                name = None
+            key = pd_obj.values
+            dtype = attrs['dtype'] if 'dtype' in attrs else None
+            if dtype is not None and dtype.kind == 'U':
+                key = np.char.decode(key, 'utf-8')
+            axis = self._get(attrs['axis_key'])
+            res = LGroup(key=key, name=name, axis=axis)
+        return res
+
+    def _put(self, key, value, **kwargs):
+        pd_store = self._pandas_hdfstore
+        if isinstance(value, LArray):
+            pd_store.put(key, value.to_frame())
+            attrs = pd_store.get_storer(key).attrs
+            attrs.type = 'Array'
+            attrs.writer = 'LArray'
+            self._put(key, value.meta)
+        elif isinstance(value, Axis):
+            dtype = value.dtype
+            labels = np.char.encode(value.labels, 'utf-8') if dtype.kind == 'U' else value.labels
+            s = Series(data=labels, name=value.name)
+            pd_store.put(key, s)
+            attrs = pd_store.get_storer(key).attrs
+            attrs.type = 'Axis'
+            attrs.dtype = dtype
+            attrs.wildcard = value.iswildcard
+        elif isinstance(value, Group):
+            axis_key = kwargs.pop('axis_key', None)
+            if axis_key is None:
+                if value.axis.name is None:
+                    raise ValueError(
+                        "Argument axis_key must be provided explicitly if the associated axis is anonymous")
+                axis_key = value.axis.name
+            if axis_key not in pd_store:
+                self._put(axis_key, value.axis)
+            data = value.eval()
+            dtype = data.dtype if isinstance(data, np.ndarray) else None
+            if dtype is not None and dtype.kind == 'U':
+                data = np.char.encode(data, 'utf-8')
+            s = Series(data=data, name=value.name)
+            pd_store.put(key, s)
+            attrs = pd_store.get_storer(key).attrs
+            attrs.type = 'Group'
+            attrs.dtype = dtype
+            attrs.axis_key = axis_key
+        elif isinstance(value, Metadata):
+            if len(value):
+                pd_store.get_storer(key).attrs.metadata = value
+        else:
+            warnings.warn('{}: Type {} is currently not supported'.format(key, type(value)))
+
+
+_hdf_store_cls = {'pandas': PandasStorer, 'tables': PytablesStorer}
+
+
+class LHDFStore(object):
+    """Context manager for reading and writing LArray objects.
+
+    Parameters
+    ----------
+    filepath : str or PathLike object
+        File path to HDF5 file
+    mode : {'a', 'w', 'r', 'r+'}, default 'a'
+
+        ``'r'``
+            Read-only; no data can be modified.
+        ``'w'``
+            Write; a new file is created (an existing file with the same
+            name would be deleted).
+        ``'a'``
+            Append; an existing file is opened for reading and writing,
+            and if the file does not exist it is created.
+        ``'r+'``
+            It is similar to ``'a'``, but the file must already exist.
+    complevel : int, 0-9, default None
+            Specifies a compression level for data.
+            A value of 0 disables compression.
+    complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
+            Specifies the compression library to be used.
+    fletcher32 : bool, default False
+            If applying compression use the fletcher32 checksum
+    engine: {'auto', 'tables', 'pandas'}, optional
+        Load using `engine`. Use 'pandas' to read an HDF file generated with a LArray version previous to 0.31.
+        Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
+
+    Examples
+    --------
+    # TODO : write examples
+    """
+    def __init__(self, filepath, mode=None, complevel=None, complib=None,
+                 fletcher32=False, engine='auto', **kwargs):
+        try:
+            import tables
+        except ImportError:
+            raise ImportError('LHDFStore requires PyTables to be installed')
+
+        is_new_file = not os.path.exists(filepath)
+        if is_new_file and mode in ['r', 'r+']:
+            raise ValueError('The file {} has not been found.'.format(filepath))
+
+        if engine == 'auto':
+            if is_new_file:
+                engine = 'tables'
+            else:
+                import tables
+                handle = tables.open_file(filepath, mode='r')
+                # for backward compatibility, we assume that the used engine is 'pandas'
+                # if not found among root attributes
+                engine = getattr(handle.root._v_attrs, 'engine', 'pandas')
+                handle.close()
+        if engine not in _hdf_store_cls.keys():
+            raise ValueError("Value of the 'engine' argument must be in list: "
+                             "auto" + ", ".join(_hdf_store_cls.keys()))
+
+        storer = _hdf_store_cls[engine](filepath, mode, complevel, complib, fletcher32, **kwargs)
+
+        if is_new_file or mode == 'w':
+            storer.attrs['engine'] = engine
+
+        if getattr(storer.attrs, 'engine', 'pandas') != engine:
+            raise Exception("Cannot {action} file {file}. Passed value for 'engine' argument was {engine_arg} "
+                            "while the file {file} was originally created using "
+                            "{engine}".format(action="read from" if mode == 'r' else "write into", file=filepath,
+                                              engine_arg=engine, engine=storer.attrs['engine']))
+
+        self._storer = storer
+
+    def __fspath__(self):
+        return self._storer._path
+
+    @property
+    def filename(self):
+        """ File path to HDF5 file """
+        return self._storer._path
+
+    @property
+    def is_open(self):
+        """
+        Return a boolean indicating whether the file is open
+        """
+        return self._storer.is_open
+
+    @property
+    def meta(self):
+        return getattr(self._storer.attrs, 'metadata', Metadata())
+
+    @meta.setter
+    def meta(self, meta):
+        self._storer.attrs.metadata = meta
+
+    def __getitem__(self, key):
+        return self.get(key)
+
+    def __setitem__(self, key, value):
+        self.put(key, value)
+
+    def __delitem__(self, key):
+        return self._storer.remove(key)
+
+    # TODO: not sure about this. Should be implemented in LazySession.
+    def __getattr__(self, key):
+        """ allow attribute access to get stores """
+        if key in self.keys():
+            return self.get(key)
+        else:
+            raise AttributeError("'{}' object has no attribute '{}'".format(self.__class__.__name__, key))
+
+    def __contains__(self, key):
+        return key in self._storer
+
+    def __len__(self):
+        return len(self._storer)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def close(self):
+        """
+        Close the PyTables file handle
+        """
+        self._storer.close()
+
+    def keys(self):
+        """
+        Return a (potentially unordered) list of the keys corresponding to the
+        objects stored in the HDFStore. These are ABSOLUTE path-names (e.g.
+        have the leading '/'
+        """
+        return [n._v_pathname for n in self._storer.groups()]
+
+    def __iter__(self):
+        return iter(self.keys())
+
+    def items(self):
+        """
+        Iterate on key->group
+        """
+        for g in self._storer.groups():
+            yield g._v_pathname, g
+
+    iteritems = items
+
+    def summary(self):
+        """
+        Return a list of LArray stored in the HDF5 file.
+
+        Examples
+        --------
+        TODO: write examples
+        """
+        if self.is_open:
+            res = ""
+            for name, group in self.items():
+                _type = getattr(group._v_attrs, 'type', 'Unknown')
+                res += "{}: {}\n".format(name, _type)
+            return res
+        else:
+            return "File {} is CLOSED".format(self.filename)
+
+    def get(self, key, **kwargs):
+        """
+        Retrieve a larray object stored in file.
+
+        Parameters
+        ----------
+        key : str
+            Name of the object to read.
+        **kwargs
+
+          * fill_value : scalar or LArray, optional
+                Value used to fill cells corresponding to label combinations which are not present in the input.
+                Defaults to NaN.
+          * sort_rows : bool, optional
+                Whether or not to sort the rows alphabetically (sorting is more efficient than not sorting).
+                Defaults to False.
+          * sort_columns : bool, optional
+                Whether or not to sort the columns alphabetically (sorting is more efficient than not sorting).
+                Defaults to False.
+          * name : str, optional
+                Name of the axis or group to return. If None, name is set to passed key.
+                Defaults to None.
+
+        Returns
+        -------
+        obj : same type as object stored in file.
+
+        Examples
+        --------
+        TODO : write examples
+        """
+        return self._storer.get(key, **kwargs)
+
+    def put(self, key, value, **kwargs):
+        """
+        Dump a larray object in file.
+
+        Parameters
+        ----------
+        key: str
+            Name of the object to dump.
+        value: LArray, Axis or Group
+            Object to dump.
+        **kwargs
+
+          * ???
+
+        Examples
+        --------
+        TODO : write examples
+        """
+        self._storer.put(key, value, **kwargs)
 
 
 def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, sort_columns=False,
@@ -23,10 +567,10 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
 
     Parameters
     ----------
-    filepath_or_buffer : str or pandas.HDFStore
+    filepath_or_buffer : str or LArrayHDFStore
         Path and name where the HDF5 file is stored or a HDFStore object.
     key : str or Group
-        Name of the array.
+        Name of the object to read.
     fill_value : scalar or LArray, optional
         Value used to fill cells corresponding to label combinations which are not present in the input.
         Defaults to NaN.
@@ -66,56 +610,25 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
         fill_value = na
         warnings.warn("read_hdf `na` argument has been renamed to `fill_value`. Please use that instead.",
                       FutureWarning, stacklevel=2)
-
-    key = _translate_group_key_hdf(key)
-    res = None
-    with LHDFStore(filepath_or_buffer) as store:
-        pd_obj = store.get(key)
-        attrs = store.get_storer(key).attrs
-        writer = attrs.writer if 'writer' in attrs else None
-        # for backward compatibility but any object read from an hdf file should have an attribute 'type'
-        _type = attrs.type if 'type' in attrs else 'Array'
-        _meta = attrs.metadata if 'metadata' in attrs else None
-        if _type == 'Array':
-            # cartesian product is not necessary if the array was written by LArray
-            cartesian_prod = writer != 'LArray'
-            res = df_aslarray(pd_obj, sort_rows=sort_rows, sort_columns=sort_columns, fill_value=fill_value,
-                              parse_header=False, cartesian_prod=cartesian_prod)
-            if _meta is not None:
-                res.meta = _meta
-        elif _type == 'Axis':
-            if name is None:
-                name = str(pd_obj.name)
-            if name == 'None':
-                name = None
-            labels = pd_obj.values
-            if 'dtype_kind' in attrs and attrs['dtype_kind'] == 'U':
-                labels = np.char.decode(labels, 'utf-8')
-            res = Axis(labels=labels, name=name)
-            res._iswildcard = attrs['wildcard']
-        elif _type == 'Group':
-            if name is None:
-                name = str(pd_obj.name)
-            if name == 'None':
-                name = None
-            key = pd_obj.values
-            if 'dtype_kind' in attrs and attrs['dtype_kind'] == 'U':
-                key = np.char.decode(key, 'utf-8')
-            axis = read_hdf(filepath_or_buffer, attrs['axis_key'])
-            res = LGroup(key=key, name=name, axis=axis)
+    with LHDFStore(filepath_or_buffer, **kwargs) as store:
+        res = store.get(key, fill_value=fill_value, sort_rows=sort_rows, sort_columns=sort_columns, name=name)
     return res
 
 
-@register_file_handler('pandas_hdf', ['h5', 'hdf'])
-class PandasHDFHandler(FileHandler):
+@register_file_handler('hdf', ['h5', 'hdf'])
+class HDFHandler(FileHandler):
     r"""
     Handler for HDF5 files using Pandas.
     """
+    def __init__(self, fname, overwrite_file=False, engine='auto'):
+        super(HDFHandler, self).__init__(fname, overwrite_file)
+        self.engine = engine
+
     def _open_for_read(self):
-        self.handle = HDFStore(self.fname, mode='r')
+        self.handle = LHDFStore(self.fname, mode='r', engine=self.engine)
 
     def _open_for_write(self):
-        self.handle = HDFStore(self.fname)
+        self.handle = LHDFStore(self.fname, engine=self.engine)
 
     def list_items(self):
         keys = [key.strip('/') for key in self.handle.keys()]
@@ -138,30 +651,25 @@ def _read_item(self, key, type, *args, **kwargs):
             kwargs['name'] = key
         else:
             raise TypeError()
-        return read_hdf(self.handle, hdf_key, *args, **kwargs)
+        return self.handle.get(hdf_key, **kwargs)
 
     def _dump_item(self, key, value, *args, **kwargs):
         if isinstance(value, LArray):
             hdf_key = '/' + key
-            value.to_hdf(self.handle, hdf_key, *args, **kwargs)
         elif isinstance(value, Axis):
             hdf_key = '__axes__/' + key
-            value.to_hdf(self.handle, hdf_key, *args, **kwargs)
         elif isinstance(value, Group):
             hdf_key = '__groups__/' + key
-            hdf_axis_key = '__axes__/' + value.axis.name
-            value.to_hdf(self.handle, hdf_key, hdf_axis_key, *args, **kwargs)
+            kwargs['axis_key'] = '__axes__/' + value.axis.name
         else:
             raise TypeError()
+        self.handle.put(hdf_key, value, **kwargs)
 
     def _read_metadata(self):
-        metadata = Metadata.from_hdf(self.handle)
-        if metadata is None:
-            metadata = Metadata()
-        return metadata
+        return self.handle.meta
 
     def _dump_metadata(self, metadata):
-        metadata.to_hdf(self.handle)
+        self.handle.meta = metadata
 
     def close(self):
         self.handle.close()
diff --git a/larray/tests/test_array.py b/larray/tests/test_array.py
index 0615f987e..50b51f01f 100644
--- a/larray/tests/test_array.py
+++ b/larray/tests/test_array.py
@@ -23,7 +23,8 @@
                     from_lists, from_string, open_excel, from_frame, sequence, nan, IGroup)
 from larray.inout.pandas import from_series
 from larray.core.axis import _to_ticks, _to_key
-from larray.util.misc import StringIO, LHDFStore
+from larray.util.misc import StringIO
+from larray.inout.hdf import LHDFStore
 from larray.core.metadata import Metadata
 
 
@@ -119,11 +120,9 @@ def test_read_set_update_delete_metadata(meta, tmpdir):
 def test_metadata_hdf(meta, tmpdir):
     key = 'meta'
     fname = os.path.join(tmpdir.strpath, 'test_metadata.hdf')
-    with LHDFStore(fname) as store:
-        ndtest(3).to_hdf(store, key)
-        meta.to_hdf(store, key)
-        meta2 = Metadata.from_hdf(store, key)
-        assert meta2 == meta
+    ndtest(3, meta=meta).to_hdf(fname, key)
+    arr = read_hdf(fname, key)
+    assert arr.meta == meta
 
 
 def test_meta_arg_array_creation(array):
diff --git a/larray/tests/test_session.py b/larray/tests/test_session.py
index e62f29e33..2652677a3 100644
--- a/larray/tests/test_session.py
+++ b/larray/tests/test_session.py
@@ -153,13 +153,13 @@ def test_names(session):
     assert session.names == ['a', 'a01', 'b', 'b12', 'c', 'd', 'e', 'f', 'g', 'h', 'i']
 
 
-def test_h5_io(tmpdir, session, meta):
+def test_h5_pandas_io(tmpdir, session, meta):
     fpath = tmp_path(tmpdir, 'test_session.h5')
     session.meta = meta
-    session.save(fpath)
+    session.save(fpath, engine='pandas_hdf')
 
     s = Session()
-    s.load(fpath)
+    s.load(fpath, engine='pandas_hdf')
     # HDF does *not* keep ordering (ie, keys are always sorted +
     # read Axis objects, then Groups objects and finally LArray objects)
     assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
@@ -169,9 +169,41 @@ def test_h5_io(tmpdir, session, meta):
     a2 = Axis('a=0..2')
     a2_01 = a2['0,1'] >> 'a01'
     e2 = ndtest((a2, 'b=b0..b2'))
-    Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False)
+    Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False, engine='pandas_hdf')
+    s = Session()
+    s.load(fpath, engine='pandas_hdf')
+    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
+    assert s['a'].equals(a2)
+    assert all(s['a01'] == a2_01)
+    assert_array_nan_equal(s['e'], e2)
+    assert s.meta == meta
+
+    # load only some objects
+    s = Session()
+    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='pandas_hdf')
+    assert list(s.keys()) == ['a', 'a01', 'e', 'f']
+    assert s.meta == meta
+
+
+def test_h5_tables_io(tmpdir, session, meta):
+    fpath = tmp_path(tmpdir, 'test_session.h5')
+    session.meta = meta
+    session.save(fpath, engine='tables_hdf')
+
+    s = Session()
+    s.load(fpath, engine='tables_hdf')
+    # HDF does *not* keep ordering (ie, keys are always sorted +
+    # read Axis objects, then Groups objects and finally LArray objects)
+    assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
+    assert s.meta == meta
+
+    # update a Group + an Axis + an array (overwrite=False)
+    a2 = Axis('a=0..2')
+    a2_01 = a2['0,1'] >> 'a01'
+    e2 = ndtest((a2, 'b=b0..b2'))
+    Session(a=a2, a01=a2_01, e=e2).save(fpath, overwrite=False, engine='tables_hdf')
     s = Session()
-    s.load(fpath)
+    s.load(fpath, engine='tables_hdf')
     assert list(s.keys()) == ['a', 'b', 'a01', 'b12', 'e', 'f', 'g']
     assert s['a'].equals(a2)
     assert all(s['a01'] == a2_01)
@@ -180,7 +212,7 @@ def test_h5_io(tmpdir, session, meta):
 
     # load only some objects
     s = Session()
-    s.load(fpath, names=['a', 'a01', 'e', 'f'])
+    s.load(fpath, names=['a', 'a01', 'e', 'f'], engine='tables_hdf')
     assert list(s.keys()) == ['a', 'a01', 'e', 'f']
     assert s.meta == meta
 
diff --git a/larray/util/misc.py b/larray/util/misc.py
index b61547a0f..5aab1b25a 100644
--- a/larray/util/misc.py
+++ b/larray/util/misc.py
@@ -26,8 +26,6 @@
 except TypeError:
     pass
 
-import pandas as pd
-
 if sys.version_info[0] < 3:
     basestring = basestring
     bytes = str
@@ -772,26 +770,6 @@ def common_type(arrays):
         return object
 
 
-class LHDFStore(object):
-    """Context manager for pandas HDFStore"""
-    def __init__(self, filepath_or_buffer, **kwargs):
-        if isinstance(filepath_or_buffer, pd.HDFStore):
-            if not filepath_or_buffer.is_open:
-                raise IOError('The HDFStore must be open for reading.')
-            self.store = filepath_or_buffer
-            self.close_store = False
-        else:
-            self.store = pd.HDFStore(filepath_or_buffer, **kwargs)
-            self.close_store = True
-
-    def __enter__(self):
-        return self.store
-
-    def __exit__(self, type_, value, traceback):
-        if self.close_store:
-            self.store.close()
-
-
 class SequenceZip(object):
     """
     Represents the "combination" of several sequences.

From 18f0e77753d20ef90b92aee3b8d4e007e4c60f35 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Mon, 26 Aug 2019 09:22:50 +0200
Subject: [PATCH 2/4] removed LHDFStore from api.rst file and __init__.py
 module

---
 doc/source/api.rst  | 18 ------------------
 larray/__init__.py  |  3 +--
 larray/inout/hdf.py |  8 ++++----
 3 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 6b6f54e62..31239c395 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -706,24 +706,6 @@ ReportSheet
    ReportSheet.add_graphs
    ReportSheet.newline
 
-HDF
-===
-
-.. autosummary::
-   :toctree: _generated/
-
-   LHDFStore
-
-.. autosummary::
-   :toctree: _generated/
-
-    LHDFStore.filename
-    LHDFStore.is_open
-    LHDFStore.keys
-    LHDFStore.items
-    LHDFStore.summary
-    LHDFStore.close
-
 
 .. _api-misc:
 
diff --git a/larray/__init__.py b/larray/__init__.py
index 6e011a042..ab65cf548 100644
--- a/larray/__init__.py
+++ b/larray/__init__.py
@@ -26,7 +26,7 @@
 from larray.inout.pandas import from_frame, from_series
 from larray.inout.csv import read_csv, read_tsv, read_eurostat
 from larray.inout.excel import read_excel
-from larray.inout.hdf import read_hdf, LHDFStore
+from larray.inout.hdf import read_hdf
 from larray.inout.sas import read_sas
 from larray.inout.stata import read_stata
 from larray.inout.xw_excel import open_excel, Workbook
@@ -78,7 +78,6 @@
     'from_lists', 'from_string', 'from_frame', 'from_series', 'read_csv', 'read_tsv',
     'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'read_stata',
     'open_excel', 'Workbook', 'ExcelReport', 'ReportSheet',
-    'LHDFStore',
     # utils
     'get_options', 'set_options',
     # viewer
diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py
index 14d6d6b46..8d77edb26 100644
--- a/larray/inout/hdf.py
+++ b/larray/inout/hdf.py
@@ -360,12 +360,12 @@ class LHDFStore(object):
         ``'r+'``
             It is similar to ``'a'``, but the file must already exist.
     complevel : int, 0-9, default None
-            Specifies a compression level for data.
-            A value of 0 disables compression.
+        Specifies a compression level for data.
+        A value of 0 disables compression.
     complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
-            Specifies the compression library to be used.
+        Specifies the compression library to be used.
     fletcher32 : bool, default False
-            If applying compression use the fletcher32 checksum
+        If applying compression use the fletcher32 checksum
     engine: {'auto', 'tables', 'pandas'}, optional
         Load using `engine`. Use 'pandas' to read an HDF file generated with a LArray version previous to 0.31.
         Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).

From 397012de14b7a7abc8fbe7ee8cce58434a584fc6 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Mon, 26 Aug 2019 10:07:43 +0200
Subject: [PATCH 3/4] added engine argument to the read_hdf method and all
 to_hdf methods

---
 larray/core/array.py |  7 +++++--
 larray/core/axis.py  |  7 +++++--
 larray/core/group.py |  7 +++++--
 larray/inout/hdf.py  | 13 ++++++++-----
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/larray/core/array.py b/larray/core/array.py
index 1e07e5a77..a07d48ff9 100644
--- a/larray/core/array.py
+++ b/larray/core/array.py
@@ -6701,7 +6701,7 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dr
             series = self.to_series(value_name, dropna is not None)
             series.to_csv(filepath, sep=sep, na_rep=na_rep, header=True, **kwargs)
 
-    def to_hdf(self, filepath, key):
+    def to_hdf(self, filepath, key, engine='auto'):
         r"""
         Writes array to a HDF file.
 
@@ -6714,6 +6714,9 @@ def to_hdf(self, filepath, key):
             Path where the hdf file has to be written.
         key : str or Group
             Key (path) of the array within the HDF file (see Notes below).
+        engine: {'auto', 'tables', 'pandas'}, optional
+            Dump using `engine`. Use 'pandas' to update an HDF file generated with a LArray version previous to 0.31.
+            Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
 
         Notes
         -----
@@ -6735,7 +6738,7 @@ def to_hdf(self, filepath, key):
         >>> a.to_hdf('test.h5', 'arrays/a')  # doctest: +SKIP
         """
         from larray.inout.hdf import LHDFStore
-        with LHDFStore(filepath) as store:
+        with LHDFStore(filepath, engine=engine) as store:
             store.put(key, self)
 
     def to_stata(self, filepath_or_buffer, **kwargs):
diff --git a/larray/core/axis.py b/larray/core/axis.py
index f7ee22e38..74de282f2 100644
--- a/larray/core/axis.py
+++ b/larray/core/axis.py
@@ -1304,7 +1304,7 @@ def align(self, other, join='outer'):
             else:
                 return self
 
-    def to_hdf(self, filepath, key=None):
+    def to_hdf(self, filepath, key=None, engine='auto'):
         r"""
         Writes axis to a HDF file.
 
@@ -1319,6 +1319,9 @@ def to_hdf(self, filepath, key=None):
             Key (path) of the axis within the HDF file (see Notes below).
             If None, the name of the axis is used.
             Defaults to None.
+        engine: {'auto', 'tables', 'pandas'}, optional
+            Dump using `engine`. Use 'pandas' to update an HDF file generated with a LArray version previous to 0.31.
+            Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
 
         Notes
         -----
@@ -1349,7 +1352,7 @@ def to_hdf(self, filepath, key=None):
             if self.name is None:
                 raise ValueError("Argument key must be provided explicitly in case of anonymous axis")
             key = self.name
-        with LHDFStore(filepath) as store:
+        with LHDFStore(filepath, engine=engine) as store:
             store.put(key, self)
 
     @property
diff --git a/larray/core/group.py b/larray/core/group.py
index faf58a5cb..313360bb8 100644
--- a/larray/core/group.py
+++ b/larray/core/group.py
@@ -1396,7 +1396,7 @@ def containing(self, substring):
             substring = substring.eval()
         return LGroup([v for v in self.eval() if substring in v], axis=self.axis)
 
-    def to_hdf(self, filepath, key=None, axis_key=None):
+    def to_hdf(self, filepath, key=None, axis_key=None, engine='auto'):
         r"""
         Writes group to a HDF file.
 
@@ -1417,6 +1417,9 @@ def to_hdf(self, filepath, key=None, axis_key=None):
             Key (path) of the associated axis in the HDF file (see Notes below).
             If None, the name of the axis associated with the group is used.
             Defaults to None.
+        engine: {'auto', 'tables', 'pandas'}, optional
+            Dump using `engine`. Use 'pandas' to update an HDF file generated with a LArray version previous to 0.31.
+            Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
 
         Notes
         -----
@@ -1458,7 +1461,7 @@ def to_hdf(self, filepath, key=None, axis_key=None):
             if self.name is None:
                 raise ValueError("Argument key must be provided explicitly in case of anonymous group")
             key = self.name
-        with LHDFStore(filepath) as store:
+        with LHDFStore(filepath, engine=engine) as store:
             store.put(key, self, axis_key=axis_key)
 
     # this makes range(LGroup(int)) possible
diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py
index 8d77edb26..9e031158a 100644
--- a/larray/inout/hdf.py
+++ b/larray/inout/hdf.py
@@ -346,7 +346,7 @@ class LHDFStore(object):
     Parameters
     ----------
     filepath : str or PathLike object
-        File path to HDF5 file
+        File path to HDF5 file.
     mode : {'a', 'w', 'r', 'r+'}, default 'a'
 
         ``'r'``
@@ -562,13 +562,13 @@ def put(self, key, value, **kwargs):
 
 
 def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, sort_columns=False,
-             name=None, **kwargs):
+             name=None, engine='auto', **kwargs):
     r"""Reads an axis or group or array named key from a HDF5 file in filepath (path+name)
 
     Parameters
     ----------
-    filepath_or_buffer : str or LArrayHDFStore
-        Path and name where the HDF5 file is stored or a HDFStore object.
+    filepath_or_buffer : str or PathLike object
+        Path and name where the HDF5 file is stored.
     key : str or Group
         Name of the object to read.
     fill_value : scalar or LArray, optional
@@ -585,6 +585,9 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
     name : str, optional
         Name of the axis or group to return. If None, name is set to passed key.
         Defaults to None.
+    engine: {'auto', 'tables', 'pandas'}, optional
+        Load using `engine`. Use 'pandas' to read an HDF file generated with a LArray version previous to 0.31.
+        Defaults to 'auto' (use default engine if you don't know the LArray version used to produced the HDF file).
 
     Returns
     -------
@@ -610,7 +613,7 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
         fill_value = na
         warnings.warn("read_hdf `na` argument has been renamed to `fill_value`. Please use that instead.",
                       FutureWarning, stacklevel=2)
-    with LHDFStore(filepath_or_buffer, **kwargs) as store:
+    with LHDFStore(filepath_or_buffer, engine=engine, **kwargs) as store:
         res = store.get(key, fill_value=fill_value, sort_rows=sort_rows, sort_columns=sort_columns, name=name)
     return res
 

From fa1c2221ed08f0c43f2931df6e7cc6a10a046511 Mon Sep 17 00:00:00 2001
From: Alix Damman <ald@plan.be>
Date: Mon, 26 Aug 2019 11:25:05 +0200
Subject: [PATCH 4/4] added doctests for LHDFStore + updated
 LHDFStore.summary() method

---
 larray/inout/hdf.py | 43 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py
index 9e031158a..bfa440864 100644
--- a/larray/inout/hdf.py
+++ b/larray/inout/hdf.py
@@ -372,7 +372,33 @@ class LHDFStore(object):
 
     Examples
     --------
-    # TODO : write examples
+    >>> from larray import ndtest
+    >>> with LHDFStore('hdf_file.h5') as s:
+    ...     # dump and read an axis
+    ...     s['a'] = Axis("a=a0..a2")
+    ...     a = s['a']
+    ...     # dump and read a group
+    ...     s['a01'] = a['a0,a1'] >> 'a01'
+    ...     a01 = s['a01']
+    ...     # dump and read an array
+    ...     s['arr'] = ndtest((3, 3))
+    ...     arr = s['arr']
+    ...     # add and read top level metadata
+    ...     s.meta.author = 'John Smith'
+    ...     metadata = s.meta
+    ...     # get filepath
+    ...     s.filename
+    ...     # display list of items stored in the hdf file
+    ...     s.keys()
+    ...     # display list of items and their type
+    ...     print(s.summary())
+    'hdf_file.h5'
+    ['/a', '/a01', '/arr', '/arr/axis_a', '/arr/axis_b']
+    /a: Axis
+    /a01: Group
+    /arr: Array
+    /arr/axis_a: Axis
+    /arr/axis_b: Axis
     """
     def __init__(self, filepath, mode=None, complevel=None, complib=None,
                  fletcher32=False, engine='auto', **kwargs):
@@ -475,6 +501,10 @@ def keys(self):
         Return a (potentially unordered) list of the keys corresponding to the
         objects stored in the HDFStore. These are ABSOLUTE path-names (e.g.
         have the leading '/'
+
+        See Also
+        --------
+        LHDFStore
         """
         return [n._v_pathname for n in self._storer.groups()]
 
@@ -494,16 +524,13 @@ def summary(self):
         """
         Return a list of LArray stored in the HDF5 file.
 
-        Examples
+        See Also
         --------
-        TODO: write examples
+        LHDFStore
         """
         if self.is_open:
-            res = ""
-            for name, group in self.items():
-                _type = getattr(group._v_attrs, 'type', 'Unknown')
-                res += "{}: {}\n".format(name, _type)
-            return res
+            return '\n'.join(["{}: {}".format(name, getattr(group._v_attrs, 'type', 'Unknown'))
+                              for name, group in self.items()])
         else:
             return "File {} is CLOSED".format(self.filename)