Skip to content

Commit

Permalink
(issue 724) :
Browse files Browse the repository at this point in the history
- moved LHDFStore to inout/hdf.py
- implemented PandasStorer and PytablesStorer
- updated LArray/Axis/Group.to_hdf
- removed Metadata.to_hdf and Metadata.from_hdf
- renamed PandasHDFHandler as HDFHandler
  • Loading branch information
alixdamman committed Aug 23, 2019
1 parent 40c8dd9 commit 03e2fc4
Show file tree
Hide file tree
Showing 11 changed files with 660 additions and 165 deletions.
19 changes: 19 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,25 @@ ReportSheet
ReportSheet.add_graphs
ReportSheet.newline

HDF
===

.. autosummary::
:toctree: _generated/

LHDFStore

.. autosummary::
:toctree: _generated/

LHDFStore.filename
LHDFStore.is_open
LHDFStore.keys
LHDFStore.items
LHDFStore.summary
LHDFStore.close


.. _api-misc:

Miscellaneous
Expand Down
3 changes: 2 additions & 1 deletion larray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from larray.inout.pandas import from_frame, from_series
from larray.inout.csv import read_csv, read_tsv, read_eurostat
from larray.inout.excel import read_excel
from larray.inout.hdf import read_hdf
from larray.inout.hdf import read_hdf, LHDFStore
from larray.inout.sas import read_sas
from larray.inout.stata import read_stata
from larray.inout.xw_excel import open_excel, Workbook
Expand Down Expand Up @@ -78,6 +78,7 @@
'from_lists', 'from_string', 'from_frame', 'from_series', 'read_csv', 'read_tsv',
'read_eurostat', 'read_excel', 'read_hdf', 'read_sas', 'read_stata',
'open_excel', 'Workbook', 'ExcelReport', 'ReportSheet',
'LHDFStore',
# utils
'get_options', 'set_options',
# viewer
Expand Down
10 changes: 3 additions & 7 deletions larray/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
from larray.core.axis import Axis, AxisReference, AxisCollection, X, _make_axis
from larray.util.misc import (table2str, size2str, basestring, izip, rproduct, ReprString, duplicates,
float_error_handler_factory, _isnoneslice, light_product, unique_list, common_type,
renamed_to, deprecate_kwarg, LHDFStore, lazy_attribute, unique_multi, SequenceZip,
renamed_to, deprecate_kwarg, lazy_attribute, unique_multi, SequenceZip,
Repeater, Product, ensure_no_numpy_type, PY2)
from larray.util.options import _OPTIONS, DISPLAY_MAXLINES, DISPLAY_EDGEITEMS, DISPLAY_WIDTH, DISPLAY_PRECISION

Expand Down Expand Up @@ -6734,13 +6734,9 @@ def to_hdf(self, filepath, key):
>>> a.to_hdf('test.h5', 'arrays/a') # doctest: +SKIP
"""
key = _translate_group_key_hdf(key)
from larray.inout.hdf import LHDFStore
with LHDFStore(filepath) as store:
store.put(key, self.to_frame())
attrs = store.get_storer(key).attrs
attrs.type = 'Array'
attrs.writer = 'LArray'
self.meta.to_hdf(store, key)
store.put(key, self)

def to_stata(self, filepath_or_buffer, **kwargs):
r"""
Expand Down
12 changes: 3 additions & 9 deletions larray/core/axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
_range_to_slice, _seq_group_to_name, _translate_group_key_hdf, remove_nested_groups)
from larray.util.oset import *
from larray.util.misc import (basestring, PY2, unicode, long, duplicates, array_lookup2, ReprString, index_by_id,
renamed_to, common_type, LHDFStore, lazy_attribute, _isnoneslice, unique_multi, Product)
renamed_to, common_type, lazy_attribute, _isnoneslice, unique_multi, Product)


np_frompyfunc = np.frompyfunc
Expand Down Expand Up @@ -1344,19 +1344,13 @@ def to_hdf(self, filepath, key=None):
>>> a.to_hdf('test.h5', 'axes/a') # doctest: +SKIP
"""
from larray.inout.hdf import LHDFStore
if key is None:
if self.name is None:
raise ValueError("Argument key must be provided explicitly in case of anonymous axis")
key = self.name
key = _translate_group_key_hdf(key)
dtype_kind = self.labels.dtype.kind
data = np.char.encode(self.labels, 'utf-8') if dtype_kind == 'U' else self.labels
s = pd.Series(data=data, name=self.name)
with LHDFStore(filepath) as store:
store.put(key, s)
store.get_storer(key).attrs.type = 'Axis'
store.get_storer(key).attrs.dtype_kind = dtype_kind
store.get_storer(key).attrs.wildcard = self.iswildcard
store.put(key, self)

@property
def dtype(self):
Expand Down
20 changes: 3 additions & 17 deletions larray/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from larray.core.abstractbases import ABCAxis, ABCAxisReference, ABCLArray
from larray.util.oset import *
from larray.util.misc import (basestring, PY2, unique, find_closing_chr, _parse_bound, _seq_summary, _isintstring,
renamed_to, LHDFStore)
renamed_to)


def _slice_to_str(key, repr_func=str):
Expand Down Expand Up @@ -1453,27 +1453,13 @@ def to_hdf(self, filepath, key=None, axis_key=None):
>>> # save both the group 'b01' and the associated axis 'b'
>>> b01.to_hdf('test.h5') # doctest: +SKIP
"""
from larray.inout.hdf import LHDFStore
if key is None:
if self.name is None:
raise ValueError("Argument key must be provided explicitly in case of anonymous group")
key = self.name
key = _translate_group_key_hdf(key)
if axis_key is None:
if self.axis.name is None:
raise ValueError("Argument axis_key must be provided explicitly if the associated axis is anonymous")
axis_key = self.axis.name
data = self.eval()
dtype_kind = data.dtype.kind if isinstance(data, np.ndarray) else ''
if dtype_kind == 'U':
data = np.char.encode(data, 'utf-8')
s = pd.Series(data=data, name=self.name)
with LHDFStore(filepath) as store:
store.put(key, s)
store.get_storer(key).attrs.type = 'Group'
store.get_storer(key).attrs.dtype_kind = dtype_kind
if axis_key not in store:
self.axis.to_hdf(store, key=axis_key)
store.get_storer(key).attrs.axis_key = axis_key
store.put(key, self, axis_key=axis_key)

# this makes range(LGroup(int)) possible
def __index__(self):
Expand Down
14 changes: 0 additions & 14 deletions larray/core/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,17 +162,3 @@ def _convert_value(value):
return value

return Metadata([(key, _convert_value(value)) for key, value in zip(array.axes.labels[0], array.data)])

# ---------- IO methods ----------
def to_hdf(self, hdfstore, key=None):
if len(self):
attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
attrs.metadata = self

@classmethod
def from_hdf(cls, hdfstore, key=None):
attrs = hdfstore.get_storer(key).attrs if key is not None else hdfstore.root._v_attrs
if 'metadata' in attrs:
return attrs.metadata
else:
return None
55 changes: 29 additions & 26 deletions larray/core/session.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,41 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function

import fnmatch
import os
import sys
import re
import fnmatch
import sys
import warnings
from collections import OrderedDict, Iterable

import numpy as np

from larray.core.metadata import Metadata
from larray.core.group import Group
from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence
from larray.core.axis import Axis
from larray.core.constants import nan
from larray.core.array import LArray, get_axes, ndtest, zeros, zeros_like, sequence, aslarray
from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring
from larray.core.group import Group
from larray.core.metadata import Metadata
from larray.inout.session import ext_default_engine, get_file_handler
from larray.util.misc import float_error_handler_factory, is_interactive_interpreter, renamed_to, inverseop, basestring


def _get_handler(engine, fname, overwrite, **kwargs):
if engine == 'auto':
_, ext = os.path.splitext(fname)
ext = ext.strip('.') if '.' in ext else 'csv'
engine = ext_default_engine[ext]
if engine == 'hdf':
engine_hdf = 'auto'
if '_hdf' in engine:
engine_hdf, engine = engine.split('_')
handler_cls = get_file_handler(engine)
if engine == 'pandas_csv' and 'sep' in kwargs:
handler = handler_cls(fname, overwrite, kwargs['sep'])
elif engine == 'hdf':
handler = handler_cls(fname, overwrite, engine=engine_hdf)
else:
handler = handler_cls(fname, overwrite)
return handler


# XXX: inherit from OrderedDict or LArray?
Expand Down Expand Up @@ -358,7 +377,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
List of objects to load.
If `fname` is None, list of paths to CSV files.
Defaults to all valid objects present in the file/directory.
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
Load using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
display : bool, optional
Whether or not to display which file is being worked on. Defaults to False.
Expand Down Expand Up @@ -415,15 +434,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
engine = ext_default_engine['csv']
else:
raise ValueError("List of paths to only CSV files expected. Got {}".format(names))
if engine == 'auto':
_, ext = os.path.splitext(fname)
ext = ext.strip('.') if '.' in ext else 'csv'
engine = ext_default_engine[ext]
handler_cls = get_file_handler(engine)
if engine == 'pandas_csv' and 'sep' in kwargs:
handler = handler_cls(fname, kwargs['sep'])
else:
handler = handler_cls(fname)
handler = _get_handler(engine, fname, False, **kwargs)
metadata, objects = handler.read(names, display=display, **kwargs)
for k, v in objects.items():
self[k] = v
Expand All @@ -442,7 +453,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
List of names of LArray/Axis/Group objects to dump.
If `fname` is None, list of paths to CSV files.
Defaults to all objects present in the Session.
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
engine : {'auto', 'pandas_csv', 'pandas_hdf', 'tables_hdf', 'pandas_excel', 'xlwings_excel', 'pickle'}, optional
Dump using `engine`. Defaults to 'auto' (use default engine for the format guessed from the file extension).
overwrite: bool, optional
Whether or not to overwrite an existing file, if any. Ignored for CSV files and 'pandas_excel' engine.
Expand Down Expand Up @@ -482,15 +493,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
>>> # replace arr1 and add arr4 in file output.h5
>>> s2.save('output.h5', overwrite=False) # doctest: +SKIP
"""
if engine == 'auto':
_, ext = os.path.splitext(fname)
ext = ext.strip('.') if '.' in ext else 'csv'
engine = ext_default_engine[ext]
handler_cls = get_file_handler(engine)
if engine == 'pandas_csv' and 'sep' in kwargs:
handler = handler_cls(fname, overwrite, kwargs['sep'])
else:
handler = handler_cls(fname, overwrite)
handler = _get_handler(engine, fname, overwrite, **kwargs)
meta = self.meta if overwrite else None
items = self.items()
if names is not None:
Expand Down
Loading

0 comments on commit 03e2fc4

Please sign in to comment.