Skip to content

Commit

Permalink
- added module to generate the files contained in the test/data/ dire…
Browse files Browse the repository at this point in the history
…ctory

- renamed 'population_session' directory and files as 'demography_eurostat'
- made 'demography_eurostat' as new available dataset in function load_example_data()
- fix larray-project#785
  • Loading branch information
alixdamman committed Aug 5, 2019
1 parent 8ea78b8 commit 07a3db7
Show file tree
Hide file tree
Showing 38 changed files with 455 additions and 73 deletions.
6 changes: 3 additions & 3 deletions doc/source/tutorial/tutorial_IO.ipyml
Original file line number Diff line number Diff line change
Expand Up @@ -574,17 +574,17 @@ cells:
- code: |
# create a new Session object and load all arrays, axes, groups and metadata
# from all CSV files located in the passed directory
csv_dir = get_example_filepath('population_session')
csv_dir = get_example_filepath('demography_eurostat')
session = Session(csv_dir)

# create a new Session object and load all arrays, axes, groups and metadata
# stored in the passed Excel file
filepath_excel = get_example_filepath('population_session.xlsx')
filepath_excel = get_example_filepath('demography_eurostat.xlsx')
session = Session(filepath_excel)

# create a new Session object and load all arrays, axes, groups and metadata
# stored in the passed HDF5 file
filepath_hdf = get_example_filepath('population_session.h5')
filepath_hdf = get_example_filepath('demography_eurostat.h5')
session = Session(filepath_hdf)

print(session.summary())
Expand Down
6 changes: 3 additions & 3 deletions doc/source/tutorial/tutorial_IO.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -835,17 +835,17 @@
"source": [
"# create a new Session object and load all arrays, axes, groups and metadata \n",
"# from all CSV files located in the passed directory\n",
"csv_dir = get_example_filepath('population_session')\n",
"csv_dir = get_example_filepath('demography_eurostat')\n",
"session = Session(csv_dir)\n",
"\n",
"# create a new Session object and load all arrays, axes, groups and metadata\n",
"# stored in the passed Excel file\n",
"filepath_excel = get_example_filepath('population_session.xlsx')\n",
"filepath_excel = get_example_filepath('demography_eurostat.xlsx')\n",
"session = Session(filepath_excel)\n",
"\n",
"# create a new Session object and load all arrays, axes, groups and metadata\n",
"# stored in the passed HDF5 file\n",
"filepath_hdf = get_example_filepath('population_session.h5')\n",
"filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
"session = Session(filepath_hdf)\n",
"\n",
"print(session.summary())"
Expand Down
4 changes: 2 additions & 2 deletions doc/source/tutorial/tutorial_sessions.ipyml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ cells:

- code: |
# load a session representing the results of a demographic model
filepath_hdf = get_example_filepath('population_session.h5')
filepath_hdf = get_example_filepath('demography_eurostat.h5')
s_pop = Session(filepath_hdf)

# print the content of the session
Expand Down Expand Up @@ -188,7 +188,7 @@ cells:

- code: |
# load a session representing the results of a demographic model
filepath_hdf = get_example_filepath('population_session.h5')
filepath_hdf = get_example_filepath('demography_eurostat.h5')
s_pop = Session(filepath_hdf)

# create a copy of the original session
Expand Down
4 changes: 2 additions & 2 deletions doc/source/tutorial/tutorial_sessions.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
"outputs": [],
"source": [
"# load a session representing the results of a demographic model\n",
"filepath_hdf = get_example_filepath('population_session.h5')\n",
"filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
"s_pop = Session(filepath_hdf)\n",
"\n",
"# print the content of the session\n",
Expand Down Expand Up @@ -319,7 +319,7 @@
"outputs": [],
"source": [
"# load a session representing the results of a demographic model\n",
"filepath_hdf = get_example_filepath('population_session.h5')\n",
"filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
"s_pop = Session(filepath_hdf)\n",
"\n",
"# create a copy of the original session\n",
Expand Down
43 changes: 25 additions & 18 deletions larray/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
_TEST_DIR = os.path.join(os.path.dirname(__file__), 'tests')

EXAMPLE_FILES_DIR = os.path.join(_TEST_DIR, 'data')
# TODO : replace 'demography.h5' by 'population_session.h5' and remove 'demo' ?
AVAILABLE_EXAMPLE_DATA = {
'demo': os.path.join(EXAMPLE_FILES_DIR, 'population_session.h5'),
'demography': os.path.join(EXAMPLE_FILES_DIR, 'demography.h5')
'demography': os.path.join(EXAMPLE_FILES_DIR, 'demography.h5'),
'demography_eurostat': os.path.join(EXAMPLE_FILES_DIR, 'demography_eurostat.h5')
}
AVAILABLE_EXAMPLE_FILES = os.listdir(EXAMPLE_FILES_DIR)

Expand Down Expand Up @@ -43,6 +42,7 @@ def get_example_filepath(fname):
return fpath


# TODO : replace # doctest: +SKIP by # doctest: +NORMALIZE_WHITESPACE once Python 2 has been dropped
def load_example_data(name):
r"""Load arrays used in the tutorial so that all examples in it can be reproduced.
Expand All @@ -52,29 +52,36 @@ def load_example_data(name):
Example data to load. Available example datasets are:
- demography
- demography_eurostat
Returns
-------
Session
Session containing one or several arrays
Session containing one or several arrays.
Examples
--------
>>> demo = load_example_data('demography')
>>> demo.pop.info # doctest: +SKIP
26 x 3 x 121 x 2 x 2
time [26]: 1991 1992 1993 ... 2014 2015 2016
geo [3]: 'BruCap' 'Fla' 'Wal'
age [121]: 0 1 2 ... 118 119 120
sex [2]: 'M' 'F'
nat [2]: 'BE' 'FO'
>>> demo.qx.info # doctest: +SKIP
26 x 3 x 121 x 2 x 2
time [26]: 1991 1992 1993 ... 2014 2015 2016
geo [3]: 'BruCap' 'Fla' 'Wal'
age [121]: 0 1 2 ... 118 119 120
sex [2]: 'M' 'F'
nat [2]: 'BE' 'FO'
>>> print(demo.summary()) # doctest: +NORMALIZE_WHITESPACE
hh: time, geo, hh_type (26 x 3 x 7) [int64]
pop: time, geo, age, sex, nat (26 x 3 x 121 x 2 x 2) [int64]
qx: time, geo, age, sex, nat (26 x 3 x 121 x 2 x 2) [float64]
>>> demo = load_example_data('demography_eurostat')
>>> print(demo.summary()) # doctest: +SKIP
Metadata:
title: Demographic datasets for a small selection of countries in Europe
source: demo_jpan, demo_fasec, demo_magec and demo_marcz tables from Eurostat
citizen: citizen ['Total' 'Reporting_country' 'Foreign' 'Stateless' 'Unknown'] (5)
country: country ['Belgium' 'France' 'Germany'] (3)
gender: gender ['Male' 'Female'] (2)
partner: partner ['Total' 'Reporting_country' 'Foreign' 'Stateless' 'Unknown'] (5)
time: time [2013 2014 2015] (3)
even_years: time[2014] >> even_years (1)
odd_years: time[2013 2015] >> odd_years (2)
births: country, gender, time (3 x 2 x 3) [int32]
deaths: country, gender, time (3 x 2 x 3) [int32]
marriages: country, partner, citizen, time (3 x 5 x 5 x 3) [int32]
pop: country, gender, time (3 x 2 x 3) [int32]
"""
if name is None:
name = 'demography'
Expand Down
24 changes: 12 additions & 12 deletions larray/inout/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
country,gender\time,2013,2014,2015
Belgium,Male,5472856,5493792,5524068
Belgium,Female,5665118,5687048,5713206
France,Male,31772665,31936596,32175328
France,Female,33827685,34005671,34280951
France,Male,31772665,32045129,32174258
France,Female,33827685,34120851,34283895
Germany,Male,39380976,39556923,39835457
Germany,Female,41142770,41210540,41362080
Expand All @@ -93,8 +93,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
country gender\time 2013 2014 2015
Belgium Male 5472856 5493792 5524068
Belgium Female 5665118 5687048 5713206
France Male 31772665 31936596 32175328
France Female 33827685 34005671 34280951
France Male 31772665 32045129 32174258
France Female 33827685 34120851 34283895
Germany Male 39380976 39556923 39835457
Germany Female 41142770 41210540 41362080
Expand All @@ -108,7 +108,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
country,gender\time,2013,2014,2015
Belgium,Male,5472856,5493792,5524068
Belgium,Female,5665118,5687048,5713206
France,Female,33827685,34005671,34280951
France,Female,33827685,34120851,34283895
Germany,Male,39380976,39556923,39835457
>>> # by default, cells associated with missing label combinations are filled with NaN.
>>> # In that case, an int array is converted to a float array.
Expand All @@ -117,7 +117,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
Belgium Male 5472856.0 5493792.0 5524068.0
Belgium Female 5665118.0 5687048.0 5713206.0
France Male nan nan nan
France Female 33827685.0 34005671.0 34280951.0
France Female 33827685.0 34120851.0 34283895.0
Germany Male 39380976.0 39556923.0 39835457.0
Germany Female nan nan nan
>>> # using argument 'fill_value', you can choose which value to use to fill missing cells.
Expand All @@ -126,7 +126,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
Belgium Male 5472856 5493792 5524068
Belgium Female 5665118 5687048 5713206
France Male 0 0 0
France Female 33827685 34005671 34280951
France Female 33827685 34120851 34283895
Germany Male 39380976 39556923 39835457
Germany Female 0 0 0
Expand All @@ -140,8 +140,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
country,gender,2013,2014,2015
Belgium,Male,5472856,5493792,5524068
Belgium,Female,5665118,5687048,5713206
France,Male,31772665,31936596,32175328
France,Female,33827685,34005671,34280951
France,Male,31772665,32045129,32174258
France,Female,33827685,34120851,34283895
Germany,Male,39380976,39556923,39835457
Germany,Female,41142770,41210540,41362080
>>> # read the array stored in the CSV file as is
Expand Down Expand Up @@ -177,13 +177,13 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
Belgium,2014,11180840
Belgium,2015,11237274
France,2013,65600350
France,2014,65942267
France,2015,66456279
France,2014,66165980
France,2015,66458153
>>> # to read arrays stored in 'narrow' format, you must pass wide=False to read_csv
>>> read_csv(fname, wide=False)
country\time 2013 2014 2015
Belgium 11137974 11180840 11237274
France 65600350 65942267 66456279
France 65600350 66165980 66458153
"""
if not np.isnan(na):
fill_value = na
Expand Down
20 changes: 10 additions & 10 deletions larray/inout/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
country gender\time 2013 2014 2015
Belgium Male 5472856 5493792 5524068
Belgium Female 5665118 5687048 5713206
France Male 31772665 31936596 32175328
France Female 33827685 34005671 34280951
France Male 31772665 32045129 32174258
France Female 33827685 34120851 34283895
Germany Male 39380976 39556923 39835457
Germany Female 41142770 41210540 41362080
Expand All @@ -109,7 +109,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
country gender\time 2013 2014 2015
Belgium Male 5472856 5493792 5524068
Belgium Female 5665118 5687048 5713206
France Female 33827685 34005671 34280951
France Female 33827685 34120851 34283895
Germany Male 39380976 39556923 39835457
By default, cells associated with missing label combinations are filled with NaN. In that case, an int array
Expand All @@ -120,7 +120,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
Belgium Male 5472856.0 5493792.0 5524068.0
Belgium Female 5665118.0 5687048.0 5713206.0
France Male nan nan nan
France Female 33827685.0 34005671.0 34280951.0
France Female 33827685.0 34120851.0 34283895.0
Germany Male 39380976.0 39556923.0 39835457.0
Germany Female nan nan nan
Expand All @@ -131,7 +131,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
Belgium Male 5472856 5493792 5524068
Belgium Female 5665118 5687048 5713206
France Male 0 0 0
France Female 33827685 34005671 34280951
France Female 33827685 34120851 34283895
Germany Male 39380976 39556923 39835457
Germany Female 0 0 0
Expand All @@ -142,8 +142,8 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
country gender 2013 2014 2015
Belgium Male 5472856 5493792 5524068
Belgium Female 5665118 5687048 5713206
France Male 31772665 31936596 32175328
France Female 33827685 34005671 34280951
France Male 31772665 32045129 32174258
France Female 33827685 34120851 34283895
Germany Male 39380976 39556923 39835457
Germany Female 41142770 41210540 41362080
Expand Down Expand Up @@ -177,14 +177,14 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
Belgium 2014 11180840
Belgium 2015 11237274
France 2013 65600350
France 2014 65942267
France 2015 66456279
France 2014 66165980
France 2015 66458153
>>> # to read arrays stored in 'narrow' format, you must pass wide=False to read_excel
>>> read_excel(fname, 'pop_narrow_format', wide=False)
country\time 2013 2014 2015
Belgium 11137974 11180840 11237274
France 65600350 65942267 66456279
France 65600350 66165980 66458153
Extract array from a given range (xlwings only)
Expand Down
4 changes: 2 additions & 2 deletions larray/inout/hdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
country gender\time 2013 2014 2015
Belgium Male 5472856 5493792 5524068
Belgium Female 5665118 5687048 5713206
France Male 31772665 31936596 32175328
France Female 33827685 34005671 34280951
France Male 31772665 32045129 32174258
France Female 33827685 34120851 34283895
Germany Male 39380976 39556923 39835457
Germany Female 41142770 41210540 41362080
"""
Expand Down
12 changes: 6 additions & 6 deletions larray/inout/xw_reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def template(self):
Examples
--------
>>> demo = load_example_data('demo')
>>> demo = load_example_data('demography_eurostat')
Passing the name of the template (only if a template directory has been set)
Expand Down Expand Up @@ -245,7 +245,7 @@ def add_graph(self, data, title=None, template=None, width=None, height=None):
Examples
--------
>>> demo = load_example_data('demo')
>>> demo = load_example_data('demography_eurostat')
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
>>> sheet_be = report.new_sheet('Belgium')
Expand Down Expand Up @@ -297,7 +297,7 @@ def add_graphs(self, array_per_title, axis_per_loop_variable, template=None, wid
Examples
--------
>>> demo = load_example_data('demo')
>>> demo = load_example_data('demography_eurostat')
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
>>> sheet_pop = report.new_sheet('Population')
Expand Down Expand Up @@ -348,7 +348,7 @@ class AbstractExcelReport(AbstractReportItem):
Examples
--------
>>> demo = load_example_data('demo')
>>> demo = load_example_data('demography_eurostat')
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
Set a new destination sheet
Expand Down Expand Up @@ -423,7 +423,7 @@ def new_sheet(self, sheet_name):
Examples
--------
>>> demo = load_example_data('demo')
>>> demo = load_example_data('demography_eurostat')
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
>>> # prepare new output sheet named 'Belgium'
Expand Down Expand Up @@ -466,7 +466,7 @@ def to_excel(self, filepath, data_sheet_name='__data__', overwrite=True):
Examples
--------
>>> demo = load_example_data('demo')
>>> demo = load_example_data('demography_eurostat')
>>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
>>> report.template = 'Line_Marker'
Expand Down
Binary file modified larray/tests/data/births_and_deaths.xlsx
Binary file not shown.
Binary file added larray/tests/data/demography_eurostat.h5
Binary file not shown.
Binary file added larray/tests/data/demography_eurostat.xlsx
Binary file not shown.
6 changes: 6 additions & 0 deletions larray/tests/data/demography_eurostat/__axes__.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
country,gender,time,partner,citizen
Belgium,Male,2013,Total,Total
France,Female,2014,Reporting_country,Reporting_country
Germany,,2015,Foreign,Foreign
,,,Stateless,Stateless
,,,Unknown,Unknown
3 changes: 3 additions & 0 deletions larray/tests/data/demography_eurostat/__metadata__.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
metadata,
title,Demographic datasets for a small selection of countries in Europe
source,"demo_jpan, demo_fasec, demo_magec and demo_marcz tables from Eurostat"
Loading

0 comments on commit 07a3db7

Please sign in to comment.