- added module to generate the files contained in the test/data/ dire…

…ctory - renamed 'population_session' directory and files as 'demography_eurostat' - made 'demography_eurostat' as new available dataset in function load_example_data() - fix larray-project#785
alixdamman · Aug 5, 2019 · 07a3db7 · 07a3db7
1 parent 8ea78b8
commit 07a3db7
Show file tree

Hide file tree

Showing 38 changed files with 455 additions and 73 deletions.
diff --git a/doc/source/tutorial/tutorial_IO.ipyml b/doc/source/tutorial/tutorial_IO.ipyml
@@ -574,17 +574,17 @@ cells:
 - code: |
     # create a new Session object and load all arrays, axes, groups and metadata 
     # from all CSV files located in the passed directory
-    csv_dir = get_example_filepath('population_session')
+    csv_dir = get_example_filepath('demography_eurostat')
     session = Session(csv_dir)
 
     # create a new Session object and load all arrays, axes, groups and metadata
     # stored in the passed Excel file
-    filepath_excel = get_example_filepath('population_session.xlsx')
+    filepath_excel = get_example_filepath('demography_eurostat.xlsx')
     session = Session(filepath_excel)
 
     # create a new Session object and load all arrays, axes, groups and metadata
     # stored in the passed HDF5 file
-    filepath_hdf = get_example_filepath('population_session.h5')
+    filepath_hdf = get_example_filepath('demography_eurostat.h5')
     session = Session(filepath_hdf)
 
     print(session.summary())

diff --git a/doc/source/tutorial/tutorial_IO.ipynb b/doc/source/tutorial/tutorial_IO.ipynb
@@ -835,17 +835,17 @@
    "source": [
     "# create a new Session object and load all arrays, axes, groups and metadata \n",
     "# from all CSV files located in the passed directory\n",
-    "csv_dir = get_example_filepath('population_session')\n",
+    "csv_dir = get_example_filepath('demography_eurostat')\n",
     "session = Session(csv_dir)\n",
     "\n",
     "# create a new Session object and load all arrays, axes, groups and metadata\n",
     "# stored in the passed Excel file\n",
-    "filepath_excel = get_example_filepath('population_session.xlsx')\n",
+    "filepath_excel = get_example_filepath('demography_eurostat.xlsx')\n",
     "session = Session(filepath_excel)\n",
     "\n",
     "# create a new Session object and load all arrays, axes, groups and metadata\n",
     "# stored in the passed HDF5 file\n",
-    "filepath_hdf = get_example_filepath('population_session.h5')\n",
+    "filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
     "session = Session(filepath_hdf)\n",
     "\n",
     "print(session.summary())"

diff --git a/doc/source/tutorial/tutorial_sessions.ipyml b/doc/source/tutorial/tutorial_sessions.ipyml
@@ -43,7 +43,7 @@ cells:
 
 - code: |
     # load a session representing the results of a demographic model
-    filepath_hdf = get_example_filepath('population_session.h5')
+    filepath_hdf = get_example_filepath('demography_eurostat.h5')
     s_pop = Session(filepath_hdf)
 
     # print the content of the session
@@ -188,7 +188,7 @@ cells:
 
 - code: |
     # load a session representing the results of a demographic model
-    filepath_hdf = get_example_filepath('population_session.h5')
+    filepath_hdf = get_example_filepath('demography_eurostat.h5')
     s_pop = Session(filepath_hdf)
 
     # create a copy of the original session

diff --git a/doc/source/tutorial/tutorial_sessions.ipynb b/doc/source/tutorial/tutorial_sessions.ipynb
@@ -78,7 +78,7 @@
    "outputs": [],
    "source": [
     "# load a session representing the results of a demographic model\n",
-    "filepath_hdf = get_example_filepath('population_session.h5')\n",
+    "filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
     "s_pop = Session(filepath_hdf)\n",
     "\n",
     "# print the content of the session\n",
@@ -319,7 +319,7 @@
    "outputs": [],
    "source": [
     "# load a session representing the results of a demographic model\n",
-    "filepath_hdf = get_example_filepath('population_session.h5')\n",
+    "filepath_hdf = get_example_filepath('demography_eurostat.h5')\n",
     "s_pop = Session(filepath_hdf)\n",
     "\n",
     "# create a copy of the original session\n",

diff --git a/larray/example.py b/larray/example.py
@@ -5,10 +5,9 @@
 _TEST_DIR = os.path.join(os.path.dirname(__file__), 'tests')
 
 EXAMPLE_FILES_DIR = os.path.join(_TEST_DIR, 'data')
-# TODO : replace 'demography.h5' by 'population_session.h5' and remove 'demo' ?
 AVAILABLE_EXAMPLE_DATA = {
-    'demo': os.path.join(EXAMPLE_FILES_DIR, 'population_session.h5'),
-    'demography': os.path.join(EXAMPLE_FILES_DIR, 'demography.h5')
+    'demography': os.path.join(EXAMPLE_FILES_DIR, 'demography.h5'),
+    'demography_eurostat': os.path.join(EXAMPLE_FILES_DIR, 'demography_eurostat.h5')
 }
 AVAILABLE_EXAMPLE_FILES = os.listdir(EXAMPLE_FILES_DIR)
 
@@ -43,6 +42,7 @@ def get_example_filepath(fname):
     return fpath
 
 
+# TODO : replace # doctest: +SKIP by # doctest: +NORMALIZE_WHITESPACE once Python 2 has been dropped
 def load_example_data(name):
     r"""Load arrays used in the tutorial so that all examples in it can be reproduced.
 
@@ -52,29 +52,36 @@ def load_example_data(name):
         Example data to load. Available example datasets are:
 
         - demography
+        - demography_eurostat
 
     Returns
     -------
     Session
-        Session containing one or several arrays
+        Session containing one or several arrays.
 
     Examples
     --------
     >>> demo = load_example_data('demography')
-    >>> demo.pop.info # doctest: +SKIP
-    26 x 3 x 121 x 2 x 2
-     time [26]: 1991 1992 1993 ... 2014 2015 2016
-     geo [3]: 'BruCap' 'Fla' 'Wal'
-     age [121]: 0 1 2 ... 118 119 120
-     sex [2]: 'M' 'F'
-     nat [2]: 'BE' 'FO'
-    >>> demo.qx.info # doctest: +SKIP
-    26 x 3 x 121 x 2 x 2
-     time [26]: 1991 1992 1993 ... 2014 2015 2016
-     geo [3]: 'BruCap' 'Fla' 'Wal'
-     age [121]: 0 1 2 ... 118 119 120
-     sex [2]: 'M' 'F'
-     nat [2]: 'BE' 'FO'
+    >>> print(demo.summary())   # doctest: +NORMALIZE_WHITESPACE
+    hh: time, geo, hh_type (26 x 3 x 7) [int64]
+    pop: time, geo, age, sex, nat (26 x 3 x 121 x 2 x 2) [int64]
+    qx: time, geo, age, sex, nat (26 x 3 x 121 x 2 x 2) [float64]
+    >>> demo = load_example_data('demography_eurostat')
+    >>> print(demo.summary())   # doctest: +SKIP
+    Metadata:
+       title: Demographic datasets for a small selection of countries in Europe
+       source: demo_jpan, demo_fasec, demo_magec and demo_marcz tables from Eurostat
+    citizen: citizen ['Total' 'Reporting_country' 'Foreign' 'Stateless' 'Unknown'] (5)
+    country: country ['Belgium' 'France' 'Germany'] (3)
+    gender: gender ['Male' 'Female'] (2)
+    partner: partner ['Total' 'Reporting_country' 'Foreign' 'Stateless' 'Unknown'] (5)
+    time: time [2013 2014 2015] (3)
+    even_years: time[2014] >> even_years (1)
+    odd_years: time[2013 2015] >> odd_years (2)
+    births: country, gender, time (3 x 2 x 3) [int32]
+    deaths: country, gender, time (3 x 2 x 3) [int32]
+    marriages: country, partner, citizen, time (3 x 5 x 5 x 3) [int32]
+    pop: country, gender, time (3 x 2 x 3) [int32]
     """
     if name is None:
         name = 'demography'

diff --git a/larray/inout/csv.py b/larray/inout/csv.py
@@ -78,8 +78,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
         country,gender\time,2013,2014,2015
         Belgium,Male,5472856,5493792,5524068
         Belgium,Female,5665118,5687048,5713206
-        France,Male,31772665,31936596,32175328
-        France,Female,33827685,34005671,34280951
+        France,Male,31772665,32045129,32174258
+        France,Female,33827685,34120851,34283895
         Germany,Male,39380976,39556923,39835457
         Germany,Female,41142770,41210540,41362080
 
@@ -93,8 +93,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
     country  gender\time      2013      2014      2015
     Belgium         Male   5472856   5493792   5524068
     Belgium       Female   5665118   5687048   5713206
-     France         Male  31772665  31936596  32175328
-     France       Female  33827685  34005671  34280951
+     France         Male  31772665  32045129  32174258
+     France       Female  33827685  34120851  34283895
     Germany         Male  39380976  39556923  39835457
     Germany       Female  41142770  41210540  41362080
 
@@ -108,7 +108,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
     country,gender\time,2013,2014,2015
     Belgium,Male,5472856,5493792,5524068
     Belgium,Female,5665118,5687048,5713206
-    France,Female,33827685,34005671,34280951
+    France,Female,33827685,34120851,34283895
     Germany,Male,39380976,39556923,39835457
     >>> # by default, cells associated with missing label combinations are filled with NaN.
     >>> # In that case, an int array is converted to a float array.
@@ -117,7 +117,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
     Belgium         Male   5472856.0   5493792.0   5524068.0
     Belgium       Female   5665118.0   5687048.0   5713206.0
      France         Male         nan         nan         nan
-     France       Female  33827685.0  34005671.0  34280951.0
+     France       Female  33827685.0  34120851.0  34283895.0
     Germany         Male  39380976.0  39556923.0  39835457.0
     Germany       Female         nan         nan         nan
     >>> # using argument 'fill_value', you can choose which value to use to fill missing cells.
@@ -126,7 +126,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
     Belgium         Male   5472856   5493792   5524068
     Belgium       Female   5665118   5687048   5713206
      France         Male         0         0         0
-     France       Female  33827685  34005671  34280951
+     France       Female  33827685  34120851  34283895
     Germany         Male  39380976  39556923  39835457
     Germany       Female         0         0         0
 
@@ -140,8 +140,8 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
     country,gender,2013,2014,2015
     Belgium,Male,5472856,5493792,5524068
     Belgium,Female,5665118,5687048,5713206
-    France,Male,31772665,31936596,32175328
-    France,Female,33827685,34005671,34280951
+    France,Male,31772665,32045129,32174258
+    France,Female,33827685,34120851,34283895
     Germany,Male,39380976,39556923,39835457
     Germany,Female,41142770,41210540,41362080
     >>> # read the array stored in the CSV file as is
@@ -177,13 +177,13 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
     Belgium,2014,11180840
     Belgium,2015,11237274
     France,2013,65600350
-    France,2014,65942267
-    France,2015,66456279
+    France,2014,66165980
+    France,2015,66458153
     >>> # to read arrays stored in 'narrow' format, you must pass wide=False to read_csv
     >>> read_csv(fname, wide=False)
     country\time      2013      2014      2015
          Belgium  11137974  11180840  11237274
-          France  65600350  65942267  66456279
+          France  65600350  66165980  66458153
     """
     if not np.isnan(na):
         fill_value = na

diff --git a/larray/inout/excel.py b/larray/inout/excel.py
@@ -84,8 +84,8 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
     country  gender\time      2013      2014      2015
     Belgium         Male   5472856   5493792   5524068
     Belgium       Female   5665118   5687048   5713206
-     France         Male  31772665  31936596  32175328
-     France       Female  33827685  34005671  34280951
+     France         Male  31772665  32045129  32174258
+     France       Female  33827685  34120851  34283895
     Germany         Male  39380976  39556923  39835457
     Germany       Female  41142770  41210540  41362080
 
@@ -109,7 +109,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
         country  gender\time      2013      2014      2015
         Belgium         Male   5472856   5493792   5524068
         Belgium       Female   5665118   5687048   5713206
-         France       Female  33827685  34005671  34280951
+         France       Female  33827685  34120851  34283895
         Germany         Male  39380976  39556923  39835457
 
     By default, cells associated with missing label combinations are filled with NaN. In that case, an int array
@@ -120,7 +120,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
     Belgium         Male   5472856.0   5493792.0   5524068.0
     Belgium       Female   5665118.0   5687048.0   5713206.0
      France         Male         nan         nan         nan
-     France       Female  33827685.0  34005671.0  34280951.0
+     France       Female  33827685.0  34120851.0  34283895.0
     Germany         Male  39380976.0  39556923.0  39835457.0
     Germany       Female         nan         nan         nan
 
@@ -131,7 +131,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
     Belgium         Male   5472856   5493792   5524068
     Belgium       Female   5665118   5687048   5713206
      France         Male         0         0         0
-     France       Female  33827685  34005671  34280951
+     France       Female  33827685  34120851  34283895
     Germany         Male  39380976  39556923  39835457
     Germany       Female         0         0         0
 
@@ -142,8 +142,8 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
         country  gender      2013      2014      2015
         Belgium    Male   5472856   5493792   5524068
         Belgium  Female   5665118   5687048   5713206
-         France    Male  31772665  31936596  32175328
-         France  Female  33827685  34005671  34280951
+         France    Male  31772665  32045129  32174258
+         France  Female  33827685  34120851  34283895
         Germany    Male  39380976  39556923  39835457
         Germany  Female  41142770  41210540  41362080
 
@@ -177,14 +177,14 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
         Belgium  2014  11180840
         Belgium  2015  11237274
          France  2013  65600350
-         France  2014  65942267
-         France  2015  66456279
+         France  2014  66165980
+         France  2015  66458153
 
     >>> # to read arrays stored in 'narrow' format, you must pass wide=False to read_excel
     >>> read_excel(fname, 'pop_narrow_format', wide=False)
     country\time      2013      2014      2015
          Belgium  11137974  11180840  11237274
-          France  65600350  65942267  66456279
+          France  65600350  66165980  66458153
 
     Extract array from a given range (xlwings only)
 

diff --git a/larray/inout/hdf.py b/larray/inout/hdf.py
@@ -57,8 +57,8 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
     country  gender\time      2013      2014      2015
     Belgium         Male   5472856   5493792   5524068
     Belgium       Female   5665118   5687048   5713206
-     France         Male  31772665  31936596  32175328
-     France       Female  33827685  34005671  34280951
+     France         Male  31772665  32045129  32174258
+     France       Female  33827685  34120851  34283895
     Germany         Male  39380976  39556923  39835457
     Germany       Female  41142770  41210540  41362080
     """

diff --git a/larray/inout/xw_reporting.py b/larray/inout/xw_reporting.py
@@ -79,7 +79,7 @@ def template(self):
 
         Examples
         --------
-        >>> demo = load_example_data('demo')
+        >>> demo = load_example_data('demography_eurostat')
 
         Passing the name of the template (only if a template directory has been set)
 
@@ -245,7 +245,7 @@ def add_graph(self, data, title=None, template=None, width=None, height=None):
 
         Examples
         --------
-        >>> demo = load_example_data('demo')
+        >>> demo = load_example_data('demography_eurostat')
         >>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
 
         >>> sheet_be = report.new_sheet('Belgium')
@@ -297,7 +297,7 @@ def add_graphs(self, array_per_title, axis_per_loop_variable, template=None, wid
 
         Examples
         --------
-        >>> demo = load_example_data('demo')
+        >>> demo = load_example_data('demography_eurostat')
         >>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
 
         >>> sheet_pop = report.new_sheet('Population')
@@ -348,7 +348,7 @@ class AbstractExcelReport(AbstractReportItem):
 
     Examples
     --------
-    >>> demo = load_example_data('demo')
+    >>> demo = load_example_data('demography_eurostat')
     >>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
 
     Set a new destination sheet
@@ -423,7 +423,7 @@ def new_sheet(self, sheet_name):
 
         Examples
         --------
-        >>> demo = load_example_data('demo')
+        >>> demo = load_example_data('demography_eurostat')
         >>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
 
         >>> # prepare new output sheet named 'Belgium'
@@ -466,7 +466,7 @@ def to_excel(self, filepath, data_sheet_name='__data__', overwrite=True):
 
         Examples
         --------
-        >>> demo = load_example_data('demo')
+        >>> demo = load_example_data('demography_eurostat')
         >>> report = ExcelReport(EXAMPLE_EXCEL_TEMPLATES_DIR)
         >>> report.template = 'Line_Marker'
 

diff --git a/larray/tests/data/births_and_deaths.xlsx b/larray/tests/data/births_and_deaths.xlsx
diff --git a/larray/tests/data/demography_eurostat.h5 b/larray/tests/data/demography_eurostat.h5
diff --git a/larray/tests/data/demography_eurostat.xlsx b/larray/tests/data/demography_eurostat.xlsx
diff --git a/larray/tests/data/demography_eurostat/__axes__.csv b/larray/tests/data/demography_eurostat/__axes__.csv
@@ -0,0 +1,6 @@
+country,gender,time,partner,citizen
+Belgium,Male,2013,Total,Total
+France,Female,2014,Reporting_country,Reporting_country
+Germany,,2015,Foreign,Foreign
+,,,Stateless,Stateless
+,,,Unknown,Unknown
diff --git a/...ts/data/population_session/__groups__.csv → ...s/data/demography_eurostat/__groups__.csv b/...ts/data/population_session/__groups__.csv → ...s/data/demography_eurostat/__groups__.csv
diff --git a/larray/tests/data/demography_eurostat/__metadata__.csv b/larray/tests/data/demography_eurostat/__metadata__.csv
@@ -0,0 +1,3 @@
+metadata,
+title,Demographic datasets for a small selection of countries in Europe
+source,"demo_jpan, demo_fasec, demo_magec and demo_marcz tables from Eurostat"
diff --git a/.../tests/data/population_session/births.csv → ...tests/data/demography_eurostat/births.csv b/.../tests/data/population_session/births.csv → ...tests/data/demography_eurostat/births.csv
diff --git a/.../tests/data/population_session/deaths.csv → ...tests/data/demography_eurostat/deaths.csv b/.../tests/data/population_session/deaths.csv → ...tests/data/demography_eurostat/deaths.csv