From 0a64a896f902116bef29e9d77291b3b812e781d9 Mon Sep 17 00:00:00 2001
From: "smanousopoulos@gmail.com" <smanousopoulos@gmail.com>
Date: Fri, 5 Jun 2015 22:57:19 +0300
Subject: [PATCH 1/3] made necessary free keywords additions in schema, types,
 to/from xml

---
 .../lib/metadata/schemata/inspire_metadata.py |  9 ++++++
 .../lib/metadata/types/inspire_metadata.py    | 25 +++++++++++++--
 .../templates/package/inspire_iso.xml         | 32 +++++++++++++++++++
 ckanext/publicamundi/tests/test_inspire.py    |  4 +++
 4 files changed, 67 insertions(+), 3 deletions(-)

diff --git a/ckanext/publicamundi/lib/metadata/schemata/inspire_metadata.py b/ckanext/publicamundi/lib/metadata/schemata/inspire_metadata.py
index 8e023e9..468610d 100644
--- a/ckanext/publicamundi/lib/metadata/schemata/inspire_metadata.py
+++ b/ckanext/publicamundi/lib/metadata/schemata/inspire_metadata.py
@@ -119,6 +119,15 @@ def check_keywords(obj):
                 raise zope.interface.Invalid(
                     'You need to select at least one keyword from INSPIRE data themes')
 
+    free_keywords = zope.schema.List(
+            title= u'Free Keywords',
+            description = u"The keyword value is a commonly used word, formalised word or phrase used to describe the subject. While the topic category is too coarse for detailed queries, keywords help narrowing a full text search and they allow for structured keyword search.",
+            required = False,
+            max_length = 10,
+            value_type = zope.schema.Object(IFreeKeyword,
+                title = u'Free Keyword'))
+    free_keywords.setTaggedValue('format:markup', { 'descend-if-dictized': False })
+
     # Geographic
 
     bounding_box = zope.schema.List(
diff --git a/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py b/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
index 40a7289..c0f4668 100644
--- a/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
+++ b/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
@@ -63,6 +63,7 @@ class InspireMetadata(BaseMetadata):
     topic_category = list
 
     keywords = KeywordsFactory()
+    free_keywords = list
 
     bounding_box = list
 
@@ -177,9 +178,18 @@ def to_resp_party(alist):
             topic_list.append(topic)
         
         keywords_dict = {}
+        free_keywords = []
+        print 'start here'
         for it in md.identification.keywords:
+            print it
             thes_title = it['thesaurus']['title']
-            if thes_title is not None:
+            if thes_title is None:
+                date = to_date(it['thesaurus']['date'])
+                datetype= it['thesaurus']['datetype']
+                title = it['thesaurus']['title']
+                for t in it['keywords']:
+                    free_keywords.append(FreeKeyword(value=t, reference_date=date, date_type=datetype, originating_vocabulary=title ))
+            else:
                 thes_split = thes_title.split(',')
                 # TODO thes_split[1] (=version) can be used in a get_by_title_and_version() 
                 # to enforce a specific thesaurus version.
@@ -193,8 +203,15 @@ def to_resp_party(alist):
                     if thes:
                         kw = ThesaurusTerms(thesaurus=thes, terms=term_list)
                         keywords_dict.update({thes_name:kw})
-                except:
-                    pass
+
+                except ValueError:
+                    print 'free keywords with name'
+                    date = to_date(it['thesaurus']['date'])
+                    datetype= it['thesaurus']['datetype']
+                    title = it['thesaurus']['title']
+                    for t in it['keywords']:
+                        free_keywords.append(FreeKeyword(value=t, reference_date=date, date_type=datetype, originating_vocabulary=title ))
+
         temporal_extent = []
         if md.identification.temporalextent_start or md.identification.temporalextent_end:
             temporal_extent = [TemporalExtent(
@@ -301,6 +318,8 @@ def to_resp_party(alist):
         #obj.resource_language = md.identification.resourcelanguage
         obj.topic_category = topic_list
         obj.keywords = keywords_dict
+        print free_keywords
+        obj.free_keywords = free_keywords
         obj.bounding_box = bbox
         obj.temporal_extent = temporal_extent
         obj.creation_date = creation_date
diff --git a/ckanext/publicamundi/templates/package/inspire_iso.xml b/ckanext/publicamundi/templates/package/inspire_iso.xml
index ecce79b..acb831c 100644
--- a/ckanext/publicamundi/templates/package/inspire_iso.xml
+++ b/ckanext/publicamundi/templates/package/inspire_iso.xml
@@ -175,6 +175,38 @@
           </gmd:thesaurusName>
         </gmd:MD_Keywords>
       </gmd:descriptiveKeywords>
+      {% endfor -%}
+      {% for k in data.free_keywords -%}
+       <gmd:descriptiveKeywords>
+        <gmd:MD_Keywords>
+          <gmd:keyword>
+            <gco:CharacterString>{{ k['value'] }}</gco:CharacterString>
+          </gmd:keyword>
+          {% if k['originating_vocabulary'] -%}
+          <gmd:thesaurusName>
+            <gmd:CI_Citation>
+              <gmd:title>
+                <gco:CharacterString>{{ k['originating_vocabulary'] }}</gco:CharacterString>
+              </gmd:title>
+              <gmd:date>
+                <gmd:CI_Date>
+                  <gmd:date>
+                    <gco:Date>{{ k['reference_date'] }}</gco:Date>
+                  </gmd:date>
+                  <gmd:dateType>
+                    <gmd:CI_DateTypeCode
+                      codeList="http://www.isotc211.org/2005/resources/Codelist/gmxCodelists.xml#CI_DateTypeCode"
+                      codeListValue={{ "\"%s\""|safe % k['date_type'] }}
+                      >{{ k['date_type'] }}</gmd:CI_DateTypeCode>
+                  </gmd:dateType>
+                </gmd:CI_Date>
+              </gmd:date>
+            </gmd:CI_Citation>
+        </gmd:thesaurusName>
+        {% endif -%}
+        </gmd:MD_Keywords>
+      </gmd:descriptiveKeywords>
+ 
       {% endfor -%}
       {% for rc in data.access_constraints -%}
       <gmd:resourceConstraints>
diff --git a/ckanext/publicamundi/tests/test_inspire.py b/ckanext/publicamundi/tests/test_inspire.py
index f820128..5173c3f 100644
--- a/ckanext/publicamundi/tests/test_inspire.py
+++ b/ckanext/publicamundi/tests/test_inspire.py
@@ -253,6 +253,10 @@
 insp10 = copy.deepcopy(fixtures.inspire1)
 insp10.keywords = {}
 
+# Keywords completely empty
+insp11 = copy.deepcopy(fixtures.inspire1)
+insp11.free_keywords = FreeKeywords(value="free-keyword")
+
 #
 # Tests
 #

From 89b3b70ba7d6bf3da0b5f816a64b3a3ede8266cd Mon Sep 17 00:00:00 2001
From: Michail Alexakis <alexakis@imis.athena-innovation.gr>
Date: Sat, 6 Jun 2015 20:10:20 -0400
Subject: [PATCH 2/3] Rebuild INSPIRE vocabularies as of #138 (wip)

Rewrite the `from_xml()` part to properly store keywords either as
thesaurus terms or as free keywords.
---
 ckanext/publicamundi/controllers/api.py       |  8 +--
 ckanext/publicamundi/lib/metadata/base.py     |  4 +-
 .../publicamundi/lib/metadata/types/baz.py    |  2 +-
 .../lib/metadata/types/inspire_metadata.py    | 55 +++++++++++++------
 .../lib/metadata/types/thesaurus.py           | 10 ++--
 .../metadata/vocabularies/babel_extractors.py |  2 +-
 .../lib/metadata/vocabularies/json_loader.py  |  7 ++-
 ckanext/publicamundi/tests/fixtures.py        |  4 +-
 8 files changed, 57 insertions(+), 35 deletions(-)

diff --git a/ckanext/publicamundi/controllers/api.py b/ckanext/publicamundi/controllers/api.py
index 8565de5..e638af4 100644
--- a/ckanext/publicamundi/controllers/api.py
+++ b/ckanext/publicamundi/controllers/api.py
@@ -121,15 +121,15 @@ def vocabulary_get(self, name):
         name = str(name)
         r = None
         
-        vocab = vocabularies.get_by_name(name)
+        vocab = vocabularies.get_by_name(name) 
         if vocab:
-            terms = vocab['vocabulary'].by_value
             r = {
                 'date_type': vocab.get('date_type'),
                 'reference_date': vocab.get('reference_date'),
                 'title': vocab.get('title'),
                 'name': vocab.get('name'),
-                'terms': [{ 'value': k, 'title': terms[k].title } for k in terms],
+                'terms': [{'token': t.token, 'value': t.value, 'title': t.title} 
+                    for t in vocab['vocabulary']],
             }
                 
         response.headers['Content-Type'] = content_types['json']
@@ -149,7 +149,7 @@ def dataset_export(self, name_or_id):
         return
     
     def dataset_import(self):
-
+        
         post = request.params
 
         # Forward to the dataset_import action
diff --git a/ckanext/publicamundi/lib/metadata/base.py b/ckanext/publicamundi/lib/metadata/base.py
index a05e0b9..2caf6bd 100644
--- a/ckanext/publicamundi/lib/metadata/base.py
+++ b/ckanext/publicamundi/lib/metadata/base.py
@@ -58,9 +58,9 @@ def flatten_field(field):
             'Only zope.schema.Choice supported for key_type'
         res = {}
         res1 = flatten_field(field.value_type)
-        for v in field.key_type.vocabulary:
+        for t in field.key_type.vocabulary:
             for k1, field1 in res1.items():
-                res[(v.token,) + k1] = field1
+                res[(t.value,) + k1] = field1
     else:
         res = { (): field }
     
diff --git a/ckanext/publicamundi/lib/metadata/types/baz.py b/ckanext/publicamundi/lib/metadata/types/baz.py
index 4fe5a0b..99e91e3 100644
--- a/ckanext/publicamundi/lib/metadata/types/baz.py
+++ b/ckanext/publicamundi/lib/metadata/types/baz.py
@@ -7,7 +7,7 @@
 from ckanext.publicamundi.lib.metadata.types import Thesaurus, ThesaurusTerms
 from ckanext.publicamundi.lib.metadata.types._common import *
 
-thesaurus_gemet_themes = Thesaurus.make('keywords-gemet-themes')
+thesaurus_gemet_themes = Thesaurus.lookup('keywords-gemet-themes')
 
 class KeywordsFactory(object):
     
diff --git a/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py b/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
index 40a7289..786bc75 100644
--- a/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
+++ b/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
@@ -1,4 +1,5 @@
 import os
+import re
 import uuid
 import zope.interface
 import zope.schema
@@ -27,7 +28,7 @@ def __init__(self, thesaurus_name='keywords-gemet-inspire-themes'):
     def __call__(self):
         keywords = {}
         keywords[self._name] = ThesaurusTerms(
-            terms=[], thesaurus=Thesaurus.make(self._name))
+            terms=[], thesaurus=Thesaurus.lookup(self._name))
         return keywords
 
 class TemporalExtentFactory(object):
@@ -162,6 +163,8 @@ def to_resp_party(alist):
                     role = it.role))
             return result
 
+        # Parse object
+
         md = MD_Metadata(e)
 
         datestamp = to_date(md.datestamp)
@@ -176,25 +179,43 @@ def to_resp_party(alist):
         for topic in md.identification.topiccategory:
             topic_list.append(topic)
         
-        keywords_dict = {}
+        free_keywords = []
+        keywords = {}
         for it in md.identification.keywords:
             thes_title = it['thesaurus']['title']
-            if thes_title is not None:
-                thes_split = thes_title.split(',')
-                # TODO thes_split[1] (=version) can be used in a get_by_title_and_version() 
-                # to enforce a specific thesaurus version.
-                thes_title = thes_split[0]
+            # Lookup and instantiate a named thesaurus
+            thes = None
+            if thes_title:
                 try:
-                    thes_name = vocabularies.munge('Keywords-' + thes_title)
-                    term_list = []
-                    for t in it['keywords']:
-                        term_list.append(t)
-                    thes = Thesaurus.make(thes_name)
-                    if thes:
-                        kw = ThesaurusTerms(thesaurus=thes, terms=term_list)
-                        keywords_dict.update({thes_name:kw})
+                    thes_title, thes_version = thes_title.split(',')
                 except:
-                    pass
+                    thes_version = None
+                else:
+                    thes_version = re.sub(r'^[ ]*version[ ]+(\d\.\d)$', r'\1', thes_version)
+                thes_name = 'keywords-' + vocabularies.munge(thes_title)
+                # Note thes_version can be used to enforce a specific thesaurus version
+                try:
+                    thes = Thesaurus.lookup(thes_name)
+                except ValueError:
+                    thes = None
+            # Treat present keywords depending on if they belong to a thesaurus
+            if thes:
+                # Treat as thesaurus terms; discard unknown terms
+                terms = []
+                for keyword in it['keywords']:
+                    term = thes.vocabulary.by_value.get(keyword)
+                    if not term:
+                        term = thes.vocabulary.by_token.get(keyword)
+                    if term:
+                        terms.append(term.value)
+                keywords[thes.name] = ThesaurusTerms(thesaurus=thes, terms=terms)
+            else:
+                # Treat as free keywords
+                # Todo Build a list of FreeKeyword items
+                for keyword in it['keywords']:
+                    # Todo Maybe convert keyword to a canonical form (e.g. munge)
+                    free_keywords.append(keyword)
+                
         temporal_extent = []
         if md.identification.temporalextent_start or md.identification.temporalextent_end:
             temporal_extent = [TemporalExtent(
@@ -300,7 +321,7 @@ def to_resp_party(alist):
         obj.locator = url_list
         #obj.resource_language = md.identification.resourcelanguage
         obj.topic_category = topic_list
-        obj.keywords = keywords_dict
+        obj.keywords = keywords
         obj.bounding_box = bbox
         obj.temporal_extent = temporal_extent
         obj.creation_date = creation_date
diff --git a/ckanext/publicamundi/lib/metadata/types/thesaurus.py b/ckanext/publicamundi/lib/metadata/types/thesaurus.py
index 4c94817..6fe7fca 100644
--- a/ckanext/publicamundi/lib/metadata/types/thesaurus.py
+++ b/ckanext/publicamundi/lib/metadata/types/thesaurus.py
@@ -28,12 +28,12 @@ def vocabulary(self):
     # Factory for Thesaurus
 
     @classmethod
-    def make(cls, name):
-        '''Create a new Thesaurus instance from it's machine-name name.
-        The metadata for this thesaurus are queried from vocabularies module.
-
-        Note: Maybe rename this class-method to lookup
+    def lookup(cls, name):
+        '''Lookup a thesaurus by it's name and return a Thesaurus instance.
+        The metadata for a newly created thesaurus are queried from vocabularies 
+        module.
         '''
+
         spec = vocabularies.get_by_name(name)
         if spec:
             kwargs = {
diff --git a/ckanext/publicamundi/lib/metadata/vocabularies/babel_extractors.py b/ckanext/publicamundi/lib/metadata/vocabularies/babel_extractors.py
index d5d1005..3d6a02c 100644
--- a/ckanext/publicamundi/lib/metadata/vocabularies/babel_extractors.py
+++ b/ckanext/publicamundi/lib/metadata/vocabularies/babel_extractors.py
@@ -8,7 +8,7 @@
 # Babel string extraction functions
 
 def extract_json(fileobj, keywords, comment_tags, options):
-    """Extract messages from XXX files.
+    """Extract messages from files.
     :param fileobj: the file-like object the messages should be extracted from
     :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions
     :param comment_tags: a list of translator tags to search for and include in the results
diff --git a/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py b/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py
index 75674eb..cb7e33a 100644
--- a/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py
+++ b/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py
@@ -29,18 +29,19 @@ def make_vocabulary(data):
     The input data can be one of the following:
      * a list of human-readable terms or a
      * a dict that maps machine-readable to human-readable terms.
-    
     '''
+
+    # Note: A SimpleTerm is a tuple (value, token, title) 
     
     terms = []
     if isinstance(data, list):
         for t in data:
             k = munge(t)
-            terms.append(SimpleTerm(k, k, t))
+            terms.append(SimpleTerm(k, t, t))
     elif isinstance(data, dict):     
         for k, t in data.items():
             #k = munge(k)
-            terms.append(SimpleTerm(k, k, t))
+            terms.append(SimpleTerm(k, t, t))
     return SimpleVocabulary(terms, swallow_duplicates=True)
 
 def make_vocabularies(data_file):
diff --git a/ckanext/publicamundi/tests/fixtures.py b/ckanext/publicamundi/tests/fixtures.py
index f05619a..5e6cce3 100644
--- a/ckanext/publicamundi/tests/fixtures.py
+++ b/ckanext/publicamundi/tests/fixtures.py
@@ -144,9 +144,9 @@
     date_type = 'creation'
 )
 
-thesaurus_gemet_themes = Thesaurus.make('keywords-gemet-themes')
+thesaurus_gemet_themes = Thesaurus.lookup('keywords-gemet-themes')
 
-thesaurus_gemet_inspire_data_themes = Thesaurus.make('keywords-gemet-inspire-themes')
+thesaurus_gemet_inspire_data_themes = Thesaurus.lookup('keywords-gemet-inspire-themes')
 
 # Baz 
 

From d5330468d1664f009a6ea279696850b9bd1decfc Mon Sep 17 00:00:00 2001
From: Michail Alexakis <alexakis@imis.athena-innovation.gr>
Date: Sun, 7 Jun 2015 15:24:36 -0400
Subject: [PATCH 3/3] Rewrite load/normalize/lookup logic for INSPIRE
 vocabularies

This should resolve #138. It also addresses #137 but tests have to be added.
---
 .../lib/metadata/types/_common.py             | 11 ++++++
 .../lib/metadata/types/inspire_metadata.py    |  4 +-
 .../lib/metadata/types/thesaurus.py           | 39 ++++++++++++-------
 .../lib/metadata/vocabularies/__init__.py     |  8 ++--
 .../lib/metadata/vocabularies/json_loader.py  | 26 +++++++------
 5 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/ckanext/publicamundi/lib/metadata/types/_common.py b/ckanext/publicamundi/lib/metadata/types/_common.py
index 68e06b9..8746eee 100644
--- a/ckanext/publicamundi/lib/metadata/types/_common.py
+++ b/ckanext/publicamundi/lib/metadata/types/_common.py
@@ -68,6 +68,17 @@ class FreeKeyword(Object):
     reference_date = None
     date_type = None
 
+    @classmethod
+    def normalize_keyword(cls, s):
+        from inflection import dasherize, underscore
+        return dasherize(underscore(unicode(s)))
+    
+    def __init__(self, **kwargs):
+        value = kwargs.get('value')
+        if value:
+            kwargs['value'] = self.normalize_keyword(value)
+        super(FreeKeyword, self).__init__(**kwargs)
+        
 @object_null_adapter()
 class GeographicBoundingBox(Object):
     
diff --git a/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py b/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
index 808fe98..575052e 100644
--- a/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
+++ b/ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
@@ -192,10 +192,9 @@ def to_responsible_party(alist):
                     thes_version = None
                 else:
                     thes_version = re.sub(r'^[ ]*version[ ]+(\d\.\d)$', r'\1', thes_version)
-                thes_name = 'keywords-' + vocabularies.munge(thes_title)
                 # Note thes_version can be used to enforce a specific thesaurus version
                 try:
-                    thes = Thesaurus.lookup(name=thes_name)
+                    thes = Thesaurus.lookup(title=thes_title, for_keywords=True)
                 except ValueError:
                     thes = None
             # Treat present keywords depending on if they belong to a thesaurus
@@ -214,7 +213,6 @@ def to_responsible_party(alist):
                 vocab_date = to_date(it['thesaurus']['date'])
                 vocab_datetype = it['thesaurus']['datetype']
                 for keyword in it['keywords']:
-                    # Todo Maybe convert keyword to a canonical form (e.g. munge)
                     free_keywords.append(FreeKeyword(
                         value = keyword,
                         reference_date = vocab_date,
diff --git a/ckanext/publicamundi/lib/metadata/types/thesaurus.py b/ckanext/publicamundi/lib/metadata/types/thesaurus.py
index 6fe7fca..eeef5ed 100644
--- a/ckanext/publicamundi/lib/metadata/types/thesaurus.py
+++ b/ckanext/publicamundi/lib/metadata/types/thesaurus.py
@@ -22,31 +22,40 @@ class Thesaurus(Object):
 
     @property
     def vocabulary(self):
-        spec = vocabularies.get_by_name(self.name)
-        return spec.get('vocabulary') if spec else None
+        vocab = vocabularies.get_by_name(self.name)
+        return vocab.get('vocabulary') if vocab else None
 
     # Factory for Thesaurus
 
     @classmethod
-    def lookup(cls, name):
-        '''Lookup a thesaurus by it's name and return a Thesaurus instance.
-        The metadata for a newly created thesaurus are queried from vocabularies 
-        module.
+    def lookup(cls, name=None, title=None, for_keywords=False):
+        '''Lookup by name or title and return a Thesaurus instance.
+
+        This is a factory method that tries to instantiate a Thesaurus object
+        from a collection of well-known (mostly related to INSPIRE) vocabularies.
         '''
+        
+        vocab = None
+        
+        if (name is None) and title:
+            name = vocabularies.normalize_thesaurus_title(title, for_keywords)
+        
+        if name:
+            vocab = vocabularies.get_by_name(name)
+        else:
+            raise ValueError('Expected a name/title lookup')
 
-        spec = vocabularies.get_by_name(name)
-        if spec:
+        if vocab:
             kwargs = {
-               'title': spec.get('title'),
-               'name': spec.get('name'),
-               'reference_date': spec.get('reference_date'),
-               'version' : spec.get('version'),
-               'date_type': spec.get('date_type'),
+               'title': vocab.get('title'),
+               'name': vocab.get('name'),
+               'reference_date': vocab.get('reference_date'),
+               'version' : vocab.get('version'),
+               'date_type': vocab.get('date_type'),
             }
             return cls(**kwargs)
         else:
-            raise ValueError(
-                'Cannot find an INSPIRE thesaurus named "%s"' %(name))
+            raise ValueError('Cannot find a thesaurus named "%s"' %(name))
 
 @object_null_adapter()
 class ThesaurusTerms(Object):
diff --git a/ckanext/publicamundi/lib/metadata/vocabularies/__init__.py b/ckanext/publicamundi/lib/metadata/vocabularies/__init__.py
index 2efb3ff..611d5bb 100644
--- a/ckanext/publicamundi/lib/metadata/vocabularies/__init__.py
+++ b/ckanext/publicamundi/lib/metadata/vocabularies/__init__.py
@@ -8,15 +8,13 @@
 
 # Import loader
 
-from ckanext.publicamundi.lib.metadata.vocabularies import json_loader
-
-munge = json_loader.munge
+from ckanext.publicamundi.lib.metadata.vocabularies.json_loader import (
+    make_vocabularies, normalize_keyword, normalize_thesaurus_title)
 
 def _update(data_file, name_prefix='', overwrite=False):
     '''Update the module-global vocabularies from external JSON data.
     '''
-    
-    for name, desc in json_loader.make_vocabularies(data_file):
+    for name, desc in make_vocabularies(data_file):
         assert overwrite or not (name in vocabularies), (
             'A vocabulary named %r is allready loaded' % (name))
         vocabularies[name_prefix + name] = desc
diff --git a/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py b/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py
index cb7e33a..58c7a50 100644
--- a/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py
+++ b/ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py
@@ -6,10 +6,8 @@
 import zope.schema
 from zope.schema.vocabulary import SimpleVocabulary, SimpleTerm
 
-def munge(name):
-    '''Convert human-friendly to machine-friendly terms.
-    
-    Needed when a machine-friendly version is not supplied.
+def _munge(name):
+    '''Convert human-friendly to machine-friendly names.
     '''
 
     re_bad = re.compile('[\(\),]+')
@@ -23,6 +21,15 @@ def munge(name):
 
     return name
 
+def normalize_keyword(name):
+    return _munge(name)
+
+def normalize_thesaurus_title(name, for_keywords=False):
+    if not for_keywords:
+        return _munge(name)
+    else:
+        return _munge('keywords' + ' ' + name)
+
 def make_vocabulary(data):
     '''Convert raw data to a SimpleVocabulary instance.
     
@@ -30,17 +37,15 @@ def make_vocabulary(data):
      * a list of human-readable terms or a
      * a dict that maps machine-readable to human-readable terms.
     '''
-
-    # Note: A SimpleTerm is a tuple (value, token, title) 
     
     terms = []
     if isinstance(data, list):
         for t in data:
-            k = munge(t)
+            k = normalize_keyword(t)
             terms.append(SimpleTerm(k, t, t))
     elif isinstance(data, dict):     
         for k, t in data.items():
-            #k = munge(k)
+            #k = normalize_keyword(k)
             terms.append(SimpleTerm(k, t, t))
     return SimpleVocabulary(terms, swallow_duplicates=True)
 
@@ -55,7 +60,7 @@ def make_vocabularies(data_file):
         data = json.loads(fp.read())
 
     for title in (set(data.keys()) - set(['Keywords'])):
-        name = munge(title)
+        name = normalize_thesaurus_title(title)
         desc = {
             'name': name,
             'title': title,
@@ -67,8 +72,7 @@ def make_vocabularies(data_file):
     for title in keywords_data.keys():
         keywords = keywords_data.get(title)
         keywords_terms = make_vocabulary(keywords.get('terms'))
-
-        name = munge('Keywords-' + title)
+        name = normalize_thesaurus_title(title, for_keywords=True)
         desc = {
             'name': name,
             'title': title,