Skip to content

Commit

Permalink
Added sample data in all languages supported by VIVO. Added output sp…
Browse files Browse the repository at this point in the history
…litter into processable chunks.
  • Loading branch information
ivanmrsulja committed Sep 25, 2024
1 parent d69485a commit 0439db9
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 821,730 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,5 @@ venv.bak/

# mypy
.mypy_cache/

*.ttl
22 changes: 12 additions & 10 deletions sample-data-generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,9 @@ def make_title():
global lorem_content

multilingual_titles = []
start = random.randint(0, len(lorem_content[0]) / 2)
length = random.randint(10, 100)
for index, lorem in enumerate(lorem_content):
start = random.randint(0, len(lorem) / 2)

title = lorem[start:start + length].strip(" ,.")
title = title[1].upper() + title[2:]
multilingual_titles.append((title, content_langs[index]))
Expand All @@ -207,8 +206,8 @@ def make_description():

multilingual_descriptions = []
length = random.randint(100, 1000)
start = random.randint(0, len(lorem_content[0]) / 2)
for index, lorem in enumerate(lorem_content):
start = random.randint(0, len(lorem) / 2)
description = lorem[start:start + length].strip(" ,.")
description = description[1].upper() + description[2:]
multilingual_descriptions.append((description, content_langs[index]))
Expand Down Expand Up @@ -370,6 +369,7 @@ def add_date(self, year):

def add_project(self, participants, works):
project_uri = make_uri('Project')
self.add((project_uri, URIRef(RDF.type), URIRef(vivo.Project)))

for title, language_tag in make_title():
self.add((project_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))
Expand All @@ -390,6 +390,7 @@ def add_project(self, participants, works):

def add_grant(self, administers, fundraisers, supportees):
grant_uri = make_uri('Grant')
self.add((grant_uri, URIRef(RDF.type), URIRef(vivo.Grant)))

for title, language_tag in make_title():
self.add((grant_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))
Expand All @@ -415,20 +416,21 @@ def add_grant(self, administers, fundraisers, supportees):


def add_equipment(self, manufacturer, equipees):
project_uri = make_uri('Equipment')
equipment_uri = make_uri('Equipment')
self.add((equipment_uri, URIRef(RDF.type), URIRef(vivo.Project)))

for title, language_tag in make_title():
self.add((project_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))
self.add((equipment_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))

for description, language_tag in make_description():
self.add((project_uri, URIRef(vivo.description), Literal(description, lang=language_tag)))
self.add((equipment_uri, URIRef(vivo.description), Literal(description, lang=language_tag)))

self.add((project_uri, URIRef(obo.OBI_0000304), URIRef(manufacturer)))
self.add((equipment_uri, URIRef(obo.OBI_0000304), URIRef(manufacturer)))

for equipee in equipees:
self.add((project_uri, URIRef(vivo.equipmentFor), URIRef(equipee)))
self.add((equipment_uri, URIRef(vivo.equipmentFor), URIRef(equipee)))

return project_uri
return equipment_uri


Graph.add_university = add_university
Expand Down Expand Up @@ -561,7 +563,7 @@ def main():

print("People", n_people, "Works", n_works)

# once all the authors and works are created, add co-authors and co-author stubs
# once all the authors and works are created, create projects, grants and equipment. After that, add co-authors and co-author stubs

n_projects = int(config.get("SDG", "n_projects"))
min_project_participants = int(config.get("SDG", "min_project_participants"))
Expand Down
Loading

0 comments on commit 0439db9

Please sign in to comment.