Skip to content

Commit

Permalink
Added support for event entities.
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanmrsulja committed Sep 26, 2024
1 parent 0439db9 commit 2949d51
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 3 deletions.
102 changes: 102 additions & 0 deletions sample-data-generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,72 @@ def add_equipment(self, manufacturer, equipees):
return equipment_uri


def add_conference(self, events):
conference_uri = make_uri('Conference')
self.add((conference_uri, URIRef(RDF.type), URIRef(vivo.Conference)))

for title, language_tag in make_title():
self.add((conference_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))

self.add((conference_uri, URIRef(vivo.dateTimeInterval), self.add_date_interval(random.randint(1979, 2018), None)))

for description, language_tag in make_description():
self.add((conference_uri, URIRef(vivo.description), Literal(description, lang=language_tag)))

for event in events:
self.add((conference_uri, URIRef(obo.BFO_0000051), URIRef(event)))

return conference_uri


def add_invited_talk(self, participants):
talk_uri = make_uri('InvitedTalk')
self.add((talk_uri, URIRef(RDF.type), URIRef(vivo.InvitedTalk)))

for title, language_tag in make_title():
self.add((talk_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))

for description, language_tag in make_description():
self.add((talk_uri, URIRef(vivo.description), Literal(description, lang=language_tag)))

for participant in participants:
self.add((talk_uri, URIRef(obo.BFO_0000055), URIRef(participant)))

return talk_uri


def add_presentation(self, participants):
presentation_uri = make_uri('Presentation')
self.add((presentation_uri, URIRef(RDF.type), URIRef(vivo.InvitedTalk)))

for title, language_tag in make_title():
self.add((presentation_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))

for description, language_tag in make_description():
self.add((presentation_uri, URIRef(vivo.description), Literal(description, lang=language_tag)))

for participant in participants:
self.add((presentation_uri, URIRef(obo.BFO_0000055), URIRef(participant)))

return presentation_uri


def add_course(self, participants):
course_uri = make_uri('Course')
self.add((course_uri, URIRef(RDF.type), URIRef(vivo.InvitedTalk)))

for title, language_tag in make_title():
self.add((course_uri, URIRef(RDFS.label), Literal(title, lang=language_tag)))

for description, language_tag in make_description():
self.add((course_uri, URIRef(vivo.description), Literal(description, lang=language_tag)))

for participant in participants:
self.add((course_uri, URIRef(obo.BFO_0000055), URIRef(participant)))

return course_uri


Graph.add_university = add_university
Graph.add_college = add_college
Graph.add_department = add_department
Expand All @@ -444,6 +510,10 @@ def add_equipment(self, manufacturer, equipees):
Graph.add_project = add_project
Graph.add_grant = add_grant
Graph.add_equipment = add_equipment
Graph.add_conference = add_conference
Graph.add_invited_talk = add_invited_talk
Graph.add_presentation = add_presentation
Graph.add_course = add_course


def main():
Expand Down Expand Up @@ -605,6 +675,38 @@ def main():
equipment_uri = g.add_equipment(random.choice(college_uris, 1)[0], random.choice(college_uris, n_equipees))
print(f"Added equipment {equipment_index + 1}: {equipment_uri}")

n_conferences = int(config.get("SDG", "n_conferences"))
n_invited_talks = int(config.get("SDG", "n_invited_talks"))
n_presentations = int(config.get("SDG", "n_presentations"))
min_event_participants = int(config.get("SDG", "min_event_participants"))
max_event_participants = int(config.get("SDG", "max_event_participants"))
for conference_index in range(n_conferences):
sub_events_uris = []

for invited_talk_index in range(n_invited_talks):
n_event_participants = random.randint(min_event_participants, max_event_participants)
invited_talk_uri = g.add_invited_talk(random.choice(person_uris, n_event_participants))
sub_events_uris.append(invited_talk_uri)
print(f"Added invited talk {invited_talk_index + 1}: {invited_talk_uri}")

for presentation_index in range(n_presentations):
n_event_participants = random.randint(min_event_participants, max_event_participants)
presentation_uri = g.add_presentation(random.choice(person_uris, n_event_participants))
sub_events_uris.append(presentation_uri)
print(f"Added presentation {presentation_index + 1}: {presentation_uri}")

conference_uri = g.add_conference(sub_events_uris)
print(f"Added conference {conference_index + 1}: {conference_uri}")

for event_uri in sub_events_uris:
g.add((event_uri, URIRef(obo.BFO_0000050), URIRef(conference_uri)))

n_courses = int(config.get("SDG", "n_courses"))
for course_index in range(n_courses):
n_event_participants = random.randint(min_event_participants, max_event_participants)
course_uri = g.add_course(random.choice(person_uris, n_event_participants))
print(f"Added course {course_index + 1}: {course_uri}")

nw_uri = 0
for w_uri in work_uris:
nw_uri += 1
Expand Down
10 changes: 10 additions & 0 deletions sdg.properties
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,13 @@ max_grant_participants = 5
n_equipment = 20
min_supportees = 2
max_supportees = 5

# The number of conferences, invited talks and presentations per conference. Number of courses
# Range for number of participants per each event (where applicable)

n_conferences = 10
n_invited_talks = 2
n_presentations = 3
n_courses = 20
min_event_participants = 1
max_event_participants = 5
8 changes: 5 additions & 3 deletions split_sample_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ LINES_PER_CHUNK=10000 # Process 10,000 lines at a time
COUNTER=1
CURRENT_SIZE=0
OUTPUT_FILE="output_part_${COUNTER}.ttl"
OFFSET=0
OFFSET=8

create_new_file() {
OUTPUT_FILE="output_part_${COUNTER}.ttl"
echo "Creating new file: $OUTPUT_FILE"
CURRENT_SIZE=0
head -n 8 "$INPUT_FILE" > "$OUTPUT_FILE"
echo "" >> "$OUTPUT_FILE"
CURRENT_SIZE=$(wc -c < "$OUTPUT_FILE")
COUNTER=$((COUNTER + 1))
}

Expand All @@ -41,4 +43,4 @@ while true; do
OFFSET=$((OFFSET + LINES_PER_CHUNK))
done

echo "File split completed."
echo "File split completed. Please check files for any sudden breaks in the triples."

0 comments on commit 2949d51

Please sign in to comment.