Skip to content

Commit

Permalink
Add author; handle multiple authors
Browse files Browse the repository at this point in the history
  • Loading branch information
plebln committed Mar 25, 2018
1 parent 8878b78 commit aa673b0
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 17 deletions.
16 changes: 16 additions & 0 deletions test_xml2bib.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,21 @@ def test_basic(self):
if (self.verbose):
print('testBasic:', x)


class TestXMLReader(unittest.TestCase):
def test_one(self):
f = 'xmlfiles/_10.14470_6t569239.xml'
x = xml2bib.XMLReader(f)
d = xml2bib.xml2dict(x)

c_names = ('Th, H.',
'Ba, N.',
'Ma, V.',
'Ri, J.',
'Ti, F.',)
c = ' and '.join(c_names)
self.assertEqual(d['author'], c)


if __name__ == '__main__':
unittest.main()
29 changes: 12 additions & 17 deletions xml2bib.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def xml2dict(x):
r = x.root
#print('DEBUG', r.attrib)

xmlfields = ('identifier', 'title', 'publisher',
xmlfields = ('creator', 'identifier', 'title', 'publisher',
'publicationYear', 'resourceType')
#nsd = {'dc': 'http://datacite.org/schema/kernel-4'}
ns = '{http://datacite.org/schema/kernel-4}'
Expand All @@ -55,32 +55,29 @@ def xml2dict(x):
for field in xmlfields:
#for n in r.findall('dc:' + field, nsd):
for n in r.iter(ns + field):
v = n.text
if field == 'creator':
v = n.find(ns + 'creatorName').text
#print('Found', field, v)
if n is not None:
v = n.text
#print('Found', field, v)
dc[field] = v

#for n in r.findall(field):
# print('found', field)
# dc[field] = n.text
if dc[field] is None:
dc[field] = v
else:
dc[field] = dc[field] + ' and ' + v

#print('DEBUG dc=', dc)

d = dict()
d['authors'] = 'Abbott, A and Costello, C'
dc_to_bib = {'year': 'publicationYear',
dc_to_bib = {'author': 'creator',
'year': 'publicationYear',
'DOI': 'identifier',
'howpublished': 'resourceType',
'publisher': 'publisher',
'title': 'title',
'howpublished': 'resourceType',
}

for x in dc_to_bib:
d[x] = dc[dc_to_bib[x]]
# d['year'] = dc['publicationYear']
# d['title'] = dc['title']
# d['DOI'] = dc['identifier']
# d['publisher'] = dc['publisher']
return d

if __name__ == '__main__':
Expand All @@ -91,5 +88,3 @@ def xml2dict(x):
y = xml2dict(x)
print(str(BibTeXWriter('ref%i' % (k,), y)))
print()


53 changes: 53 additions & 0 deletions xmlfiles/_10.14470_6t569239.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">
<identifier identifierType="DOI">10.14470/6T569239</identifier>
<creators>
<creator>
<creatorName>Th, H.</creatorName>
<givenName>Ha</givenName>
<familyName>Th</familyName>
</creator>
<creator>
<creatorName>Ba, N.</creatorName>
<givenName>Ni</givenName>
<familyName>Ba</familyName>
</creator>
<creator>
<creatorName>Ma, V.</creatorName>
<givenName>Va</givenName>
<familyName>Ma</familyName>
</creator>
<creator>
<creatorName>Ri, J.</creatorName>
<givenName>Jo</givenName>
<familyName>Ri</familyName>
</creator>
<creator>
<creatorName>Ti, F.</creatorName>
<givenName>Fr</givenName>
<familyName>Ti</familyName>
</creator>
</creators>
<titles>
<title>ScanArray Core (1G 2012-2017)</title>
</titles>
<publisher>The ScanArray consortium</publisher>
<publicationYear>0</publicationYear>
<resourceType resourceTypeGeneral="Other">Seismic Network</resourceType>
<subjects>
<subject>Broadband seismic waveforms</subject>
<subject>Lithosphere</subject>
<subject subject_scheme="GEMET - INSPIRE themes, version 1.0" scheme_uri="" text="Monitoring system"/>
<subject subject_scheme="Library of Congress Subject Headings" scheme_uri="http://id.loc.gov/authorities/subjects" text="Seismological stations"/>
</subjects>
<dates>
<date dateType="Created">2016-05-23</date>
<date dateType="Issued">?</date>
</dates>
<version/>
<descriptions>
<description descriptionType="Abstract">The ScanArray experiment is a major collaborative effort of institutions in Scandinavia and Germany to map crustal and mantle structure below Scandinavia using a dense temporary deployment of broadband seismometers.
[..]
Waveform data will be fully opened in early 2020.</description>
</descriptions>
</resource>

0 comments on commit aa673b0

Please sign in to comment.