Skip to content

Commit

Permalink
Address charset encoding; LaTeX escapes for umlauts
Browse files Browse the repository at this point in the history
  • Loading branch information
plebln committed Mar 26, 2018
1 parent b2388b8 commit 1fa3797
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions xml2bib.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
#!/usr/bin/env python
# coding: utf-8

from __future__ import print_function
import sys
import xml.etree.ElementTree as ET

"""
Note the big X, not a chi, in the class name.
Need to convert some accented Unicode characters to their LaTeX forms.
Note the big X, not a chi, in the class name BibTeXWriter.
It is possible that {, " or $ need escaping with backslash ('\').
"""


class BibTeXWriter():
def __init__(self, tag, data, entry_type='Misc'):
def __init__(self, tag, data, entry_type='Misc', encoding='UTF-8'):
self.entry_type = entry_type
self.tag = tag
self.data = data
self.enc = encoding

def __str__(self):
# LaTex makes accented chars differently.
replacements = {
u'ä': u'\\"{a}', u'Ä': u'\"{A}',
u'ö': u'\\"{o}', u'Ö': u'\"{O}',
u'ü': u'\\"{u}', u'Ü': u'\"{U}',
}
lines = []
for k in sorted(self.data):
v = self.data[k]
Expand All @@ -26,8 +36,10 @@ def __str__(self):
try:
value = int(v)
except ValueError:
value = '{' + str(v) + '}'
lines.append(' {} = {}'.format(k, str(value)))
for x in replacements:
v = v.replace(x, replacements[x])
value = '{' + v.encode(self.enc) + '}'
lines.append(' {} = {}'.format(k, value))
return '@{0}{{{1},\n'.format(self.entry_type, self.tag,) + \
',\n'.join(lines) + '\n}'

Expand Down Expand Up @@ -62,6 +74,7 @@ def xml2dict(x):
v = n.text
if field == 'creator':
v = n.find(ns + 'creatorName').text
v = v.strip()
#print('Found', field, v)
if n is not None:
if dc[field] is None:
Expand Down

0 comments on commit 1fa3797

Please sign in to comment.