diff --git a/xml2bib.py b/xml2bib.py index ffad94c..b206125 100644 --- a/xml2bib.py +++ b/xml2bib.py @@ -1,23 +1,33 @@ #!/usr/bin/env python +# coding: utf-8 from __future__ import print_function import sys import xml.etree.ElementTree as ET """ -Note the big X, not a chi, in the class name. +Need to convert some accented Unicode characters to their LaTeX forms. + +Note the big X, not a chi, in the class name BibTeXWriter. It is possible that {, " or $ need escaping with backslash ('\'). """ class BibTeXWriter(): - def __init__(self, tag, data, entry_type='Misc'): + def __init__(self, tag, data, entry_type='Misc', encoding='UTF-8'): self.entry_type = entry_type self.tag = tag self.data = data + self.enc = encoding def __str__(self): + # LaTex makes accented chars differently. + replacements = { + u'ä': u'\\"{a}', u'Ä': u'\"{A}', + u'ö': u'\\"{o}', u'Ö': u'\"{O}', + u'ü': u'\\"{u}', u'Ü': u'\"{U}', + } lines = [] for k in sorted(self.data): v = self.data[k] @@ -26,8 +36,10 @@ def __str__(self): try: value = int(v) except ValueError: - value = '{' + str(v) + '}' - lines.append(' {} = {}'.format(k, str(value))) + for x in replacements: + v = v.replace(x, replacements[x]) + value = '{' + v.encode(self.enc) + '}' + lines.append(' {} = {}'.format(k, value)) return '@{0}{{{1},\n'.format(self.entry_type, self.tag,) + \ ',\n'.join(lines) + '\n}' @@ -62,6 +74,7 @@ def xml2dict(x): v = n.text if field == 'creator': v = n.find(ns + 'creatorName').text + v = v.strip() #print('Found', field, v) if n is not None: if dc[field] is None: