Skip to content

Commit

Permalink
Added renaming of bibtex id.
Browse files Browse the repository at this point in the history
  • Loading branch information
ruofeidu committed Mar 17, 2019
1 parent 5b6f23f commit 785dcfe
Show file tree
Hide file tree
Showing 4 changed files with 2,593 additions and 32 deletions.
47 changes: 33 additions & 14 deletions DuBibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class Paras:
header = {}
DOI2URL = False
removeUrl = False
# If optimizeBibId is True, orename Bib Id into LastName2000FirstTitleWord
optimizeBibId = False
fieldRemovalList = []
autoCommentCredit = '% Automatically generated by DuBibTeX.\n'
autoCommentUrl = '% https://github.com/ruofeidu/DuBibtex\n'
Expand Down Expand Up @@ -82,6 +84,7 @@ def __init__(self):
Paras.keepComments = config.getboolean(Paras.section, "keepComments")
Paras.debugBibCrawler = config.getboolean(Paras.section, "debugBibCrawler")
Paras.debugStatistics = config.getboolean(Paras.section, "debugStatistics")
Paras.optimizeBibId = config.getboolean(Paras.section, "optimizeBibId")
Paras.inputFileList = config.get(Paras.section,
"inputFileList").strip().split(",")
Paras.doiJsonFile = config.get(Paras.section, "doiJsonFile").strip()
Expand Down Expand Up @@ -118,7 +121,27 @@ def fix_doi(self, _doi):
self.cur['url'] = 'http://doi.org/%s' % _doi

def write_current_item(self):
# print(self.cur)
# Ensures there is year field.
if 'year' not in self.cur or len(self.cur['year']) < 4:
m = Re.year.search(self.bib)
if m and m.groups():
self.cur['year'] = m.groups()[0]

# Optimizes self.bib id.
if Paras.optimizeBibId and 'author' in self.cur and 'title' in self.cur and 'year' in self.cur:
self.bib = self.cur['author'].split(
',', 1)[0] + self.cur['year'] + self.cur['title'].split(
' ', 1)[0].capitalize()

if not 'author' in self.cur:
print('Error: No author for ' + self.bib)

if not 'title' in self.cur:
print('Error: No title for ' + self.bib)

if not 'year' in self.cur:
print('Error: No year for ' + self.bib)

self.fout.write('@%s{%s,\n' % (self.cur['type'].lower(), self.bib))

if Paras.defaultAddress and 'address' not in self.cur:
Expand All @@ -133,25 +156,20 @@ def write_current_item(self):
self.debug_bib('PUB\t' + self.cur['title'])
self.cur['publisher'] = 'ACM'

if 'year' not in self.cur or len(self.cur['year']) < 4:
m = Re.year.search(self.bib)
if m and m.groups():
self.cur['year'] = m.groups()[0]

if self.bib in self.doiDict:
# self.debug_bib('Missing DOI, but obtained from the local dict JSON.')
self.fix_doi(self.doiDict[self.bib])

# remove invalid doi field
# Removes invalid DOI field.
if 'doi' in self.cur and '/' not in self.cur['doi']:
del self.cur['doi']
if self.cur['type'].lower() in ['misc', 'book'] and 'doi' in self.cur:
del self.cur['doi']

# search doi field if missing
# Searches DOI field if the field was missing.
if Paras.searchDOI and int(self.cur['year']) > Paras.minYear and 'doi' not in self.cur \
and self.cur['type'].lower() not in ['misc', 'book', 'techreport'] and 'nodoi' not in self.cur:
# search for DOI
# Searches for DOI.
self.debug_bib('Missing DOI, search "%s"...' % self.cur['title'])

if 'journal' in self.cur and self.cur['journal'][:5].lower() == 'arxiv':
Expand All @@ -170,7 +188,7 @@ def write_current_item(self):
else:
self.numMissing += 1

# fix underscore and store it in the hash table
# Fixes underscore and stores it in the hash table.
if 'doi' in self.cur:
self.cur['doi'] = fix_underscore(self.cur['doi'])
self.doiDict[self.bib] = self.cur['doi']
Expand Down Expand Up @@ -214,26 +232,26 @@ def parse_line(self, line):
self.clear()
return

# match duplicates
# Matches duplicates.
if self.duplicated:
if Paras.debugStatistics:
print("* duplicated %s" % self.bib)
self.numDuplicated += 1
return

# match new bib item
# Matches new bib item.
m = Re.bib.match(line)
if m and len(m.groups()) > 0:
self.add_new_bib(m.groups()[1], m.groups()[0])

# output comments
# Outputs comments when required.
if not self.bib:
if Paras.keepComments and line != Paras.autoCommentCredit and line != Paras.autoCommentUrl and len(
line) > 2:
self.fout.write(line)
return

# for each bibtex, first match {{}} or {""}, then match {} or ""
# For each bibtex, first matches {{}} or {""}, then matches {} or "".
m = Re.item2.match(line)
if not m:
m = Re.item.match(line)
Expand Down Expand Up @@ -415,6 +433,7 @@ def capitalize(s, spliter=' '):
for filename in Paras.inputFileList:
with open(filename, 'r') as f:
lines = f.readlines()
print(filename)
for line in lines:
p.parse_line(line)

Expand Down
15 changes: 4 additions & 11 deletions config.ini
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
[DuBibtex]
header = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36
searchDOI = True
keepComments = True
keepComments = False
useOfflineDOI = True
printSelfInfo = True
;inputFileList = GeolleryTech.bib
;outputFile = GeolleryTech_new.bib
;inputFileList = ORCLayout.bib
;;outputFile = ORCLayout_new.bib
;inputFileList = Geollery.bib
;outputFile = Geollery_new.bib
inputFileList = lucss.bib
outputFile = lucss_new.bib
#inputFileList = Montage4D.bib
#outputFile = Montage4D_new.bib
inputFileList = input.bib,yuan.bib
outputFile = yuan_v2.bib
doiJsonFile = doi_dict.json
debugBibCrawler = True
debugStatistics = True
defaultAddress = True
optimizeBibId = True
minYear = 1946
timeOut = 3
DOI2URL = False
Expand Down
Loading

0 comments on commit 785dcfe

Please sign in to comment.