Skip to content

Commit

Permalink
pagereactor by hand
Browse files Browse the repository at this point in the history
  • Loading branch information
krivard committed Sep 22, 2014
1 parent 71477d7 commit ab6bdaa
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 4 deletions.
12 changes: 8 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,8 @@ $(tacpr_raw): venv | $(datdir)
$(TACPR) > $@

$(qid_tacid): $(tacpr_raw) $(qid_name) venv | $(datdir)
#$(PYTHON) $(srcdir)/generate_qid_tacid.py $(TACPR) $(qid_name) > $@
$(PYTHON) $(srcdir)/generate_qid_tacid.py $(tacpr_raw) $(qid_name) > $@
awk 'BEGIN{FS=OFS="\t"}{print sprintf("CMUPR_%04d",NR),$$8}' $(tacpr_raw) > $@
# $(PYTHON) $(srcdir)/generate_qid_tacid.py $(tacpr_raw) $(qid_name) > $@

$(rid_lid_score): $(qid_tacid) $(qid_rid) venv | $(datdir)
$(PYTHON) $(srcdir)/generate_rid_lid_score.py $(qid_tacid) $(qid_rid) > $@
Expand Down Expand Up @@ -335,8 +335,12 @@ $(semi_supervised2): $(rid_fid_weight) $(rid_lid_score) $(qid_rid) \
pagereactor: $(pagereactor0)

# pagereactor output grouped by string
$(pagereactor0): $(qid_tacid) | $(outdir)
cp $(qid_tacid) $@
#$(pagereactor0): $(qid_tacid) | $(outdir)
# cp $(qid_tacid) $@

$(pagereactor0):
cd pagereactor; $(MAKE) $(MFLAGS)
cp pagereactor/pagereactor0.txt $@

# ==============================================================================

Expand Down
26 changes: 26 additions & 0 deletions pagereactor/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
include ../Makefile.in

TAB=$(shell echo "\t")

all: pagereactor0.txt

clean:
rm -rf pagereactor*

# drop non-KBP queries
pagereactor.candidates: ${TACPR}
awk 'BEGIN{FS=OFS="\t";k=1;}{if ($NF != "OTHER") { print sprintf("CMUPR_%04d",k),$$0; k++}}' $< > $@

# generate a unique ID for each wp14 page
pagereactor0.eids: pagereactor.candidates
cut -f 3 $< | \
sort -t "${TAB}" -k 1b,1 | \
uniq | \
awk 'BEGIN{FS=OFS="\t"}{print $$0,sprintf("nil%04d",NR)}' > $@

# match nil TAC ids to their unique id
pagereactor0.txt: pagereactor.candidates pagereactor0.eids
sort -t "${TAB}" -k 3b,3 $(word 1,$^) | \
join -t "${TAB}" -1 3 - $(word 2,$^) | \
awk 'BEGIN{FS=OFS="\t"}{ tacid=$$9; if (tacid == "") { tacid = $$NF; } print $$2,tacid}' | \
sort -k 1b,1 > $@

0 comments on commit ab6bdaa

Please sign in to comment.