-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_names_inbody_Twig.pl
executable file
·46 lines (25 loc) · 1.19 KB
/
get_names_inbody_Twig.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/perl
# Iris,may 2014
# Script to parse the CARDS and PS letters with the Twig module, instead of hacking with regular expressions.
# in this case I want to print a sub element <name> only when it occurs in the body part of the TEI document.
use strict;
use XML::Twig;
use utf8;
binmode(STDOUT, ":utf8");
my $file = $ARGV[0];
get_XML_content($file);
sub get_XML_content{
my $file = $_[0];
# create a sub tree of the full document that only contains the body part
# and create an handler that extract the name parts
my $twig= new XML::Twig( twig_roots => { 'body' => 1 },
twig_handlers => { 'name'=> \&h_name } );
$twig->parsefile( "$file");
}
# simple handler that extracts text from an element
sub h_name{
my( $tree, $elem)= @_; # handlers params are always
# the twig and the element
my $string = $elem->text; # get the text of element
print "$string\n";
}