From 1d55f90caf433c7442e5be21a1849af2b5522ffe Mon Sep 17 00:00:00 2001 From: Michael Daum Date: Wed, 17 Jan 2024 12:31:20 +0100 Subject: [PATCH] Fixed xml external entity (XEE) injection bug reported by @phvietan, fixes issue #10 also: - added unit test for support of purl.oclc.org namespace ... see issue #7 - removed tidyness tests again as it fails on weaver'ed code --- .perltidyrc | 1 + Changes | 4 ++++ dist.ini | 1 + lib/Spreadsheet/ParseXLSX.pm | 44 +++++++++++++++++++---------------- t/bug-md-10.t | 21 +++++++++++++++++ t/bug-md-7.t | 16 +++++++++++-- t/data/bug-md-10.xlsx | Bin 0 -> 6406 bytes t/perltidy.t | 18 -------------- 8 files changed, 65 insertions(+), 40 deletions(-) create mode 100644 t/bug-md-10.t create mode 100644 t/data/bug-md-10.xlsx delete mode 100644 t/perltidy.t diff --git a/.perltidyrc b/.perltidyrc index 861582b..6760975 100644 --- a/.perltidyrc +++ b/.perltidyrc @@ -9,3 +9,4 @@ -sot -nbbb -nbbc +-msc=1 diff --git a/Changes b/Changes index 9dacd12..2d7b995 100644 --- a/Changes +++ b/Changes @@ -2,6 +2,10 @@ Revision history for Spreadsheet-ParseXLSX {{$NEXT}} + - Fix xml external entity (XEE) injection bug + - added unit test for support of purl.oclc.org namespace ... see issue #7 + - removed tidyness tests again as it fails on weaver'ed code + 0.29 2024-01-02 - Merge pull request #1 from theevilapplepie/master: Fix for 'Argument "" isn't numeric in addition (+) at /usr/local/shar… diff --git a/dist.ini b/dist.ini index 3fa3018..84da900 100644 --- a/dist.ini +++ b/dist.ini @@ -39,6 +39,7 @@ critic_config = .perlcriticrc [PodCoverageTests] [PodSyntaxTests] [MetaResources] +[GitHub::Meta] [Git::Check] [Git::Commit] commit_msg = changelog diff --git a/lib/Spreadsheet/ParseXLSX.pm b/lib/Spreadsheet/ParseXLSX.pm index d350231..b204267 100644 --- a/lib/Spreadsheet/ParseXLSX.pm +++ b/lib/Spreadsheet/ParseXLSX.pm @@ -80,7 +80,7 @@ sub parse { } if (openhandle($file)) { - bless $file, 'IO::File' if ref($file) eq 'GLOB'; # sigh + bless $file, 'IO::File' if ref($file) eq 'GLOB'; # sigh my $fh = ref($file) eq 'File::Temp' ? IO::File->new("<&=" . fileno($file)) @@ -112,7 +112,7 @@ sub _check_signature { my $signature = ''; if (openhandle($file)) { - bless $file, 'IO::File' if ref($file) eq 'GLOB'; # sigh + bless $file, 'IO::File' if ref($file) eq 'GLOB'; # sigh $file->read($signature, 2); $file->seek(-2, IO::File::SEEK_CUR); } elsif (ref($file) eq 'SCALAR') { @@ -149,7 +149,7 @@ sub _parse_workbook { $workbook->{FmtClass} = $formatter || Spreadsheet::ParseExcel::FmtDefault->new; - my $themes = $self->_parse_themes((values %{$files->{themes}})[0]); # XXX + my $themes = $self->_parse_themes((values %{$files->{themes}})[0]); # XXX $workbook->{Color} = $themes->{Color}; @@ -400,7 +400,7 @@ sub _parse_sheet { } elsif ($type eq 'str' || $type eq 'inlineStr') { $long_type = 'Text'; } else { - die "unimplemented type $type"; # XXX + die "unimplemented type $type"; # XXX } my $format_idx = $cell->att('s') || 0; @@ -423,7 +423,7 @@ sub _parse_sheet { my $cell = Spreadsheet::ParseExcel::Cell->new( Val => $val, Type => $long_type, - Merged => undef, # fix up later + Merged => undef, # fix up later Format => $format, FormatNo => $format_idx, ( @@ -525,26 +525,26 @@ sub _parse_sheet_links { # Add the hyperlink $cell->{Hyperlink} = [ - $hyperlink->att('display') || $cell->{_Value} || undef, # Description - $destination_url, # Target - undef, # Target Frame - $row, # Start Row - $row, # End Row - $col, # Start Column - $col, # End Column + $hyperlink->att('display') || $cell->{_Value} || undef, # Description + $destination_url, # Target + undef, # Target Frame + $row, # Start Row + $row, # End Row + $col, # Start Column + $col, # End Column ]; } else { # This is an internal hyperlink # Add the hyperlink $cell->{Hyperlink} = [ - $hyperlink->att('display') || $cell->{_Value} || undef, # Description - $hyperlink->att('location'), # Target - undef, # Target Frame - $row, # Start Row - $row, # End Row - $col, # Start Column - $col, # End Column + $hyperlink->att('display') || $cell->{_Value} || undef, # Description + $hyperlink->att('location'), # Target + undef, # Target Frame + $row, # Start Row + $row, # End Row + $col, # Start Column + $col, # End Column ]; } @@ -957,7 +957,10 @@ sub _extract_files { my $type_base = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'; my $rels = $self->_parse_xml($zip, $self->_rels_for(''),); - my $wb_name = ($rels->find_nodes(qq))[0]->att('Target'); + my $node = ($rels->find_nodes(qq))[0]; + die "invalid workbook" unless $node; + + my $wb_name = $node->att('Target'); $wb_name =~ s{^/}{}; my $wb_xml = $self->_parse_xml($zip, $wb_name); @@ -1175,6 +1178,7 @@ sub _new_twig { 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' => 'rels', 'http://schemas.openxmlformats.org/drawingml/2006/main' => 'drawmain', }, + no_xxe => 1, keep_original_prefix => 1, %opts, ); diff --git a/t/bug-md-10.t b/t/bug-md-10.t new file mode 100644 index 0000000..1787f1c --- /dev/null +++ b/t/bug-md-10.t @@ -0,0 +1,21 @@ +#!/usr/bin/env perl + +use strict; +use warnings; +use Test::More; + +use Spreadsheet::ParseXLSX; + +my $wb; +eval { + $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-md-10.xlsx'); +}; + +ok(!defined($wb)); +ok($@); +ok($@ =~ /^cannot expand &xxe/); + +done_testing; + + + diff --git a/t/bug-md-7.t b/t/bug-md-7.t index c1173fb..dc229b3 100644 --- a/t/bug-md-7.t +++ b/t/bug-md-7.t @@ -6,8 +6,20 @@ use Test::More; use Spreadsheet::ParseXLSX; -my $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-md-7.xlsx'); -isa_ok($wb, 'Spreadsheet::ParseExcel::Workbook'); +my $wb; + +eval { + $wb = Spreadsheet::ParseXLSX->new->parse('t/data/bug-md-7.xlsx'); +}; + +# activate this when #7 is fixed +if (0) { + isa_ok($wb, 'Spreadsheet::ParseExcel::Workbook'); +} else { + ok(!defined($wb)); + ok($@); + ok($@ =~ /^invalid workbook/); +} done_testing; diff --git a/t/data/bug-md-10.xlsx b/t/data/bug-md-10.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d6710a255c2782d8467f50542326802b6e1911bd GIT binary patch literal 6406 zcmbtZ2T+sS)(u5^RY0Wo8hRH6LX#p*h%}KBLQg^{(vcFS2!bG>NG}2+D4?N9ReBXr zklu@k6j4Hx7xev#SG@PW`7`q+UuN>H*=Mh__daVyUmFjf1aSDm$uI+dyZP@M0e}f$ z=>~Oi7Xcd)1Mn`1m_k}bO(CA%qyQZJ&o}@8J`MJl`q<|WKZyXCY%}miy5{f__M0{? z06_ZVdErBpZy>LvO(Cbc;I&!e+n zg??O(h*ctsb|k!w{IFo(vD|eo`hE1JFHB^>LI#=>9o=Ew^&MXwk(2_Y1Yw+n;;uPl z59aOmFLG)XwAI9z(rUC4_};-}c%ski+!MS_)2`RysLbCpuoQl+{`Tgh;#H5GwxD@b z0lHghxQaV}U#w5c1#XU5y0c=P#b$4|*Uo2qkvSsRXJ9qurty5HfPU|S#pS{3z4Li# z8oikr=oANP!vfvu4zoH>rV$I`g`OyFRqH{aki{bHPf2Ib=xp(z-@74JbJ?)aW?Tz9mI?PP*8|oOQS&Zt+62z%< z7ad*)^RP-tvN}4uC)Nm{Sn_OBBh}?>2uCvMpOI@A)M79<$pM*@5cgQaRig)_rers{ zbff+8HFs!`kXBrUw}P}wr>AYS&WS!Ho~c?83|wQ@+exh1gy{2#vfrUzDP(k^rw4R>yC;lL_|`U1Z^Ct?7OEk3 zj1MdD9svU!8CE5uCIa@~Ll;;STTuu>%}6Fv3t%UbxA^tsp97 zVlO%10EkKKvS<>TYi=l%pt}+~8Bf%g=4PI4L6m6oVVi>Hlc~odA3Vws_^J<90^NH6 z!#VA9)#gMnF&(`Ez)=3wYifQ>)HB6mMw}4HqEavgOMKL1nM{!3SG$+_S7Z~<@~H z8!ubEb+x;>cC{=}%nTA3qBtmmn0Zt%5N3!}(&3h!qc9OIE?9$7Qx1x;zFmLCNIZ=U z--#}t;V@(k=OV~39F5;7@%Duq=ksr5X=tX=LCvYOSorwsC^(<&^eA~UZ6KzJMdxb2 z;&-tgru+Dk!^5WA+vR6>lm5HzcJHAX7*{~?Dj3U^0+uVv?+<$++?;I?21+O-XGGO)IN064|T2QigPfjF&;*}gpcUbTi1dmQ)|bAZ0ko_iECEv#W*WH z9CS2W6*Yy>Wd|`%uELy*usTdt+fan*cYMI!kkx^VzPxTbch_ih^tYZ{X zWOJqu0jc^hzag?c0P$b8-btE2Fuh* z2;cp%)8&Vgc9-7>Le4YI>x;Zo8>x4>yyJtl`w4jr!Ft;p9dtK~Rk_O0sAZs;2*@Sj z6dTtiVfncL%W#@niFf8(i_WUJXlVs%DFd0Lz6B|CyPiwfO~G8xA`V>(?tle-TD$>i zHwHBXwD1|V-;K&$R&w*ivn>f->1LA8P9ul-rkewb8}l~wCuD)AN@P<-?35p_ zTkV4(tCp{noczMb=TRqObd?reoL3-x#oK%&`Fpo&Sm4i*`c?&L3_CWvG5LkbCbKak zT6eS{_I|FQbJ3rufI~MA`X9a~SZ!Uew6oBX*rd_6sf+DMQM9YfognEqu=Z85Mv^Hq zu9x9q=EU~=U-eK{5#|B9zHHm_a8L> zVS?ty8#s@OlC?~1@kdRJj50rd%&EyrLU;p4=6j=N$A;Vf>712d3p72@ErrKRbWtT8 zc)-2SWqxhj19>@xU3I1;Ze%x_CMbH4pjxYz`HR;?PI!*K;4-yui#yUQUL@dBd4_{PUbntejTF0_Q)WH*^-Gq1+l(kwW1MBevx!mL4Ff8B%8 zUKhKI<6K{*gvu17X_LMr)@9@=%WSIAI*upoC8XW8pq`ec*CsgV7nS%7UGdzAJG;?Q zQ_x68Uc>-ornBcZ-sN?WsA%DSUoPjDoPBxsLh`^D>-SdsET0ws-0BZcSJ+nX;Q)m} ze>VKTJ<|QS?c>Q> zuyfVOXOeM?$r|Xs{Ky!*EM$ugGk>!7x=;4f<#jne8_6>wnPuMHoDyMCvej@@Ibb}Q zyB-(n{0`j9>jW-t!YG^$0L(TtMe=Cq7>YM4OjgOVkVH!0c*k3<=$6cNu8j za587jqZhEj@%g;hV%(FSg}s;*{jcqZR|0Sx&Oc=nEF94curc;}xmh-s@pyVVoSnrS!xEHLGTs&l0VCqIZ!X zVpBm%M}km}H*hZmNdo)?9sv+KViHqlbKbwG#$^a1_J+CDWi5lacp@2sWU?x7N9R&J zD@2psIpcF5NN6gTxr)6;?$Wa++j{At&@!6r@dXi384g;hEqYbPFfMiy=oaQ472dn^ zT7rRNxwVsVF*H>d3iA$&mMxX;Avc{$+lrJ!K_ICfLKkTr~K0Jc%gq zs5iMD&FyH0nR`pE4@}f}xG5htK9Rj55bmKtf1xHw!!fH;=pn5q!fH{q`RQt@;}ta$ zwHBUPozBYJqf;|*)XYG>gboMu?j0tTNSkLZ2?oShi(*~Kk@aout`oVcsYQ=hdqu^0 zHLlrnv=>_})rsloIyXlt%Y5yPvR{LFOWxO-wk5rK!;g@0l_HJH58rKHWn#bAYoevL z`cdNG3i=j-^Rhd#TjZ)7R}3G7(5byu=YF{zu@+&#Q0Hm6*Zw?uoE5+(uxn=hw$#Zh z0m@(xswtXRvXvqUUw;2~cIBb^VV|XR$*>IYIMVAq_y_0t?5Psi3=J zZc#U7qou~)`qm6eIxze*<4OBR`&z3qj5bQ{QFw7RHf z12hF!@xByWXJZa*`;p|_^Ctg3D|fTA&l3oYRdbXdF$1$h1nQ*xJmY|yPyFEXVV}h6 zd!yg>XT5g&sv`6uK9f6}di_mgYE$Wp7T(D4P(E2@6=~{S`ps*%K9J<2?9~I%RQj3l z``&NbMG7Zp1(Qq9ifVIS`I3`Jw~L!*+2h%2ob3CBWNf1MZnIqSbWPnu+xw&tXU`}aTqnixQI1xzF< zzlhm(<4qr{p1(J%?P@*9vGFc&oacN{ww>3PB%HO9(>dm+zvBGk0Qq11ki!8or^=Z9 z2J43av5JA=`vCb4aa1?_v_}r-2)oK}r^%u_J(0L#WMQicYHb0$5+ocpEghwkScR>{ z`-#wgxX;!rwY;3KN-*4ejV6$IdjoWNd8W6JHFDG^FSFH6fA;Z{vx5#ptF;Pr875JM z7gw1b=_9NybT6B~kIrF+SJw-Gn4BHd8c*JVX@^dA61q8F+Lfi(6qBZpI)Cq`Yx077 z)WI43aj`cK`>15VI8HJIzBg8utI9T)R<@LMSyJjPSt&+I900N+KO>9{8g>mS?Zh<_fzys*ZTZ0gM+1?&-w5dZt6 zHCI8vJ)m$8OCujwsJq3FgP1s0qa9;q6vcPIU$SAXjD3<)XB<#GnfH?B4YFhx zQcyA3+j_bpzyRGpv!@$XYF?baV;@^{ARP8#d>zB6uZ@Gtf_J<~CctW-?=LFA3h40e z|7zgGcTeyy%I7=QpB%yOSXiC`Il)Rlu>M;BSinPn@L0g(gCqVWI5-yfh=umg@%?U~ z9gVN#DBKS_4U2lr{yVzr-(8)fsBTz|@gvBe9?##w9=~?HU)DZ_t($LN#y_w6cPHa$ zd@R`GE*9u$e*zm}K@WY6|A85bU!r?K@B{RR9RKG)e;4CN1H~u&E9OyYjzvF4mj9Ke zUxnXM^e;r;(2vSMEaV@VKc1prw`V8zM@KZsKd=9H1#&b(O41{6NA<}6H%@ZuZ@~Y) z%zqzrN8{8#3U_o{{jB;wruBOp`F%bd#rHW00316luz5I~7Qb!U6WGmo=-*KSK46RH J+n*PJ{{YgOtE>P3 literal 0 HcmV?d00001 diff --git a/t/perltidy.t b/t/perltidy.t deleted file mode 100644 index 7d69cd5..0000000 --- a/t/perltidy.t +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env perl - -use strict; -use warnings; - -use Test::More; - -use Test::PerlTidy qw( run_tests ); - -run_tests( - path => 'lib', - perltidyrc => '.perltidyrc', - exclude => [ qr{\.t$}, 'inc/'], - mute => 1 -); - - -done_testing;