From 3051663273d815fdea9d61313858e07a2cb3cc38 Mon Sep 17 00:00:00 2001 From: oleibman <10341515+oleibman@users.noreply.github.com> Date: Thu, 23 Jan 2025 23:21:28 -0800 Subject: [PATCH] Security Patch Control Characters in Protocol --- CHANGELOG.md | 7 + composer.json | 1 + composer.lock | 160 +++---- phpstan-baseline.neon | 20 - phpstan.neon.dist | 1 + src/PhpSpreadsheet/Reader/Html.php | 5 +- src/PhpSpreadsheet/Worksheet/Drawing.php | 2 +- src/PhpSpreadsheet/Writer/Html.php | 19 +- src/PhpSpreadsheet/Writer/Xls/Parser.php | 389 ++++++++++++------ .../Reader/Html/HtmlImage2Test.php | 81 ++++ .../Writer/Html/BadHyperlinkTest.php | 14 +- tests/data/Reader/Xml/sec-w24f.dontuse | 65 +++ 12 files changed, 523 insertions(+), 241 deletions(-) create mode 100644 tests/PhpSpreadsheetTests/Reader/Html/HtmlImage2Test.php create mode 100644 tests/data/Reader/Xml/sec-w24f.dontuse diff --git a/CHANGELOG.md b/CHANGELOG.md index 93b1f9557d..3a117176da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com) and this project adheres to [Semantic Versioning](https://semver.org). +# TBD - 1.29.9 + +### Fixed + +- Backported security patch for control characters in protocol. +- Use Composer\Pcre in Xls/Parser. Partial backport of [PR #4203](https://github.com/PHPOffice/PhpSpreadsheet/pull/4203) + # 2025-01-11 - 1.29.8 ### Deprecated diff --git a/composer.json b/composer.json index e160834a7b..876e0351c0 100644 --- a/composer.json +++ b/composer.json @@ -75,6 +75,7 @@ "ext-xmlwriter": "*", "ext-zip": "*", "ext-zlib": "*", + "composer/pcre": "^3.3", "ezyang/htmlpurifier": "^4.15", "maennchen/zipstream-php": "^2.1 || ^3.0", "markbaker/complex": "^3.0", diff --git a/composer.lock b/composer.lock index e66f3597aa..c1c9e27c42 100644 --- a/composer.lock +++ b/composer.lock @@ -4,8 +4,87 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "df1c3f6b3012c28bbde38eb2f4f5a0b5", + "content-hash": "0248c8ddf08bb5669fb4c5caef56a6ab", "packages": [ + { + "name": "composer/pcre", + "version": "3.3.2", + "source": { + "type": "git", + "url": "https://github.com/composer/pcre.git", + "reference": "b2bed4734f0cc156ee1fe9c0da2550420d99a21e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/composer/pcre/zipball/b2bed4734f0cc156ee1fe9c0da2550420d99a21e", + "reference": "b2bed4734f0cc156ee1fe9c0da2550420d99a21e", + "shasum": "" + }, + "require": { + "php": "^7.4 || ^8.0" + }, + "conflict": { + "phpstan/phpstan": "<1.11.10" + }, + "require-dev": { + "phpstan/phpstan": "^1.12 || ^2", + "phpstan/phpstan-strict-rules": "^1 || ^2", + "phpunit/phpunit": "^8 || ^9" + }, + "type": "library", + "extra": { + "phpstan": { + "includes": [ + "extension.neon" + ] + }, + "branch-alias": { + "dev-main": "3.x-dev" + } + }, + "autoload": { + "psr-4": { + "Composer\\Pcre\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jordi Boggiano", + "email": "j.boggiano@seld.be", + "homepage": "http://seld.be" + } + ], + "description": "PCRE wrapping library that offers type-safe preg_* replacements.", + "keywords": [ + "PCRE", + "preg", + "regex", + "regular expression" + ], + "support": { + "issues": "https://github.com/composer/pcre/issues", + "source": "https://github.com/composer/pcre/tree/3.3.2" + }, + "funding": [ + { + "url": "https://packagist.com", + "type": "custom" + }, + { + "url": "https://github.com/composer", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/composer/composer", + "type": "tidelift" + } + ], + "time": "2024-11-12T16:29:46+00:00" + }, { "name": "ezyang/htmlpurifier", "version": "v4.17.0", @@ -672,85 +751,6 @@ ], "time": "2022-12-23T10:58:28+00:00" }, - { - "name": "composer/pcre", - "version": "3.3.1", - "source": { - "type": "git", - "url": "https://github.com/composer/pcre.git", - "reference": "63aaeac21d7e775ff9bc9d45021e1745c97521c4" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/composer/pcre/zipball/63aaeac21d7e775ff9bc9d45021e1745c97521c4", - "reference": "63aaeac21d7e775ff9bc9d45021e1745c97521c4", - "shasum": "" - }, - "require": { - "php": "^7.4 || ^8.0" - }, - "conflict": { - "phpstan/phpstan": "<1.11.10" - }, - "require-dev": { - "phpstan/phpstan": "^1.11.10", - "phpstan/phpstan-strict-rules": "^1.1", - "phpunit/phpunit": "^8 || ^9" - }, - "type": "library", - "extra": { - "branch-alias": { - "dev-main": "3.x-dev" - }, - "phpstan": { - "includes": [ - "extension.neon" - ] - } - }, - "autoload": { - "psr-4": { - "Composer\\Pcre\\": "src" - } - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Jordi Boggiano", - "email": "j.boggiano@seld.be", - "homepage": "http://seld.be" - } - ], - "description": "PCRE wrapping library that offers type-safe preg_* replacements.", - "keywords": [ - "PCRE", - "preg", - "regex", - "regular expression" - ], - "support": { - "issues": "https://github.com/composer/pcre/issues", - "source": "https://github.com/composer/pcre/tree/3.3.1" - }, - "funding": [ - { - "url": "https://packagist.com", - "type": "custom" - }, - { - "url": "https://github.com/composer", - "type": "github" - }, - { - "url": "https://tidelift.com/funding/github/packagist/composer/composer", - "type": "tidelift" - } - ], - "time": "2024-08-27T18:44:43+00:00" - }, { "name": "composer/semver", "version": "3.4.2", diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index e107b26679..2a60d86ab5 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -315,26 +315,6 @@ parameters: count: 1 path: src/PhpSpreadsheet/Writer/Xls.php - - - message: "#^Offset 2 does not exist on array\\{0\\?\\: string, 1\\?\\: ''\\|'\\$', 2\\?\\: non\\-falsy\\-string, 3\\?\\: ''\\|'\\$', 4\\?\\: numeric\\-string\\}\\.$#" - count: 1 - path: src/PhpSpreadsheet/Writer/Xls/Parser.php - - - - message: "#^Offset 2 does not exist on array\\{0\\?\\: string, 1\\?\\: ''\\|'\\$', 2\\?\\: numeric\\-string, 3\\?\\: ''\\|'\\$', 4\\?\\: numeric\\-string\\}\\.$#" - count: 1 - path: src/PhpSpreadsheet/Writer/Xls/Parser.php - - - - message: "#^Offset 4 does not exist on array\\{0\\?\\: string, 1\\?\\: ''\\|'\\$', 2\\?\\: non\\-falsy\\-string, 3\\?\\: ''\\|'\\$', 4\\?\\: numeric\\-string\\}\\.$#" - count: 1 - path: src/PhpSpreadsheet/Writer/Xls/Parser.php - - - - message: "#^Offset 4 does not exist on array\\{0\\?\\: string, 1\\?\\: ''\\|'\\$', 2\\?\\: numeric\\-string, 3\\?\\: ''\\|'\\$', 4\\?\\: numeric\\-string\\}\\.$#" - count: 1 - path: src/PhpSpreadsheet/Writer/Xls/Parser.php - - message: "#^Parameter \\#2 \\$length of function fread expects int\\<1, max\\>, int\\<0, max\\> given\\.$#" count: 1 diff --git a/phpstan.neon.dist b/phpstan.neon.dist index f49ab2eb6e..91329377fe 100644 --- a/phpstan.neon.dist +++ b/phpstan.neon.dist @@ -3,6 +3,7 @@ includes: - phpstan-conditional.php - vendor/phpstan/phpstan-phpunit/extension.neon - vendor/phpstan/phpstan-phpunit/rules.neon + - vendor/composer/pcre/extension.neon parameters: level: 8 diff --git a/src/PhpSpreadsheet/Reader/Html.php b/src/PhpSpreadsheet/Reader/Html.php index bfb52401aa..d3168467d7 100644 --- a/src/PhpSpreadsheet/Reader/Html.php +++ b/src/PhpSpreadsheet/Reader/Html.php @@ -1084,7 +1084,10 @@ private function insertImage(Worksheet $sheet, $column, $row, array $attributes) $name = $attributes['alt'] ?? null; $drawing = new Drawing(); - $drawing->setPath($src); + $drawing->setPath($src, false); + if ($drawing->getPath() === '') { + return; + } $drawing->setWorksheet($sheet); $drawing->setCoordinates($column . $row); $drawing->setOffsetX(0); diff --git a/src/PhpSpreadsheet/Worksheet/Drawing.php b/src/PhpSpreadsheet/Worksheet/Drawing.php index 6cf382b2f0..13012f947b 100644 --- a/src/PhpSpreadsheet/Worksheet/Drawing.php +++ b/src/PhpSpreadsheet/Worksheet/Drawing.php @@ -115,7 +115,7 @@ public function setPath($path, $verifyFile = true, $zip = null) $this->path = ''; // Check if a URL has been passed. https://stackoverflow.com/a/2058596/1252979 - if (filter_var($path, FILTER_VALIDATE_URL)) { + if (filter_var($path, FILTER_VALIDATE_URL) || (preg_match('/^([\\w\\s\\x00-\\x1f]+):/u', $path) && !preg_match('/^([\\w]+):/u', $path))) { if (!preg_match('/^(http|https|file|ftp|s3):/', $path)) { throw new PhpSpreadsheetException('Invalid protocol for linked drawing'); } diff --git a/src/PhpSpreadsheet/Writer/Html.php b/src/PhpSpreadsheet/Writer/Html.php index 3d507a5e2e..cbfa6852df 100644 --- a/src/PhpSpreadsheet/Writer/Html.php +++ b/src/PhpSpreadsheet/Writer/Html.php @@ -1521,9 +1521,10 @@ private function generateRow(Worksheet $worksheet, array $values, $row, $cellTyp $url = $worksheet->getHyperlink($coordinate)->getUrl(); $urlDecode1 = html_entity_decode($url, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8'); $urlTrim = preg_replace('/^\\s+/u', '', $urlDecode1) ?? $urlDecode1; - $parseScheme = preg_match('/^([\\w\\s]+):/u', strtolower($urlTrim), $matches); - if ($parseScheme === 1 && !in_array($matches[1], ['http', 'https', 'file', 'ftp', 's3'], true)) { + $parseScheme = preg_match('/^([\\w\\s\\x00-\\x1f]+):/u', strtolower($urlTrim), $matches); + if ($parseScheme === 1 && !in_array($matches[1], ['http', 'https', 'file', 'ftp', 'mailto', 's3'], true)) { $cellData = htmlspecialchars($url, Settings::htmlEntityFlags()); + $cellData = self::replaceControlChars($cellData); } else { $cellData = '' . $cellData . ''; } @@ -1568,6 +1569,20 @@ private function generateRow(Worksheet $worksheet, array $values, $row, $cellTyp return $html; } + private static function replaceNonAscii(array $matches): string + { + return '&#' . mb_ord($matches[0], 'UTF-8') . ';'; + } + + private static function replaceControlChars(string $convert): string + { + return (string) preg_replace_callback( + '/[\\x00-\\x1f]/', + [self::class, 'replaceNonAscii'], + $convert + ); + } + /** * Takes array where of CSS properties / values and converts to CSS string. * diff --git a/src/PhpSpreadsheet/Writer/Xls/Parser.php b/src/PhpSpreadsheet/Writer/Xls/Parser.php index f195ac782b..30c4e917a3 100644 --- a/src/PhpSpreadsheet/Writer/Xls/Parser.php +++ b/src/PhpSpreadsheet/Writer/Xls/Parser.php @@ -2,6 +2,7 @@ namespace PhpOffice\PhpSpreadsheet\Writer\Xls; +use Composer\Pcre\Preg; use PhpOffice\PhpSpreadsheet\Calculation\Calculation; use PhpOffice\PhpSpreadsheet\Shared\StringHelper; use PhpOffice\PhpSpreadsheet\Spreadsheet; @@ -45,35 +46,46 @@ class Parser // Invalid sheet title characters cannot occur in the sheet title: // *:/\?[] (usual invalid sheet title characters) // Single quote is represented as a pair '' - const REGEX_SHEET_TITLE_QUOTED = '(([^\*\:\/\\\\\?\[\]\\\'])+|(\\\'\\\')+)+'; + // Former value for this constant led to "catastrophic backtracking", + // unable to handle double apostrophes. + // (*COMMIT) should prevent this. + const REGEX_SHEET_TITLE_QUOTED = "([^*:/\\\\?\[\]']|'')+"; + + const REGEX_CELL_TITLE_QUOTED = "~^'" + . self::REGEX_SHEET_TITLE_QUOTED + . '(:' . self::REGEX_SHEET_TITLE_QUOTED . ')?' + . "'!(*COMMIT)" + . '[$]?[A-Ia-i]?[A-Za-z][$]?(\\d+)' + . '$~u'; + + const REGEX_RANGE_TITLE_QUOTED = "~^'" + . self::REGEX_SHEET_TITLE_QUOTED + . '(:' . self::REGEX_SHEET_TITLE_QUOTED . ')?' + . "'!(*COMMIT)" + . '[$]?[A-Ia-i]?[A-Za-z][$]?(\\d+)' + . ':' + . '[$]?[A-Ia-i]?[A-Za-z][$]?(\\d+)' + . '$~u'; /** * The index of the character we are currently looking at. - * - * @var int */ - public $currentCharacter; + public int $currentCharacter; /** * The token we are working on. - * - * @var string */ - public $currentToken; + public string $currentToken; /** * The formula to parse. - * - * @var string */ - private $formula; + private string $formula; /** * The character ahead of the current char. - * - * @var string */ - public $lookAhead; + public string $lookAhead; /** * The parse tree to be generated. @@ -84,24 +96,18 @@ class Parser /** * Array of external sheets. - * - * @var array */ - private $externalSheets; + private array $externalSheets; /** * Array of sheet references in the form of REF structures. - * - * @var array */ - public $references; + public array $references; /** * The Excel ptg indices. - * - * @var array */ - private $ptg = [ + private array $ptg = [ 'ptgExp' => 0x01, 'ptgTbl' => 0x02, 'ptgAdd' => 0x03, @@ -212,10 +218,8 @@ class Parser * -1 is a variable number of arguments. * class: The reference, value or array class of the function args. * vol: The function is volatile. - * - * @var array */ - private $functions = [ + private array $functions = [ // function ptg args class vol 'COUNT' => [0, -1, 0, 0], 'IF' => [1, -1, 1, 0], @@ -469,8 +473,7 @@ class Parser 'BAHTTEXT' => [368, 1, 0, 0], ]; - /** @var Spreadsheet */ - private $spreadsheet; + private Spreadsheet $spreadsheet; /** * The class constructor. @@ -491,40 +494,40 @@ public function __construct(Spreadsheet $spreadsheet) /** * Convert a token to the proper ptg value. * - * @param mixed $token the token to convert + * @param string $token the token to convert * - * @return mixed the converted token on success + * @return string the converted token on success */ - private function convert($token) + private function convert(string $token): string { - if (preg_match('/"([^"]|""){0,255}"/', $token)) { + if (Preg::isMatch('/"([^"]|""){0,255}"/', $token)) { return $this->convertString($token); } if (is_numeric($token)) { return $this->convertNumber($token); } // match references like A1 or $A$1 - if (preg_match('/^\$?([A-Ia-i]?[A-Za-z])\$?(\d+)$/', $token)) { + if (Preg::isMatch('/^\$?([A-Ia-i]?[A-Za-z])\$?(\d+)$/', $token)) { return $this->convertRef2d($token); } // match external references like Sheet1!A1 or Sheet1:Sheet2!A1 or Sheet1!$A$1 or Sheet1:Sheet2!$A$1 - if (preg_match('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?[A-Ia-i]?[A-Za-z]\$?(\\d+)$/u', $token)) { + if (Preg::isMatch('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?[A-Ia-i]?[A-Za-z]\$?(\\d+)$/u', $token)) { return $this->convertRef3d($token); } // match external references like 'Sheet1'!A1 or 'Sheet1:Sheet2'!A1 or 'Sheet1'!$A$1 or 'Sheet1:Sheet2'!$A$1 - if (preg_match("/^'" . self::REGEX_SHEET_TITLE_QUOTED . '(\\:' . self::REGEX_SHEET_TITLE_QUOTED . ")?'\\!\\$?[A-Ia-i]?[A-Za-z]\\$?(\\d+)$/u", $token)) { + if (self::matchCellSheetnameQuoted($token)) { return $this->convertRef3d($token); } // match ranges like A1:B2 or $A$1:$B$2 - if (preg_match('/^(\$)?[A-Ia-i]?[A-Za-z](\$)?(\d+)\:(\$)?[A-Ia-i]?[A-Za-z](\$)?(\d+)$/', $token)) { + if (Preg::isMatch('/^(\$)?[A-Ia-i]?[A-Za-z](\$)?(\d+)\:(\$)?[A-Ia-i]?[A-Za-z](\$)?(\d+)$/', $token)) { return $this->convertRange2d($token); } // match external ranges like Sheet1!A1:B2 or Sheet1:Sheet2!A1:B2 or Sheet1!$A$1:$B$2 or Sheet1:Sheet2!$A$1:$B$2 - if (preg_match('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?([A-Ia-i]?[A-Za-z])?\$?(\\d+)\\:\$?([A-Ia-i]?[A-Za-z])?\$?(\\d+)$/u', $token)) { + if (Preg::isMatch('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?([A-Ia-i]?[A-Za-z])?\$?(\\d+)\\:\$?([A-Ia-i]?[A-Za-z])?\$?(\\d+)$/u', $token)) { return $this->convertRange3d($token); } // match external ranges like 'Sheet1'!A1:B2 or 'Sheet1:Sheet2'!A1:B2 or 'Sheet1'!$A$1:$B$2 or 'Sheet1:Sheet2'!$A$1:$B$2 - if (preg_match("/^'" . self::REGEX_SHEET_TITLE_QUOTED . '(\\:' . self::REGEX_SHEET_TITLE_QUOTED . ")?'\\!\\$?([A-Ia-i]?[A-Za-z])?\\$?(\\d+)\\:\\$?([A-Ia-i]?[A-Za-z])?\\$?(\\d+)$/u", $token)) { + if (self::matchRangeSheetnameQuoted($token)) { return $this->convertRange3d($token); } // operators (including parentheses) @@ -532,14 +535,14 @@ private function convert($token) return pack('C', $this->ptg[$token]); } // match error codes - if (preg_match('/^#[A-Z0\\/]{3,5}[!?]{1}$/', $token) || $token == '#N/A') { + if (Preg::isMatch('/^#[A-Z0\\/]{3,5}[!?]{1}$/', $token) || $token == '#N/A') { return $this->convertError($token); } - if (preg_match('/^' . Calculation::CALCULATION_REGEXP_DEFINEDNAME . '$/mui', $token) && $this->spreadsheet->getDefinedName($token) !== null) { + if (Preg::isMatch('/^' . Calculation::CALCULATION_REGEXP_DEFINEDNAME . '$/mui', $token) && $this->spreadsheet->getDefinedName($token) !== null) { return $this->convertDefinedName($token); } // commented so argument number can be processed correctly. See toReversePolish(). - /*if (preg_match("/[A-Z0-9\xc0-\xdc\.]+/", $token)) + /*if (Preg::isMatch("/[A-Z0-9\xc0-\xdc\.]+/", $token)) { return($this->convertFunction($token, $this->_func_args)); }*/ @@ -547,10 +550,10 @@ private function convert($token) if ($token == 'arg') { return ''; } - if (preg_match('/^true$/i', $token)) { + if (Preg::isMatch('/^true$/i', $token)) { return $this->convertBool(1); } - if (preg_match('/^false$/i', $token)) { + if (Preg::isMatch('/^false$/i', $token)) { return $this->convertBool(0); } @@ -561,20 +564,18 @@ private function convert($token) /** * Convert a number token to ptgInt or ptgNum. * - * @param mixed $num an integer or double for conversion to its ptg value - * - * @return string + * @param float|int|string $num an integer or double for conversion to its ptg value */ - private function convertNumber($num) + private function convertNumber($num): string { // Integer in the range 0..2**16-1 - if ((preg_match('/^\\d+$/', $num)) && ($num <= 65535)) { + if ((Preg::isMatch('/^\\d+$/', (string) $num)) && ($num <= 65535)) { return pack('Cv', $this->ptg['ptgInt'], $num); } // A float if (BIFFwriter::getByteOrder()) { // if it's Big Endian - $num = strrev($num); + $num = strrev((string) $num); } return pack('Cd', $this->ptg['ptgNum'], $num); @@ -590,9 +591,9 @@ private function convertBool(int $num): string * * @param string $string a string for conversion to its ptg value * - * @return mixed the converted token on success + * @return string the converted token */ - private function convertString($string) + private function convertString(string $string): string { // chop away beggining and ending quotes $string = substr($string, 1, -1); @@ -612,7 +613,7 @@ private function convertString($string) * * @return string The packed ptg for the function */ - private function convertFunction($token, $num_args) + private function convertFunction(string $token, int $num_args): string { $args = $this->functions[$token][1]; @@ -629,15 +630,12 @@ private function convertFunction($token, $num_args) * Convert an Excel range such as A1:D4 to a ptgRefV. * * @param string $range An Excel range in the A1:A2 - * @param int $class - * - * @return string */ - private function convertRange2d($range, $class = 0) + private function convertRange2d(string $range, int $class = 0): string { // TODO: possible class value 0,1,2 check Formula.pm // Split the range into 2 cell refs - if (preg_match('/^(\$)?([A-Ia-i]?[A-Za-z])(\$)?(\d+)\:(\$)?([A-Ia-i]?[A-Za-z])(\$)?(\d+)$/', $range)) { + if (Preg::isMatch('/^(\$)?([A-Ia-i]?[A-Za-z])(\$)?(\d+)\:(\$)?([A-Ia-i]?[A-Za-z])(\$)?(\d+)$/', $range)) { [$cell1, $cell2] = explode(':', $range); } else { // TODO: use real error codes @@ -668,21 +666,21 @@ private function convertRange2d($range, $class = 0) * * @param string $token an Excel range in the Sheet1!A1:A2 format * - * @return mixed the packed ptgArea3d token on success + * @return string the packed ptgArea3d token on success */ - private function convertRange3d($token) + private function convertRange3d(string $token): string { // Split the ref at the ! symbol [$ext_ref, $range] = PhpspreadsheetWorksheet::extractSheetTitle($token, true); // Convert the external reference part (different for BIFF8) - $ext_ref = $this->getRefIndex($ext_ref); + $ext_ref = $this->getRefIndex($ext_ref ?? ''); // Split the range into 2 cell refs - [$cell1, $cell2] = explode(':', $range); + [$cell1, $cell2] = explode(':', $range ?? ''); // Convert the cell references - if (preg_match('/^(\$)?[A-Ia-i]?[A-Za-z](\$)?(\\d+)$/', $cell1)) { + if (Preg::isMatch('/^(\$)?[A-Ia-i]?[A-Za-z](\$)?(\\d+)$/', $cell1)) { [$row1, $col1] = $this->cellToPackedRowcol($cell1); [$row2, $col2] = $this->cellToPackedRowcol($cell2); } else { // It's a rows range (like 26:27) @@ -702,7 +700,7 @@ private function convertRange3d($token) * * @return string The cell in packed() format with the corresponding ptg */ - private function convertRef2d($cell) + private function convertRef2d(string $cell): string { // Convert the cell reference $cell_array = $this->cellToPackedRowcol($cell); @@ -720,18 +718,18 @@ private function convertRef2d($cell) * * @param string $cell An Excel cell reference * - * @return mixed the packed ptgRef3d token on success + * @return string the packed ptgRef3d token on success */ - private function convertRef3d($cell) + private function convertRef3d(string $cell): string { // Split the ref at the ! symbol [$ext_ref, $cell] = PhpspreadsheetWorksheet::extractSheetTitle($cell, true); // Convert the external reference part (different for BIFF8) - $ext_ref = $this->getRefIndex($ext_ref); + $ext_ref = $this->getRefIndex($ext_ref ?? ''); // Convert the cell reference part - [$row, $col] = $this->cellToPackedRowcol($cell); + [$row, $col] = $this->cellToPackedRowcol($cell ?? ''); // The ptg value depends on the class of the ptg. $ptgRef = pack('C', $this->ptg['ptgRef3dA']); @@ -768,8 +766,7 @@ private function convertError($errorCode) return pack('C', 0xFF); } - /** @var bool */ - private $tryDefinedName = false; + private bool $tryDefinedName = false; private function convertDefinedName(string $name): string { @@ -803,15 +800,15 @@ private function convertDefinedName(string $name): string * * @param string $ext_ref The name of the external reference * - * @return mixed The reference index in packed() format on success + * @return string The reference index in packed() format on success */ - private function getRefIndex($ext_ref) + private function getRefIndex(string $ext_ref): string { - $ext_ref = (string) preg_replace(["/^'/", "/'$/"], ['', ''], $ext_ref); // Remove leading and trailing ' if any. + $ext_ref = Preg::replace(["/^'/", "/'$/"], ['', ''], $ext_ref); // Remove leading and trailing ' if any. $ext_ref = str_replace('\'\'', '\'', $ext_ref); // Replace escaped '' with ' // Check if there is a sheet range eg., Sheet1:Sheet2. - if (preg_match('/:/', $ext_ref)) { + if (Preg::isMatch('/:/', $ext_ref)) { [$sheet_name1, $sheet_name2] = explode(':', $ext_ref); $sheet1 = $this->getSheetIndex($sheet_name1); @@ -865,7 +862,7 @@ private function getRefIndex($ext_ref) * * @return int The sheet index, -1 if the sheet was not found */ - private function getSheetIndex($sheet_name) + private function getSheetIndex(string $sheet_name): int { if (!isset($this->externalSheets[$sheet_name])) { return -1; @@ -882,9 +879,9 @@ private function getSheetIndex($sheet_name) * @param string $name The name of the worksheet being added * @param int $index The index of the worksheet being added * - * @see \PhpOffice\PhpSpreadsheet\Writer\Xls\Workbook::addWorksheet() + * @see Workbook::addWorksheet */ - public function setExtSheet($name, $index): void + public function setExtSheet(string $name, int $index): void { $this->externalSheets[$name] = $index; } @@ -896,7 +893,7 @@ public function setExtSheet($name, $index): void * * @return array Array containing the row and column in packed() format */ - private function cellToPackedRowcol($cell) + private function cellToPackedRowcol(string $cell): array { $cell = strtoupper($cell); [$row, $col, $row_rel, $col_rel] = $this->cellToRowcol($cell); @@ -925,9 +922,13 @@ private function cellToPackedRowcol($cell) * * @return array Array containing (row1,col1,row2,col2) in packed() format */ - private function rangeToPackedRange($range) + private function rangeToPackedRange(string $range): array { - preg_match('/(\$)?(\d+)\:(\$)?(\d+)/', $range, $match); + if (!Preg::isMatch('/(\$)?(\d+)\:(\$)?(\d+)/', $range, $match)) { + // @codeCoverageIgnoreStart + throw new WriterException('Regexp failure in rangeToPackedRange'); + // @codeCoverageIgnoreEnd + } // return absolute rows if there is a $ in the ref $row1_rel = empty($match[1]) ? 1 : 0; $row1 = $match[2]; @@ -963,12 +964,14 @@ private function rangeToPackedRange($range) * whether the row or column are relative references. * * @param string $cell the Excel cell reference in A1 format - * - * @return array */ - private function cellToRowcol($cell) + private function cellToRowcol(string $cell): array { - preg_match('/(\$)?([A-I]?[A-Z])(\$)?(\d+)/', $cell, $match); + if (!Preg::isMatch('/(\$)?([A-I]?[A-Z])(\$)?(\d+)/', $cell, $match)) { + // @codeCoverageIgnoreStart + throw new WriterException('Regexp failure in cellToRowcol'); + // @codeCoverageIgnoreEnd + } // return absolute column if there is a $ in the ref $col_rel = empty($match[1]) ? 1 : 0; $col_ref = $match[2]; @@ -1034,17 +1037,16 @@ private function advance(): void } ++$i; } - //die("Lexical error ".$this->currentCharacter); } /** * Checks if it's a valid token. * - * @param mixed $token the token to check + * @param string $token the token to check * - * @return mixed The checked token or false on failure + * @return string The checked token or empty string on failure */ - private function match($token) + private function match(string $token): string { switch ($token) { case '+': @@ -1081,52 +1083,109 @@ private function match($token) } // if it's a reference A1 or $A$1 or $A1 or A$1 - if (preg_match('/^\$?[A-Ia-i]?[A-Za-z]\$?\d+$/', $token) && !preg_match('/\d/', $this->lookAhead) && ($this->lookAhead !== ':') && ($this->lookAhead !== '.') && ($this->lookAhead !== '!')) { + if ( + Preg::isMatch('/^\$?[A-Ia-i]?[A-Za-z]\$?\d+$/', $token) + && !Preg::isMatch('/\d/', $this->lookAhead) + && ($this->lookAhead !== ':') + && ($this->lookAhead !== '.') + && ($this->lookAhead !== '!') + ) { return $token; } // If it's an external reference (Sheet1!A1 or Sheet1:Sheet2!A1 or Sheet1!$A$1 or Sheet1:Sheet2!$A$1) - if (preg_match('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?[A-Ia-i]?[A-Za-z]\$?\\d+$/u', $token) && !preg_match('/\d/', $this->lookAhead) && ($this->lookAhead !== ':') && ($this->lookAhead !== '.')) { + if ( + Preg::isMatch('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?[A-Ia-i]?[A-Za-z]\$?\\d+$/u', $token) + && !Preg::isMatch('/\d/', $this->lookAhead) + && ($this->lookAhead !== ':') + && ($this->lookAhead !== '.') + ) { return $token; } // If it's an external reference ('Sheet1'!A1 or 'Sheet1:Sheet2'!A1 or 'Sheet1'!$A$1 or 'Sheet1:Sheet2'!$A$1) - if (preg_match("/^'" . self::REGEX_SHEET_TITLE_QUOTED . '(\\:' . self::REGEX_SHEET_TITLE_QUOTED . ")?'\\!\\$?[A-Ia-i]?[A-Za-z]\\$?\\d+$/u", $token) && !preg_match('/\d/', $this->lookAhead) && ($this->lookAhead !== ':') && ($this->lookAhead !== '.')) { + if ( + self::matchCellSheetnameQuoted($token) + && !Preg::isMatch('/\\d/', $this->lookAhead) + && ($this->lookAhead !== ':') && ($this->lookAhead !== '.') + ) { return $token; } // if it's a range A1:A2 or $A$1:$A$2 - if (preg_match('/^(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+:(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+$/', $token) && !preg_match('/\d/', $this->lookAhead)) { + if ( + Preg::isMatch( + '/^(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+:(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+$/', + $token + ) + && !Preg::isMatch('/\d/', $this->lookAhead) + ) { return $token; } // If it's an external range like Sheet1!A1:B2 or Sheet1:Sheet2!A1:B2 or Sheet1!$A$1:$B$2 or Sheet1:Sheet2!$A$1:$B$2 - if (preg_match('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?([A-Ia-i]?[A-Za-z])?\$?\\d+:\$?([A-Ia-i]?[A-Za-z])?\$?\\d+$/u', $token) && !preg_match('/\d/', $this->lookAhead)) { + if ( + Preg::isMatch( + '/^' + . self::REGEX_SHEET_TITLE_UNQUOTED + . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED + . ')?\\!\$?([A-Ia-i]?[A-Za-z])?\$?\\d+:\$?([A-Ia-i]?[A-Za-z])?\$?\\d+$/u', + $token + ) + && !Preg::isMatch('/\d/', $this->lookAhead) + ) { return $token; } // If it's an external range like 'Sheet1'!A1:B2 or 'Sheet1:Sheet2'!A1:B2 or 'Sheet1'!$A$1:$B$2 or 'Sheet1:Sheet2'!$A$1:$B$2 - if (preg_match("/^'" . self::REGEX_SHEET_TITLE_QUOTED . '(\\:' . self::REGEX_SHEET_TITLE_QUOTED . ")?'\\!\\$?([A-Ia-i]?[A-Za-z])?\\$?\\d+:\\$?([A-Ia-i]?[A-Za-z])?\\$?\\d+$/u", $token) && !preg_match('/\d/', $this->lookAhead)) { + if ( + self::matchRangeSheetnameQuoted($token) + && !Preg::isMatch('/\\d/', $this->lookAhead) + ) { return $token; } // If it's a number (check that it's not a sheet name or range) if (is_numeric($token) && (!is_numeric($token . $this->lookAhead) || ($this->lookAhead == '')) && ($this->lookAhead !== '!') && ($this->lookAhead !== ':')) { return $token; } - if (preg_match('/"([^"]|""){0,255}"/', $token) && $this->lookAhead !== '"' && (substr_count($token, '"') % 2 == 0)) { + if ( + Preg::isMatch('/"([^"]|""){0,255}"/', $token) + && $this->lookAhead !== '"' + && (substr_count($token, '"') % 2 == 0) + ) { // If it's a string (of maximum 255 characters) return $token; } // If it's an error code - if (preg_match('/^#[A-Z0\\/]{3,5}[!?]{1}$/', $token) || $token === '#N/A') { + if ( + Preg::isMatch('/^#[A-Z0\\/]{3,5}[!?]{1}$/', $token) + || $token === '#N/A' + ) { return $token; } // if it's a function call - if (preg_match("/^[A-Z0-9\xc0-\xdc\\.]+$/i", $token) && ($this->lookAhead === '(')) { + if ( + Preg::isMatch("/^[A-Z0-9\xc0-\xdc\\.]+$/i", $token) + && ($this->lookAhead === '(') + ) { return $token; } - if (preg_match('/^' . Calculation::CALCULATION_REGEXP_DEFINEDNAME . '$/miu', $token) && $this->spreadsheet->getDefinedName($token) !== null) { + if ( + Preg::isMatch( + '/^' + . Calculation::CALCULATION_REGEXP_DEFINEDNAME + . '$/miu', + $token + ) + && $this->spreadsheet->getDefinedName($token) !== null + ) { return $token; } - if (preg_match('/^true$/i', $token) && ($this->lookAhead === ')' || $this->lookAhead === ',')) { + if ( + Preg::isMatch('/^true$/i', $token) + && ($this->lookAhead === ')' || $this->lookAhead === ',') + ) { return $token; } - if (preg_match('/^false$/i', $token) && ($this->lookAhead === ')' || $this->lookAhead === ',')) { + if ( + Preg::isMatch('/^false$/i', $token) + && ($this->lookAhead === ')' || $this->lookAhead === ',') + ) { return $token; } if (substr($token, -1) === ')') { @@ -1144,9 +1203,9 @@ private function match($token) * @param string $formula the formula to parse, without the initial equal * sign (=) * - * @return mixed true on success + * @return bool true on success */ - public function parse($formula) + public function parse(string $formula): bool { $this->currentCharacter = 0; $this->formula = (string) $formula; @@ -1161,9 +1220,9 @@ public function parse($formula) * It parses a condition. It assumes the following rule: * Cond -> Expr [(">" | "<") Expr]. * - * @return mixed The parsed ptg'd tree on success + * @return array The parsed ptg'd tree on success */ - private function condition() + private function condition(): array { $result = $this->expression(); if ($this->currentToken == '<') { @@ -1203,12 +1262,12 @@ private function condition() * -> "+" Term : Positive value * -> Error code. * - * @return mixed The parsed ptg'd tree on success + * @return array The parsed ptg'd tree on success */ - private function expression() + private function expression(): array { // If it's a string return a string node - if (preg_match('/"([^"]|""){0,255}"/', $this->currentToken)) { + if (Preg::isMatch('/"([^"]|""){0,255}"/', $this->currentToken)) { $tmp = str_replace('""', '"', $this->currentToken); if (($tmp == '"') || ($tmp == '')) { // Trap for "" that has been used for an empty string @@ -1218,12 +1277,17 @@ private function expression() $this->advance(); return $result; - } elseif (preg_match('/^#[A-Z0\\/]{3,5}[!?]{1}$/', $this->currentToken) || $this->currentToken == '#N/A') { // error code + } + if ( + Preg::isMatch('/^#[A-Z0\\/]{3,5}[!?]{1}$/', $this->currentToken) + || $this->currentToken == '#N/A' + ) { // error code $result = $this->createTree($this->currentToken, 'ptgErr', ''); $this->advance(); return $result; - } elseif ($this->currentToken == '-') { // negative value + } + if ($this->currentToken == '-') { // negative value // catch "-" Term $this->advance(); $result2 = $this->expression(); @@ -1243,9 +1307,9 @@ private function expression() $result = $this->createTree('ptgConcat', $result, $result2); } while ( - ($this->currentToken == '+') || - ($this->currentToken == '-') || - ($this->currentToken == '^') + ($this->currentToken == '+') + || ($this->currentToken == '-') + || ($this->currentToken == '^') ) { if ($this->currentToken == '+') { $this->advance(); @@ -1273,7 +1337,7 @@ private function expression() * * @see fact() */ - private function parenthesizedExpression() + private function parenthesizedExpression(): array { return $this->createTree('ptgParen', $this->expression(), ''); } @@ -1282,14 +1346,14 @@ private function parenthesizedExpression() * It parses a term. It assumes the following rule: * Term -> Fact [("*" | "/") Fact]. * - * @return mixed The parsed ptg'd tree on success + * @return array The parsed ptg'd tree on success */ - private function term() + private function term(): array { $result = $this->fact(); while ( - ($this->currentToken == '*') || - ($this->currentToken == '/') + ($this->currentToken == '*') + || ($this->currentToken == '/') ) { if ($this->currentToken == '*') { $this->advance(); @@ -1313,9 +1377,9 @@ private function term() * | Number * | Function. * - * @return mixed The parsed ptg'd tree on success + * @return array The parsed ptg'd tree on success */ - private function fact() + private function fact(): array { $currentToken = $this->currentToken; if ($currentToken === '(') { @@ -1329,20 +1393,28 @@ private function fact() return $result; } // if it's a reference - if (preg_match('/^\$?[A-Ia-i]?[A-Za-z]\$?\d+$/', $this->currentToken)) { + if (Preg::isMatch('/^\$?[A-Ia-i]?[A-Za-z]\$?\d+$/', $this->currentToken)) { $result = $this->createTree($this->currentToken, '', ''); $this->advance(); return $result; } - if (preg_match('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?[A-Ia-i]?[A-Za-z]\$?\\d+$/u', $this->currentToken)) { + if ( + Preg::isMatch( + '/^' + . self::REGEX_SHEET_TITLE_UNQUOTED + . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED + . ')?\\!\$?[A-Ia-i]?[A-Za-z]\$?\\d+$/u', + $this->currentToken + ) + ) { // If it's an external reference (Sheet1!A1 or Sheet1:Sheet2!A1 or Sheet1!$A$1 or Sheet1:Sheet2!$A$1) $result = $this->createTree($this->currentToken, '', ''); $this->advance(); return $result; } - if (preg_match("/^'" . self::REGEX_SHEET_TITLE_QUOTED . '(\\:' . self::REGEX_SHEET_TITLE_QUOTED . ")?'\\!\\$?[A-Ia-i]?[A-Za-z]\\$?\\d+$/u", $this->currentToken)) { + if (self::matchCellSheetnameQuoted($this->currentToken)) { // If it's an external reference ('Sheet1'!A1 or 'Sheet1:Sheet2'!A1 or 'Sheet1'!$A$1 or 'Sheet1:Sheet2'!$A$1) $result = $this->createTree($this->currentToken, '', ''); $this->advance(); @@ -1350,8 +1422,14 @@ private function fact() return $result; } if ( - preg_match('/^(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+:(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+$/', $this->currentToken) || - preg_match('/^(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+\.\.(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+$/', $this->currentToken) + Preg::isMatch( + '/^(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+:(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+$/', + $this->currentToken + ) + || Preg::isMatch( + '/^(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+\.\.(\$)?[A-Ia-i]?[A-Za-z](\$)?\d+$/', + $this->currentToken + ) ) { // if it's a range A1:B2 or $A$1:$B$2 // must be an error? @@ -1360,7 +1438,16 @@ private function fact() return $result; } - if (preg_match('/^' . self::REGEX_SHEET_TITLE_UNQUOTED . '(\\:' . self::REGEX_SHEET_TITLE_UNQUOTED . ')?\\!\$?([A-Ia-i]?[A-Za-z])?\$?\\d+:\$?([A-Ia-i]?[A-Za-z])?\$?\\d+$/u', $this->currentToken)) { + if ( + Preg::isMatch( + '/^' + . self::REGEX_SHEET_TITLE_UNQUOTED + . '(\\:' + . self::REGEX_SHEET_TITLE_UNQUOTED + . ')?\\!\$?([A-Ia-i]?[A-Za-z])?\$?\\d+:\$?([A-Ia-i]?[A-Za-z])?\$?\\d+$/u', + $this->currentToken + ) + ) { // If it's an external range (Sheet1!A1:B2 or Sheet1:Sheet2!A1:B2 or Sheet1!$A$1:$B$2 or Sheet1:Sheet2!$A$1:$B$2) // must be an error? $result = $this->createTree($this->currentToken, '', ''); @@ -1368,7 +1455,7 @@ private function fact() return $result; } - if (preg_match("/^'" . self::REGEX_SHEET_TITLE_QUOTED . '(\\:' . self::REGEX_SHEET_TITLE_QUOTED . ")?'\\!\\$?([A-Ia-i]?[A-Za-z])?\\$?\\d+:\\$?([A-Ia-i]?[A-Za-z])?\\$?\\d+$/u", $this->currentToken)) { + if (self::matchRangeSheetnameQuoted($this->currentToken)) { // If it's an external range ('Sheet1'!A1:B2 or 'Sheet1'!A1:B2 or 'Sheet1'!$A$1:$B$2 or 'Sheet1'!$A$1:$B$2) // must be an error? $result = $this->createTree($this->currentToken, '', ''); @@ -1388,17 +1475,28 @@ private function fact() return $result; } - if (preg_match("/^[A-Z0-9\xc0-\xdc\\.]+$/i", $this->currentToken) && ($this->lookAhead === '(')) { + if ( + Preg::isMatch("/^[A-Z0-9\xc0-\xdc\\.]+$/i", $this->currentToken) + && ($this->lookAhead === '(') + ) { // if it's a function call return $this->func(); } - if (preg_match('/^' . Calculation::CALCULATION_REGEXP_DEFINEDNAME . '$/miu', $this->currentToken) && $this->spreadsheet->getDefinedName($this->currentToken) !== null) { + if ( + Preg::isMatch( + '/^' + . Calculation::CALCULATION_REGEXP_DEFINEDNAME + . '$/miu', + $this->currentToken + ) + && $this->spreadsheet->getDefinedName($this->currentToken) !== null + ) { $result = $this->createTree('ptgName', $this->currentToken, ''); $this->advance(); return $result; } - if (preg_match('/^true|false$/i', $this->currentToken)) { + if (Preg::isMatch('/^true|false$/i', $this->currentToken)) { $result = $this->createTree($this->currentToken, '', ''); $this->advance(); @@ -1412,9 +1510,9 @@ private function fact() * It parses a function call. It assumes the following rule: * Func -> ( Expr [,Expr]* ). * - * @return mixed The parsed ptg'd tree on success + * @return array The parsed ptg'd tree on success */ - private function func() + private function func(): array { $num_args = 0; // number of arguments received $function = strtoupper($this->currentToken); @@ -1461,7 +1559,7 @@ private function func() * * @return array A tree */ - private function createTree($value, $left, $right) + private function createTree($value, $left, $right): array { return ['value' => $value, 'left' => $left, 'right' => $right]; } @@ -1493,7 +1591,7 @@ private function createTree($value, $left, $right) * * @return string The tree in reverse polish notation */ - public function toReversePolish($tree = []) + public function toReversePolish(array $tree = []): string { $polish = ''; // the string we are going to return if (empty($tree)) { // If it's the first call use parseTree @@ -1519,11 +1617,14 @@ public function toReversePolish($tree = []) } // if it's a function convert it here (so we can set it's arguments) if ( - preg_match("/^[A-Z0-9\xc0-\xdc\\.]+$/", $tree['value']) && - !preg_match('/^([A-Ia-i]?[A-Za-z])(\d+)$/', $tree['value']) && - !preg_match('/^[A-Ia-i]?[A-Za-z](\\d+)\\.\\.[A-Ia-i]?[A-Za-z](\\d+)$/', $tree['value']) && - !is_numeric($tree['value']) && - !isset($this->ptg[$tree['value']]) + Preg::isMatch("/^[A-Z0-9\xc0-\xdc\\.]+$/", $tree['value']) + && !Preg::isMatch('/^([A-Ia-i]?[A-Za-z])(\d+)$/', $tree['value']) + && !Preg::isMatch( + '/^[A-Ia-i]?[A-Za-z](\\d+)\\.\\.[A-Ia-i]?[A-Za-z](\\d+)$/', + $tree['value'] + ) + && !is_numeric($tree['value']) + && !isset($this->ptg[$tree['value']]) ) { // left subtree for a function is always an array. if ($tree['left'] != '') { @@ -1539,4 +1640,20 @@ public function toReversePolish($tree = []) return $polish . $converted_tree; } + + public static function matchCellSheetnameQuoted(string $token): bool + { + return Preg::isMatch( + self::REGEX_CELL_TITLE_QUOTED, + $token + ); + } + + public static function matchRangeSheetnameQuoted(string $token): bool + { + return Preg::isMatch( + self::REGEX_RANGE_TITLE_QUOTED, + $token + ); + } } diff --git a/tests/PhpSpreadsheetTests/Reader/Html/HtmlImage2Test.php b/tests/PhpSpreadsheetTests/Reader/Html/HtmlImage2Test.php new file mode 100644 index 0000000000..c2d610ff6b --- /dev/null +++ b/tests/PhpSpreadsheetTests/Reader/Html/HtmlImage2Test.php @@ -0,0 +1,81 @@ + + + test image voilà + + '; + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + $firstSheet = $spreadsheet->getSheet(0); + + /** @var Drawing $drawing */ + $drawing = $firstSheet->getDrawingCollection()[0]; + self::assertEquals($imagePath, $drawing->getPath()); + self::assertEquals('A1', $drawing->getCoordinates()); + } + + public function testCantInsertImageNotFound(): void + { + if (getenv('SKIP_URL_IMAGE_TEST') === '1') { + self::markTestSkipped('Skipped due to setting of environment variable'); + } + $imagePath = 'https://phpspreadsheet.readthedocs.io/en/latest/topics/images/xxx01-03-filter-icon-1.png'; + $html = ' + + + +
test image voilà
'; + $filename = HtmlHelper::createHtml($html); + $spreadsheet = HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + $firstSheet = $spreadsheet->getSheet(0); + $drawingCollection = $firstSheet->getDrawingCollection(); + self::assertCount(0, $drawingCollection); + } + + /** + * @dataProvider providerBadProtocol + */ + public function testCannotInsertImageBadProtocol(string $imagePath): void + { + $this->expectException(SpreadsheetException::class); + $this->expectExceptionMessage('Invalid protocol for linked drawing'); + $html = ' + + + +
test image voilà
'; + $filename = HtmlHelper::createHtml($html); + HtmlHelper::loadHtmlIntoSpreadsheet($filename, true); + } + + public static function providerBadProtocol(): array + { + return [ + 'unknown protocol' => ['httpx://example.com/image.png'], + 'embedded whitespace' => ['ht tp://example.com/image.png'], + 'control character' => ["\x14http://example.com/image.png"], + 'mailto' => ['mailto:xyz@example.com'], + 'mailto whitespace' => ['mail to:xyz@example.com'], + 'phar' => ['phar://example.com/image.phar'], + 'phar control' => ["\x14phar://example.com/image.phar"], + ]; + } +} diff --git a/tests/PhpSpreadsheetTests/Writer/Html/BadHyperlinkTest.php b/tests/PhpSpreadsheetTests/Writer/Html/BadHyperlinkTest.php index 669594bb1c..6e2634c6cf 100644 --- a/tests/PhpSpreadsheetTests/Writer/Html/BadHyperlinkTest.php +++ b/tests/PhpSpreadsheetTests/Writer/Html/BadHyperlinkTest.php @@ -5,6 +5,7 @@ namespace PhpOffice\PhpSpreadsheetTests\Writer\Html; use PhpOffice\PhpSpreadsheet\Reader\Xlsx as XlsxReader; +use PhpOffice\PhpSpreadsheet\Reader\Xml as XmlReader; use PhpOffice\PhpSpreadsheet\Writer\Html as HtmlWriter; use PHPUnit\Framework\TestCase; @@ -17,7 +18,18 @@ public function testBadHyperlink(): void $spreadsheet = $reader->load($infile); $writer = new HtmlWriter($spreadsheet); $html = $writer->generateHtmlAll(); - self::assertStringContainsString("jav\tascript:alert()", $html); + self::assertStringContainsString('jav ascript:alert()', $html); + $spreadsheet->disconnectWorksheets(); + } + + public function testControlCharacter(): void + { + $reader = new XmlReader(); + $infile = 'tests/data/Reader/Xml/sec-w24f.dontuse'; + $spreadsheet = $reader->load($infile); + $writer = new HtmlWriter($spreadsheet); + $html = $writer->generateHtmlAll(); + self::assertStringContainsString('j avascript:alert(1)', $html); $spreadsheet->disconnectWorksheets(); } } diff --git a/tests/data/Reader/Xml/sec-w24f.dontuse b/tests/data/Reader/Xml/sec-w24f.dontuse new file mode 100644 index 0000000000..f39faa4fc8 --- /dev/null +++ b/tests/data/Reader/Xml/sec-w24f.dontuse @@ -0,0 +1,65 @@ + + + + + author + author + 2015-06-05T18:19:34Z + 2024-12-25T10:16:07Z + 16.00 + + + + + + 11020 + 19420 + 32767 + 32767 + False + False + + + + + + + + + + + +
+ + +
+