From a0ef6022973d04056a1a7480a63c8888d16af7b5 Mon Sep 17 00:00:00 2001 From: Thomas Negre Date: Wed, 16 Nov 2022 11:12:34 +0100 Subject: [PATCH 01/12] Fix ODT generation : htmlspecialchars() was run too many times on some strings. --- htdocs/includes/odtphp/odf.php | 57 ++++-- test/phpunit/ODFTest.php | 363 +++++++++++++++++++++++++++++++++ 2 files changed, 406 insertions(+), 14 deletions(-) create mode 100644 test/phpunit/ODFTest.php diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index b780b550b6d..a07f5346943 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -15,6 +15,7 @@ class OdfException extends Exception * @copyright 2010-2015 - Laurent Destailleur - eldy@users.sourceforge.net * @copyright 2010 - Vikas Mahajan - http://vikasmahajan.wordpress.com * @copyright 2012 - Stephen Larroque - lrq3000@gmail.com + * @copyright 2020 - Open-DSI - contact@open-dsi.fr * @license https://www.gnu.org/copyleft/gpl.html GPL License * @version 1.5.0 */ @@ -43,6 +44,8 @@ class Odf public $userdefined=array(); const PIXEL_TO_CM = 0.026458333; + const FIND_ENCODED_TAGS_REGEX = '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'; + /** * Class constructor @@ -156,12 +159,16 @@ class Odf */ public function convertVarToOdf($value, $encode = true, $charset = 'ISO-8859') { - $value = $encode ? htmlspecialchars($value) : $value; - $value = ($charset == 'ISO-8859') ? utf8_encode($value) : $value; + $value = html_entity_decode($value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401); + + // fix breaklines... + $value = str_replace("
", "
", $value); $convertedValue = $value; // Check if the value includes html tags if ($this->_hasHtmlTag($value) === true) { + $value = strip_tags($value, ['
', '', '', '', '', '', '', '', '', '']); + // Default styles for strong/b, i/em, u, s, sub & sup $automaticStyles = array( '', @@ -172,7 +179,7 @@ class Odf '' ); - $convertedValue = $this->_replaceHtmlWithOdtTag($this->_getDataFromHtml($value), $customStyles, $fontDeclarations); + $convertedValue = $this->_replaceHtmlWithOdtTag($this->_getDataFromHtml($value), $customStyles, $fontDeclarations, $encode, $charset); foreach ($customStyles as $key => $val) { array_push($automaticStyles, '' . $val . ''); @@ -195,20 +202,26 @@ class Odf } } $this->contentXml = str_replace('', $fonts . '', $this->contentXml); + } + else { + $convertedValue = $this->encode_chars($convertedValue, $encode, $charset); } - else $convertedValue = preg_replace('/(\r\n|\r|\n)/i', "", $value); + + $convertedValue = preg_replace('/(\r\n|\r|\n)/i', "", $convertedValue); return $convertedValue; } /** - * Replaces html tags in with odt tags and returns an odt string + * Replaces html tags in with odt tags and returns an odt string. Encodes and converts inner text. * @param array $tags An array with html tags generated by the getDataFromHtml() function * @param array $customStyles An array of style defenitions that should be included inside the odt file * @param array $fontDeclarations An array of font declarations that should be included inside the odt file + * @param bool $encode If true, special XML characters are encoded + * @param string $charset Charset * @return string */ - private function _replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) + private function _replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations, $encode = false, $charset = '') { if ($customStyles == null) $customStyles = array(); if ($fontDeclarations == null) $fontDeclarations = array(); @@ -218,7 +231,8 @@ class Odf foreach ((array) $tags as $tag) { // Check if the current item is a tag or just plain text if (isset($tag['text'])) { - $odtResult .= $tag['text']; + $text = $this->encode_chars($tag['text'], $encode, $charset); + $odtResult .= $text; } elseif (isset($tag['name'])) { switch ($tag['name']) { case 'br': @@ -226,23 +240,23 @@ class Odf break; case 'strong': case 'b': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'i': case 'em': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'u': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 's': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'sub': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'sup': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'span': if (isset($tag['attributes']['style'])) { @@ -279,7 +293,7 @@ class Odf // Generate a unique id for the style (using microtime and random because some CPUs are really fast...) $key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10); $customStyles[$key] = $odtStyles; - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; } } break; @@ -292,6 +306,21 @@ class Odf return $odtResult; } + /** + * Correctly encode chars + * @param string $text The text to encode or not + * @param bool $encode If true, special XML characters are encoded + * @param string $charset Charset + * @return string The converted text + * @see self::convertVarToOdf() + */ + private function encode_chars($text, $encode = false, $charset = '') + { + $newtext = $encode ? htmlspecialchars($text, ENT_QUOTES | ENT_XML1) : $text; + $newtext = ($charset == 'ISO-8859') ? utf8_encode($newtext) : $newtext; + return $newtext; + } + /** * Checks if the given text is a html string * @param string $text The text to check diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php new file mode 100644 index 00000000000..989a42c93bf --- /dev/null +++ b/test/phpunit/ODFTest.php @@ -0,0 +1,363 @@ + + * Copyright (C) ---Put here your own copyright and developer email--- + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * \file test/unit/BillOfMaterialsTest.php + * \ingroup billofmaterials + * \brief PHPUnit test for BillOfMaterials class. + */ + +global $conf,$user,$langs,$db; +//define('TEST_DB_FORCE_TYPE','mysql'); // This is to force using mysql driver +//require_once 'PHPUnit/Autoload.php'; +require_once dirname(__FILE__).'/../../htdocs/master.inc.php'; +require_once dirname(__FILE__).'/../../htdocs/includes/odtphp/odf.php'; + +if (empty($user->id)) { + print "Load permissions for admin user nb 1\n"; + $user->fetch(1); + $user->getrights(); +} +$conf->global->MAIN_DISABLE_ALL_MAILS=1; + +$langs->load("main"); + + +/** + * Class for PHPUnit tests + * + * @backupGlobals disabled + * @backupStaticAttributes enabled + * @remarks backupGlobals must be disabled to have db,conf,user and lang not erased. + */ +class ODFTest extends PHPUnit\Framework\TestCase +{ + protected $savconf; + protected $savuser; + protected $savlangs; + protected $savdb; + + /** + * Constructor + * We save global variables into local variables + * + * @return BOMTest + */ + public function __construct() + { + parent::__construct(); + + //$this->sharedFixture + global $conf,$user,$langs,$db; + $this->savconf=$conf; + $this->savuser=$user; + $this->savlangs=$langs; + $this->savdb=$db; + + print __METHOD__." db->type=".$db->type." user->id=".$user->id; + //print " - db ".$db->db; + print "\n"; + } + + /** + * setUpBeforeClass + * + * @return void + */ + public static function setUpBeforeClass() + { + global $conf,$user,$langs,$db; + $db->begin(); // This is to have all actions inside a transaction even if test launched without suite. + + print __METHOD__."\n"; + } + + /** + * tearDownAfterClass + * + * @return void + */ + public static function tearDownAfterClass() + { + global $conf,$user,$langs,$db; + $db->rollback(); + + print __METHOD__."\n"; + } + + /** + * Init phpunit tests + * + * @return void + */ + protected function setUp() + { + global $conf,$user,$langs,$db; + $conf=$this->savconf; + $user=$this->savuser; + $langs=$this->savlangs; + $db=$this->savdb; + + print __METHOD__."\n"; + } + + /** + * End phpunit tests + * + * @return void + */ + protected function tearDown() + { + print __METHOD__."\n"; + } + + /** + * test ODF convertVarToOdf + * + * @return int + */ + public function testODFconvertVarToOdf() + { + global $conf,$user,$langs,$db; + $conf=$this->savconf; + $user=$this->savuser; + $langs=$this->savlangs; + $db=$this->savdb; + + // we test using template_invoice, it does not matter, we just need a valid odt. + $filename = '../../htdocs/install/doctemplates/invoices/template_invoice.odt'; + $config = [ + 'PATH_TO_TMP' => "/tmp", + 'ZIP_PROXY' => "PclZipProxy", + 'DELIMITER_LEFT' => "{", + 'DELIMITER_RIGHT' => "}", + ]; + + $to_test = [ + /** No HTML **/ + // Simple strings + 1 => [ + 'to_convert' => 'Simple string', + 'encode' => true, + 'charset' => null, + 'expected' => 'Simple string' + ], + 2 => [ + 'to_convert' => 'Simple string', + 'encode' => false, + 'charset' => null, + 'expected' => 'Simple string' + ], + 3 => [ + 'to_convert' => "Simple string\nwith line break", + 'encode' => true, + 'charset' => null, + 'expected' => "Simple stringwith line break" + ], + 4 => [ + 'to_convert' => "Simple string\nwith line break", + 'encode' => false, + 'charset' => null, + 'expected' => "Simple stringwith line break" + ], + // Special chars + 5 => [ + 'to_convert' => 'One&two', + 'encode' => true, + 'charset' => null, + 'expected' => 'One&two' + ], + 6 => [ + 'to_convert' => 'One&two', + 'encode' => false, + 'charset' => null, + 'expected' => 'One&two' + ], + 7 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + ], + 8 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + ], + // special chars with non-default charset + 9 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => true, + 'charset' => 'UTF-16', + 'expected' => "/a&él'èàüöç€Ğ~<>", + ], + 10 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => false, + 'charset' => 'UTF-16', // When the charset differs from ISO-8859 string is not converted. + 'expected' => "/a&él'èàüöç€Ğ~<>", + ], + 11 => [ + 'to_convert' => "Greater > than", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Greater > than"), + ], + 12 => [ + 'to_convert' => "Greater > than", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Greater > than"), + ], + 13 => [ + 'to_convert' => "Smaller < than", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Smaller < than"), + ], + 14 => [ + 'to_convert' => "Smaller < than", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Smaller < than"), + ], + /** HTML **/ + // break lines + 15 => [ + 'to_convert' => "Break
line", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + 16 => [ + 'to_convert' => "Break
line", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + 17 => [ + 'to_convert' => "Break
line", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + 18 => [ + 'to_convert' => "Break
line", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + // HTML tags + 19 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => false, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + ], + 20 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => true, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + ], + 21 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + ], + 22 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + ], + 23 => [ + 'to_convert' => "text with intricatedtags", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode('text with intricatedtags'), + ], + + // One can also pass html-encoded string to the method + 24 => [ + 'to_convert' => 'One&two', + 'encode' => true, + 'charset' => null, + 'expected' => 'One&two' + ], + 25 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => false, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + ], + 26 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => true, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + ], + 27 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + ], + 28 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + ], + + // // TODO custom styles are not tested for now : the custom style have a custom ID based on time. Not random, but hard to mock or predict. generated in _replaceHtmlWithOdtTag() case 'span'. + // [ + // 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', + // 'encode' => true, + // 'charset' => 'UTF-8', + // 'expected' => "123 trucmachin > truc < troc > tracbla bla'", + // ], + + // The method removes hyperlinks and tags that are not dealt with. + 29 => [ + 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', + 'encode' => true, + 'charset' => null, + 'expected' => "123 trucmachin > truc < troc > tracbla bla", + ], + ]; + + $odf=new Odf($filename, array()); + if (is_object($odf)) $result = 1; // Just to test + + foreach ($to_test as $case) { + if ($case['charset'] !== null) { + $res = $odf->convertVarToOdf($case['to_convert'], $case['encode'], $case['charset']); + } else { + $res = $odf->convertVarToOdf($case['to_convert'], $case['encode']); + } + $this->assertEquals($res, $case['expected']); + } + + print __METHOD__." result=".$result."\n"; + + return $result; + } +} From b0548213b94aab7fceccbdd572a8589631b33952 Mon Sep 17 00:00:00 2001 From: Thomas Negre Date: Wed, 16 Nov 2022 11:44:46 +0100 Subject: [PATCH 02/12] copyright and doc --- htdocs/includes/odtphp/odf.php | 2 +- test/phpunit/ODFTest.php | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index a07f5346943..9612a4c41a5 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -15,7 +15,7 @@ class OdfException extends Exception * @copyright 2010-2015 - Laurent Destailleur - eldy@users.sourceforge.net * @copyright 2010 - Vikas Mahajan - http://vikasmahajan.wordpress.com * @copyright 2012 - Stephen Larroque - lrq3000@gmail.com - * @copyright 2020 - Open-DSI - contact@open-dsi.fr + * @copyright 2022 - Open-DSI - contact@open-dsi.fr * @license https://www.gnu.org/copyleft/gpl.html GPL License * @version 1.5.0 */ diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php index 989a42c93bf..7c859173326 100644 --- a/test/phpunit/ODFTest.php +++ b/test/phpunit/ODFTest.php @@ -1,6 +1,6 @@ - * Copyright (C) ---Put here your own copyright and developer email--- + * Copyright (C) 2022 - Open-DSI * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,9 +17,9 @@ */ /** - * \file test/unit/BillOfMaterialsTest.php - * \ingroup billofmaterials - * \brief PHPUnit test for BillOfMaterials class. + * \file test/unit/ODFTest.php + * \ingroup odf + * \brief PHPUnit test for odf class. */ global $conf,$user,$langs,$db; From a00331c4789ed0d5c2039f0b87ff3e0e5b335266 Mon Sep 17 00:00:00 2001 From: Thomas Negre Date: Wed, 16 Nov 2022 12:07:07 +0100 Subject: [PATCH 03/12] Inline comment --- htdocs/includes/odtphp/odf.php | 1 + 1 file changed, 1 insertion(+) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 9612a4c41a5..a5fb62e7218 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -167,6 +167,7 @@ class Odf // Check if the value includes html tags if ($this->_hasHtmlTag($value) === true) { + // Note: allowing many tags is supported on PHP >7.4 . For older versions, this will strip all HTML tags. $value = strip_tags($value, ['
', '', '', '', '', '', '', '', '', '']); // Default styles for strong/b, i/em, u, s, sub & sup From 00c62bf150b83cd668b42c10d33b501d4a543f2a Mon Sep 17 00:00:00 2001 From: Thomas Negre Date: Wed, 16 Nov 2022 14:08:41 +0100 Subject: [PATCH 04/12] Consistent management of carriage returns --- htdocs/includes/odtphp/odf.php | 2 +- test/phpunit/ODFTest.php | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index a5fb62e7218..9254ed709df 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -206,9 +206,9 @@ class Odf } else { $convertedValue = $this->encode_chars($convertedValue, $encode, $charset); + $convertedValue = preg_replace('/(\r\n|\r|\n)/i', "", $convertedValue); } - $convertedValue = preg_replace('/(\r\n|\r|\n)/i', "", $convertedValue); return $convertedValue; } diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php index 7c859173326..5bcd9a72290 100644 --- a/test/phpunit/ODFTest.php +++ b/test/phpunit/ODFTest.php @@ -342,6 +342,14 @@ class ODFTest extends PHPUnit\Framework\TestCase 'charset' => null, 'expected' => "123 trucmachin > truc < troc > tracbla bla", ], + + // HTML should not take \n into account, but only
. + 30 => [ + 'to_convert' => "text with strong text , a line\nbreak and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => false, + 'charset' => 'UTF-8', + 'expected' => 'text with strong text , a line'."\n".'break and underlined words with it@lic sp&ciàlchärs éè l\'', + ], ]; $odf=new Odf($filename, array()); From df761ceef7fd1ec1f0d68131b2a690ffb9cc9edf Mon Sep 17 00:00:00 2001 From: Thomas Negre Date: Wed, 23 Nov 2022 11:56:01 +0100 Subject: [PATCH 05/12] Use a constant for HTML tags regex --- htdocs/includes/odtphp/odf.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 9254ed709df..4a24afafffd 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -44,6 +44,7 @@ class Odf public $userdefined=array(); const PIXEL_TO_CM = 0.026458333; + const FIND_TAGS_REGEX = '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'; const FIND_ENCODED_TAGS_REGEX = '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'; @@ -329,7 +330,7 @@ class Odf */ private function _isHtmlTag($text) { - return preg_match('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text); + return preg_match(self::FIND_TAGS_REGEX, $text); } /** @@ -339,7 +340,7 @@ class Odf */ private function _hasHtmlTag($text) { - $result = preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text); + $result = preg_match_all(self::FIND_TAGS_REGEX, $text); return is_numeric($result) && $result > 0; } @@ -355,7 +356,7 @@ class Odf while (strlen($tempHtml) > 0) { // Check if the string includes a html tag - if (preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $tempHtml, $matches)) { + if (preg_match_all(self::FIND_TAGS_REGEX, $tempHtml, $matches)) { $tagOffset = strpos($tempHtml, $matches[0][0]); // Check if the string starts with the html tag if ($tagOffset > 0) { From 95781e39c2ea6bee1d905872d42085373ab194b8 Mon Sep 17 00:00:00 2001 From: Thomas Negre Date: Wed, 23 Nov 2022 11:56:51 +0100 Subject: [PATCH 06/12] Fix HTML tags regex: headings such as

are also HTML tags --- htdocs/includes/odtphp/odf.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 4a24afafffd..4e4287ae3b6 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -44,7 +44,7 @@ class Odf public $userdefined=array(); const PIXEL_TO_CM = 0.026458333; - const FIND_TAGS_REGEX = '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'; + const FIND_TAGS_REGEX = '/<([A-Za-z0-9]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'; const FIND_ENCODED_TAGS_REGEX = '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'; From e10ba35387c00ccffdb734e922cddfe4622e4614 Mon Sep 17 00:00:00 2001 From: Thomas Negre Date: Wed, 23 Nov 2022 12:01:21 +0100 Subject: [PATCH 07/12] Add test case to lock/explain behavior on

heading tags. --- test/phpunit/ODFTest.php | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php index 5bcd9a72290..90b505e0848 100644 --- a/test/phpunit/ODFTest.php +++ b/test/phpunit/ODFTest.php @@ -335,6 +335,9 @@ class ODFTest extends PHPUnit\Framework\TestCase // 'expected' => "123 trucmachin > truc < troc > tracbla bla'", // ], + /* Tests that can evolve */ + // Following tests reflect the current behavior. They may evolve if the method behavior changes. + // The method removes hyperlinks and tags that are not dealt with. 29 => [ 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', @@ -342,9 +345,14 @@ class ODFTest extends PHPUnit\Framework\TestCase 'charset' => null, 'expected' => "123 trucmachin > truc < troc > tracbla bla", ], - - // HTML should not take \n into account, but only
. 30 => [ + 'to_convert' => '123

Title

bla', + 'encode' => true, + 'charset' => null, + 'expected' => "123 Title bla", + ], + // HTML should not take \n into account, but only
. + 31 => [ 'to_convert' => "text with strong text , a line\nbreak and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => false, 'charset' => 'UTF-8', From 32520599e450c4621ac03f9fdb37ad24513e7707 Mon Sep 17 00:00:00 2001 From: tnegre Date: Fri, 3 Feb 2023 15:12:05 +0100 Subject: [PATCH 08/12] ODT generation : tests cases for PHP < 7.4 --- test/phpunit/ODFTest.php | 101 ++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 33 deletions(-) diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php index 90b505e0848..02c80dc9689 100644 --- a/test/phpunit/ODFTest.php +++ b/test/phpunit/ODFTest.php @@ -155,87 +155,101 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => 'Simple string', 'encode' => true, 'charset' => null, - 'expected' => 'Simple string' + 'expected7.4' => 'Simple string', + 'expected7.3' => 'Simple string' ], 2 => [ 'to_convert' => 'Simple string', 'encode' => false, 'charset' => null, - 'expected' => 'Simple string' + 'expected7.4' => 'Simple string', + 'expected7.3' => 'Simple string' ], 3 => [ 'to_convert' => "Simple string\nwith line break", 'encode' => true, 'charset' => null, - 'expected' => "Simple stringwith line break" + 'expected7.4' => "Simple stringwith line break", + 'expected7.3' => "Simple stringwith line break" ], 4 => [ 'to_convert' => "Simple string\nwith line break", 'encode' => false, 'charset' => null, - 'expected' => "Simple stringwith line break" + 'expected7.4' => "Simple stringwith line break", + 'expected7.3' => "Simple stringwith line break" ], // Special chars 5 => [ 'to_convert' => 'One&two', 'encode' => true, 'charset' => null, - 'expected' => 'One&two' + 'expected7.4' => 'One&two', + 'expected7.3' => 'One&two' ], 6 => [ 'to_convert' => 'One&two', 'encode' => false, 'charset' => null, - 'expected' => 'One&two' + 'expected7.4' => 'One&two', + 'expected7.3' => 'One&two' ], 7 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + 'expected7.4' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + 'expected7.3' => utf8_encode("/a&él'èàüöç€Ğ~<>") ], 8 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => false, 'charset' => null, - 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + 'expected7.4' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + 'expected7.3' => utf8_encode("/a&él'èàüöç€Ğ~<>") ], // special chars with non-default charset 9 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => true, 'charset' => 'UTF-16', - 'expected' => "/a&él'èàüöç€Ğ~<>", + 'expected7.4' => "/a&él'èàüöç€Ğ~<>", + 'expected7.3' => "/a&él'èàüöç€Ğ~<>", ], 10 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => false, 'charset' => 'UTF-16', // When the charset differs from ISO-8859 string is not converted. - 'expected' => "/a&él'èàüöç€Ğ~<>", + 'expected7.4' => "/a&él'èàüöç€Ğ~<>", + 'expected7.3' => "/a&él'èàüöç€Ğ~<>", ], 11 => [ 'to_convert' => "Greater > than", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode("Greater > than"), + 'expected7.4' => utf8_encode("Greater > than"), + 'expected7.3' => utf8_encode("Greater > than"), ], 12 => [ 'to_convert' => "Greater > than", 'encode' => false, 'charset' => null, - 'expected' => utf8_encode("Greater > than"), + 'expected7.4' => utf8_encode("Greater > than"), + 'expected7.3' => utf8_encode("Greater > than"), ], 13 => [ 'to_convert' => "Smaller < than", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode("Smaller < than"), + 'expected7.4' => utf8_encode("Smaller < than"), + 'expected7.3' => utf8_encode("Smaller < than"), ], 14 => [ 'to_convert' => "Smaller < than", 'encode' => false, 'charset' => null, - 'expected' => utf8_encode("Smaller < than"), + 'expected7.4' => utf8_encode("Smaller < than"), + 'expected7.3' => utf8_encode("Smaller < than"), ], /** HTML **/ // break lines @@ -243,56 +257,65 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => "Break
line", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode("Breakline"), + 'expected7.4' => utf8_encode("Breakline"), + 'expected7.3' => utf8_encode("Breakline"), ], 16 => [ 'to_convert' => "Break
line", 'encode' => false, 'charset' => null, - 'expected' => utf8_encode("Breakline"), + 'expected7.4' => utf8_encode("Breakline"), + 'expected7.3' => utf8_encode("Breakline"), ], 17 => [ 'to_convert' => "Break
line", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode("Breakline"), + 'expected7.4' => utf8_encode("Breakline"), + 'expected7.3' => utf8_encode("Breakline"), ], 18 => [ 'to_convert' => "Break
line", 'encode' => false, 'charset' => null, - 'expected' => utf8_encode("Breakline"), + 'expected7.4' => utf8_encode("Breakline"), + 'expected7.3' => utf8_encode("Breakline"), ], // HTML tags 19 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => false, 'charset' => 'UTF-8', - 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', ], 20 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => true, 'charset' => 'UTF-8', - 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', ], 21 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => false, 'charset' => null, - 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), ], 22 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), ], 23 => [ 'to_convert' => "text with intricatedtags", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode('text with intricatedtags'), + 'expected7.4' => utf8_encode('text with intricatedtags'), + 'expected7.3' => utf8_encode('text with intricatedtags'), ], // One can also pass html-encoded string to the method @@ -300,31 +323,36 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => 'One&two', 'encode' => true, 'charset' => null, - 'expected' => 'One&two' + 'expected7.4' => 'One&two', + 'expected7.3' => 'One&two' ], 25 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => false, 'charset' => 'UTF-8', - 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', ], 26 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => true, 'charset' => 'UTF-8', - 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', ], 27 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => false, 'charset' => null, - 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), ], 28 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => true, 'charset' => null, - 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), ], // // TODO custom styles are not tested for now : the custom style have a custom ID based on time. Not random, but hard to mock or predict. generated in _replaceHtmlWithOdtTag() case 'span'. @@ -332,7 +360,7 @@ class ODFTest extends PHPUnit\Framework\TestCase // 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', // 'encode' => true, // 'charset' => 'UTF-8', - // 'expected' => "123 trucmachin > truc < troc > tracbla bla'", + // 'expected7.4' => "123 trucmachin > truc < troc > tracbla bla'", // ], /* Tests that can evolve */ @@ -343,20 +371,23 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', 'encode' => true, 'charset' => null, - 'expected' => "123 trucmachin > truc < troc > tracbla bla", + 'expected7.4' => "123 trucmachin > truc < troc > tracbla bla", + 'expected7.3' => "123 trucmachin > truc < troc > tracbla bla", ], 30 => [ 'to_convert' => '123

Title

bla', 'encode' => true, 'charset' => null, - 'expected' => "123 Title bla", + 'expected7.4' => "123 Title bla", + 'expected7.3' => "123 Title bla", ], // HTML should not take \n into account, but only
. 31 => [ 'to_convert' => "text with strong text , a line\nbreak and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => false, 'charset' => 'UTF-8', - 'expected' => 'text with strong text , a line'."\n".'break and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected7.4' => 'text with strong text , a line'."\n".'break and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected7.3' => 'text with strong text , a line'."\n".'break and underlined words with it@lic sp&ciàlchärs éè l\'', ], ]; @@ -369,7 +400,11 @@ class ODFTest extends PHPUnit\Framework\TestCase } else { $res = $odf->convertVarToOdf($case['to_convert'], $case['encode']); } - $this->assertEquals($res, $case['expected']); + if (version_compare(phpversion(), '7.4.0', '>=' )) { + $this->assertEquals($res, $case['expected7.4']); + } else { + $this->assertEquals($res, $case['expected7.3']); + } } print __METHOD__." result=".$result."\n"; From 565e9f0c3d482ed601defcc42e4025231e97e8dd Mon Sep 17 00:00:00 2001 From: tnegre Date: Fri, 3 Feb 2023 15:33:50 +0100 Subject: [PATCH 09/12] Stickler fix --- test/phpunit/ODFTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php index 02c80dc9689..2bcc7bb0cb7 100644 --- a/test/phpunit/ODFTest.php +++ b/test/phpunit/ODFTest.php @@ -400,7 +400,7 @@ class ODFTest extends PHPUnit\Framework\TestCase } else { $res = $odf->convertVarToOdf($case['to_convert'], $case['encode']); } - if (version_compare(phpversion(), '7.4.0', '>=' )) { + if (version_compare(phpversion(), '7.4.0', '>=')) { $this->assertEquals($res, $case['expected7.4']); } else { $this->assertEquals($res, $case['expected7.3']); From 06c64eea98b5c2e5b9abad9a26ce74fb1a179336 Mon Sep 17 00:00:00 2001 From: tnegre Date: Thu, 23 Mar 2023 11:10:47 +0100 Subject: [PATCH 10/12] Update copyright --- htdocs/includes/odtphp/odf.php | 2 +- test/phpunit/ODFTest.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index e3b0b8a48f2..abf12c66e0c 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -18,7 +18,7 @@ class OdfException extends Exception * @copyright 2010-2015 - Laurent Destailleur - eldy@users.sourceforge.net * @copyright 2010 - Vikas Mahajan - http://vikasmahajan.wordpress.com * @copyright 2012 - Stephen Larroque - lrq3000@gmail.com - * @copyright 2022 - Open-DSI - contact@open-dsi.fr + * @copyright 2023 - Thomas Negre - contact@open-dsi.fr * @license https://www.gnu.org/copyleft/gpl.html GPL License * @version 1.5.0 */ diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php index 2bcc7bb0cb7..c4eeee3f8bd 100644 --- a/test/phpunit/ODFTest.php +++ b/test/phpunit/ODFTest.php @@ -1,6 +1,6 @@ - * Copyright (C) 2022 - Open-DSI + * Copyright (C) 2023 - Thomas Negre - contact@open-dsi.fr * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by From e1c56650eba43e809e5c32a40cb5c2fca6c6acae Mon Sep 17 00:00:00 2001 From: tnegre Date: Fri, 24 Mar 2023 12:11:39 +0100 Subject: [PATCH 11/12] ODF line generation : compat php < 7.4 --- htdocs/includes/odtphp/odf.php | 7 +-- test/phpunit/ODFTest.php | 101 +++++++++++---------------------- 2 files changed, 36 insertions(+), 72 deletions(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index abf12c66e0c..1d097e95975 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -167,14 +167,13 @@ class Odf { $value = html_entity_decode($value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401); - // fix breaklines... - $value = str_replace("
", "
", $value); + // fix breaklines. + $value = preg_replace('/<[ ]*br[ ]*\/?>/', "
", $value); $convertedValue = $value; // Check if the value includes html tags if ($this->_hasHtmlTag($value) === true) { - // Note: allowing many tags is supported on PHP >7.4 . For older versions, this will strip all HTML tags. - $value = strip_tags($value, ['
', '', '', '', '', '', '', '', '', '']); + $value = strip_tags($value, '
'); // Default styles for strong/b, i/em, u, s, sub & sup $automaticStyles = array( diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php index c4eeee3f8bd..fdf31140b11 100644 --- a/test/phpunit/ODFTest.php +++ b/test/phpunit/ODFTest.php @@ -155,101 +155,87 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => 'Simple string', 'encode' => true, 'charset' => null, - 'expected7.4' => 'Simple string', - 'expected7.3' => 'Simple string' + 'expected' => 'Simple string', ], 2 => [ 'to_convert' => 'Simple string', 'encode' => false, 'charset' => null, - 'expected7.4' => 'Simple string', - 'expected7.3' => 'Simple string' + 'expected' => 'Simple string', ], 3 => [ 'to_convert' => "Simple string\nwith line break", 'encode' => true, 'charset' => null, - 'expected7.4' => "Simple stringwith line break", - 'expected7.3' => "Simple stringwith line break" + 'expected' => "Simple stringwith line break", ], 4 => [ 'to_convert' => "Simple string\nwith line break", 'encode' => false, 'charset' => null, - 'expected7.4' => "Simple stringwith line break", - 'expected7.3' => "Simple stringwith line break" + 'expected' => "Simple stringwith line break", ], // Special chars 5 => [ 'to_convert' => 'One&two', 'encode' => true, 'charset' => null, - 'expected7.4' => 'One&two', - 'expected7.3' => 'One&two' + 'expected' => 'One&two', ], 6 => [ 'to_convert' => 'One&two', 'encode' => false, 'charset' => null, - 'expected7.4' => 'One&two', - 'expected7.3' => 'One&two' + 'expected' => 'One&two', ], 7 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode("/a&él'èàüöç€Ğ~<>"), - 'expected7.3' => utf8_encode("/a&él'èàüöç€Ğ~<>") + 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), ], 8 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => false, 'charset' => null, - 'expected7.4' => utf8_encode("/a&él'èàüöç€Ğ~<>"), - 'expected7.3' => utf8_encode("/a&él'èàüöç€Ğ~<>") + 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), ], // special chars with non-default charset 9 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => true, 'charset' => 'UTF-16', - 'expected7.4' => "/a&él'èàüöç€Ğ~<>", - 'expected7.3' => "/a&él'èàüöç€Ğ~<>", + 'expected' => "/a&él'èàüöç€Ğ~<>", ], 10 => [ 'to_convert' => "/a&él'èàüöç€Ğ~<>", 'encode' => false, 'charset' => 'UTF-16', // When the charset differs from ISO-8859 string is not converted. - 'expected7.4' => "/a&él'èàüöç€Ğ~<>", - 'expected7.3' => "/a&él'èàüöç€Ğ~<>", + 'expected' => "/a&él'èàüöç€Ğ~<>", ], 11 => [ 'to_convert' => "Greater > than", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode("Greater > than"), - 'expected7.3' => utf8_encode("Greater > than"), + 'expected' => utf8_encode("Greater > than"), ], 12 => [ 'to_convert' => "Greater > than", 'encode' => false, 'charset' => null, - 'expected7.4' => utf8_encode("Greater > than"), - 'expected7.3' => utf8_encode("Greater > than"), + 'expected' => utf8_encode("Greater > than"), ], 13 => [ 'to_convert' => "Smaller < than", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode("Smaller < than"), - 'expected7.3' => utf8_encode("Smaller < than"), + 'expected' => utf8_encode("Smaller < than"), ], 14 => [ 'to_convert' => "Smaller < than", 'encode' => false, 'charset' => null, - 'expected7.4' => utf8_encode("Smaller < than"), - 'expected7.3' => utf8_encode("Smaller < than"), + 'expected' => utf8_encode("Smaller < than"), ], /** HTML **/ // break lines @@ -257,65 +243,56 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => "Break
line", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode("Breakline"), - 'expected7.3' => utf8_encode("Breakline"), + 'expected' => utf8_encode("Breakline"), ], 16 => [ 'to_convert' => "Break
line", 'encode' => false, 'charset' => null, - 'expected7.4' => utf8_encode("Breakline"), - 'expected7.3' => utf8_encode("Breakline"), + 'expected' => utf8_encode("Breakline"), ], 17 => [ 'to_convert' => "Break
line", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode("Breakline"), - 'expected7.3' => utf8_encode("Breakline"), + 'expected' => utf8_encode("Breakline"), ], 18 => [ 'to_convert' => "Break
line", 'encode' => false, 'charset' => null, - 'expected7.4' => utf8_encode("Breakline"), - 'expected7.3' => utf8_encode("Breakline"), + 'expected' => utf8_encode("Breakline"), ], // HTML tags 19 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => false, 'charset' => 'UTF-8', - 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', - 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', ], 20 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => true, 'charset' => 'UTF-8', - 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', - 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', ], 21 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => false, 'charset' => null, - 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), - 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), ], 22 => [ 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), - 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), ], 23 => [ 'to_convert' => "text with intricatedtags", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode('text with intricatedtags'), - 'expected7.3' => utf8_encode('text with intricatedtags'), + 'expected' => utf8_encode('text with intricatedtags'), ], // One can also pass html-encoded string to the method @@ -323,36 +300,31 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => 'One&two', 'encode' => true, 'charset' => null, - 'expected7.4' => 'One&two', - 'expected7.3' => 'One&two' + 'expected' => 'One&two', ], 25 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => false, 'charset' => 'UTF-8', - 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', - 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', ], 26 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => true, 'charset' => 'UTF-8', - 'expected7.4' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', - 'expected7.3' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', ], 27 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => false, 'charset' => null, - 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), - 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), ], 28 => [ 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", 'encode' => true, 'charset' => null, - 'expected7.4' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), - 'expected7.3' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), ], // // TODO custom styles are not tested for now : the custom style have a custom ID based on time. Not random, but hard to mock or predict. generated in _replaceHtmlWithOdtTag() case 'span'. @@ -360,7 +332,7 @@ class ODFTest extends PHPUnit\Framework\TestCase // 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', // 'encode' => true, // 'charset' => 'UTF-8', - // 'expected7.4' => "123 trucmachin > truc < troc > tracbla bla'", + // 'expected' => "123 trucmachin > truc < troc > tracbla bla'", // ], /* Tests that can evolve */ @@ -371,23 +343,20 @@ class ODFTest extends PHPUnit\Framework\TestCase 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', 'encode' => true, 'charset' => null, - 'expected7.4' => "123 trucmachin > truc < troc > tracbla bla", - 'expected7.3' => "123 trucmachin > truc < troc > tracbla bla", + 'expected' => "123 trucmachin > truc < troc > tracbla bla", ], 30 => [ 'to_convert' => '123

Title

bla', 'encode' => true, 'charset' => null, - 'expected7.4' => "123 Title bla", - 'expected7.3' => "123 Title bla", + 'expected' => "123 Title bla", ], // HTML should not take \n into account, but only
. 31 => [ 'to_convert' => "text with strong text , a line\nbreak and underlined words with it@lic sp&ciàlchärs éè l'", 'encode' => false, 'charset' => 'UTF-8', - 'expected7.4' => 'text with strong text , a line'."\n".'break and underlined words with it@lic sp&ciàlchärs éè l\'', - 'expected7.3' => 'text with strong text , a line'."\n".'break and underlined words with it@lic sp&ciàlchärs éè l\'', + 'expected' => 'text with strong text , a line'."\n".'break and underlined words with it@lic sp&ciàlchärs éè l\'', ], ]; @@ -400,11 +369,7 @@ class ODFTest extends PHPUnit\Framework\TestCase } else { $res = $odf->convertVarToOdf($case['to_convert'], $case['encode']); } - if (version_compare(phpversion(), '7.4.0', '>=')) { - $this->assertEquals($res, $case['expected7.4']); - } else { - $this->assertEquals($res, $case['expected7.3']); - } + $this->assertEquals($res, $case['expected']); } print __METHOD__." result=".$result."\n"; From 46a8b5d9cb7c4d813fc4cc7e94aaa22e0bd2d842 Mon Sep 17 00:00:00 2001 From: Laurent Destailleur Date: Sun, 26 Mar 2023 15:22:12 +0200 Subject: [PATCH 12/12] Update odf.php --- htdocs/includes/odtphp/odf.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 1d097e95975..e3160b880f9 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -168,7 +168,7 @@ class Odf $value = html_entity_decode($value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401); // fix breaklines. - $value = preg_replace('/<[ ]*br[ ]*\/?>/', "
", $value); + $value = preg_replace('//', "
", $value); $convertedValue = $value; // Check if the value includes html tags