diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index b780b550b6d..a07f5346943 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -15,6 +15,7 @@ class OdfException extends Exception * @copyright 2010-2015 - Laurent Destailleur - eldy@users.sourceforge.net * @copyright 2010 - Vikas Mahajan - http://vikasmahajan.wordpress.com * @copyright 2012 - Stephen Larroque - lrq3000@gmail.com + * @copyright 2020 - Open-DSI - contact@open-dsi.fr * @license https://www.gnu.org/copyleft/gpl.html GPL License * @version 1.5.0 */ @@ -43,6 +44,8 @@ class Odf public $userdefined=array(); const PIXEL_TO_CM = 0.026458333; + const FIND_ENCODED_TAGS_REGEX = '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'; + /** * Class constructor @@ -156,12 +159,16 @@ class Odf */ public function convertVarToOdf($value, $encode = true, $charset = 'ISO-8859') { - $value = $encode ? htmlspecialchars($value) : $value; - $value = ($charset == 'ISO-8859') ? utf8_encode($value) : $value; + $value = html_entity_decode($value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401); + + // fix breaklines... + $value = str_replace("
", "
", $value); $convertedValue = $value; // Check if the value includes html tags if ($this->_hasHtmlTag($value) === true) { + $value = strip_tags($value, ['
', '', '', '', '', '', '', '', '', '']); + // Default styles for strong/b, i/em, u, s, sub & sup $automaticStyles = array( '', @@ -172,7 +179,7 @@ class Odf '' ); - $convertedValue = $this->_replaceHtmlWithOdtTag($this->_getDataFromHtml($value), $customStyles, $fontDeclarations); + $convertedValue = $this->_replaceHtmlWithOdtTag($this->_getDataFromHtml($value), $customStyles, $fontDeclarations, $encode, $charset); foreach ($customStyles as $key => $val) { array_push($automaticStyles, '' . $val . ''); @@ -195,20 +202,26 @@ class Odf } } $this->contentXml = str_replace('', $fonts . '', $this->contentXml); + } + else { + $convertedValue = $this->encode_chars($convertedValue, $encode, $charset); } - else $convertedValue = preg_replace('/(\r\n|\r|\n)/i', "", $value); + + $convertedValue = preg_replace('/(\r\n|\r|\n)/i', "", $convertedValue); return $convertedValue; } /** - * Replaces html tags in with odt tags and returns an odt string + * Replaces html tags in with odt tags and returns an odt string. Encodes and converts inner text. * @param array $tags An array with html tags generated by the getDataFromHtml() function * @param array $customStyles An array of style defenitions that should be included inside the odt file * @param array $fontDeclarations An array of font declarations that should be included inside the odt file + * @param bool $encode If true, special XML characters are encoded + * @param string $charset Charset * @return string */ - private function _replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) + private function _replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations, $encode = false, $charset = '') { if ($customStyles == null) $customStyles = array(); if ($fontDeclarations == null) $fontDeclarations = array(); @@ -218,7 +231,8 @@ class Odf foreach ((array) $tags as $tag) { // Check if the current item is a tag or just plain text if (isset($tag['text'])) { - $odtResult .= $tag['text']; + $text = $this->encode_chars($tag['text'], $encode, $charset); + $odtResult .= $text; } elseif (isset($tag['name'])) { switch ($tag['name']) { case 'br': @@ -226,23 +240,23 @@ class Odf break; case 'strong': case 'b': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'i': case 'em': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'u': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 's': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'sub': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'sup': - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; break; case 'span': if (isset($tag['attributes']['style'])) { @@ -279,7 +293,7 @@ class Odf // Generate a unique id for the style (using microtime and random because some CPUs are really fast...) $key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10); $customStyles[$key] = $odtStyles; - $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $this->encode_chars($tag['innerText'], $encode, $charset)) . ''; } } break; @@ -292,6 +306,21 @@ class Odf return $odtResult; } + /** + * Correctly encode chars + * @param string $text The text to encode or not + * @param bool $encode If true, special XML characters are encoded + * @param string $charset Charset + * @return string The converted text + * @see self::convertVarToOdf() + */ + private function encode_chars($text, $encode = false, $charset = '') + { + $newtext = $encode ? htmlspecialchars($text, ENT_QUOTES | ENT_XML1) : $text; + $newtext = ($charset == 'ISO-8859') ? utf8_encode($newtext) : $newtext; + return $newtext; + } + /** * Checks if the given text is a html string * @param string $text The text to check diff --git a/test/phpunit/ODFTest.php b/test/phpunit/ODFTest.php new file mode 100644 index 00000000000..989a42c93bf --- /dev/null +++ b/test/phpunit/ODFTest.php @@ -0,0 +1,363 @@ + + * Copyright (C) ---Put here your own copyright and developer email--- + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * \file test/unit/BillOfMaterialsTest.php + * \ingroup billofmaterials + * \brief PHPUnit test for BillOfMaterials class. + */ + +global $conf,$user,$langs,$db; +//define('TEST_DB_FORCE_TYPE','mysql'); // This is to force using mysql driver +//require_once 'PHPUnit/Autoload.php'; +require_once dirname(__FILE__).'/../../htdocs/master.inc.php'; +require_once dirname(__FILE__).'/../../htdocs/includes/odtphp/odf.php'; + +if (empty($user->id)) { + print "Load permissions for admin user nb 1\n"; + $user->fetch(1); + $user->getrights(); +} +$conf->global->MAIN_DISABLE_ALL_MAILS=1; + +$langs->load("main"); + + +/** + * Class for PHPUnit tests + * + * @backupGlobals disabled + * @backupStaticAttributes enabled + * @remarks backupGlobals must be disabled to have db,conf,user and lang not erased. + */ +class ODFTest extends PHPUnit\Framework\TestCase +{ + protected $savconf; + protected $savuser; + protected $savlangs; + protected $savdb; + + /** + * Constructor + * We save global variables into local variables + * + * @return BOMTest + */ + public function __construct() + { + parent::__construct(); + + //$this->sharedFixture + global $conf,$user,$langs,$db; + $this->savconf=$conf; + $this->savuser=$user; + $this->savlangs=$langs; + $this->savdb=$db; + + print __METHOD__." db->type=".$db->type." user->id=".$user->id; + //print " - db ".$db->db; + print "\n"; + } + + /** + * setUpBeforeClass + * + * @return void + */ + public static function setUpBeforeClass() + { + global $conf,$user,$langs,$db; + $db->begin(); // This is to have all actions inside a transaction even if test launched without suite. + + print __METHOD__."\n"; + } + + /** + * tearDownAfterClass + * + * @return void + */ + public static function tearDownAfterClass() + { + global $conf,$user,$langs,$db; + $db->rollback(); + + print __METHOD__."\n"; + } + + /** + * Init phpunit tests + * + * @return void + */ + protected function setUp() + { + global $conf,$user,$langs,$db; + $conf=$this->savconf; + $user=$this->savuser; + $langs=$this->savlangs; + $db=$this->savdb; + + print __METHOD__."\n"; + } + + /** + * End phpunit tests + * + * @return void + */ + protected function tearDown() + { + print __METHOD__."\n"; + } + + /** + * test ODF convertVarToOdf + * + * @return int + */ + public function testODFconvertVarToOdf() + { + global $conf,$user,$langs,$db; + $conf=$this->savconf; + $user=$this->savuser; + $langs=$this->savlangs; + $db=$this->savdb; + + // we test using template_invoice, it does not matter, we just need a valid odt. + $filename = '../../htdocs/install/doctemplates/invoices/template_invoice.odt'; + $config = [ + 'PATH_TO_TMP' => "/tmp", + 'ZIP_PROXY' => "PclZipProxy", + 'DELIMITER_LEFT' => "{", + 'DELIMITER_RIGHT' => "}", + ]; + + $to_test = [ + /** No HTML **/ + // Simple strings + 1 => [ + 'to_convert' => 'Simple string', + 'encode' => true, + 'charset' => null, + 'expected' => 'Simple string' + ], + 2 => [ + 'to_convert' => 'Simple string', + 'encode' => false, + 'charset' => null, + 'expected' => 'Simple string' + ], + 3 => [ + 'to_convert' => "Simple string\nwith line break", + 'encode' => true, + 'charset' => null, + 'expected' => "Simple stringwith line break" + ], + 4 => [ + 'to_convert' => "Simple string\nwith line break", + 'encode' => false, + 'charset' => null, + 'expected' => "Simple stringwith line break" + ], + // Special chars + 5 => [ + 'to_convert' => 'One&two', + 'encode' => true, + 'charset' => null, + 'expected' => 'One&two' + ], + 6 => [ + 'to_convert' => 'One&two', + 'encode' => false, + 'charset' => null, + 'expected' => 'One&two' + ], + 7 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + ], + 8 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("/a&él'èàüöç€Ğ~<>"), + ], + // special chars with non-default charset + 9 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => true, + 'charset' => 'UTF-16', + 'expected' => "/a&él'èàüöç€Ğ~<>", + ], + 10 => [ + 'to_convert' => "/a&él'èàüöç€Ğ~<>", + 'encode' => false, + 'charset' => 'UTF-16', // When the charset differs from ISO-8859 string is not converted. + 'expected' => "/a&él'èàüöç€Ğ~<>", + ], + 11 => [ + 'to_convert' => "Greater > than", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Greater > than"), + ], + 12 => [ + 'to_convert' => "Greater > than", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Greater > than"), + ], + 13 => [ + 'to_convert' => "Smaller < than", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Smaller < than"), + ], + 14 => [ + 'to_convert' => "Smaller < than", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Smaller < than"), + ], + /** HTML **/ + // break lines + 15 => [ + 'to_convert' => "Break
line", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + 16 => [ + 'to_convert' => "Break
line", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + 17 => [ + 'to_convert' => "Break
line", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + 18 => [ + 'to_convert' => "Break
line", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode("Breakline"), + ], + // HTML tags + 19 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => false, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + ], + 20 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => true, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + ], + 21 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + ], + 22 => [ + 'to_convert' => "text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + ], + 23 => [ + 'to_convert' => "text with intricatedtags", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode('text with intricatedtags'), + ], + + // One can also pass html-encoded string to the method + 24 => [ + 'to_convert' => 'One&two', + 'encode' => true, + 'charset' => null, + 'expected' => 'One&two' + ], + 25 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => false, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\'', + ], + 26 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => true, + 'charset' => 'UTF-8', + 'expected' => 'text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l'', + ], + 27 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => false, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l\''), + ], + 28 => [ + 'to_convert' => "text with <strong>strong, </strong><em>emphasis</em> and <u>underlined</u> words with <i>it@lic sp&ciàlchärs éè l'</i>", + 'encode' => true, + 'charset' => null, + 'expected' => utf8_encode('text with strong, emphasis and underlined words with it@lic sp&ciàlchärs éè l''), + ], + + // // TODO custom styles are not tested for now : the custom style have a custom ID based on time. Not random, but hard to mock or predict. generated in _replaceHtmlWithOdtTag() case 'span'. + // [ + // 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', + // 'encode' => true, + // 'charset' => 'UTF-8', + // 'expected' => "123 trucmachin > truc < troc > tracbla bla'", + // ], + + // The method removes hyperlinks and tags that are not dealt with. + 29 => [ + 'to_convert' => '123 trucmachin > truc < troc > tracbla bla', + 'encode' => true, + 'charset' => null, + 'expected' => "123 trucmachin > truc < troc > tracbla bla", + ], + ]; + + $odf=new Odf($filename, array()); + if (is_object($odf)) $result = 1; // Just to test + + foreach ($to_test as $case) { + if ($case['charset'] !== null) { + $res = $odf->convertVarToOdf($case['to_convert'], $case['encode'], $case['charset']); + } else { + $res = $odf->convertVarToOdf($case['to_convert'], $case['encode']); + } + $this->assertEquals($res, $case['expected']); + } + + print __METHOD__." result=".$result."\n"; + + return $result; + } +}