From 598d6d63b09f1497b2668a472c6f800595cf1b1f Mon Sep 17 00:00:00 2001 From: Tim Otte Date: Mon, 2 Mar 2020 10:18:38 +0100 Subject: [PATCH 1/8] Added HtmlToOdtConverter class Implemented the converter in the odf setVars() function --- .../core/class/HtmlToOdtConverter.class.php | 222 ++++++++++++++++++ htdocs/includes/odtphp/odf.php | 24 +- 2 files changed, 240 insertions(+), 6 deletions(-) create mode 100644 htdocs/core/class/HtmlToOdtConverter.class.php diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php new file mode 100644 index 00000000000..d074c7f7a9b --- /dev/null +++ b/htdocs/core/class/HtmlToOdtConverter.class.php @@ -0,0 +1,222 @@ +)|(?:>(.*)<\/\1>))/'); + +class HtmlToOdtConverter { + + /** + * Converts a string with html inside into an odt compatible string + * @param string The text to convert + * @return array + */ + public static function htmlToOdt($htmlText) { + /* + Default styles: + + = + = + = + = + = + = + + Custom styles: + + [Content] + + font-size = + font-family = + Additionally, a font face has to be added to the font-face-decls. An example for a font face declaration: + color = + */ + + //TODO: Add font names to odt header + $automaticStyles = array( + '', + '', + '', + '', + '', + '' + ); + + $odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles); + + foreach ($customStyles as $key => $value) { + array_push($automaticStyles, '' . $value . ''); + } + + return array( + 'automaticStyles' => $automaticStyles, + 'content' => $odtText + ); + } + + /** + * Replaces html tags in with odt tags and returns an odt string + * @param array $tags An array with html tags generated by the getDataFromHtml() function + * @param array $customStyles An array of style defenitions that should be included inside the odt file + */ + private static function replaceHtmlWithOdtTag($tags, &$customStyles) { + if ($customStyles == null) $customStyles = array(); + + $odtResult = ''; + + foreach ((array) $tags as $tag) { + // Check if the current item is a tag or just plain text + if (isset($tag['text'])) { + $odtResult .= $tag['text']; + } else if (isset($tag['name'])) { + switch ($tag['name']) { + case 'br': + $odtResult .= ''; + break; + case 'strong': + case 'b': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'i': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'u': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 's': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'sub': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'sup': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'span': + if (isset($tag['attributes']['style'])) { + $odtStyles = ''; + foreach ($tag['attributes']['style'] as $styleName => $styleValue) { + switch ($styleName) { + case 'font-family': + $odtStyles .= ''; + break; + case 'font-size': + if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) { + $fontSize = intval($matches[1]); + if ($matches[2] == 'px') { + $fontSize = round($fontSize * 0.75); + } + $odtStyles .= ''; + } + break; + case 'color': + if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) { + $odtStyles .= ''; + } + break; + } + } + if (strlen($odtStyles) > 0) { + $key = microtime(); + $customStyles[$key] = $odtStyles; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + } + } + break; + default: + $odtResult .= self::replaceHtmlWithOdtTag($tag['children'], $customStyles); + break; + } + } + } + return $odtResult; + } + + /** + * Checks if the given text is a html string + * @param string $text The text to check + */ + public static function isHtmlTag($text) { + return preg_match(HTML_REGEX_PATTERN, $text); + } + + /** + * Checks if the given text includes a html string + * @param string $text The text to check + */ + public static function hasHtmlTag($text) { + return preg_match_all(HTML_REGEX_PATTERN, $text); + } + + /** + * Returns an array of html elements + * @param string $html A string with html tags + */ + private static function getDataFromHtml($html) { + $tags = array(); + $tempHtml = $html; + + while (strlen($tempHtml) > 0) { + // Check if the string includes a html tag + if (preg_match_all(HTML_REGEX_PATTERN, $tempHtml, $matches)) { + $tagOffset = strpos($tempHtml, $matches[0][0]); + // Check if the string starts with the html tag + if ($tagOffset > 0) { + // Push the text infront of the html tag to the result array + array_push($tags, array( + 'text' => substr($tempHtml, 0, $tagOffset) + )); + // Remove the text from the string + $tempHtml = substr($tempHtml, $tagOffset); + } + // Extract the attribute data from the html tag + $explodedAttributes = strlen($matches[2][0]) > 0 ? explode(' ', $matches[2][0]) : array(); + $attributes = array(); + // Store each attribute with its name in the $attributes array + for ($i=0; $i $matches[1][0], + 'attributes' => $attributes, + 'innerText' => strip_tags($matches[3][0]), + 'children' => self::hasHtmlTag($matches[3][0]) ? self::getDataFromHtml($matches[3][0]) : null + )); + // Remove the processed html tag from the html string + $tempHtml = substr($tempHtml, strlen($matches[0][0])); + } else { + array_push($tags, array( + 'text' => $tempHtml + )); + $tempHtml = ''; + } + } + return $tags; + } +} \ No newline at end of file diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 01c3310a3f8..d42e4133b00 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -1,6 +1,7 @@ htmlToUTFAndPreOdf($value); - - $value = $encode ? htmlspecialchars($value) : $value; $value = ($charset == 'ISO-8859') ? utf8_encode($value) : $value; - $value=$this->preOdfToOdf($value); - - $this->vars[$tag] = $value; + // Check if the value includes html tags + if (HtmlToOdtConverter::hasHtmlTag($value) === true) { + // Convert the value to an odt compatible value + $result = HtmlToOdtConverter::htmlToOdt($value); + // Join the styles and add them to the content xml + $styles = ''; + foreach ($result['automaticStyles'] as $style) { + if (strpos($this->contentXml, $style) === false) { + $styles .= $style; + } + } + $this->contentXml = str_replace('', $styles . '', $this->contentXml); + // Set the var to the converted odt value + $this->vars[$tag] = $result['content']; + } + else $this->vars[$tag] = $value; + return $this; } From d5671f1c42a5dec895d71e83f3354f5f440209c5 Mon Sep 17 00:00:00 2001 From: Tim Otte Date: Mon, 2 Mar 2020 16:08:06 +0100 Subject: [PATCH 2/8] Fixed a few bugs --- htdocs/core/class/HtmlToOdtConverter.class.php | 4 +++- htdocs/includes/odtphp/odf.php | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php index d074c7f7a9b..3df9d3cb3af 100644 --- a/htdocs/core/class/HtmlToOdtConverter.class.php +++ b/htdocs/core/class/HtmlToOdtConverter.class.php @@ -77,6 +77,7 @@ class HtmlToOdtConverter { $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; break; case 'i': + case 'em': $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; break; case 'u': @@ -144,7 +145,8 @@ class HtmlToOdtConverter { * @param string $text The text to check */ public static function hasHtmlTag($text) { - return preg_match_all(HTML_REGEX_PATTERN, $text); + $result = preg_match_all(HTML_REGEX_PATTERN, $text); + return is_numeric($result) && $result > 0; } /** diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index d42e4133b00..e81e670c253 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -1,7 +1,7 @@ Date: Tue, 3 Mar 2020 10:19:05 +0100 Subject: [PATCH 3/8] Fixed a few bugs Added custom font-families --- .../core/class/HtmlToOdtConverter.class.php | 42 ++++++++++++------- htdocs/includes/odtphp/odf.php | 8 ++++ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php index 3df9d3cb3af..ff8e90ccfb0 100644 --- a/htdocs/core/class/HtmlToOdtConverter.class.php +++ b/htdocs/core/class/HtmlToOdtConverter.class.php @@ -1,7 +1,7 @@ )|(?:>(.*)<\/\1>))/'); +// Learn more about this regex pattern: https://regexr.com/4vi60 +define('HTML_REGEX_PATTERN', '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'); class HtmlToOdtConverter { @@ -41,7 +41,7 @@ class HtmlToOdtConverter { '' ); - $odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles); + $odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles, $fontDeclarations); foreach ($customStyles as $key => $value) { array_push($automaticStyles, '' . $value . ''); @@ -49,7 +49,8 @@ class HtmlToOdtConverter { return array( 'automaticStyles' => $automaticStyles, - 'content' => $odtText + 'content' => $odtText, + 'fonts' => $fontDeclarations ); } @@ -58,8 +59,9 @@ class HtmlToOdtConverter { * @param array $tags An array with html tags generated by the getDataFromHtml() function * @param array $customStyles An array of style defenitions that should be included inside the odt file */ - private static function replaceHtmlWithOdtTag($tags, &$customStyles) { + private static function replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) { if ($customStyles == null) $customStyles = array(); + if ($fontDeclarations == null) $fontDeclarations = array(); $odtResult = ''; @@ -74,23 +76,23 @@ class HtmlToOdtConverter { break; case 'strong': case 'b': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; break; case 'i': case 'em': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; break; case 'u': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; break; case 's': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; break; case 'sub': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; break; case 'sup': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; break; case 'span': if (isset($tag['attributes']['style'])) { @@ -98,7 +100,14 @@ class HtmlToOdtConverter { foreach ($tag['attributes']['style'] as $styleName => $styleValue) { switch ($styleName) { case 'font-family': - $odtStyles .= ''; + $fontName = $styleValue; + if (strpos($fontName, ',') !== false) { + $fontName = explode(',', $fontName)[0]; + } + if (!in_array($fontName, $fontDeclarations)) { + array_push($fontDeclarations, $fontName); + } + $odtStyles .= ''; break; case 'font-size': if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) { @@ -117,14 +126,14 @@ class HtmlToOdtConverter { } } if (strlen($odtStyles) > 0) { - $key = microtime(); + $key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10); $customStyles[$key] = $odtStyles; - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; } } break; default: - $odtResult .= self::replaceHtmlWithOdtTag($tag['children'], $customStyles); + $odtResult .= self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations); break; } } @@ -171,7 +180,8 @@ class HtmlToOdtConverter { $tempHtml = substr($tempHtml, $tagOffset); } // Extract the attribute data from the html tag - $explodedAttributes = strlen($matches[2][0]) > 0 ? explode(' ', $matches[2][0]) : array(); + preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s]*")?)+/', $matches[2][0], $explodedAttributes); + $explodedAttributes = array_filter($explodedAttributes[0]); $attributes = array(); // Store each attribute with its name in the $attributes array for ($i=0; $icontentXml = str_replace('', $styles . '', $this->contentXml); + // Join the font declarations and add them to the content xml + $fonts = ''; + foreach ($result['fonts'] as $font) { + if (strpos($this->contentXml, 'style:name="' . $font . '"') === false) { + $fonts .= ''; + } + } + $this->contentXml = str_replace('', $fonts . '', $this->contentXml); // Set the var to the converted odt value $this->vars[$tag] = $result['content']; } From f026b959a2b14d75a4e6d51dfb09ce585b27a65a Mon Sep 17 00:00:00 2001 From: stickler-ci Date: Tue, 3 Mar 2020 09:28:00 +0000 Subject: [PATCH 4/8] Fixing style errors. --- .../core/class/HtmlToOdtConverter.class.php | 24 ++++++++++++------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php index ff8e90ccfb0..e9f210eb889 100644 --- a/htdocs/core/class/HtmlToOdtConverter.class.php +++ b/htdocs/core/class/HtmlToOdtConverter.class.php @@ -3,14 +3,16 @@ // Learn more about this regex pattern: https://regexr.com/4vi60 define('HTML_REGEX_PATTERN', '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/'); -class HtmlToOdtConverter { +class HtmlToOdtConverter +{ /** * Converts a string with html inside into an odt compatible string * @param string The text to convert * @return array */ - public static function htmlToOdt($htmlText) { + public static function htmlToOdt($htmlText) + { /* Default styles: @@ -40,7 +42,7 @@ class HtmlToOdtConverter { '', '' ); - + $odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles, $fontDeclarations); foreach ($customStyles as $key => $value) { @@ -59,7 +61,8 @@ class HtmlToOdtConverter { * @param array $tags An array with html tags generated by the getDataFromHtml() function * @param array $customStyles An array of style defenitions that should be included inside the odt file */ - private static function replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) { + private static function replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) + { if ($customStyles == null) $customStyles = array(); if ($fontDeclarations == null) $fontDeclarations = array(); @@ -69,7 +72,7 @@ class HtmlToOdtConverter { // Check if the current item is a tag or just plain text if (isset($tag['text'])) { $odtResult .= $tag['text']; - } else if (isset($tag['name'])) { + } elseif (isset($tag['name'])) { switch ($tag['name']) { case 'br': $odtResult .= ''; @@ -145,7 +148,8 @@ class HtmlToOdtConverter { * Checks if the given text is a html string * @param string $text The text to check */ - public static function isHtmlTag($text) { + public static function isHtmlTag($text) + { return preg_match(HTML_REGEX_PATTERN, $text); } @@ -153,7 +157,8 @@ class HtmlToOdtConverter { * Checks if the given text includes a html string * @param string $text The text to check */ - public static function hasHtmlTag($text) { + public static function hasHtmlTag($text) + { $result = preg_match_all(HTML_REGEX_PATTERN, $text); return is_numeric($result) && $result > 0; } @@ -162,7 +167,8 @@ class HtmlToOdtConverter { * Returns an array of html elements * @param string $html A string with html tags */ - private static function getDataFromHtml($html) { + private static function getDataFromHtml($html) + { $tags = array(); $tempHtml = $html; @@ -231,4 +237,4 @@ class HtmlToOdtConverter { } return $tags; } -} \ No newline at end of file +} From 667f00872cb695a66caa1b7c1cd950f74499873d Mon Sep 17 00:00:00 2001 From: Tim Otte Date: Tue, 3 Mar 2020 10:33:26 +0100 Subject: [PATCH 5/8] Fixed lint errors --- htdocs/core/class/HtmlToOdtConverter.class.php | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php index e9f210eb889..4617a94ea1d 100644 --- a/htdocs/core/class/HtmlToOdtConverter.class.php +++ b/htdocs/core/class/HtmlToOdtConverter.class.php @@ -8,7 +8,7 @@ class HtmlToOdtConverter /** * Converts a string with html inside into an odt compatible string - * @param string The text to convert + * @param string $htmlText The text to convert * @return array */ public static function htmlToOdt($htmlText) @@ -60,6 +60,8 @@ class HtmlToOdtConverter * Replaces html tags in with odt tags and returns an odt string * @param array $tags An array with html tags generated by the getDataFromHtml() function * @param array $customStyles An array of style defenitions that should be included inside the odt file + * @param array $fontDeclarations An array of font declarations that should be included inside the odt file + * @return string */ private static function replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) { @@ -147,6 +149,7 @@ class HtmlToOdtConverter /** * Checks if the given text is a html string * @param string $text The text to check + * @return bool */ public static function isHtmlTag($text) { @@ -156,6 +159,7 @@ class HtmlToOdtConverter /** * Checks if the given text includes a html string * @param string $text The text to check + * @return bool */ public static function hasHtmlTag($text) { @@ -166,6 +170,7 @@ class HtmlToOdtConverter /** * Returns an array of html elements * @param string $html A string with html tags + * @return array */ private static function getDataFromHtml($html) { @@ -190,7 +195,8 @@ class HtmlToOdtConverter $explodedAttributes = array_filter($explodedAttributes[0]); $attributes = array(); // Store each attribute with its name in the $attributes array - for ($i=0; $i Date: Wed, 4 Mar 2020 08:54:49 +0100 Subject: [PATCH 6/8] Moved the HtmlToOdtConverter class into the odf class --- .../core/class/HtmlToOdtConverter.class.php | 247 ------------------ htdocs/includes/odtphp/odf.php | 218 +++++++++++++++- 2 files changed, 210 insertions(+), 255 deletions(-) delete mode 100644 htdocs/core/class/HtmlToOdtConverter.class.php diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php deleted file mode 100644 index 4617a94ea1d..00000000000 --- a/htdocs/core/class/HtmlToOdtConverter.class.php +++ /dev/null @@ -1,247 +0,0 @@ -)|(?:>(.*)<\/\1>))/'); - -class HtmlToOdtConverter -{ - - /** - * Converts a string with html inside into an odt compatible string - * @param string $htmlText The text to convert - * @return array - */ - public static function htmlToOdt($htmlText) - { - /* - Default styles: - - = - = - = - = - = - = - - Custom styles: - - [Content] - - font-size = - font-family = - Additionally, a font face has to be added to the font-face-decls. An example for a font face declaration: - color = - */ - - //TODO: Add font names to odt header - $automaticStyles = array( - '', - '', - '', - '', - '', - '' - ); - - $odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles, $fontDeclarations); - - foreach ($customStyles as $key => $value) { - array_push($automaticStyles, '' . $value . ''); - } - - return array( - 'automaticStyles' => $automaticStyles, - 'content' => $odtText, - 'fonts' => $fontDeclarations - ); - } - - /** - * Replaces html tags in with odt tags and returns an odt string - * @param array $tags An array with html tags generated by the getDataFromHtml() function - * @param array $customStyles An array of style defenitions that should be included inside the odt file - * @param array $fontDeclarations An array of font declarations that should be included inside the odt file - * @return string - */ - private static function replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) - { - if ($customStyles == null) $customStyles = array(); - if ($fontDeclarations == null) $fontDeclarations = array(); - - $odtResult = ''; - - foreach ((array) $tags as $tag) { - // Check if the current item is a tag or just plain text - if (isset($tag['text'])) { - $odtResult .= $tag['text']; - } elseif (isset($tag['name'])) { - switch ($tag['name']) { - case 'br': - $odtResult .= ''; - break; - case 'strong': - case 'b': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'i': - case 'em': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'u': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 's': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'sub': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'sup': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'span': - if (isset($tag['attributes']['style'])) { - $odtStyles = ''; - foreach ($tag['attributes']['style'] as $styleName => $styleValue) { - switch ($styleName) { - case 'font-family': - $fontName = $styleValue; - if (strpos($fontName, ',') !== false) { - $fontName = explode(',', $fontName)[0]; - } - if (!in_array($fontName, $fontDeclarations)) { - array_push($fontDeclarations, $fontName); - } - $odtStyles .= ''; - break; - case 'font-size': - if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) { - $fontSize = intval($matches[1]); - if ($matches[2] == 'px') { - $fontSize = round($fontSize * 0.75); - } - $odtStyles .= ''; - } - break; - case 'color': - if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) { - $odtStyles .= ''; - } - break; - } - } - if (strlen($odtStyles) > 0) { - $key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10); - $customStyles[$key] = $odtStyles; - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - } - } - break; - default: - $odtResult .= self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations); - break; - } - } - } - return $odtResult; - } - - /** - * Checks if the given text is a html string - * @param string $text The text to check - * @return bool - */ - public static function isHtmlTag($text) - { - return preg_match(HTML_REGEX_PATTERN, $text); - } - - /** - * Checks if the given text includes a html string - * @param string $text The text to check - * @return bool - */ - public static function hasHtmlTag($text) - { - $result = preg_match_all(HTML_REGEX_PATTERN, $text); - return is_numeric($result) && $result > 0; - } - - /** - * Returns an array of html elements - * @param string $html A string with html tags - * @return array - */ - private static function getDataFromHtml($html) - { - $tags = array(); - $tempHtml = $html; - - while (strlen($tempHtml) > 0) { - // Check if the string includes a html tag - if (preg_match_all(HTML_REGEX_PATTERN, $tempHtml, $matches)) { - $tagOffset = strpos($tempHtml, $matches[0][0]); - // Check if the string starts with the html tag - if ($tagOffset > 0) { - // Push the text infront of the html tag to the result array - array_push($tags, array( - 'text' => substr($tempHtml, 0, $tagOffset) - )); - // Remove the text from the string - $tempHtml = substr($tempHtml, $tagOffset); - } - // Extract the attribute data from the html tag - preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s]*")?)+/', $matches[2][0], $explodedAttributes); - $explodedAttributes = array_filter($explodedAttributes[0]); - $attributes = array(); - // Store each attribute with its name in the $attributes array - $explodedAttributesCount = count($explodedAttributes); - for ($i=0; $i<$explodedAttributesCount; $i++) { - $attribute = trim($explodedAttributes[$i]); - // Check if the attribute has a value (like style="") or has no value (like required) - if (strpos($attribute, '=') !== false) { - $splitAttribute = explode('=', $attribute); - $attrName = trim($splitAttribute[0]); - $attrValue = trim(str_replace('"', '', $splitAttribute[1])); - // check if the current attribute is a style attribute - if (strtolower($attrName) == 'style') { - $attributes[$attrName] = array(); - if (strpos($attrValue, ';') !== false) { - // Split the style properties and store them in an array - $explodedStyles = explode(';', $attrValue); - $explodedStylesCount = count($explodedStyles); - for ($n=0; $n<$explodedStylesCount; $n++) { - $splitStyle = explode(':', $explodedStyles[$n]); - $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); - } - } else { - $splitStyle = explode(':', $attrValue); - $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); - } - } else { - // Store the value directly in the $attributes array if this is not the style attribute - $attributes[$attrName] = $attrValue; - } - } else { - $attributes[trim($attribute)] = true; - } - } - // Push the html tag data to the result array - array_push($tags, array( - 'name' => $matches[1][0], - 'attributes' => $attributes, - 'innerText' => strip_tags($matches[3][0]), - 'children' => self::hasHtmlTag($matches[3][0]) ? self::getDataFromHtml($matches[3][0]) : null - )); - // Remove the processed html tag from the html string - $tempHtml = substr($tempHtml, strlen($matches[0][0])); - } else { - array_push($tags, array( - 'text' => $tempHtml - )); - $tempHtml = ''; - } - } - return $tags; - } -} diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 244119a282f..b2a93305430 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -1,7 +1,6 @@ _hasHtmlTag($value) === true) { + // Default styles for strong/b, i/em, u, s, sub & sup + $automaticStyles = array( + '', + '', + '', + '', + '', + '' + ); + + $this->vars[$tag] = $this->_replaceHtmlWithOdtTag($this->_getDataFromHtml($value), $customStyles, $fontDeclarations); + + foreach ($customStyles as $key => $val) { + array_push($automaticStyles, '' . $val . ''); + } + // Join the styles and add them to the content xml $styles = ''; - foreach ($result['automaticStyles'] as $style) { + foreach ($automaticStyles as $style) { if (strpos($this->contentXml, $style) === false) { $styles .= $style; } } $this->contentXml = str_replace('', $styles . '', $this->contentXml); + // Join the font declarations and add them to the content xml $fonts = ''; - foreach ($result['fonts'] as $font) { + foreach ($fontDeclarations as $font) { if (strpos($this->contentXml, 'style:name="' . $font . '"') === false) { $fonts .= ''; } } $this->contentXml = str_replace('', $fonts . '', $this->contentXml); - // Set the var to the converted odt value - $this->vars[$tag] = $result['content']; } else $this->vars[$tag] = $value; return $this; } + /** + * Replaces html tags in with odt tags and returns an odt string + * @param array $tags An array with html tags generated by the getDataFromHtml() function + * @param array $customStyles An array of style defenitions that should be included inside the odt file + * @param array $fontDeclarations An array of font declarations that should be included inside the odt file + * @return string + */ + private function _replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) + { + if ($customStyles == null) $customStyles = array(); + if ($fontDeclarations == null) $fontDeclarations = array(); + + $odtResult = ''; + + foreach ((array) $tags as $tag) { + // Check if the current item is a tag or just plain text + if (isset($tag['text'])) { + $odtResult .= $tag['text']; + } elseif (isset($tag['name'])) { + switch ($tag['name']) { + case 'br': + $odtResult .= ''; + break; + case 'strong': + case 'b': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'i': + case 'em': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'u': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 's': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'sub': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'sup': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'span': + if (isset($tag['attributes']['style'])) { + $odtStyles = ''; + foreach ($tag['attributes']['style'] as $styleName => $styleValue) { + switch ($styleName) { + case 'font-family': + $fontName = $styleValue; + if (strpos($fontName, ',') !== false) { + $fontName = explode(',', $fontName)[0]; + } + if (!in_array($fontName, $fontDeclarations)) { + array_push($fontDeclarations, $fontName); + } + $odtStyles .= ''; + break; + case 'font-size': + if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) { + $fontSize = intval($matches[1]); + if ($matches[2] == 'px') { + $fontSize = round($fontSize * 0.75); + } + $odtStyles .= ''; + } + break; + case 'color': + if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) { + $odtStyles .= ''; + } + break; + } + } + if (strlen($odtStyles) > 0) { + // Generate a unique id for the style (using microtime and random because some CPUs are really fast...) + $key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10); + $customStyles[$key] = $odtStyles; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + } + } + break; + default: + $odtResult .= $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations); + break; + } + } + } + return $odtResult; + } + + /** + * Checks if the given text is a html string + * @param string $text The text to check + * @return bool + */ + private function _isHtmlTag($text) + { + return preg_match('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text); + } + + /** + * Checks if the given text includes a html string + * @param string $text The text to check + * @return bool + */ + private function _hasHtmlTag($text) + { + $result = preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text); + return is_numeric($result) && $result > 0; + } + + /** + * Returns an array of html elements + * @param string $html A string with html tags + * @return array + */ + private function _getDataFromHtml($html) + { + $tags = array(); + $tempHtml = $html; + + while (strlen($tempHtml) > 0) { + // Check if the string includes a html tag + if (preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $tempHtml, $matches)) { + $tagOffset = strpos($tempHtml, $matches[0][0]); + // Check if the string starts with the html tag + if ($tagOffset > 0) { + // Push the text infront of the html tag to the result array + array_push($tags, array( + 'text' => substr($tempHtml, 0, $tagOffset) + )); + // Remove the text from the string + $tempHtml = substr($tempHtml, $tagOffset); + } + // Extract the attribute data from the html tag + preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s]*")?)+/', $matches[2][0], $explodedAttributes); + $explodedAttributes = array_filter($explodedAttributes[0]); + $attributes = array(); + // Store each attribute with its name in the $attributes array + $explodedAttributesCount = count($explodedAttributes); + for ($i=0; $i<$explodedAttributesCount; $i++) { + $attribute = trim($explodedAttributes[$i]); + // Check if the attribute has a value (like style="") or has no value (like required) + if (strpos($attribute, '=') !== false) { + $splitAttribute = explode('=', $attribute); + $attrName = trim($splitAttribute[0]); + $attrValue = trim(str_replace('"', '', $splitAttribute[1])); + // check if the current attribute is a style attribute + if (strtolower($attrName) == 'style') { + $attributes[$attrName] = array(); + if (strpos($attrValue, ';') !== false) { + // Split the style properties and store them in an array + $explodedStyles = explode(';', $attrValue); + $explodedStylesCount = count($explodedStyles); + for ($n=0; $n<$explodedStylesCount; $n++) { + $splitStyle = explode(':', $explodedStyles[$n]); + $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); + } + } else { + $splitStyle = explode(':', $attrValue); + $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); + } + } else { + // Store the value directly in the $attributes array if this is not the style attribute + $attributes[$attrName] = $attrValue; + } + } else { + $attributes[trim($attribute)] = true; + } + } + // Push the html tag data to the result array + array_push($tags, array( + 'name' => $matches[1][0], + 'attributes' => $attributes, + 'innerText' => strip_tags($matches[3][0]), + 'children' => $this->_hasHtmlTag($matches[3][0]) ? $this->_getDataFromHtml($matches[3][0]) : null + )); + // Remove the processed html tag from the html string + $tempHtml = substr($tempHtml, strlen($matches[0][0])); + } else { + array_push($tags, array( + 'text' => $tempHtml + )); + $tempHtml = ''; + } + } + return $tags; + } + /** * Function to convert a HTML string into an ODT string From 1472a2437163726eb27e61879218149abe1c9e0b Mon Sep 17 00:00:00 2001 From: Tim Otte Date: Thu, 5 Mar 2020 09:41:22 +0100 Subject: [PATCH 7/8] Added missing line break replacement when the values do not contain html tags --- htdocs/includes/odtphp/odf.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index b2a93305430..79e2646ad32 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -179,7 +179,7 @@ class Odf } $this->contentXml = str_replace('', $fonts . '', $this->contentXml); } - else $this->vars[$tag] = $value; + else $this->vars[$tag] = preg_replace('/(\r\n|\r|\n)/i', "", $value); return $this; } From 26deae9dbc3145b7ba148578cdf449cd7f1cce50 Mon Sep 17 00:00:00 2001 From: Tim Otte Date: Mon, 9 Mar 2020 15:37:23 +0100 Subject: [PATCH 8/8] Fixed the regex string used to extract the attributes from the html tag --- htdocs/includes/odtphp/odf.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 79e2646ad32..96e571c08b2 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -320,7 +320,7 @@ class Odf $tempHtml = substr($tempHtml, $tagOffset); } // Extract the attribute data from the html tag - preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s]*")?)+/', $matches[2][0], $explodedAttributes); + preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s\,\;]*")?)+/', $matches[2][0], $explodedAttributes); $explodedAttributes = array_filter($explodedAttributes[0]); $attributes = array(); // Store each attribute with its name in the $attributes array