From 6b3fdfbcd86299d64ca5bd0f79368193c5e63a6a Mon Sep 17 00:00:00 2001 From: Tim Otte Date: Wed, 4 Mar 2020 08:54:49 +0100 Subject: [PATCH] Moved the HtmlToOdtConverter class into the odf class --- .../core/class/HtmlToOdtConverter.class.php | 247 ------------------ htdocs/includes/odtphp/odf.php | 218 +++++++++++++++- 2 files changed, 210 insertions(+), 255 deletions(-) delete mode 100644 htdocs/core/class/HtmlToOdtConverter.class.php diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php deleted file mode 100644 index 4617a94ea1d..00000000000 --- a/htdocs/core/class/HtmlToOdtConverter.class.php +++ /dev/null @@ -1,247 +0,0 @@ -)|(?:>(.*)<\/\1>))/'); - -class HtmlToOdtConverter -{ - - /** - * Converts a string with html inside into an odt compatible string - * @param string $htmlText The text to convert - * @return array - */ - public static function htmlToOdt($htmlText) - { - /* - Default styles: - - = - = - = - = - = - = - - Custom styles: - - [Content] - - font-size = - font-family = - Additionally, a font face has to be added to the font-face-decls. An example for a font face declaration: - color = - */ - - //TODO: Add font names to odt header - $automaticStyles = array( - '', - '', - '', - '', - '', - '' - ); - - $odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles, $fontDeclarations); - - foreach ($customStyles as $key => $value) { - array_push($automaticStyles, '' . $value . ''); - } - - return array( - 'automaticStyles' => $automaticStyles, - 'content' => $odtText, - 'fonts' => $fontDeclarations - ); - } - - /** - * Replaces html tags in with odt tags and returns an odt string - * @param array $tags An array with html tags generated by the getDataFromHtml() function - * @param array $customStyles An array of style defenitions that should be included inside the odt file - * @param array $fontDeclarations An array of font declarations that should be included inside the odt file - * @return string - */ - private static function replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) - { - if ($customStyles == null) $customStyles = array(); - if ($fontDeclarations == null) $fontDeclarations = array(); - - $odtResult = ''; - - foreach ((array) $tags as $tag) { - // Check if the current item is a tag or just plain text - if (isset($tag['text'])) { - $odtResult .= $tag['text']; - } elseif (isset($tag['name'])) { - switch ($tag['name']) { - case 'br': - $odtResult .= ''; - break; - case 'strong': - case 'b': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'i': - case 'em': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'u': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 's': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'sub': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'sup': - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - break; - case 'span': - if (isset($tag['attributes']['style'])) { - $odtStyles = ''; - foreach ($tag['attributes']['style'] as $styleName => $styleValue) { - switch ($styleName) { - case 'font-family': - $fontName = $styleValue; - if (strpos($fontName, ',') !== false) { - $fontName = explode(',', $fontName)[0]; - } - if (!in_array($fontName, $fontDeclarations)) { - array_push($fontDeclarations, $fontName); - } - $odtStyles .= ''; - break; - case 'font-size': - if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) { - $fontSize = intval($matches[1]); - if ($matches[2] == 'px') { - $fontSize = round($fontSize * 0.75); - } - $odtStyles .= ''; - } - break; - case 'color': - if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) { - $odtStyles .= ''; - } - break; - } - } - if (strlen($odtStyles) > 0) { - $key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10); - $customStyles[$key] = $odtStyles; - $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; - } - } - break; - default: - $odtResult .= self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations); - break; - } - } - } - return $odtResult; - } - - /** - * Checks if the given text is a html string - * @param string $text The text to check - * @return bool - */ - public static function isHtmlTag($text) - { - return preg_match(HTML_REGEX_PATTERN, $text); - } - - /** - * Checks if the given text includes a html string - * @param string $text The text to check - * @return bool - */ - public static function hasHtmlTag($text) - { - $result = preg_match_all(HTML_REGEX_PATTERN, $text); - return is_numeric($result) && $result > 0; - } - - /** - * Returns an array of html elements - * @param string $html A string with html tags - * @return array - */ - private static function getDataFromHtml($html) - { - $tags = array(); - $tempHtml = $html; - - while (strlen($tempHtml) > 0) { - // Check if the string includes a html tag - if (preg_match_all(HTML_REGEX_PATTERN, $tempHtml, $matches)) { - $tagOffset = strpos($tempHtml, $matches[0][0]); - // Check if the string starts with the html tag - if ($tagOffset > 0) { - // Push the text infront of the html tag to the result array - array_push($tags, array( - 'text' => substr($tempHtml, 0, $tagOffset) - )); - // Remove the text from the string - $tempHtml = substr($tempHtml, $tagOffset); - } - // Extract the attribute data from the html tag - preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s]*")?)+/', $matches[2][0], $explodedAttributes); - $explodedAttributes = array_filter($explodedAttributes[0]); - $attributes = array(); - // Store each attribute with its name in the $attributes array - $explodedAttributesCount = count($explodedAttributes); - for ($i=0; $i<$explodedAttributesCount; $i++) { - $attribute = trim($explodedAttributes[$i]); - // Check if the attribute has a value (like style="") or has no value (like required) - if (strpos($attribute, '=') !== false) { - $splitAttribute = explode('=', $attribute); - $attrName = trim($splitAttribute[0]); - $attrValue = trim(str_replace('"', '', $splitAttribute[1])); - // check if the current attribute is a style attribute - if (strtolower($attrName) == 'style') { - $attributes[$attrName] = array(); - if (strpos($attrValue, ';') !== false) { - // Split the style properties and store them in an array - $explodedStyles = explode(';', $attrValue); - $explodedStylesCount = count($explodedStyles); - for ($n=0; $n<$explodedStylesCount; $n++) { - $splitStyle = explode(':', $explodedStyles[$n]); - $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); - } - } else { - $splitStyle = explode(':', $attrValue); - $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); - } - } else { - // Store the value directly in the $attributes array if this is not the style attribute - $attributes[$attrName] = $attrValue; - } - } else { - $attributes[trim($attribute)] = true; - } - } - // Push the html tag data to the result array - array_push($tags, array( - 'name' => $matches[1][0], - 'attributes' => $attributes, - 'innerText' => strip_tags($matches[3][0]), - 'children' => self::hasHtmlTag($matches[3][0]) ? self::getDataFromHtml($matches[3][0]) : null - )); - // Remove the processed html tag from the html string - $tempHtml = substr($tempHtml, strlen($matches[0][0])); - } else { - array_push($tags, array( - 'text' => $tempHtml - )); - $tempHtml = ''; - } - } - return $tags; - } -} diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 244119a282f..b2a93305430 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -1,7 +1,6 @@ _hasHtmlTag($value) === true) { + // Default styles for strong/b, i/em, u, s, sub & sup + $automaticStyles = array( + '', + '', + '', + '', + '', + '' + ); + + $this->vars[$tag] = $this->_replaceHtmlWithOdtTag($this->_getDataFromHtml($value), $customStyles, $fontDeclarations); + + foreach ($customStyles as $key => $val) { + array_push($automaticStyles, '' . $val . ''); + } + // Join the styles and add them to the content xml $styles = ''; - foreach ($result['automaticStyles'] as $style) { + foreach ($automaticStyles as $style) { if (strpos($this->contentXml, $style) === false) { $styles .= $style; } } $this->contentXml = str_replace('', $styles . '', $this->contentXml); + // Join the font declarations and add them to the content xml $fonts = ''; - foreach ($result['fonts'] as $font) { + foreach ($fontDeclarations as $font) { if (strpos($this->contentXml, 'style:name="' . $font . '"') === false) { $fonts .= ''; } } $this->contentXml = str_replace('', $fonts . '', $this->contentXml); - // Set the var to the converted odt value - $this->vars[$tag] = $result['content']; } else $this->vars[$tag] = $value; return $this; } + /** + * Replaces html tags in with odt tags and returns an odt string + * @param array $tags An array with html tags generated by the getDataFromHtml() function + * @param array $customStyles An array of style defenitions that should be included inside the odt file + * @param array $fontDeclarations An array of font declarations that should be included inside the odt file + * @return string + */ + private function _replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) + { + if ($customStyles == null) $customStyles = array(); + if ($fontDeclarations == null) $fontDeclarations = array(); + + $odtResult = ''; + + foreach ((array) $tags as $tag) { + // Check if the current item is a tag or just plain text + if (isset($tag['text'])) { + $odtResult .= $tag['text']; + } elseif (isset($tag['name'])) { + switch ($tag['name']) { + case 'br': + $odtResult .= ''; + break; + case 'strong': + case 'b': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'i': + case 'em': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'u': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 's': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'sub': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'sup': + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + break; + case 'span': + if (isset($tag['attributes']['style'])) { + $odtStyles = ''; + foreach ($tag['attributes']['style'] as $styleName => $styleValue) { + switch ($styleName) { + case 'font-family': + $fontName = $styleValue; + if (strpos($fontName, ',') !== false) { + $fontName = explode(',', $fontName)[0]; + } + if (!in_array($fontName, $fontDeclarations)) { + array_push($fontDeclarations, $fontName); + } + $odtStyles .= ''; + break; + case 'font-size': + if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) { + $fontSize = intval($matches[1]); + if ($matches[2] == 'px') { + $fontSize = round($fontSize * 0.75); + } + $odtStyles .= ''; + } + break; + case 'color': + if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) { + $odtStyles .= ''; + } + break; + } + } + if (strlen($odtStyles) > 0) { + // Generate a unique id for the style (using microtime and random because some CPUs are really fast...) + $key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10); + $customStyles[$key] = $odtStyles; + $odtResult .= '' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . ''; + } + } + break; + default: + $odtResult .= $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations); + break; + } + } + } + return $odtResult; + } + + /** + * Checks if the given text is a html string + * @param string $text The text to check + * @return bool + */ + private function _isHtmlTag($text) + { + return preg_match('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text); + } + + /** + * Checks if the given text includes a html string + * @param string $text The text to check + * @return bool + */ + private function _hasHtmlTag($text) + { + $result = preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text); + return is_numeric($result) && $result > 0; + } + + /** + * Returns an array of html elements + * @param string $html A string with html tags + * @return array + */ + private function _getDataFromHtml($html) + { + $tags = array(); + $tempHtml = $html; + + while (strlen($tempHtml) > 0) { + // Check if the string includes a html tag + if (preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $tempHtml, $matches)) { + $tagOffset = strpos($tempHtml, $matches[0][0]); + // Check if the string starts with the html tag + if ($tagOffset > 0) { + // Push the text infront of the html tag to the result array + array_push($tags, array( + 'text' => substr($tempHtml, 0, $tagOffset) + )); + // Remove the text from the string + $tempHtml = substr($tempHtml, $tagOffset); + } + // Extract the attribute data from the html tag + preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s]*")?)+/', $matches[2][0], $explodedAttributes); + $explodedAttributes = array_filter($explodedAttributes[0]); + $attributes = array(); + // Store each attribute with its name in the $attributes array + $explodedAttributesCount = count($explodedAttributes); + for ($i=0; $i<$explodedAttributesCount; $i++) { + $attribute = trim($explodedAttributes[$i]); + // Check if the attribute has a value (like style="") or has no value (like required) + if (strpos($attribute, '=') !== false) { + $splitAttribute = explode('=', $attribute); + $attrName = trim($splitAttribute[0]); + $attrValue = trim(str_replace('"', '', $splitAttribute[1])); + // check if the current attribute is a style attribute + if (strtolower($attrName) == 'style') { + $attributes[$attrName] = array(); + if (strpos($attrValue, ';') !== false) { + // Split the style properties and store them in an array + $explodedStyles = explode(';', $attrValue); + $explodedStylesCount = count($explodedStyles); + for ($n=0; $n<$explodedStylesCount; $n++) { + $splitStyle = explode(':', $explodedStyles[$n]); + $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); + } + } else { + $splitStyle = explode(':', $attrValue); + $attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]); + } + } else { + // Store the value directly in the $attributes array if this is not the style attribute + $attributes[$attrName] = $attrValue; + } + } else { + $attributes[trim($attribute)] = true; + } + } + // Push the html tag data to the result array + array_push($tags, array( + 'name' => $matches[1][0], + 'attributes' => $attributes, + 'innerText' => strip_tags($matches[3][0]), + 'children' => $this->_hasHtmlTag($matches[3][0]) ? $this->_getDataFromHtml($matches[3][0]) : null + )); + // Remove the processed html tag from the html string + $tempHtml = substr($tempHtml, strlen($matches[0][0])); + } else { + array_push($tags, array( + 'text' => $tempHtml + )); + $tempHtml = ''; + } + } + return $tags; + } + /** * Function to convert a HTML string into an ODT string