diff --git a/htdocs/core/class/HtmlToOdtConverter.class.php b/htdocs/core/class/HtmlToOdtConverter.class.php new file mode 100644 index 00000000000..d074c7f7a9b --- /dev/null +++ b/htdocs/core/class/HtmlToOdtConverter.class.php @@ -0,0 +1,222 @@ +)|(?:>(.*)<\/\1>))/'); + +class HtmlToOdtConverter { + + /** + * Converts a string with html inside into an odt compatible string + * @param string The text to convert + * @return array + */ + public static function htmlToOdt($htmlText) { + /* + Default styles: + + = + = + = + = + = + = + + Custom styles: + + [Content] + + font-size = + font-family = + Additionally, a font face has to be added to the font-face-decls. An example for a font face declaration: + color = + */ + + //TODO: Add font names to odt header + $automaticStyles = array( + '', + '', + '', + '', + '', + '' + ); + + $odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles); + + foreach ($customStyles as $key => $value) { + array_push($automaticStyles, '' . $value . ''); + } + + return array( + 'automaticStyles' => $automaticStyles, + 'content' => $odtText + ); + } + + /** + * Replaces html tags in with odt tags and returns an odt string + * @param array $tags An array with html tags generated by the getDataFromHtml() function + * @param array $customStyles An array of style defenitions that should be included inside the odt file + */ + private static function replaceHtmlWithOdtTag($tags, &$customStyles) { + if ($customStyles == null) $customStyles = array(); + + $odtResult = ''; + + foreach ((array) $tags as $tag) { + // Check if the current item is a tag or just plain text + if (isset($tag['text'])) { + $odtResult .= $tag['text']; + } else if (isset($tag['name'])) { + switch ($tag['name']) { + case 'br': + $odtResult .= ''; + break; + case 'strong': + case 'b': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'i': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'u': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 's': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'sub': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'sup': + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + break; + case 'span': + if (isset($tag['attributes']['style'])) { + $odtStyles = ''; + foreach ($tag['attributes']['style'] as $styleName => $styleValue) { + switch ($styleName) { + case 'font-family': + $odtStyles .= ''; + break; + case 'font-size': + if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) { + $fontSize = intval($matches[1]); + if ($matches[2] == 'px') { + $fontSize = round($fontSize * 0.75); + } + $odtStyles .= ''; + } + break; + case 'color': + if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) { + $odtStyles .= ''; + } + break; + } + } + if (strlen($odtStyles) > 0) { + $key = microtime(); + $customStyles[$key] = $odtStyles; + $odtResult .= '' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles) : $tag['innerText']) . ''; + } + } + break; + default: + $odtResult .= self::replaceHtmlWithOdtTag($tag['children'], $customStyles); + break; + } + } + } + return $odtResult; + } + + /** + * Checks if the given text is a html string + * @param string $text The text to check + */ + public static function isHtmlTag($text) { + return preg_match(HTML_REGEX_PATTERN, $text); + } + + /** + * Checks if the given text includes a html string + * @param string $text The text to check + */ + public static function hasHtmlTag($text) { + return preg_match_all(HTML_REGEX_PATTERN, $text); + } + + /** + * Returns an array of html elements + * @param string $html A string with html tags + */ + private static function getDataFromHtml($html) { + $tags = array(); + $tempHtml = $html; + + while (strlen($tempHtml) > 0) { + // Check if the string includes a html tag + if (preg_match_all(HTML_REGEX_PATTERN, $tempHtml, $matches)) { + $tagOffset = strpos($tempHtml, $matches[0][0]); + // Check if the string starts with the html tag + if ($tagOffset > 0) { + // Push the text infront of the html tag to the result array + array_push($tags, array( + 'text' => substr($tempHtml, 0, $tagOffset) + )); + // Remove the text from the string + $tempHtml = substr($tempHtml, $tagOffset); + } + // Extract the attribute data from the html tag + $explodedAttributes = strlen($matches[2][0]) > 0 ? explode(' ', $matches[2][0]) : array(); + $attributes = array(); + // Store each attribute with its name in the $attributes array + for ($i=0; $i $matches[1][0], + 'attributes' => $attributes, + 'innerText' => strip_tags($matches[3][0]), + 'children' => self::hasHtmlTag($matches[3][0]) ? self::getDataFromHtml($matches[3][0]) : null + )); + // Remove the processed html tag from the html string + $tempHtml = substr($tempHtml, strlen($matches[0][0])); + } else { + array_push($tags, array( + 'text' => $tempHtml + )); + $tempHtml = ''; + } + } + return $tags; + } +} \ No newline at end of file diff --git a/htdocs/includes/odtphp/odf.php b/htdocs/includes/odtphp/odf.php index 01c3310a3f8..d42e4133b00 100644 --- a/htdocs/includes/odtphp/odf.php +++ b/htdocs/includes/odtphp/odf.php @@ -1,6 +1,7 @@ htmlToUTFAndPreOdf($value); - - $value = $encode ? htmlspecialchars($value) : $value; $value = ($charset == 'ISO-8859') ? utf8_encode($value) : $value; - $value=$this->preOdfToOdf($value); - - $this->vars[$tag] = $value; + // Check if the value includes html tags + if (HtmlToOdtConverter::hasHtmlTag($value) === true) { + // Convert the value to an odt compatible value + $result = HtmlToOdtConverter::htmlToOdt($value); + // Join the styles and add them to the content xml + $styles = ''; + foreach ($result['automaticStyles'] as $style) { + if (strpos($this->contentXml, $style) === false) { + $styles .= $style; + } + } + $this->contentXml = str_replace('', $styles . '', $this->contentXml); + // Set the var to the converted odt value + $this->vars[$tag] = $result['content']; + } + else $this->vars[$tag] = $value; + return $this; }