Merge pull request #13254 from Tim-Otte/add-odt-converter
NEW Support some HTML contents into ODT documents
This commit is contained in:
commit
36e6dd9d8e
@ -141,17 +141,239 @@ class Odf
|
||||
//}
|
||||
}
|
||||
|
||||
$value=$this->htmlToUTFAndPreOdf($value);
|
||||
|
||||
$value = $encode ? htmlspecialchars($value) : $value;
|
||||
$value = ($charset == 'ISO-8859') ? utf8_encode($value) : $value;
|
||||
|
||||
$value=$this->preOdfToOdf($value);
|
||||
// Check if the value includes html tags
|
||||
if ($this->_hasHtmlTag($value) === true) {
|
||||
// Default styles for strong/b, i/em, u, s, sub & sup
|
||||
$automaticStyles = array(
|
||||
'<style:style style:name="boldText" style:family="text"><style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold" /></style:style>',
|
||||
'<style:style style:name="italicText" style:family="text"><style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic" /></style:style>',
|
||||
'<style:style style:name="underlineText" style:family="text"><style:text-properties style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color" /></style:style>',
|
||||
'<style:style style:name="strikethroughText" style:family="text"><style:text-properties style:text-line-through-style="solid" style:text-line-through-type="single" /></style:style>',
|
||||
'<style:style style:name="subText" style:family="text"><style:text-properties style:text-position="sub 58%" /></style:style>',
|
||||
'<style:style style:name="supText" style:family="text"><style:text-properties style:text-position="super 58%" /></style:style>'
|
||||
);
|
||||
|
||||
$this->vars[$tag] = $this->_replaceHtmlWithOdtTag($this->_getDataFromHtml($value), $customStyles, $fontDeclarations);
|
||||
|
||||
foreach ($customStyles as $key => $val) {
|
||||
array_push($automaticStyles, '<style:style style:name="customStyle' . $key . '" style:family="text">' . $val . '</style:style>');
|
||||
}
|
||||
|
||||
$this->vars[$tag] = $value;
|
||||
// Join the styles and add them to the content xml
|
||||
$styles = '';
|
||||
foreach ($automaticStyles as $style) {
|
||||
if (strpos($this->contentXml, $style) === false) {
|
||||
$styles .= $style;
|
||||
}
|
||||
}
|
||||
$this->contentXml = str_replace('</office:automatic-styles>', $styles . '</office:automatic-styles>', $this->contentXml);
|
||||
|
||||
// Join the font declarations and add them to the content xml
|
||||
$fonts = '';
|
||||
foreach ($fontDeclarations as $font) {
|
||||
if (strpos($this->contentXml, 'style:name="' . $font . '"') === false) {
|
||||
$fonts .= '<style:font-face style:name="' . $font . '" svg:font-family="\'' . $font . '\'" />';
|
||||
}
|
||||
}
|
||||
$this->contentXml = str_replace('</office:font-face-decls>', $fonts . '</office:font-face-decls>', $this->contentXml);
|
||||
}
|
||||
else $this->vars[$tag] = preg_replace('/(\r\n|\r|\n)/i', "<text:line-break/>", $value);
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces html tags in with odt tags and returns an odt string
|
||||
* @param array $tags An array with html tags generated by the getDataFromHtml() function
|
||||
* @param array $customStyles An array of style defenitions that should be included inside the odt file
|
||||
* @param array $fontDeclarations An array of font declarations that should be included inside the odt file
|
||||
* @return string
|
||||
*/
|
||||
private function _replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations)
|
||||
{
|
||||
if ($customStyles == null) $customStyles = array();
|
||||
if ($fontDeclarations == null) $fontDeclarations = array();
|
||||
|
||||
$odtResult = '';
|
||||
|
||||
foreach ((array) $tags as $tag) {
|
||||
// Check if the current item is a tag or just plain text
|
||||
if (isset($tag['text'])) {
|
||||
$odtResult .= $tag['text'];
|
||||
} elseif (isset($tag['name'])) {
|
||||
switch ($tag['name']) {
|
||||
case 'br':
|
||||
$odtResult .= '<text:line-break/>';
|
||||
break;
|
||||
case 'strong':
|
||||
case 'b':
|
||||
$odtResult .= '<text:span text:style-name="boldText">' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
||||
break;
|
||||
case 'i':
|
||||
case 'em':
|
||||
$odtResult .= '<text:span text:style-name="italicText">' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
||||
break;
|
||||
case 'u':
|
||||
$odtResult .= '<text:span text:style-name="underlineText">' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
||||
break;
|
||||
case 's':
|
||||
$odtResult .= '<text:span text:style-name="strikethroughText">' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
||||
break;
|
||||
case 'sub':
|
||||
$odtResult .= '<text:span text:style-name="subText">' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
||||
break;
|
||||
case 'sup':
|
||||
$odtResult .= '<text:span text:style-name="supText">' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
||||
break;
|
||||
case 'span':
|
||||
if (isset($tag['attributes']['style'])) {
|
||||
$odtStyles = '';
|
||||
foreach ($tag['attributes']['style'] as $styleName => $styleValue) {
|
||||
switch ($styleName) {
|
||||
case 'font-family':
|
||||
$fontName = $styleValue;
|
||||
if (strpos($fontName, ',') !== false) {
|
||||
$fontName = explode(',', $fontName)[0];
|
||||
}
|
||||
if (!in_array($fontName, $fontDeclarations)) {
|
||||
array_push($fontDeclarations, $fontName);
|
||||
}
|
||||
$odtStyles .= '<style:text-properties style:font-name="' . $fontName . '" />';
|
||||
break;
|
||||
case 'font-size':
|
||||
if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) {
|
||||
$fontSize = intval($matches[1]);
|
||||
if ($matches[2] == 'px') {
|
||||
$fontSize = round($fontSize * 0.75);
|
||||
}
|
||||
$odtStyles .= '<style:text-properties fo:font-size="' . $fontSize . 'pt" style:font-size-asian="' . $fontSize . 'pt" style:font-size-complex="' . $fontSize . 'pt" />';
|
||||
}
|
||||
break;
|
||||
case 'color':
|
||||
if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) {
|
||||
$odtStyles .= '<style:text-properties fo:color="' . $styleValue . '" />';
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (strlen($odtStyles) > 0) {
|
||||
// Generate a unique id for the style (using microtime and random because some CPUs are really fast...)
|
||||
$key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10);
|
||||
$customStyles[$key] = $odtStyles;
|
||||
$odtResult .= '<text:span text:style-name="customStyle' . $key . '">' . ($tag['children'] != null ? $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
$odtResult .= $this->_replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return $odtResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given text is a html string
|
||||
* @param string $text The text to check
|
||||
* @return bool
|
||||
*/
|
||||
private function _isHtmlTag($text)
|
||||
{
|
||||
return preg_match('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given text includes a html string
|
||||
* @param string $text The text to check
|
||||
* @return bool
|
||||
*/
|
||||
private function _hasHtmlTag($text)
|
||||
{
|
||||
$result = preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $text);
|
||||
return is_numeric($result) && $result > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of html elements
|
||||
* @param string $html A string with html tags
|
||||
* @return array
|
||||
*/
|
||||
private function _getDataFromHtml($html)
|
||||
{
|
||||
$tags = array();
|
||||
$tempHtml = $html;
|
||||
|
||||
while (strlen($tempHtml) > 0) {
|
||||
// Check if the string includes a html tag
|
||||
if (preg_match_all('/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/', $tempHtml, $matches)) {
|
||||
$tagOffset = strpos($tempHtml, $matches[0][0]);
|
||||
// Check if the string starts with the html tag
|
||||
if ($tagOffset > 0) {
|
||||
// Push the text infront of the html tag to the result array
|
||||
array_push($tags, array(
|
||||
'text' => substr($tempHtml, 0, $tagOffset)
|
||||
));
|
||||
// Remove the text from the string
|
||||
$tempHtml = substr($tempHtml, $tagOffset);
|
||||
}
|
||||
// Extract the attribute data from the html tag
|
||||
preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s\,\;]*")?)+/', $matches[2][0], $explodedAttributes);
|
||||
$explodedAttributes = array_filter($explodedAttributes[0]);
|
||||
$attributes = array();
|
||||
// Store each attribute with its name in the $attributes array
|
||||
$explodedAttributesCount = count($explodedAttributes);
|
||||
for ($i=0; $i<$explodedAttributesCount; $i++) {
|
||||
$attribute = trim($explodedAttributes[$i]);
|
||||
// Check if the attribute has a value (like style="") or has no value (like required)
|
||||
if (strpos($attribute, '=') !== false) {
|
||||
$splitAttribute = explode('=', $attribute);
|
||||
$attrName = trim($splitAttribute[0]);
|
||||
$attrValue = trim(str_replace('"', '', $splitAttribute[1]));
|
||||
// check if the current attribute is a style attribute
|
||||
if (strtolower($attrName) == 'style') {
|
||||
$attributes[$attrName] = array();
|
||||
if (strpos($attrValue, ';') !== false) {
|
||||
// Split the style properties and store them in an array
|
||||
$explodedStyles = explode(';', $attrValue);
|
||||
$explodedStylesCount = count($explodedStyles);
|
||||
for ($n=0; $n<$explodedStylesCount; $n++) {
|
||||
$splitStyle = explode(':', $explodedStyles[$n]);
|
||||
$attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]);
|
||||
}
|
||||
} else {
|
||||
$splitStyle = explode(':', $attrValue);
|
||||
$attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]);
|
||||
}
|
||||
} else {
|
||||
// Store the value directly in the $attributes array if this is not the style attribute
|
||||
$attributes[$attrName] = $attrValue;
|
||||
}
|
||||
} else {
|
||||
$attributes[trim($attribute)] = true;
|
||||
}
|
||||
}
|
||||
// Push the html tag data to the result array
|
||||
array_push($tags, array(
|
||||
'name' => $matches[1][0],
|
||||
'attributes' => $attributes,
|
||||
'innerText' => strip_tags($matches[3][0]),
|
||||
'children' => $this->_hasHtmlTag($matches[3][0]) ? $this->_getDataFromHtml($matches[3][0]) : null
|
||||
));
|
||||
// Remove the processed html tag from the html string
|
||||
$tempHtml = substr($tempHtml, strlen($matches[0][0]));
|
||||
} else {
|
||||
array_push($tags, array(
|
||||
'text' => $tempHtml
|
||||
));
|
||||
$tempHtml = '';
|
||||
}
|
||||
}
|
||||
return $tags;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Function to convert a HTML string into an ODT string
|
||||
|
||||
Loading…
Reference in New Issue
Block a user