234 lines
14 KiB
PHP
234 lines
14 KiB
PHP
<?php
|
|
|
|
// Learn more about this regex pattern: https://regexr.com/4vi60
|
|
define('HTML_REGEX_PATTERN', '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(.*)<\/\1>))/');
|
|
|
|
class HtmlToOdtConverter {
|
|
|
|
/**
|
|
* Converts a string with html inside into an odt compatible string
|
|
* @param string The text to convert
|
|
* @return array
|
|
*/
|
|
public static function htmlToOdt($htmlText) {
|
|
/*
|
|
Default styles:
|
|
|
|
<strong> = <style:style style:name="boldText" style:family="text"><style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold" /></style:style>
|
|
<i> = <style:style style:name="italicText" style:family="text"><style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic" /></style:style>
|
|
<u> = <style:style style:name="underlineText" style:family="text"><style:text-properties style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color" /></style:style>
|
|
<s> = <style:style style:name="strikethroughText" style:family="text"><style:text-properties style:text-line-through-style="solid" style:text-line-through-type="single" /></style:style>
|
|
<sub> = <style:style style:name="subText" style:family="text"><style:text-properties style:text-position="sub 58%" /></style:style>
|
|
<sup> = <style:style style:name="supText" style:family="text"><style:text-properties style:text-position="super 58%" /></style:style>
|
|
|
|
Custom styles:
|
|
|
|
<style:style style:name="customStyleN" style:family="text"> [Content] </style:style>
|
|
|
|
font-size = <style:text-properties fo:font-size="60pt" style:font-size-asian="60pt" style:font-size-complex="60pt" />
|
|
font-family = <style:text-properties style:font-name="Courier New" />
|
|
Additionally, a font face has to be added to the font-face-decls. An example for a font face declaration: <style:font-face style:name="Courier New" svg:font-family="'Courier New'" />
|
|
color = <style:text-properties fo:color="#0000ff" />
|
|
*/
|
|
|
|
//TODO: Add font names to odt header
|
|
$automaticStyles = array(
|
|
'<style:style style:name="boldText" style:family="text"><style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold" /></style:style>',
|
|
'<style:style style:name="italicText" style:family="text"><style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic" /></style:style>',
|
|
'<style:style style:name="underlineText" style:family="text"><style:text-properties style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color" /></style:style>',
|
|
'<style:style style:name="strikethroughText" style:family="text"><style:text-properties style:text-line-through-style="solid" style:text-line-through-type="single" /></style:style>',
|
|
'<style:style style:name="subText" style:family="text"><style:text-properties style:text-position="sub 58%" /></style:style>',
|
|
'<style:style style:name="supText" style:family="text"><style:text-properties style:text-position="super 58%" /></style:style>'
|
|
);
|
|
|
|
$odtText = self::replaceHtmlWithOdtTag(self::getDataFromHtml($htmlText), $customStyles, $fontDeclarations);
|
|
|
|
foreach ($customStyles as $key => $value) {
|
|
array_push($automaticStyles, '<style:style style:name="customStyle' . $key . '" style:family="text">' . $value . '</style:style>');
|
|
}
|
|
|
|
return array(
|
|
'automaticStyles' => $automaticStyles,
|
|
'content' => $odtText,
|
|
'fonts' => $fontDeclarations
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Replaces html tags in with odt tags and returns an odt string
|
|
* @param array $tags An array with html tags generated by the getDataFromHtml() function
|
|
* @param array $customStyles An array of style defenitions that should be included inside the odt file
|
|
*/
|
|
private static function replaceHtmlWithOdtTag($tags, &$customStyles, &$fontDeclarations) {
|
|
if ($customStyles == null) $customStyles = array();
|
|
if ($fontDeclarations == null) $fontDeclarations = array();
|
|
|
|
$odtResult = '';
|
|
|
|
foreach ((array) $tags as $tag) {
|
|
// Check if the current item is a tag or just plain text
|
|
if (isset($tag['text'])) {
|
|
$odtResult .= $tag['text'];
|
|
} else if (isset($tag['name'])) {
|
|
switch ($tag['name']) {
|
|
case 'br':
|
|
$odtResult .= '<text:line-break/>';
|
|
break;
|
|
case 'strong':
|
|
case 'b':
|
|
$odtResult .= '<text:span text:style-name="boldText">' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
|
break;
|
|
case 'i':
|
|
case 'em':
|
|
$odtResult .= '<text:span text:style-name="italicText">' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
|
break;
|
|
case 'u':
|
|
$odtResult .= '<text:span text:style-name="underlineText">' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
|
break;
|
|
case 's':
|
|
$odtResult .= '<text:span text:style-name="strikethroughText">' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
|
break;
|
|
case 'sub':
|
|
$odtResult .= '<text:span text:style-name="subText">' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
|
break;
|
|
case 'sup':
|
|
$odtResult .= '<text:span text:style-name="supText">' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
|
break;
|
|
case 'span':
|
|
if (isset($tag['attributes']['style'])) {
|
|
$odtStyles = '';
|
|
foreach ($tag['attributes']['style'] as $styleName => $styleValue) {
|
|
switch ($styleName) {
|
|
case 'font-family':
|
|
$fontName = $styleValue;
|
|
if (strpos($fontName, ',') !== false) {
|
|
$fontName = explode(',', $fontName)[0];
|
|
}
|
|
if (!in_array($fontName, $fontDeclarations)) {
|
|
array_push($fontDeclarations, $fontName);
|
|
}
|
|
$odtStyles .= '<style:text-properties style:font-name="' . $fontName . '" />';
|
|
break;
|
|
case 'font-size':
|
|
if (preg_match('/([0-9]+)\s?(px|pt)/', $styleValue, $matches)) {
|
|
$fontSize = intval($matches[1]);
|
|
if ($matches[2] == 'px') {
|
|
$fontSize = round($fontSize * 0.75);
|
|
}
|
|
$odtStyles .= '<style:text-properties fo:font-size="' . $fontSize . 'pt" style:font-size-asian="' . $fontSize . 'pt" style:font-size-complex="' . $fontSize . 'pt" />';
|
|
}
|
|
break;
|
|
case 'color':
|
|
if (preg_match('/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/', $styleValue)) {
|
|
$odtStyles .= '<style:text-properties fo:color="' . $styleValue . '" />';
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (strlen($odtStyles) > 0) {
|
|
$key = floatval(str_replace('.', '', microtime(true)))+rand(0, 10);
|
|
$customStyles[$key] = $odtStyles;
|
|
$odtResult .= '<text:span text:style-name="customStyle' . $key . '">' . ($tag['children'] != null ? self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations) : $tag['innerText']) . '</text:span>';
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
$odtResult .= self::replaceHtmlWithOdtTag($tag['children'], $customStyles, $fontDeclarations);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return $odtResult;
|
|
}
|
|
|
|
/**
|
|
* Checks if the given text is a html string
|
|
* @param string $text The text to check
|
|
*/
|
|
public static function isHtmlTag($text) {
|
|
return preg_match(HTML_REGEX_PATTERN, $text);
|
|
}
|
|
|
|
/**
|
|
* Checks if the given text includes a html string
|
|
* @param string $text The text to check
|
|
*/
|
|
public static function hasHtmlTag($text) {
|
|
$result = preg_match_all(HTML_REGEX_PATTERN, $text);
|
|
return is_numeric($result) && $result > 0;
|
|
}
|
|
|
|
/**
|
|
* Returns an array of html elements
|
|
* @param string $html A string with html tags
|
|
*/
|
|
private static function getDataFromHtml($html) {
|
|
$tags = array();
|
|
$tempHtml = $html;
|
|
|
|
while (strlen($tempHtml) > 0) {
|
|
// Check if the string includes a html tag
|
|
if (preg_match_all(HTML_REGEX_PATTERN, $tempHtml, $matches)) {
|
|
$tagOffset = strpos($tempHtml, $matches[0][0]);
|
|
// Check if the string starts with the html tag
|
|
if ($tagOffset > 0) {
|
|
// Push the text infront of the html tag to the result array
|
|
array_push($tags, array(
|
|
'text' => substr($tempHtml, 0, $tagOffset)
|
|
));
|
|
// Remove the text from the string
|
|
$tempHtml = substr($tempHtml, $tagOffset);
|
|
}
|
|
// Extract the attribute data from the html tag
|
|
preg_match_all('/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s]*")?)+/', $matches[2][0], $explodedAttributes);
|
|
$explodedAttributes = array_filter($explodedAttributes[0]);
|
|
$attributes = array();
|
|
// Store each attribute with its name in the $attributes array
|
|
for ($i=0; $i<count($explodedAttributes); $i++) {
|
|
$attribute = trim($explodedAttributes[$i]);
|
|
// Check if the attribute has a value (like style="") or has no value (like required)
|
|
if (strpos($attribute, '=') !== false) {
|
|
$splitAttribute = explode('=', $attribute);
|
|
$attrName = trim($splitAttribute[0]);
|
|
$attrValue = trim(str_replace('"', '', $splitAttribute[1]));
|
|
// check if the current attribute is a style attribute
|
|
if (strtolower($attrName) == 'style') {
|
|
$attributes[$attrName] = array();
|
|
if (strpos($attrValue, ';') !== false) {
|
|
// Split the style properties and store them in an array
|
|
$explodedStyles = explode(';', $attrValue);
|
|
for ($n=0; $n<count($explodedStyles); $n++) {
|
|
$splitStyle = explode(':', $explodedStyles[$n]);
|
|
$attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]);
|
|
}
|
|
} else {
|
|
$splitStyle = explode(':', $attrValue);
|
|
$attributes[$attrName][trim($splitStyle[0])] = trim($splitStyle[1]);
|
|
}
|
|
} else {
|
|
// Store the value directly in the $attributes array if this is not the style attribute
|
|
$attributes[$attrName] = $attrValue;
|
|
}
|
|
} else {
|
|
$attributes[trim($attribute)] = true;
|
|
}
|
|
}
|
|
// Push the html tag data to the result array
|
|
array_push($tags, array(
|
|
'name' => $matches[1][0],
|
|
'attributes' => $attributes,
|
|
'innerText' => strip_tags($matches[3][0]),
|
|
'children' => self::hasHtmlTag($matches[3][0]) ? self::getDataFromHtml($matches[3][0]) : null
|
|
));
|
|
// Remove the processed html tag from the html string
|
|
$tempHtml = substr($tempHtml, strlen($matches[0][0]));
|
|
} else {
|
|
array_push($tags, array(
|
|
'text' => $tempHtml
|
|
));
|
|
$tempHtml = '';
|
|
}
|
|
}
|
|
return $tags;
|
|
}
|
|
} |