From 35869f1449dc13cbbd9949a8b7cfe25233a339b9 Mon Sep 17 00:00:00 2001 From: Laurent Destailleur Date: Wed, 17 Mar 2021 21:36:20 +0100 Subject: [PATCH] Add function dol_string_onlythesehtmlattributes() and option MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to enable it. --- htdocs/core/lib/functions.lib.php | 49 +++++++++++++++++++++++++++++-- test/phpunit/SecurityTest.php | 15 ++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index 1ff47bd2de3..d37fc0dd60e 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -670,6 +670,8 @@ function GETPOSTINT($paramname, $method = 0, $filter = null, $options = null, $n */ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options = null) { + global $conf; + // Check is done after replacement switch ($check) { case 'none': @@ -738,6 +740,12 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options = break; case 'restricthtml': // Recommended for most html textarea $out = dol_string_onlythesehtmltags($out, 0, 1, 1); + + // We should also exclude non expected attributes + if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) { + $out = dol_string_onlythesehtmlattributes($out); + } + break; case 'custom': if (empty($filter)) { @@ -6232,7 +6240,7 @@ function dol_string_nohtmltag($stringtoclean, $removelinefeed = 1, $pagecodeto = * * @param string $stringtoclean String to clean * @param int $cleanalsosomestyles Remove absolute/fixed positioning from inline styles - * @param int $removeclassattribute Remove the class attribute from tags + * @param int $removeclassattribute 1=Remove the class attribute from tags * @param int $cleanalsojavascript Remove also occurence of 'javascript:'. * @return string String cleaned * @@ -6245,6 +6253,10 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1, "ol", "p", "q", "s", "section", "span", "strike", "strong", "title", "table", "tr", "th", "td", "u", "ul", "sup", "sub", "blockquote", "pre", "h1", "h2", "h3", "h4", "h5", "h6" ); + $allowed_attributes = array( + "class", "href", "src", "style", "id", "name", "data-html" + ); + $allowed_tags_string = join("><", $allowed_tags); $allowed_tags_string = '<'.$allowed_tags_string.'>'; @@ -6278,6 +6290,39 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1, return $temp; } + +/** + * Clean a string from some undesirable HTML tags. + * Note. Not as secured as dol_string_onlythesehtmltags(). + * + * @param string $stringtoclean String to clean + * @param array $allowed_attributes Array of tags not allowed + * @return string String cleaned + * + * @see dol_escape_htmltag() strip_tags() dol_string_nohtmltag() dol_string_onlythesehtmltags() dol_string_neverthesehtmltags() + */ +function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes = array("class", "href", "src", "style", "id", "name", "data-html")) +{ + if (class_exists('DOMDocument')) { + $dom = new DOMDocument(); + $dom->loadHTML($stringtoclean, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL); + if (is_object($dom)) { + for ($els = $dom->getElementsByTagname('*'), $i = $els->length - 1; $i >= 0; $i--) { + for ($attrs = $els->item($i)->attributes, $ii = $attrs->length - 1; $ii >= 0; $ii--) { + // Delete attribute if not into allowed_attributes + if (! empty($attrs->item($ii)->name) && ! in_array($attrs->item($ii)->name, $allowed_attributes)) { + $els->item($i)->removeAttribute($attrs->item($ii)->name); + } + } + } + } + + return $dom->saveHTML(); + } else { + return $stringtoclean; + } +} + /** * Clean a string from some undesirable HTML tags. * Note. Not as secured as dol_string_onlythesehtmltags(). @@ -6287,7 +6332,7 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1, * @param string $cleanalsosomestyles Clean also some tags * @return string String cleaned * - * @see dol_escape_htmltag() strip_tags() dol_string_nohtmltag() dol_string_onlythesehtmltags() + * @see dol_escape_htmltag() strip_tags() dol_string_nohtmltag() dol_string_onlythesehtmltags() dol_string_onlythesehtmlattributes() */ function dol_string_neverthesehtmltags($stringtoclean, $disallowed_tags = array('textarea'), $cleanalsosomestyles = 0) { diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php index b3a95d5f816..1d3b76cd4f2 100644 --- a/test/phpunit/SecurityTest.php +++ b/test/phpunit/SecurityTest.php @@ -571,6 +571,21 @@ class SecurityTest extends PHPUnit\Framework\TestCase return 0; } + /** + * testDolStringOnlyTheseHtmlAttributes + * + * @return number + */ + public function testDolStringOnlyTheseHtmlAttributes() + { + $stringtotest = '
abc
'; + $decodedstring = dol_string_onlythesehtmlattributes($stringtotest); + $decodedstring = preg_replace("/\n$/", "", $decodedstring); + $this->assertEquals('
abc
', $decodedstring, 'Function did not sanitize correclty with test 1'); + + return 0; + } + /** * testGetRandomPassword *