Add option MAIN_RESTRICTHTML_ONLY_VALID_HTML

This commit is contained in:
Laurent Destailleur 2021-07-06 00:47:43 +02:00
parent df1d1209f4
commit 0dfa7bdbcc
2 changed files with 39 additions and 4 deletions

View File

@ -778,6 +778,16 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options =
do {
$oldstringtoclean = $out;
if (!empty($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML)) {
$dom = new DOMDocument;
try {
$dom->loadHTML($out, LIBXML_ERR_NONE|LIBXML_HTML_NOIMPLIED|LIBXML_HTML_NODEFDTD|LIBXML_NONET|LIBXML_NOWARNING|LIBXML_NOXMLDECL);
} catch(Exception $e) {
return 'InvalidHTMLString';
}
$out = $dom->saveHTML();
}
// Ckeditor use the numeric entitic for apostrophe so we force it to text entity (all other special chars are correctly
// encoded using text entities). This is a fix for CKeditor.
$out = preg_replace('/'/i', ''', $out);
@ -794,7 +804,8 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options =
// We should also exclude non expected attributes
if (!empty($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES)) {
$out = dol_string_onlythesehtmlattributes($out);
// Warning, the function may add a LF so we are forced to trim to compare with old $out without having always a difference and an infinit loop.
$out = trim(dol_string_onlythesehtmlattributes($out));
}
} while ($oldstringtoclean != $out);
break;
@ -6311,7 +6322,7 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1,
$stringtoclean = preg_replace('/:|&#0+58|&#x3A/i', '', $stringtoclean); // refused string ':' encoded (no reason to have a : encoded like this) to disable 'javascript:...'
$stringtoclean = preg_replace('/javascript\s*:/i', '', $stringtoclean);
$temp = strip_tags($stringtoclean, $allowed_tags_string);
$temp = strip_tags($stringtoclean, $allowed_tags_string); // Warning: This remove also undesired </> changing string obfuscated with </> that pass injection detection into harmfull string
if ($cleanalsosomestyles) { // Clean for remaining html tags
$temp = preg_replace('/position\s*:\s*(absolute|fixed)\s*!\s*important/i', '', $temp); // Note: If hacker try to introduce css comment into string to bypass this regex, the string must also be encoded by the dol_htmlentitiesbr during output so it become harmless
@ -6361,8 +6372,8 @@ function dol_string_onlythesehtmlattributes($stringtoclean, $allowed_attributes
}
$return = $dom->saveHTML();
//$return = '<html><body>aaaa</p>bb<p>ssdd</p>'."\n<p>aaa</p>aa<p>bb</p>";
$return = preg_replace('/^<html><body>/', '', $return);
$return = preg_replace('/<\/body><\/html>$/', '', $return);
return $return;

View File

@ -363,6 +363,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
$_POST["param13"]='&#110; &#x6E; &gt; &lt; &quot; <a href=\"j&#x61;vascript:alert(document.domain)\">XSS</a>';
$_POST["param13b"]='&#110; &#x6E; &gt; &lt; &quot; <a href=\"j&#x61vascript:alert(document.domain)\">XSS</a>';
$_POST["param14"]="Text with ' encoded with the numeric html entity converted into text entity &#39; (like when submited by CKEditor)";
$_POST["param15"]="<img onerror<=alert(document.domain)> src=>0xbeefed";
//$_POST["param13"]='javascript%26colon%26%23x3B%3Balert(1)';
//$_POST["param14"]='javascripT&javascript#x3a alert(1)';
@ -480,7 +481,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
// Test with restricthtml we must remove html open/close tag and content but not htmlentities (we can decode html entities for ascii chars like &#110;)
$result=GETPOST("param6", 'restricthtml');
print __METHOD__." result=".$result."\n";
print __METHOD__." result param6=".$result."\n";
$this->assertEquals('&quot;&gt;', $result);
$result=GETPOST("param7", 'restricthtml');
@ -503,6 +504,29 @@ class SecurityTest extends PHPUnit\Framework\TestCase
print __METHOD__." result=".$result."\n";
$this->assertEquals("Text with ' encoded with the numeric html entity converted into text entity &apos; (like when submited by CKEditor)", $result, 'Test 14');
$result=GETPOST("param15", 'restricthtml'); // <img onerror<=alert(document.domain)> src=>0xbeefed
print __METHOD__." result=".$result."\n";
$this->assertEquals("<img onerror=alert(document.domain) src=>0xbeefed", $result, 'Test 15a'); // The GETPOST return a harmull string
// Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
$result=GETPOST("param15", 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('InvalidHTMLString', $result, 'Test 15b');
unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML);
// Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
$result=GETPOST("param15", 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('<img src="">0xbeefed', $result, 'Test 15b');
unset($conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES);
// Special test for GETPOST of backtopage, backtolist or backtourl parameter
$_POST["backtopage"]='//www.google.com';