From 796b2d201acb9938b903fb2afa297db289ecc93e Mon Sep 17 00:00:00 2001 From: Laurent Destailleur Date: Tue, 29 Jun 2021 18:17:27 +0200 Subject: [PATCH] Enhance the sanitizing. --- htdocs/core/lib/functions.lib.php | 8 ++++++-- htdocs/main.inc.php | 9 +++++---- test/phpunit/SecurityTest.php | 10 +++++----- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index e3b288aff8e..87a4b966056 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -778,8 +778,12 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options = do { $oldstringtoclean = $out; - // We replace chars encoded with numeric HTML entities with real char (to avoid to have numeric entities used for obfuscation of injections) - $out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+);/i', 'realCharForNumericEntities', $out); + // We replace chars from a/A to z/Z encoded with numeric HTML entities with the real char so we won't loose the chars at the next step. + // No need to use a loop here, this step is not to sanitize (this is done at next step, this is to try to save chars, even if they are + // using a non coventionnel way to be encoded, to not have them sanitized just after) + $out = preg_replace_callback('/&#(x?[0-9][0-9a-f]+;?)/i', 'realCharForNumericEntities', $out); + + // Now we remove all remaining HTML entities staring with a number. We don't want such entities. $out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have javascript with an entities without the ; to hide the 'a' of 'javascript'. $out = dol_string_onlythesehtmltags($out, 0, 1, 1); diff --git a/htdocs/main.inc.php b/htdocs/main.inc.php index 93388190275..b66b5f5b211 100644 --- a/htdocs/main.inc.php +++ b/htdocs/main.inc.php @@ -53,7 +53,7 @@ if (!empty($_SERVER['MAIN_SHOW_TUNING_INFO'])) { /** * Return the real char for a numeric entities. - * This function is required by testSqlAndScriptInject(). + * WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar. * * @param string $matches String of numeric entity * @return string New value @@ -61,17 +61,18 @@ if (!empty($_SERVER['MAIN_SHOW_TUNING_INFO'])) { function realCharForNumericEntities($matches) { $newstringnumentity = $matches[1]; + //print '$newstringnumentity='.$newstringnumentity; if (preg_match('/^x/i', $newstringnumentity)) { - $newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity)); + $newstringnumentity = hexdec(preg_replace('/;$/', '', preg_replace('/^x/i', '', $newstringnumentity))); } - // The numeric value we don't want as entities + // The numeric value we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ? if (($newstringnumentity >= 65 && $newstringnumentity <= 90) || ($newstringnumentity >= 97 && $newstringnumentity <= 122)) { return chr((int) $newstringnumentity); } - return '&#'.$matches[1]; + return '&#'.$matches[1]; // Value will be unchanged because regex was /&#( )/ } /** diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php index 7e93f7673f2..08d4ec88703 100644 --- a/test/phpunit/SecurityTest.php +++ b/test/phpunit/SecurityTest.php @@ -345,7 +345,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase $_GET["param5"]="a_1-b"; $_POST["param6"]="">assertEquals('">', $result); $result=GETPOST("param7", 'restricthtml'); - print __METHOD__." result=".$result."\n"; - $this->assertEquals('"c:\this is a path~1\aaan" abcdef', $result); + print __METHOD__." result param7 = ".$result."\n"; + $this->assertEquals('"c:\this is a path~1\aaan &#x;;;;" abcdef', $result); $result=GETPOST("param12", 'restricthtml'); print __METHOD__." result=".$result."\n"; @@ -488,11 +488,11 @@ class SecurityTest extends PHPUnit\Framework\TestCase $result=GETPOST("param13", 'restricthtml'); print __METHOD__." result=".$result."\n"; - $this->assertEquals('n n > < " XSS', $result, 'Test that HTML entities are decoded with restricthtml, but only for common alpha chars'); + $this->assertEquals('n n > < " XSS', $result, 'Test 13 that HTML entities are decoded with restricthtml, but only for common alpha chars'); $result=GETPOST("param13b", 'restricthtml'); print __METHOD__." result=".$result."\n"; - $this->assertEquals('n n > < " XSS', $result, 'Test that HTML entities are decoded with restricthtml, but only for common alpha chars'); + $this->assertEquals('n n > < " XSS', $result, 'Test 13b that HTML entities are decoded with restricthtml, but only for common alpha chars'); // Special test for GETPOST of backtopage, backtolist or backtourl parameter