diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php
index e3b288aff8e..87a4b966056 100644
--- a/htdocs/core/lib/functions.lib.php
+++ b/htdocs/core/lib/functions.lib.php
@@ -778,8 +778,12 @@ function checkVal($out = '', $check = 'alphanohtml', $filter = null, $options =
do {
$oldstringtoclean = $out;
- // We replace chars encoded with numeric HTML entities with real char (to avoid to have numeric entities used for obfuscation of injections)
- $out = preg_replace_callback('/(x?[0-9][0-9a-f]+);/i', 'realCharForNumericEntities', $out);
+ // We replace chars from a/A to z/Z encoded with numeric HTML entities with the real char so we won't loose the chars at the next step.
+ // No need to use a loop here, this step is not to sanitize (this is done at next step, this is to try to save chars, even if they are
+ // using a non coventionnel way to be encoded, to not have them sanitized just after)
+ $out = preg_replace_callback('/(x?[0-9][0-9a-f]+;?)/i', 'realCharForNumericEntities', $out);
+
+ // Now we remove all remaining HTML entities staring with a number. We don't want such entities.
$out = preg_replace('/?[0-9]+/i', '', $out); // For example if we have javascript with an entities without the ; to hide the 'a' of 'javascript'.
$out = dol_string_onlythesehtmltags($out, 0, 1, 1);
diff --git a/htdocs/main.inc.php b/htdocs/main.inc.php
index 93388190275..b66b5f5b211 100644
--- a/htdocs/main.inc.php
+++ b/htdocs/main.inc.php
@@ -53,7 +53,7 @@ if (!empty($_SERVER['MAIN_SHOW_TUNING_INFO'])) {
/**
* Return the real char for a numeric entities.
- * This function is required by testSqlAndScriptInject().
+ * WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar.
*
* @param string $matches String of numeric entity
* @return string New value
@@ -61,17 +61,18 @@ if (!empty($_SERVER['MAIN_SHOW_TUNING_INFO'])) {
function realCharForNumericEntities($matches)
{
$newstringnumentity = $matches[1];
+ //print '$newstringnumentity='.$newstringnumentity;
if (preg_match('/^x/i', $newstringnumentity)) {
- $newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity));
+ $newstringnumentity = hexdec(preg_replace('/;$/', '', preg_replace('/^x/i', '', $newstringnumentity)));
}
- // The numeric value we don't want as entities
+ // The numeric value we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ?
if (($newstringnumentity >= 65 && $newstringnumentity <= 90) || ($newstringnumentity >= 97 && $newstringnumentity <= 122)) {
return chr((int) $newstringnumentity);
}
- return ''.$matches[1];
+ return ''.$matches[1]; // Value will be unchanged because regex was /( )/
}
/**
diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php
index 7e93f7673f2..08d4ec88703 100644
--- a/test/phpunit/SecurityTest.php
+++ b/test/phpunit/SecurityTest.php
@@ -345,7 +345,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
$_GET["param5"]="a_1-b";
$_POST["param6"]="">../>../svg><<<../>../>../animate =alert(1)>abc';
- $_GET["param7"]='"c:\this is a path~1\aaan" abcdef';
+ $_GET["param7"]='"c:\this is a path~1\aaan 110;" abcdef';
$_POST["param8a"]="Hacker