From aa3fd067fcef69eedc0907802dc50180ec56a33f Mon Sep 17 00:00:00 2001 From: Laurent Destailleur Date: Fri, 6 Oct 2017 23:54:45 +0200 Subject: [PATCH] Fix regex UNGREEDY --- htdocs/core/lib/website.lib.php | 62 +++++++++++++++++++++++++++++++-- htdocs/websites/index.php | 37 ++------------------ 2 files changed, 61 insertions(+), 38 deletions(-) diff --git a/htdocs/core/lib/website.lib.php b/htdocs/core/lib/website.lib.php index e8c37f12815..bab4f6c5487 100644 --- a/htdocs/core/lib/website.lib.php +++ b/htdocs/core/lib/website.lib.php @@ -54,12 +54,12 @@ function dolWebsiteOutput($content) $nbrep=0; if (! $symlinktomediaexists) { - $content=preg_replace('/()/', '\1'.$urlwithroot.'/viewimage.php\2modulepart=medias\3file=\4\5', $content, -1, $nbrep); + $content=preg_replace('/(]*src=")[^\"]*viewimage\.php([^\"]*)modulepart=medias([^\"]*)file=([^\"]*)("[^\/]*\/?>)/', '\1'.$urlwithroot.'/viewimage.php\2modulepart=medias\3file=\4\5', $content, -1, $nbrep); $content=preg_replace('/(url\(["\']?)[^\)]*viewimage\.php([^\)]*)modulepart=medias([^\)]*)file=([^\)]*)(["\']?\))/', '\1'.$urlwithroot.'/viewimage.php\2modulepart=medias\3file=\4\5', $content, -1, $nbrep); } else { - $content=preg_replace('/()/', '\1medias/\4\5', $content, -1, $nbrep); + $content=preg_replace('/(]*src=")[^\"]*viewimage\.php([^\"]*)modulepart=medias([^\"]*)file=([^\"]*)("[^\/]*\/?>)/', '\1medias/\4\5', $content, -1, $nbrep); $content=preg_replace('/(url\(["\']?)[^\)]*viewimage\.php([^\)]*)modulepart=medias([^\)]*)file=([^\)]*)(["\']?\))/', '\1medias/\4\5', $content, -1, $nbrep); } } @@ -108,7 +108,7 @@ function dolWebsiteReplacementOfLinks($website, $content) $content = preg_replace('/url\((["\']?)medias\//', 'url(\1'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file=', $content, -1, $nbrep); // ]*src=")(?!(http|'.preg_quote(DOL_URL_ROOT,'/').'\/viewimage))/', '\1'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file=', $content, -1, $nbrep); return $content; } @@ -277,3 +277,59 @@ function exportWebSite($website) return $filename; } + +/** + * Download all images found into page content $tmp. + * If $modifylinks is set, links to images will be replace with a link to viewimage wrapper. + * + * @param Website $object Object website + * @param WebsitePage $objectpage Object website page + * @param string $urltograb URL to grab + * @param string $tmp Content to parse + * @param string $action Var $action + * @param string $modifylinks 0=Do not modify content, 1=Replace links with a link to + * @return void + */ +function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modifylinks=0) +{ + global $conf; + + preg_match_all('/]*)>/i', $objectpage->content, $regs); + foreach ($regs[0] as $key => $val) + { + $urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + $linkwithoutdomain = $regs[2][$key]; + $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^http/', $regs[2][$key])) + { + $urltograbbis = $regs[2][$key]; + $linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]); + $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; + } + + $tmpgeturl = getURLContent($urltograbbis); + if ($tmpgeturl['curl_error_no']) + { + $error++; + setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + $action='create'; + } + else + { + dol_mkdir(dirname($filetosave)); + + $fp = fopen($filetosave, "w"); + fputs($fp, $tmpgeturl['content']); + fclose($fp); + if (! empty($conf->global->MAIN_UMASK)) + @chmod($file, octdec($conf->global->MAIN_UMASK)); + } + + if ($modifylinks) + { + $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; + $tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp); + } + } +} + diff --git a/htdocs/websites/index.php b/htdocs/websites/index.php index 1f3afb76fba..3094ef3b5b4 100644 --- a/htdocs/websites/index.php +++ b/htdocs/websites/index.php @@ -329,7 +329,7 @@ if ($action == 'add') */ $tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp); } - $objectpage->htmlheader = $tmp; + $objectpage->htmlheader = trim($tmp); // Now loop to fetch CSS @@ -382,40 +382,7 @@ if ($action == 'add') // Now loop to fetch all images $tmp = $objectpage->content; - preg_match_all('/]*)>/i', $objectpage->content, $regs); - foreach ($regs[0] as $key => $val) - { - $urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; - $linkwithoutdomain = $regs[2][$key]; - $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; - if (preg_match('/^http/', $regs[2][$key])) - { - $urltograbbis = $regs[2][$key]; - $linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]); - $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; - } - - $tmpgeturl = getURLContent($urltograbbis); - if ($tmpgeturl['curl_error_no']) - { - $error++; - setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); - $action='create'; - } - else - { - dol_mkdir(dirname($filetosave)); - - $fp = fopen($filetosave, "w"); - fputs($fp, $tmpgeturl['content']); - fclose($fp); - if (! empty($conf->global->MAIN_UMASK)) - @chmod($file, octdec($conf->global->MAIN_UMASK)); - } - - $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; - $tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp); - } + getAllImages($object, $objectpage, $urltograb, $tmp, $action, 1); //print dol_escape_htmltag($tmp);exit; $objectpage->content = $tmp;