Fix regex UNGREEDY

This commit is contained in:
Laurent Destailleur 2017-10-06 23:54:45 +02:00
parent 8e187c2759
commit aa3fd067fc
2 changed files with 61 additions and 38 deletions

View File

@ -54,12 +54,12 @@ function dolWebsiteOutput($content)
$nbrep=0;
if (! $symlinktomediaexists)
{
$content=preg_replace('/(<img.*src=")[^\"]*viewimage\.php([^\"]*)modulepart=medias([^\"]*)file=([^\"]*)("[^\/]*\/?>)/', '\1'.$urlwithroot.'/viewimage.php\2modulepart=medias\3file=\4\5', $content, -1, $nbrep);
$content=preg_replace('/(<img[^>]*src=")[^\"]*viewimage\.php([^\"]*)modulepart=medias([^\"]*)file=([^\"]*)("[^\/]*\/?>)/', '\1'.$urlwithroot.'/viewimage.php\2modulepart=medias\3file=\4\5', $content, -1, $nbrep);
$content=preg_replace('/(url\(["\']?)[^\)]*viewimage\.php([^\)]*)modulepart=medias([^\)]*)file=([^\)]*)(["\']?\))/', '\1'.$urlwithroot.'/viewimage.php\2modulepart=medias\3file=\4\5', $content, -1, $nbrep);
}
else
{
$content=preg_replace('/(<img.*src=")[^\"]*viewimage\.php([^\"]*)modulepart=medias([^\"]*)file=([^\"]*)("[^\/]*\/?>)/', '\1medias/\4\5', $content, -1, $nbrep);
$content=preg_replace('/(<img[^>]*src=")[^\"]*viewimage\.php([^\"]*)modulepart=medias([^\"]*)file=([^\"]*)("[^\/]*\/?>)/', '\1medias/\4\5', $content, -1, $nbrep);
$content=preg_replace('/(url\(["\']?)[^\)]*viewimage\.php([^\)]*)modulepart=medias([^\)]*)file=([^\)]*)(["\']?\))/', '\1medias/\4\5', $content, -1, $nbrep);
}
}
@ -108,7 +108,7 @@ function dolWebsiteReplacementOfLinks($website, $content)
$content = preg_replace('/url\((["\']?)medias\//', 'url(\1'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file=', $content, -1, $nbrep);
// <img src="image.png... => <img src="dolibarr/viewimage.php/modulepart=medias&file=image.png...
$content = preg_replace('/(<img.*src=")(?!(http|'.preg_quote(DOL_URL_ROOT,'/').'\/viewimage))/', '\1'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file=', $content, -1, $nbrep);
$content = preg_replace('/(<img[^>]*src=")(?!(http|'.preg_quote(DOL_URL_ROOT,'/').'\/viewimage))/', '\1'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file=', $content, -1, $nbrep);
return $content;
}
@ -277,3 +277,59 @@ function exportWebSite($website)
return $filename;
}
/**
* Download all images found into page content $tmp.
* If $modifylinks is set, links to images will be replace with a link to viewimage wrapper.
*
* @param Website $object Object website
* @param WebsitePage $objectpage Object website page
* @param string $urltograb URL to grab
* @param string $tmp Content to parse
* @param string $action Var $action
* @param string $modifylinks 0=Do not modify content, 1=Replace links with a link to
* @return void
*/
function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modifylinks=0)
{
global $conf;
preg_match_all('/<img([^\.\/]+)src="([^>"]+)"([^>]*)>/i', $objectpage->content, $regs);
foreach ($regs[0] as $key => $val)
{
$urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
$linkwithoutdomain = $regs[2][$key];
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
{
$urltograbbis = $regs[2][$key];
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
}
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
else
{
dol_mkdir(dirname($filetosave));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($file, octdec($conf->global->MAIN_UMASK));
}
if ($modifylinks)
{
$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '<img'.$regs[1][$key].'src="'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file='.$filename.'"'.$regs[3][$key].'>', $tmp);
}
}
}

View File

@ -329,7 +329,7 @@ if ($action == 'add')
*/
$tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp);
}
$objectpage->htmlheader = $tmp;
$objectpage->htmlheader = trim($tmp);
// Now loop to fetch CSS
@ -382,40 +382,7 @@ if ($action == 'add')
// Now loop to fetch all images
$tmp = $objectpage->content;
preg_match_all('/<img([^\.\/]+)src="([^>"]+)"([^>]*)>/i', $objectpage->content, $regs);
foreach ($regs[0] as $key => $val)
{
$urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
$linkwithoutdomain = $regs[2][$key];
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
{
$urltograbbis = $regs[2][$key];
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
}
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
else
{
dol_mkdir(dirname($filetosave));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($file, octdec($conf->global->MAIN_UMASK));
}
$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '<img'.$regs[1][$key].'src="'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file='.$filename.'"'.$regs[3][$key].'>', $tmp);
}
getAllImages($object, $objectpage, $urltograb, $tmp, $action, 1);
//print dol_escape_htmltag($tmp);exit;
$objectpage->content = $tmp;