Add option to grab images into same dir or subdir

This commit is contained in:
Laurent Destailleur 2018-02-19 14:56:37 +01:00
parent 60fe1fa503
commit 60a77a6d85
4 changed files with 315 additions and 276 deletions

View File

@ -222,9 +222,11 @@ function includeContainer($containeralias)
* @param string $tmp Content to parse
* @param string $action Var $action
* @param string $modifylinks 0=Do not modify content, 1=Replace links with a link to viewimage
* @param int $grabimages 0=Do not grab images, 1=Grab images
* @param string $grabimagesinto 'root' or 'subpage'
* @return void
*/
function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modifylinks=0)
function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modifylinks=0, $grabimages=1, $grabimagesinto='subpage')
{
global $conf;
@ -252,15 +254,18 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
}
$linkwithoutdomain = $regs[2][$key];
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
$dirforimages = '/'.$objectpage->pageurl;
if ($grabimagesinto == 'root') $dirforimages='';
// Define $filetosave and $filename
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.$dirforimages.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
{
$urltograbbis = $regs[2][$key];
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.$dirforimages.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
}
$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$filename = 'image/'.$object->ref.$dirforimages.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
// Clean the aa/bb/../cc into aa/cc
$filetosave = preg_replace('/\/[^\/]+\/\.\./', '', $filetosave);
@ -272,30 +277,33 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
if (empty($alreadygrabbed[$urltograbbis]))
{
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
if ($grabimages)
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else
{
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else
{
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
dol_mkdir(dirname($filetosave));
dol_mkdir(dirname($filetosave));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
}
}
}
@ -344,30 +352,33 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
if (empty($alreadygrabbed[$urltograbbis]))
{
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
if ($grabimages)
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else
{
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else
{
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
dol_mkdir(dirname($filetosave));
dol_mkdir(dirname($filetosave));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
}
}
}

View File

@ -69,3 +69,8 @@ WEBSITE_USE_WEBSITE_ACCOUNTSTooltip=Enable the table to store web site accounts
YouMustDefineTheHomePage=You must first define the default Home page
OnlyEditionOfSourceForGrabbedContentFuture=Note: only edition of HTML source will be possible when a page content is intiliazed by grabbing it from an external page (WYSIWYG editor will not be available)
OnlyEditionOfSourceForGrabbedContent=Only edition of HTML source is possible when content was grabber from an external site
GrabImagesInto=Grab also images found into css and page.
ImagesShouldBeSavedInto=Images should be saved into directory
WebsiteRootOfImages=Root directory for website images
SubdirOfPage=Sub-directory dedicated to page
AliasPageAlreadyExists=Alias page <strong>%s</strong> already exists

View File

@ -113,13 +113,16 @@ class WebsitePage extends CommonObject
*/
public function create(User $user, $notrigger = false)
{
$this->description = dol_trunc($this->description, 255, 'right', 'utf-8', 1);
$this->keywords = dol_trunc($this->keywords, 255, 'right', 'utf-8', 1);
return $this->createCommon($user, $notrigger);
}
/**
* Load object in memory from the database
*
* @param int $id Id object. If this is 0, the default page of website_id will be used, if not defined, the first one found.
* @param int $id Id object. If this is 0, the value into $page will be used. If not found of $page not defined, the default page of website_id will be used or the first page found if not set.
* @param string $website_id Web site id (page name must also be filled if this parameter is used)
* @param string $page Page name (website id must also be filled if this parameter is used)
*

View File

@ -261,6 +261,9 @@ if ($action == 'addcontainer')
if (GETPOST('fetchexternalurl','alpha'))
{
$urltograb=GETPOST('externalurl','alpha');
$grabimages=GETPOST('grabimages','alpha');
$grabimagesinto=GETPOST('grabimagesinto','alpha');
//var_dump($grabimages);exit;
}
if ($urltograb)
@ -275,234 +278,248 @@ if ($action == 'addcontainer')
{
$urltograb.='/';
}
$pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-', preg_replace('/\/+$/', '', $urltograbwithoutdomainandparam)));
$urltograbdirwithoutslash = dirname($urltograb.'.');
$urltograbdirrootwithoutslash = getRootURLFromURL($urltograbdirwithoutslash);
// Exemple, now $urltograbdirwithoutslash is https://www.dolimed.com/screenshots
// and $urltograbdirrootwithoutslash is https://www.dolimed.com
$tmp = getURLContent($urltograb);
if ($tmp['curl_error_no'])
// Check pageurl is not already used
$tmpwebsitepage = new WebsitePage($db);
$result = $tmpwebsitepage->fetch(0, $object->id, $pageurl);
if ($result > 0)
{
setEventMessages($langs->trans("AliasPageAlreadyExists", $pageurl), null, 'errors');
$error++;
setEventMessages('Error getting '.$urltograb.': '.$tmp['curl_error_msg'], null, 'errors');
$action='createcontainer';
}
elseif ($tmp['http_code'] != '200')
if (! $error)
{
$error++;
setEventMessages('Error getting '.$urltograb.': '.$tmp['http_code'], null, 'errors');
$action='createcontainer';
}
else
{
// Remove comments
$tmp['content'] = removeHtmlComment($tmp['content']);
preg_match('/<head>(.*)<\/head>/is', $tmp['content'], $reg);
$head = $reg[1];
$objectpage->type_container = 'page';
$objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-', preg_replace('/\/+$/', '', $urltograbwithoutdomainandparam)));
if (empty($objectpage->pageurl))
{
$tmpdomain = getDomainFromURL($urltograb);
$objectpage->pageurl=$tmpdomain.'-home';
}
if (preg_match('/<title>(.*)<\/title>/ims', $head, $regtmp))
$tmp = getURLContent($urltograb);
if ($tmp['curl_error_no'])
{
$objectpage->title = $regtmp[1];
$error++;
setEventMessages('Error getting '.$urltograb.': '.$tmp['curl_error_msg'], null, 'errors');
$action='createcontainer';
}
if (preg_match('/<meta name="description"[^"]+content="([^"]+)"/ims', $head, $regtmp))
elseif ($tmp['http_code'] != '200')
{
$objectpage->description = $regtmp[1];
$error++;
setEventMessages('Error getting '.$urltograb.': '.$tmp['http_code'], null, 'errors');
$action='createcontainer';
}
if (preg_match('/<meta name="keywords"[^"]+content="([^"]+)"/ims', $head, $regtmp))
else
{
$objectpage->keywords = $regtmp[1];
// Remove comments
$tmp['content'] = removeHtmlComment($tmp['content']);
preg_match('/<head>(.*)<\/head>/is', $tmp['content'], $reg);
$head = $reg[1];
$objectpage->type_container = 'page';
$objectpage->pageurl = $pageurl;
if (empty($objectpage->pageurl))
{
$tmpdomain = getDomainFromURL($urltograb);
$objectpage->pageurl=$tmpdomain.'-home';
}
if (preg_match('/<title>(.*)<\/title>/ims', $head, $regtmp))
{
$objectpage->title = $regtmp[1];
}
if (preg_match('/<meta name="description"[^"]+content="([^"]+)"/ims', $head, $regtmp))
{
$objectpage->description = $regtmp[1];
}
if (preg_match('/<meta name="keywords"[^"]+content="([^"]+)"/ims', $head, $regtmp))
{
$objectpage->keywords = $regtmp[1];
}
if (preg_match('/<html\s+lang="([^"]+)"/ims', $tmp['content'], $regtmp))
{
$tmplang=explode('-', $regtmp[1]);
$objectpage->lang = $tmplang[0].($tmplang[1] ? '_'.strtoupper($tmplang[1]) : '');
}
$objectpage->content = $tmp['content'];
$objectpage->content = preg_replace('/^.*<body(\s[^>]*)*>/ims', '', $objectpage->content);
$objectpage->content = preg_replace('/<\/body(\s[^>]*)*>.*$/ims', '', $objectpage->content);
$absoluteurlinaction=$urltograbdirwithoutslash;
// TODO Replace 'action="$urltograbdirwithoutslash' into action="/"
// TODO Replace 'action="$urltograbdirwithoutslash..."' into action="..."
// TODO Replace 'a href="$urltograbdirwithoutslash' into a href="/"
// TODO Replace 'a href="$urltograbdirwithoutslash..."' into a href="..."
// Now loop to fetch all css files. Include them inline into header of page
$objectpage->htmlheader = $tmp['content'];
$objectpage->htmlheader = preg_replace('/^.*<head(\s[^>]*)*>/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<\/head(\s[^>]*)*>.*$/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<base(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="robot(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="keywords(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="title(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="description(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="generator(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
//$objectpage->htmlheader = preg_replace('/<meta name="verify-v1[^>]*>\n*/ims', '', $objectpage->htmlheader);
//$objectpage->htmlheader = preg_replace('/<meta name="msvalidate.01[^>]*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<title>[^<]*<\/title>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<link[^>]*rel="shortcut[^>]*>\n/ims', '', $objectpage->htmlheader);
// Now loop to fetch JS
$tmp = $objectpage->htmlheader;
preg_match_all('/<script([^\.>]+)src=["\']([^"\'>]+)["\']([^>]*)><\/script>/i', $objectpage->htmlheader, $regs);
foreach ($regs[0] as $key => $val)
{
dol_syslog("We will grab the resource found into script tag ".$regs[2][$key]);
$linkwithoutdomain = $regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
}
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
{
$urltograbbis = $regs[2][$key];
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
}
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
$domaintograbbis = getDomainFromURL($urltograbbis);
if ($domaintograb != $domaintograbbis) continue;
/*
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='createcontainer';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='createcontainer';
}
else
{
dol_mkdir(dirname($filetosave));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($file, octdec($conf->global->MAIN_UMASK));
}
*/
//$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp);
}
$objectpage->htmlheader = trim($tmp);
// Now loop to fetch CSS
$pagecsscontent = "\n".'<style>'."\n";
preg_match_all('/<link([^\.>]+)href=["\']([^"\'>]+\.css[^"\'>]*)["\']([^>]*)>/i', $objectpage->htmlheader, $regs);
foreach ($regs[0] as $key => $val)
{
dol_syslog("We will grab the resource found into link tag ".$regs[2][$key]);
$linkwithoutdomain = $regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
}
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
{
$urltograbbis = $regs[2][$key];
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
}
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
$domaintograbbis = getDomainFromURL($urltograbbis);
if ($domaintograb != $domaintograbbis) continue;
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='createcontainer';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='createcontainer';
}
else
{
//dol_mkdir(dirname($filetosave));
//$fp = fopen($filetosave, "w");
//fputs($fp, $tmpgeturl['content']);
//fclose($fp);
//if (! empty($conf->global->MAIN_UMASK))
// @chmod($file, octdec($conf->global->MAIN_UMASK));
// $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n";
getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1, $grabimages, $grabimagesinto);
$pagecsscontent.=$tmpgeturl['content']."\n";
$objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader);
}
}
$pagecsscontent.='</style>'."\n";
//var_dump($pagecsscontent);
//print dol_escape_htmltag($tmp);exit;
$objectpage->htmlheader .= $pagecsscontent;
// Now loop to fetch all images into page
$tmp = $objectpage->content;
getAllImages($object, $objectpage, $urltograb, $tmp, $action, 1, $grabimages, $grabimagesinto);
//print dol_escape_htmltag($tmp);exit;
$objectpage->content = $tmp;
$objectpage->grabbed_from = $urltograb;
}
if (preg_match('/<html\s+lang="([^"]+)"/ims', $tmp['content'], $regtmp))
{
$tmplang=explode('-', $regtmp[1]);
$objectpage->lang = $tmplang[0].($tmplang[1] ? '_'.strtoupper($tmplang[1]) : '');
}
$objectpage->content = $tmp['content'];
$objectpage->content = preg_replace('/^.*<body(\s[^>]*)*>/ims', '', $objectpage->content);
$objectpage->content = preg_replace('/<\/body(\s[^>]*)*>.*$/ims', '', $objectpage->content);
$absoluteurlinaction=$urltograbdirwithoutslash;
// TODO Replace 'action="$urltograbdirwithoutslash' into action="/"
// TODO Replace 'action="$urltograbdirwithoutslash..."' into action="..."
// TODO Replace 'a href="$urltograbdirwithoutslash' into a href="/"
// TODO Replace 'a href="$urltograbdirwithoutslash..."' into a href="..."
// Now loop to fetch all css files. Include them inline into header of page
$objectpage->htmlheader = $tmp['content'];
$objectpage->htmlheader = preg_replace('/^.*<head(\s[^>]*)*>/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<\/head(\s[^>]*)*>.*$/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<base(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="robot(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="keywords(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="title(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="description(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<meta name="generator(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
//$objectpage->htmlheader = preg_replace('/<meta name="verify-v1[^>]*>\n*/ims', '', $objectpage->htmlheader);
//$objectpage->htmlheader = preg_replace('/<meta name="msvalidate.01[^>]*>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<title>[^<]*<\/title>\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/<link[^>]*rel="shortcut[^>]*>\n/ims', '', $objectpage->htmlheader);
// Now loop to fetch JS
$tmp = $objectpage->htmlheader;
preg_match_all('/<script([^\.>]+)src=["\']([^"\'>]+)["\']([^>]*)><\/script>/i', $objectpage->htmlheader, $regs);
foreach ($regs[0] as $key => $val)
{
dol_syslog("We will grab the resource found into script tag ".$regs[2][$key]);
$linkwithoutdomain = $regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
}
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
{
$urltograbbis = $regs[2][$key];
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
}
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
$domaintograbbis = getDomainFromURL($urltograbbis);
if ($domaintograb != $domaintograbbis) continue;
/*
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='createcontainer';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='createcontainer';
}
else
{
dol_mkdir(dirname($filetosave));
$fp = fopen($filetosave, "w");
fputs($fp, $tmpgeturl['content']);
fclose($fp);
if (! empty($conf->global->MAIN_UMASK))
@chmod($file, octdec($conf->global->MAIN_UMASK));
}
*/
//$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp);
}
$objectpage->htmlheader = trim($tmp);
// Now loop to fetch CSS
$pagecsscontent = "\n".'<style>'."\n";
preg_match_all('/<link([^\.>]+)href=["\']([^"\'>]+\.css[^"\'>]*)["\']([^>]*)>/i', $objectpage->htmlheader, $regs);
foreach ($regs[0] as $key => $val)
{
dol_syslog("We will grab the resource found into link tag ".$regs[2][$key]);
$linkwithoutdomain = $regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
}
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
{
$urltograbbis = $regs[2][$key];
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
}
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
$domaintograbbis = getDomainFromURL($urltograbbis);
if ($domaintograb != $domaintograbbis) continue;
$tmpgeturl = getURLContent($urltograbbis);
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='createcontainer';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='createcontainer';
}
else
{
//dol_mkdir(dirname($filetosave));
//$fp = fopen($filetosave, "w");
//fputs($fp, $tmpgeturl['content']);
//fclose($fp);
//if (! empty($conf->global->MAIN_UMASK))
// @chmod($file, octdec($conf->global->MAIN_UMASK));
// $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n";
getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1);
$pagecsscontent.=$tmpgeturl['content']."\n";
$objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader);
}
}
$pagecsscontent.='</style>'."\n";
//var_dump($pagecsscontent);
//print dol_escape_htmltag($tmp);exit;
$objectpage->htmlheader .= $pagecsscontent;
// Now loop to fetch all images
$tmp = $objectpage->content;
getAllImages($object, $objectpage, $urltograb, $tmp, $action, 1);
//print dol_escape_htmltag($tmp);exit;
$objectpage->content = $tmp;
$objectpage->grabbed_from = $urltograb;
}
}
else
@ -1895,27 +1912,30 @@ if ($action == 'editmeta' || $action == 'createcontainer')
print '<!-- Edit or create page/container -->'."\n";
//print '<div class="fichecenter">';
if ($conf->global->MAIN_FEATURES_LEVEL >= 1)
if ($action == 'createcontainer')
{
if ($action == 'createcontainer')
{
print '<br>';
print '<br>';
print ' * '.$langs->trans("CreateByFetchingExternalPage").'<br><hr>';
print '<table class="border" width="100%">';
print '<tr><td class="titlefield">';
print $langs->trans("URL");
print '</td><td>';
print '<input class="flat minwidth300" type="text" name="externalurl" value="'.dol_escape_htmltag(GETPOST('externalurl','alpha')).'" placeholder="https://externalsite/pagetofetch"> ';
print '<input class="button" type="submit" name="fetchexternalurl" value="'.dol_escape_htmltag($langs->trans("FetchAndCreate")).'">';
print '<br><br>'.info_admin($langs->trans("OnlyEditionOfSourceForGrabbedContentFuture"), 0, 0, '1');
print '</td></tr>';
print '</table>';
print ' * '.$langs->trans("CreateByFetchingExternalPage").'<br><hr>';
print '<table class="border" width="100%">';
print '<tr><td class="titlefield">';
print $langs->trans("URL");
print '</td><td>';
print '<input class="flat minwidth300" type="text" name="externalurl" value="'.dol_escape_htmltag(GETPOST('externalurl','alpha')).'" placeholder="https://externalsite/pagetofetch"> ';
print '<input class="flat paddingtop" type="checkbox" name="grabimages" value="1" checked="checked"> '.$langs->trans("GrabImagesInto");
print ' ';
print $langs->trans("ImagesShouldBeSavedInto").' ';
$arraygrabimagesinto=array('root'=>$langs->trans("WebsiteRootOfImages"), 'subpage'=>$langs->trans("SubdirOfPage"));
print $form->selectarray('grabimagesinto', $arraygrabimagesinto, GETPOSTISSET('grabimagesinto')?GETPOST('grabimagesinto'):'root');
print '<br>';
print '<input class="button" style="margin-top: 5px" type="submit" name="fetchexternalurl" value="'.dol_escape_htmltag($langs->trans("FetchAndCreate")).'">';
print '<br>'.info_admin($langs->trans("OnlyEditionOfSourceForGrabbedContentFuture"), 0, 0, '1');
print '</td></tr>';
print '</table>';
print '<br>';
print '<br>';
print ' * '.$langs->trans("OrEnterPageInfoManually").'<br><hr>';
}
print ' * '.$langs->trans("OrEnterPageInfoManually").'<br><hr>';
}
print '<table class="border" width="100%">';