Add option to grab images into same dir or subdir
This commit is contained in:
parent
60fe1fa503
commit
60a77a6d85
@ -222,9 +222,11 @@ function includeContainer($containeralias)
|
||||
* @param string $tmp Content to parse
|
||||
* @param string $action Var $action
|
||||
* @param string $modifylinks 0=Do not modify content, 1=Replace links with a link to viewimage
|
||||
* @param int $grabimages 0=Do not grab images, 1=Grab images
|
||||
* @param string $grabimagesinto 'root' or 'subpage'
|
||||
* @return void
|
||||
*/
|
||||
function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modifylinks=0)
|
||||
function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modifylinks=0, $grabimages=1, $grabimagesinto='subpage')
|
||||
{
|
||||
global $conf;
|
||||
|
||||
@ -252,15 +254,18 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
|
||||
}
|
||||
|
||||
$linkwithoutdomain = $regs[2][$key];
|
||||
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
|
||||
$dirforimages = '/'.$objectpage->pageurl;
|
||||
if ($grabimagesinto == 'root') $dirforimages='';
|
||||
|
||||
// Define $filetosave and $filename
|
||||
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.$dirforimages.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
|
||||
if (preg_match('/^http/', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $regs[2][$key];
|
||||
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
|
||||
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.$dirforimages.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
}
|
||||
|
||||
$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
$filename = 'image/'.$object->ref.$dirforimages.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
|
||||
// Clean the aa/bb/../cc into aa/cc
|
||||
$filetosave = preg_replace('/\/[^\/]+\/\.\./', '', $filetosave);
|
||||
@ -272,30 +277,33 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
|
||||
|
||||
if (empty($alreadygrabbed[$urltograbbis]))
|
||||
{
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
if ($grabimages)
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
else
|
||||
{
|
||||
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
else
|
||||
{
|
||||
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
|
||||
|
||||
dol_mkdir(dirname($filetosave));
|
||||
dol_mkdir(dirname($filetosave));
|
||||
|
||||
$fp = fopen($filetosave, "w");
|
||||
fputs($fp, $tmpgeturl['content']);
|
||||
fclose($fp);
|
||||
if (! empty($conf->global->MAIN_UMASK))
|
||||
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
|
||||
$fp = fopen($filetosave, "w");
|
||||
fputs($fp, $tmpgeturl['content']);
|
||||
fclose($fp);
|
||||
if (! empty($conf->global->MAIN_UMASK))
|
||||
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -344,30 +352,33 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
|
||||
|
||||
if (empty($alreadygrabbed[$urltograbbis]))
|
||||
{
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
if ($grabimages)
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
else
|
||||
{
|
||||
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='create';
|
||||
}
|
||||
else
|
||||
{
|
||||
$alreadygrabbed[$urltograbbis]=1; // Track that file was alreay grabbed.
|
||||
|
||||
dol_mkdir(dirname($filetosave));
|
||||
dol_mkdir(dirname($filetosave));
|
||||
|
||||
$fp = fopen($filetosave, "w");
|
||||
fputs($fp, $tmpgeturl['content']);
|
||||
fclose($fp);
|
||||
if (! empty($conf->global->MAIN_UMASK))
|
||||
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
|
||||
$fp = fopen($filetosave, "w");
|
||||
fputs($fp, $tmpgeturl['content']);
|
||||
fclose($fp);
|
||||
if (! empty($conf->global->MAIN_UMASK))
|
||||
@chmod($filetosave, octdec($conf->global->MAIN_UMASK));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -69,3 +69,8 @@ WEBSITE_USE_WEBSITE_ACCOUNTSTooltip=Enable the table to store web site accounts
|
||||
YouMustDefineTheHomePage=You must first define the default Home page
|
||||
OnlyEditionOfSourceForGrabbedContentFuture=Note: only edition of HTML source will be possible when a page content is intiliazed by grabbing it from an external page (WYSIWYG editor will not be available)
|
||||
OnlyEditionOfSourceForGrabbedContent=Only edition of HTML source is possible when content was grabber from an external site
|
||||
GrabImagesInto=Grab also images found into css and page.
|
||||
ImagesShouldBeSavedInto=Images should be saved into directory
|
||||
WebsiteRootOfImages=Root directory for website images
|
||||
SubdirOfPage=Sub-directory dedicated to page
|
||||
AliasPageAlreadyExists=Alias page <strong>%s</strong> already exists
|
||||
@ -113,13 +113,16 @@ class WebsitePage extends CommonObject
|
||||
*/
|
||||
public function create(User $user, $notrigger = false)
|
||||
{
|
||||
$this->description = dol_trunc($this->description, 255, 'right', 'utf-8', 1);
|
||||
$this->keywords = dol_trunc($this->keywords, 255, 'right', 'utf-8', 1);
|
||||
|
||||
return $this->createCommon($user, $notrigger);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load object in memory from the database
|
||||
*
|
||||
* @param int $id Id object. If this is 0, the default page of website_id will be used, if not defined, the first one found.
|
||||
* @param int $id Id object. If this is 0, the value into $page will be used. If not found of $page not defined, the default page of website_id will be used or the first page found if not set.
|
||||
* @param string $website_id Web site id (page name must also be filled if this parameter is used)
|
||||
* @param string $page Page name (website id must also be filled if this parameter is used)
|
||||
*
|
||||
|
||||
@ -261,6 +261,9 @@ if ($action == 'addcontainer')
|
||||
if (GETPOST('fetchexternalurl','alpha'))
|
||||
{
|
||||
$urltograb=GETPOST('externalurl','alpha');
|
||||
$grabimages=GETPOST('grabimages','alpha');
|
||||
$grabimagesinto=GETPOST('grabimagesinto','alpha');
|
||||
//var_dump($grabimages);exit;
|
||||
}
|
||||
|
||||
if ($urltograb)
|
||||
@ -275,234 +278,248 @@ if ($action == 'addcontainer')
|
||||
{
|
||||
$urltograb.='/';
|
||||
}
|
||||
$pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-', preg_replace('/\/+$/', '', $urltograbwithoutdomainandparam)));
|
||||
|
||||
$urltograbdirwithoutslash = dirname($urltograb.'.');
|
||||
$urltograbdirrootwithoutslash = getRootURLFromURL($urltograbdirwithoutslash);
|
||||
// Exemple, now $urltograbdirwithoutslash is https://www.dolimed.com/screenshots
|
||||
// and $urltograbdirrootwithoutslash is https://www.dolimed.com
|
||||
|
||||
$tmp = getURLContent($urltograb);
|
||||
if ($tmp['curl_error_no'])
|
||||
// Check pageurl is not already used
|
||||
$tmpwebsitepage = new WebsitePage($db);
|
||||
$result = $tmpwebsitepage->fetch(0, $object->id, $pageurl);
|
||||
if ($result > 0)
|
||||
{
|
||||
setEventMessages($langs->trans("AliasPageAlreadyExists", $pageurl), null, 'errors');
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograb.': '.$tmp['curl_error_msg'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
elseif ($tmp['http_code'] != '200')
|
||||
|
||||
if (! $error)
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograb.': '.$tmp['http_code'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
else
|
||||
{
|
||||
// Remove comments
|
||||
$tmp['content'] = removeHtmlComment($tmp['content']);
|
||||
|
||||
preg_match('/<head>(.*)<\/head>/is', $tmp['content'], $reg);
|
||||
$head = $reg[1];
|
||||
|
||||
$objectpage->type_container = 'page';
|
||||
$objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-', preg_replace('/\/+$/', '', $urltograbwithoutdomainandparam)));
|
||||
if (empty($objectpage->pageurl))
|
||||
{
|
||||
$tmpdomain = getDomainFromURL($urltograb);
|
||||
$objectpage->pageurl=$tmpdomain.'-home';
|
||||
}
|
||||
|
||||
if (preg_match('/<title>(.*)<\/title>/ims', $head, $regtmp))
|
||||
$tmp = getURLContent($urltograb);
|
||||
if ($tmp['curl_error_no'])
|
||||
{
|
||||
$objectpage->title = $regtmp[1];
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograb.': '.$tmp['curl_error_msg'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
if (preg_match('/<meta name="description"[^"]+content="([^"]+)"/ims', $head, $regtmp))
|
||||
elseif ($tmp['http_code'] != '200')
|
||||
{
|
||||
$objectpage->description = $regtmp[1];
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograb.': '.$tmp['http_code'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
if (preg_match('/<meta name="keywords"[^"]+content="([^"]+)"/ims', $head, $regtmp))
|
||||
else
|
||||
{
|
||||
$objectpage->keywords = $regtmp[1];
|
||||
// Remove comments
|
||||
$tmp['content'] = removeHtmlComment($tmp['content']);
|
||||
|
||||
preg_match('/<head>(.*)<\/head>/is', $tmp['content'], $reg);
|
||||
$head = $reg[1];
|
||||
|
||||
$objectpage->type_container = 'page';
|
||||
$objectpage->pageurl = $pageurl;
|
||||
if (empty($objectpage->pageurl))
|
||||
{
|
||||
$tmpdomain = getDomainFromURL($urltograb);
|
||||
$objectpage->pageurl=$tmpdomain.'-home';
|
||||
}
|
||||
|
||||
if (preg_match('/<title>(.*)<\/title>/ims', $head, $regtmp))
|
||||
{
|
||||
$objectpage->title = $regtmp[1];
|
||||
}
|
||||
if (preg_match('/<meta name="description"[^"]+content="([^"]+)"/ims', $head, $regtmp))
|
||||
{
|
||||
$objectpage->description = $regtmp[1];
|
||||
}
|
||||
if (preg_match('/<meta name="keywords"[^"]+content="([^"]+)"/ims', $head, $regtmp))
|
||||
{
|
||||
$objectpage->keywords = $regtmp[1];
|
||||
}
|
||||
if (preg_match('/<html\s+lang="([^"]+)"/ims', $tmp['content'], $regtmp))
|
||||
{
|
||||
$tmplang=explode('-', $regtmp[1]);
|
||||
$objectpage->lang = $tmplang[0].($tmplang[1] ? '_'.strtoupper($tmplang[1]) : '');
|
||||
}
|
||||
|
||||
$objectpage->content = $tmp['content'];
|
||||
$objectpage->content = preg_replace('/^.*<body(\s[^>]*)*>/ims', '', $objectpage->content);
|
||||
$objectpage->content = preg_replace('/<\/body(\s[^>]*)*>.*$/ims', '', $objectpage->content);
|
||||
|
||||
$absoluteurlinaction=$urltograbdirwithoutslash;
|
||||
// TODO Replace 'action="$urltograbdirwithoutslash' into action="/"
|
||||
// TODO Replace 'action="$urltograbdirwithoutslash..."' into action="..."
|
||||
// TODO Replace 'a href="$urltograbdirwithoutslash' into a href="/"
|
||||
// TODO Replace 'a href="$urltograbdirwithoutslash..."' into a href="..."
|
||||
|
||||
// Now loop to fetch all css files. Include them inline into header of page
|
||||
$objectpage->htmlheader = $tmp['content'];
|
||||
$objectpage->htmlheader = preg_replace('/^.*<head(\s[^>]*)*>/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<\/head(\s[^>]*)*>.*$/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<base(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="robot(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="keywords(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="title(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="description(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="generator(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
//$objectpage->htmlheader = preg_replace('/<meta name="verify-v1[^>]*>\n*/ims', '', $objectpage->htmlheader);
|
||||
//$objectpage->htmlheader = preg_replace('/<meta name="msvalidate.01[^>]*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<title>[^<]*<\/title>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<link[^>]*rel="shortcut[^>]*>\n/ims', '', $objectpage->htmlheader);
|
||||
|
||||
// Now loop to fetch JS
|
||||
$tmp = $objectpage->htmlheader;
|
||||
|
||||
preg_match_all('/<script([^\.>]+)src=["\']([^"\'>]+)["\']([^>]*)><\/script>/i', $objectpage->htmlheader, $regs);
|
||||
foreach ($regs[0] as $key => $val)
|
||||
{
|
||||
dol_syslog("We will grab the resource found into script tag ".$regs[2][$key]);
|
||||
|
||||
$linkwithoutdomain = $regs[2][$key];
|
||||
if (preg_match('/^\//', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
|
||||
}
|
||||
else
|
||||
{
|
||||
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
|
||||
}
|
||||
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
|
||||
if (preg_match('/^http/', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $regs[2][$key];
|
||||
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
}
|
||||
|
||||
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
|
||||
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
|
||||
|
||||
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
|
||||
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
|
||||
$domaintograbbis = getDomainFromURL($urltograbbis);
|
||||
if ($domaintograb != $domaintograbbis) continue;
|
||||
|
||||
/*
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
else
|
||||
{
|
||||
dol_mkdir(dirname($filetosave));
|
||||
|
||||
$fp = fopen($filetosave, "w");
|
||||
fputs($fp, $tmpgeturl['content']);
|
||||
fclose($fp);
|
||||
if (! empty($conf->global->MAIN_UMASK))
|
||||
@chmod($file, octdec($conf->global->MAIN_UMASK));
|
||||
}
|
||||
*/
|
||||
|
||||
//$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
$tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp);
|
||||
}
|
||||
$objectpage->htmlheader = trim($tmp);
|
||||
|
||||
|
||||
// Now loop to fetch CSS
|
||||
$pagecsscontent = "\n".'<style>'."\n";
|
||||
|
||||
preg_match_all('/<link([^\.>]+)href=["\']([^"\'>]+\.css[^"\'>]*)["\']([^>]*)>/i', $objectpage->htmlheader, $regs);
|
||||
foreach ($regs[0] as $key => $val)
|
||||
{
|
||||
dol_syslog("We will grab the resource found into link tag ".$regs[2][$key]);
|
||||
|
||||
$linkwithoutdomain = $regs[2][$key];
|
||||
if (preg_match('/^\//', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
|
||||
}
|
||||
else
|
||||
{
|
||||
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
|
||||
}
|
||||
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
|
||||
if (preg_match('/^http/', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $regs[2][$key];
|
||||
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
}
|
||||
|
||||
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
|
||||
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
|
||||
|
||||
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
|
||||
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
|
||||
$domaintograbbis = getDomainFromURL($urltograbbis);
|
||||
if ($domaintograb != $domaintograbbis) continue;
|
||||
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
else
|
||||
{
|
||||
//dol_mkdir(dirname($filetosave));
|
||||
|
||||
//$fp = fopen($filetosave, "w");
|
||||
//fputs($fp, $tmpgeturl['content']);
|
||||
//fclose($fp);
|
||||
//if (! empty($conf->global->MAIN_UMASK))
|
||||
// @chmod($file, octdec($conf->global->MAIN_UMASK));
|
||||
|
||||
// $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
$pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n";
|
||||
|
||||
getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1, $grabimages, $grabimagesinto);
|
||||
|
||||
$pagecsscontent.=$tmpgeturl['content']."\n";
|
||||
|
||||
$objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader);
|
||||
}
|
||||
}
|
||||
|
||||
$pagecsscontent.='</style>'."\n";
|
||||
//var_dump($pagecsscontent);
|
||||
|
||||
//print dol_escape_htmltag($tmp);exit;
|
||||
$objectpage->htmlheader .= $pagecsscontent;
|
||||
|
||||
|
||||
// Now loop to fetch all images into page
|
||||
$tmp = $objectpage->content;
|
||||
|
||||
getAllImages($object, $objectpage, $urltograb, $tmp, $action, 1, $grabimages, $grabimagesinto);
|
||||
|
||||
//print dol_escape_htmltag($tmp);exit;
|
||||
$objectpage->content = $tmp;
|
||||
|
||||
$objectpage->grabbed_from = $urltograb;
|
||||
}
|
||||
if (preg_match('/<html\s+lang="([^"]+)"/ims', $tmp['content'], $regtmp))
|
||||
{
|
||||
$tmplang=explode('-', $regtmp[1]);
|
||||
$objectpage->lang = $tmplang[0].($tmplang[1] ? '_'.strtoupper($tmplang[1]) : '');
|
||||
}
|
||||
|
||||
$objectpage->content = $tmp['content'];
|
||||
$objectpage->content = preg_replace('/^.*<body(\s[^>]*)*>/ims', '', $objectpage->content);
|
||||
$objectpage->content = preg_replace('/<\/body(\s[^>]*)*>.*$/ims', '', $objectpage->content);
|
||||
|
||||
$absoluteurlinaction=$urltograbdirwithoutslash;
|
||||
// TODO Replace 'action="$urltograbdirwithoutslash' into action="/"
|
||||
// TODO Replace 'action="$urltograbdirwithoutslash..."' into action="..."
|
||||
// TODO Replace 'a href="$urltograbdirwithoutslash' into a href="/"
|
||||
// TODO Replace 'a href="$urltograbdirwithoutslash..."' into a href="..."
|
||||
|
||||
// Now loop to fetch all css files. Include them inline into header of page
|
||||
$objectpage->htmlheader = $tmp['content'];
|
||||
$objectpage->htmlheader = preg_replace('/^.*<head(\s[^>]*)*>/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<\/head(\s[^>]*)*>.*$/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<base(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="robot(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="keywords(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="title(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="description(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<meta name="generator(\s[^>]*)*>\n*/ims', '', $objectpage->htmlheader);
|
||||
//$objectpage->htmlheader = preg_replace('/<meta name="verify-v1[^>]*>\n*/ims', '', $objectpage->htmlheader);
|
||||
//$objectpage->htmlheader = preg_replace('/<meta name="msvalidate.01[^>]*>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<title>[^<]*<\/title>\n*/ims', '', $objectpage->htmlheader);
|
||||
$objectpage->htmlheader = preg_replace('/<link[^>]*rel="shortcut[^>]*>\n/ims', '', $objectpage->htmlheader);
|
||||
|
||||
// Now loop to fetch JS
|
||||
$tmp = $objectpage->htmlheader;
|
||||
|
||||
preg_match_all('/<script([^\.>]+)src=["\']([^"\'>]+)["\']([^>]*)><\/script>/i', $objectpage->htmlheader, $regs);
|
||||
foreach ($regs[0] as $key => $val)
|
||||
{
|
||||
dol_syslog("We will grab the resource found into script tag ".$regs[2][$key]);
|
||||
|
||||
$linkwithoutdomain = $regs[2][$key];
|
||||
if (preg_match('/^\//', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
|
||||
}
|
||||
else
|
||||
{
|
||||
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
|
||||
}
|
||||
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
|
||||
if (preg_match('/^http/', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $regs[2][$key];
|
||||
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
}
|
||||
|
||||
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
|
||||
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
|
||||
|
||||
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
|
||||
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
|
||||
$domaintograbbis = getDomainFromURL($urltograbbis);
|
||||
if ($domaintograb != $domaintograbbis) continue;
|
||||
|
||||
/*
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
else
|
||||
{
|
||||
dol_mkdir(dirname($filetosave));
|
||||
|
||||
$fp = fopen($filetosave, "w");
|
||||
fputs($fp, $tmpgeturl['content']);
|
||||
fclose($fp);
|
||||
if (! empty($conf->global->MAIN_UMASK))
|
||||
@chmod($file, octdec($conf->global->MAIN_UMASK));
|
||||
}
|
||||
*/
|
||||
|
||||
//$filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
$tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '', $tmp);
|
||||
}
|
||||
$objectpage->htmlheader = trim($tmp);
|
||||
|
||||
|
||||
// Now loop to fetch CSS
|
||||
$pagecsscontent = "\n".'<style>'."\n";
|
||||
|
||||
preg_match_all('/<link([^\.>]+)href=["\']([^"\'>]+\.css[^"\'>]*)["\']([^>]*)>/i', $objectpage->htmlheader, $regs);
|
||||
foreach ($regs[0] as $key => $val)
|
||||
{
|
||||
dol_syslog("We will grab the resource found into link tag ".$regs[2][$key]);
|
||||
|
||||
$linkwithoutdomain = $regs[2][$key];
|
||||
if (preg_match('/^\//', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
|
||||
}
|
||||
else
|
||||
{
|
||||
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
|
||||
}
|
||||
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
|
||||
if (preg_match('/^http/', $regs[2][$key]))
|
||||
{
|
||||
$urltograbbis = $regs[2][$key];
|
||||
$linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]);
|
||||
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
}
|
||||
|
||||
//print $domaintograb.' - '.$domaintograbbis.' - '.$urltograbdirwithoutslash.' - ';
|
||||
//print $linkwithoutdomain.' - '.$urltograbbis."<br>\n";
|
||||
|
||||
// Test if this is an external URL of grabbed web site. If yes, we do not load resource
|
||||
$domaintograb = getDomainFromURL($urltograbdirwithoutslash);
|
||||
$domaintograbbis = getDomainFromURL($urltograbbis);
|
||||
if ($domaintograb != $domaintograbbis) continue;
|
||||
|
||||
$tmpgeturl = getURLContent($urltograbbis);
|
||||
if ($tmpgeturl['curl_error_no'])
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
elseif ($tmpgeturl['http_code'] != '200')
|
||||
{
|
||||
$error++;
|
||||
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
|
||||
$action='createcontainer';
|
||||
}
|
||||
else
|
||||
{
|
||||
//dol_mkdir(dirname($filetosave));
|
||||
|
||||
//$fp = fopen($filetosave, "w");
|
||||
//fputs($fp, $tmpgeturl['content']);
|
||||
//fclose($fp);
|
||||
//if (! empty($conf->global->MAIN_UMASK))
|
||||
// @chmod($file, octdec($conf->global->MAIN_UMASK));
|
||||
|
||||
// $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
|
||||
$pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n";
|
||||
|
||||
getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1);
|
||||
|
||||
$pagecsscontent.=$tmpgeturl['content']."\n";
|
||||
|
||||
$objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader);
|
||||
}
|
||||
}
|
||||
|
||||
$pagecsscontent.='</style>'."\n";
|
||||
//var_dump($pagecsscontent);
|
||||
|
||||
//print dol_escape_htmltag($tmp);exit;
|
||||
$objectpage->htmlheader .= $pagecsscontent;
|
||||
|
||||
|
||||
// Now loop to fetch all images
|
||||
$tmp = $objectpage->content;
|
||||
|
||||
getAllImages($object, $objectpage, $urltograb, $tmp, $action, 1);
|
||||
|
||||
//print dol_escape_htmltag($tmp);exit;
|
||||
$objectpage->content = $tmp;
|
||||
|
||||
$objectpage->grabbed_from = $urltograb;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -1895,27 +1912,30 @@ if ($action == 'editmeta' || $action == 'createcontainer')
|
||||
print '<!-- Edit or create page/container -->'."\n";
|
||||
//print '<div class="fichecenter">';
|
||||
|
||||
if ($conf->global->MAIN_FEATURES_LEVEL >= 1)
|
||||
if ($action == 'createcontainer')
|
||||
{
|
||||
if ($action == 'createcontainer')
|
||||
{
|
||||
print '<br>';
|
||||
print '<br>';
|
||||
|
||||
print ' * '.$langs->trans("CreateByFetchingExternalPage").'<br><hr>';
|
||||
print '<table class="border" width="100%">';
|
||||
print '<tr><td class="titlefield">';
|
||||
print $langs->trans("URL");
|
||||
print '</td><td>';
|
||||
print '<input class="flat minwidth300" type="text" name="externalurl" value="'.dol_escape_htmltag(GETPOST('externalurl','alpha')).'" placeholder="https://externalsite/pagetofetch"> ';
|
||||
print '<input class="button" type="submit" name="fetchexternalurl" value="'.dol_escape_htmltag($langs->trans("FetchAndCreate")).'">';
|
||||
print '<br><br>'.info_admin($langs->trans("OnlyEditionOfSourceForGrabbedContentFuture"), 0, 0, '1');
|
||||
print '</td></tr>';
|
||||
print '</table>';
|
||||
print ' * '.$langs->trans("CreateByFetchingExternalPage").'<br><hr>';
|
||||
print '<table class="border" width="100%">';
|
||||
print '<tr><td class="titlefield">';
|
||||
print $langs->trans("URL");
|
||||
print '</td><td>';
|
||||
print '<input class="flat minwidth300" type="text" name="externalurl" value="'.dol_escape_htmltag(GETPOST('externalurl','alpha')).'" placeholder="https://externalsite/pagetofetch"> ';
|
||||
print '<input class="flat paddingtop" type="checkbox" name="grabimages" value="1" checked="checked"> '.$langs->trans("GrabImagesInto");
|
||||
print ' ';
|
||||
print $langs->trans("ImagesShouldBeSavedInto").' ';
|
||||
$arraygrabimagesinto=array('root'=>$langs->trans("WebsiteRootOfImages"), 'subpage'=>$langs->trans("SubdirOfPage"));
|
||||
print $form->selectarray('grabimagesinto', $arraygrabimagesinto, GETPOSTISSET('grabimagesinto')?GETPOST('grabimagesinto'):'root');
|
||||
print '<br>';
|
||||
print '<input class="button" style="margin-top: 5px" type="submit" name="fetchexternalurl" value="'.dol_escape_htmltag($langs->trans("FetchAndCreate")).'">';
|
||||
print '<br>'.info_admin($langs->trans("OnlyEditionOfSourceForGrabbedContentFuture"), 0, 0, '1');
|
||||
print '</td></tr>';
|
||||
print '</table>';
|
||||
|
||||
print '<br>';
|
||||
print '<br>';
|
||||
|
||||
print ' * '.$langs->trans("OrEnterPageInfoManually").'<br><hr>';
|
||||
}
|
||||
print ' * '.$langs->trans("OrEnterPageInfoManually").'<br><hr>';
|
||||
}
|
||||
|
||||
print '<table class="border" width="100%">';
|
||||
|
||||
Loading…
Reference in New Issue
Block a user