diff --git a/htdocs/core/lib/geturl.lib.php b/htdocs/core/lib/geturl.lib.php index cb594259c9f..55cf8290a89 100644 --- a/htdocs/core/lib/geturl.lib.php +++ b/htdocs/core/lib/geturl.lib.php @@ -123,7 +123,8 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request dol_syslog("getURLContent request=".$request); - dol_syslog("getURLContent response=".$response); + //dol_syslog("getURLContent response =".response); // This may contains binary data, so we dont output it + dol_syslog("getURLContent response size =".strlen($response)); // This may contains binary data, so we dont output it $rep=array(); if (curl_errno($ch)) @@ -173,5 +174,38 @@ function getDomainFromURL($url) $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...) $tmpdomain = preg_replace('/^[^\.]+\./', '', $tmpdomain); // Remove part www. before domain name + return $tmpdomain; } + +/** + * Function root url from a long url + * For example: https://www.abc.mydomain.com/dir/page.html return 'https://www.abc.mydomain.com' + * For example: http://www.abc.mydomain.com/ return 'https://www.abc.mydomain.com' + * + * @param string $url Full URL. + * @return string Returns root url + */ +function getRootURLFromURL($url) +{ + $prefix=''; + $tmpurl = $url; + if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) $prefix = $reg[1]; + $tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s):// + $tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain + + return $prefix.$tmpurl; +} + +/** + * Function to remove comments into HTML content + * + * @param string $content Text content + * @return string Returns text without HTML comments + */ +function removeHtmlComment($content) +{ + $content = preg_replace('//', '', $content); + return $content; +} + diff --git a/htdocs/core/lib/website.lib.php b/htdocs/core/lib/website.lib.php index 9a344ef0355..57ec80e0b71 100644 --- a/htdocs/core/lib/website.lib.php +++ b/htdocs/core/lib/website.lib.php @@ -225,7 +225,16 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify { if (preg_match('/^data:image/i', $regs[2][$key])) continue; // We do nothing for such images - $urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^\//', $regs[2][$key])) + { + $urltograbdirrootwithoutslash = getRootURLFromURL($urltograb); + $urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot + } + else + { + $urltograbbis = $urltograb.'/'.$regs[2][$key]; // We use dir of grabbed file + } + $linkwithoutdomain = $regs[2][$key]; $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; if (preg_match('/^http/', $regs[2][$key])) @@ -251,7 +260,13 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify if ($tmpgeturl['curl_error_no']) { $error++; - setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors'); + $action='create'; + } + elseif ($tmpgeturl['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors'); $action='create'; } else @@ -281,7 +296,15 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify { if (preg_match('/^data:image/i', $regs[2][$key])) continue; // We do nothing for such images - $urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^\//', $regs[2][$key])) + { + $urltograbdirrootwithoutslash = getRootURLFromURL($urltograb); + $urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot + } + else + { + $urltograbbis = $urltograb.'/'.$regs[2][$key]; // We use dir of grabbed file + } $linkwithoutdomain = $regs[2][$key]; $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; @@ -309,7 +332,13 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify if ($tmpgeturl['curl_error_no']) { $error++; - setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors'); + $action='create'; + } + elseif ($tmpgeturl['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors'); $action='create'; } else diff --git a/htdocs/langs/en_US/website.lang b/htdocs/langs/en_US/website.lang index f5c2aeaf466..a978fdc4498 100644 --- a/htdocs/langs/en_US/website.lang +++ b/htdocs/langs/en_US/website.lang @@ -54,6 +54,8 @@ OrEnterPageInfoManually=Or create empty page from scratch... FetchAndCreate=Fetch and Create ExportSite=Export site IDOfPage=Id of page +Banner=Bandeau +BlogPost=Blog post WebsiteAccount=Web site account WebsiteAccounts=Web site accounts AddWebsiteAccount=Create web site account diff --git a/htdocs/website/index.php b/htdocs/website/index.php index 79468ed85cf..4f8c314b25e 100644 --- a/htdocs/website/index.php +++ b/htdocs/website/index.php @@ -255,6 +255,8 @@ if ($action == 'add') if ($urltograb) { + include_once DOL_DOCUMENT_ROOT.'/core/lib/geturl.lib.php'; + // Clean url to grab, so url can be // http://www.example.com/ or http://www.example.com/dir1/ or http://www.example.com/dir1/aaa $urltograbwithoutdomainandparam = preg_replace('/^https?:\/\/[^\/]+\/?/i', '', $urltograb); @@ -263,24 +265,35 @@ if ($action == 'add') { $urltograb.='/'; } - $urltograbdirwithoutslash = dirname($urltograb.'.'); - include_once DOL_DOCUMENT_ROOT.'/core/lib/geturl.lib.php'; + $urltograbdirwithoutslash = dirname($urltograb.'.'); + $urltograbdirrootwithoutslash = getRootURLFromURL($urltograbdirwithoutslash); + // Exemple, now $urltograbdirwithoutslash is https://www.dolimed.com/screenshots + // and $urltograbdirrootwithoutslash is https://www.dolimed.com $tmp = getURLContent($urltograb); if ($tmp['curl_error_no']) { $error++; - setEventMessages($tmp['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograb.': '.$tmp['curl_error_msg'], null, 'errors'); + $action='create'; + } + elseif ($tmp['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograb.': '.$tmp['http_code'], null, 'errors'); $action='create'; } else { + // Remove comments + $tmp['content'] = removeHtmlComment($tmp['content']); + preg_match('/
(.*)<\/head>/is', $tmp['content'], $reg); $head = $reg[1]; $objectpage->type_container = 'page'; - $objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-',$urltograbwithoutdomainandparam)); + $objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-', preg_replace('/\/+$/', '', $urltograbwithoutdomainandparam))); if (empty($objectpage->pageurl)) { $tmpdomain = getDomainFromURL($urltograb); @@ -336,10 +349,17 @@ if ($action == 'add') preg_match_all('/