diff --git a/htdocs/core/lib/geturl.lib.php b/htdocs/core/lib/geturl.lib.php index 6f268fda3a7..cb594259c9f 100644 --- a/htdocs/core/lib/geturl.lib.php +++ b/htdocs/core/lib/geturl.lib.php @@ -55,7 +55,7 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea curl_setopt($ch, CURLOPT_USERAGENT, 'Dolibarr geturl function'); @curl_setopt($ch, CURLOPT_FOLLOWLOCATION, ($followlocation?true:false)); // We use @ here because this may return warning if safe mode is on or open_basedir is on - + if (count($addheaders)) curl_setopt($ch, CURLOPT_HTTPHEADER, $addheaders); curl_setopt($ch, CURLINFO_HEADER_OUT, true); // To be able to retrieve request header and log it @@ -63,7 +63,7 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea // You can force, if supported a version like TLSv1 or TLSv1.2 if (! empty($conf->global->MAIN_CURL_SSLVERSION)) curl_setopt($ch, CURLOPT_SSLVERSION, $conf->global->MAIN_CURL_SSLVERSION); //curl_setopt($ch, CURLOPT_SSLVERSION, 6); for tls 1.2 - + //turning off the server and peer verification(TrustManager Concept). curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); @@ -82,12 +82,12 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea { curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'PUT'); // HTTP request is 'PUT' if (! is_array($param)) parse_str($param, $array_param); - else + else { dol_syslog("parameter param must be a string", LOG_WARNING); $array_param=$param; } - curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields + curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($array_param)); // Setting param x=a&y=z as PUT fields } else if ($postorget == 'PUTALREADYFORMATED') { @@ -121,7 +121,7 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea $response = curl_exec($ch); $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request - + dol_syslog("getURLContent request=".$request); dol_syslog("getURLContent response=".$response); @@ -130,7 +130,7 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea { // Ad keys to $rep $rep['content']=$response; - + // moving to display page to display curl errors $rep['curl_error_no']=curl_errno($ch); $rep['curl_error_msg']=curl_error($ch); @@ -146,12 +146,12 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea //$rep['header_size']=$info['header_size']; //$rep['http_code']=$info['http_code']; dol_syslog("getURLContent http_code=".$rep['http_code']); - + // Add more keys to $rep $rep['content']=$response; $rep['curl_error_no']=''; $rep['curl_error_msg']=''; - + //closing the curl curl_close($ch); } @@ -159,3 +159,19 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea return $rep; } + +/** + * Function get second level domain name. + * For example: https://www.abc.mydomain.com/dir/page.html return 'mydomain' + * + * @param string $url Full URL. + * @return string Returns domaine name + */ +function getDomainFromURL($url) +{ + $tmpdomain = preg_replace('/^https?:\/\//i', '', $url); // Remove http(s):// + $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain + $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...) + $tmpdomain = preg_replace('/^[^\.]+\./', '', $tmpdomain); // Remove part www. before domain name + return $tmpdomain; +} diff --git a/htdocs/websites/index.php b/htdocs/websites/index.php index 96947035113..39d41307f20 100644 --- a/htdocs/websites/index.php +++ b/htdocs/websites/index.php @@ -94,6 +94,7 @@ $pageid=GETPOST('pageid', 'int'); $pageref=GETPOST('pageref', 'aZ09'); $action=GETPOST('action','alpha'); + if (GETPOST('delete')) { $action='delete'; } if (GETPOST('preview')) $action='preview'; if (GETPOST('createsite')) { $action='createsite'; } @@ -246,7 +247,11 @@ if ($action == 'add') $urltograbwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\/?/i', '', $urltograbwithoutdomain); $objectpage->pageurl = basename($urltograbwithoutdomain); - if (empty($objectpage->pageurl)) $objectpage->pageurl='home'; + if (empty($objectpage->pageurl)) + { + $tmpdomain = getDomainFromURL($urltograb); + $objectpage->pageurl='home'.$tmpdomain; + } if (preg_match('/(.*)<\/title>/ims', $head, $regtmp)) { @@ -270,6 +275,46 @@ if ($action == 'add') $objectpage->content = preg_replace('/^.*<body[^>]*>/ims', '', $objectpage->content); $objectpage->content = preg_replace('/<\/body[^>]*>.*$/ims', '', $objectpage->content); + $tmp = $objectpage->content; + + // Now loop o to fetch all images + preg_match_all('/<img([^\.\/]+)src="([^>"]+)"([^>]*)>/i', $objectpage->content, $regs); + foreach ($regs[0] as $key => $val) + { + $urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + $linkwithoutdomain = $regs[2][$key]; + $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^http/', $regs[2][$key])) + { + $urltograbbis = $regs[2][$key]; + $linkwithoutdomain = preg_replace('/^https?:\/\/[^\/]+\//i', '', $regs[2][$key]); + $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; + } + + $tmpgeturl = getURLContent($urltograbbis); + if ($tmpgeturl['curl_error_no']) + { + $error++; + setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + $action='create'; + } + else + { + dol_mkdir(dirname($filetosave)); + + $fp = fopen($filetosave, "w"); + fputs($fp, $tmpgeturl['content']); + fclose($fp); + if (! empty($conf->global->MAIN_UMASK)) + @chmod($file, octdec($conf->global->MAIN_UMASK)); + } + + $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; + $tmp = preg_replace('/'.preg_quote($regs[0][$key],'/').'/i', '<img'.$regs[1][$key].'src="'.DOL_URL_ROOT.'/viewimage.php?modulepart=medias&file='.$filename.'"'.$regs[3][$key].'>', $tmp); + } +//print dol_escape_htmltag($tmp);exit; + $objectpage->content = $tmp; + $objectpage->grabbed_from = $urltograb; } }