diff --git a/htdocs/core/lib/geturl.lib.php b/htdocs/core/lib/geturl.lib.php index cb594259c9f..55cf8290a89 100644 --- a/htdocs/core/lib/geturl.lib.php +++ b/htdocs/core/lib/geturl.lib.php @@ -123,7 +123,8 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea $request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request dol_syslog("getURLContent request=".$request); - dol_syslog("getURLContent response=".$response); + //dol_syslog("getURLContent response =".response); // This may contains binary data, so we dont output it + dol_syslog("getURLContent response size =".strlen($response)); // This may contains binary data, so we dont output it $rep=array(); if (curl_errno($ch)) @@ -173,5 +174,38 @@ function getDomainFromURL($url) $tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain $tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...) $tmpdomain = preg_replace('/^[^\.]+\./', '', $tmpdomain); // Remove part www. before domain name + return $tmpdomain; } + +/** + * Function root url from a long url + * For example: https://www.abc.mydomain.com/dir/page.html return 'https://www.abc.mydomain.com' + * For example: http://www.abc.mydomain.com/ return 'https://www.abc.mydomain.com' + * + * @param string $url Full URL. + * @return string Returns root url + */ +function getRootURLFromURL($url) +{ + $prefix=''; + $tmpurl = $url; + if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) $prefix = $reg[1]; + $tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s):// + $tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain + + return $prefix.$tmpurl; +} + +/** + * Function to remove comments into HTML content + * + * @param string $content Text content + * @return string Returns text without HTML comments + */ +function removeHtmlComment($content) +{ + $content = preg_replace('//', '', $content); + return $content; +} + diff --git a/htdocs/core/lib/website.lib.php b/htdocs/core/lib/website.lib.php index 9a344ef0355..57ec80e0b71 100644 --- a/htdocs/core/lib/website.lib.php +++ b/htdocs/core/lib/website.lib.php @@ -225,7 +225,16 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify { if (preg_match('/^data:image/i', $regs[2][$key])) continue; // We do nothing for such images - $urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^\//', $regs[2][$key])) + { + $urltograbdirrootwithoutslash = getRootURLFromURL($urltograb); + $urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot + } + else + { + $urltograbbis = $urltograb.'/'.$regs[2][$key]; // We use dir of grabbed file + } + $linkwithoutdomain = $regs[2][$key]; $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; if (preg_match('/^http/', $regs[2][$key])) @@ -251,7 +260,13 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify if ($tmpgeturl['curl_error_no']) { $error++; - setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors'); + $action='create'; + } + elseif ($tmpgeturl['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors'); $action='create'; } else @@ -281,7 +296,15 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify { if (preg_match('/^data:image/i', $regs[2][$key])) continue; // We do nothing for such images - $urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^\//', $regs[2][$key])) + { + $urltograbdirrootwithoutslash = getRootURLFromURL($urltograb); + $urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot + } + else + { + $urltograbbis = $urltograb.'/'.$regs[2][$key]; // We use dir of grabbed file + } $linkwithoutdomain = $regs[2][$key]; $filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; @@ -309,7 +332,13 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify if ($tmpgeturl['curl_error_no']) { $error++; - setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors'); + $action='create'; + } + elseif ($tmpgeturl['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors'); $action='create'; } else diff --git a/htdocs/langs/en_US/website.lang b/htdocs/langs/en_US/website.lang index f5c2aeaf466..a978fdc4498 100644 --- a/htdocs/langs/en_US/website.lang +++ b/htdocs/langs/en_US/website.lang @@ -54,6 +54,8 @@ OrEnterPageInfoManually=Or create empty page from scratch... FetchAndCreate=Fetch and Create ExportSite=Export site IDOfPage=Id of page +Banner=Bandeau +BlogPost=Blog post WebsiteAccount=Web site account WebsiteAccounts=Web site accounts AddWebsiteAccount=Create web site account diff --git a/htdocs/website/index.php b/htdocs/website/index.php index 79468ed85cf..4f8c314b25e 100644 --- a/htdocs/website/index.php +++ b/htdocs/website/index.php @@ -255,6 +255,8 @@ if ($action == 'add') if ($urltograb) { + include_once DOL_DOCUMENT_ROOT.'/core/lib/geturl.lib.php'; + // Clean url to grab, so url can be // http://www.example.com/ or http://www.example.com/dir1/ or http://www.example.com/dir1/aaa $urltograbwithoutdomainandparam = preg_replace('/^https?:\/\/[^\/]+\/?/i', '', $urltograb); @@ -263,24 +265,35 @@ if ($action == 'add') { $urltograb.='/'; } - $urltograbdirwithoutslash = dirname($urltograb.'.'); - include_once DOL_DOCUMENT_ROOT.'/core/lib/geturl.lib.php'; + $urltograbdirwithoutslash = dirname($urltograb.'.'); + $urltograbdirrootwithoutslash = getRootURLFromURL($urltograbdirwithoutslash); + // Exemple, now $urltograbdirwithoutslash is https://www.dolimed.com/screenshots + // and $urltograbdirrootwithoutslash is https://www.dolimed.com $tmp = getURLContent($urltograb); if ($tmp['curl_error_no']) { $error++; - setEventMessages($tmp['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograb.': '.$tmp['curl_error_msg'], null, 'errors'); + $action='create'; + } + elseif ($tmp['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograb.': '.$tmp['http_code'], null, 'errors'); $action='create'; } else { + // Remove comments + $tmp['content'] = removeHtmlComment($tmp['content']); + preg_match('/(.*)<\/head>/is', $tmp['content'], $reg); $head = $reg[1]; $objectpage->type_container = 'page'; - $objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-',$urltograbwithoutdomainandparam)); + $objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-', preg_replace('/\/+$/', '', $urltograbwithoutdomainandparam))); if (empty($objectpage->pageurl)) { $tmpdomain = getDomainFromURL($urltograb); @@ -336,10 +349,17 @@ if ($action == 'add') preg_match_all('/]+)src=["\']([^"\'>]+)["\']([^>]*)><\/script>/i', $objectpage->htmlheader, $regs); foreach ($regs[0] as $key => $val) { - dol_syslog("We will grab the resource ".$regs[2][$key]); + dol_syslog("We will grab the resource found into script tag ".$regs[2][$key]); $linkwithoutdomain = $regs[2][$key]; - $urltograbbis = $urltograbdirwithoutslash.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^\//', $regs[2][$key])) + { + $urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot + } + else + { + $urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file + } //$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; if (preg_match('/^http/', $regs[2][$key])) @@ -362,10 +382,16 @@ if ($action == 'add') if ($tmpgeturl['curl_error_no']) { $error++; - setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors'); $action='create'; } - else + elseif ($tmpgeturl['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors'); + $action='create'; + } + else { dol_mkdir(dirname($filetosave)); @@ -389,10 +415,17 @@ if ($action == 'add') preg_match_all('/]+)href=["\']([^"\'>]+\.css[^"\'>]*)["\']([^>]*)>/i', $objectpage->htmlheader, $regs); foreach ($regs[0] as $key => $val) { - dol_syslog("We will grab the resource ".$regs[2][$key]); + dol_syslog("We will grab the resource found into link tag ".$regs[2][$key]); $linkwithoutdomain = $regs[2][$key]; - $urltograbbis = $urltograbdirwithoutslash.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; + if (preg_match('/^\//', $regs[2][$key])) + { + $urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot + } + else + { + $urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file + } //$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key]; if (preg_match('/^http/', $regs[2][$key])) @@ -414,28 +447,34 @@ if ($action == 'add') if ($tmpgeturl['curl_error_no']) { $error++; - setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors'); + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors'); + $action='create'; + } + elseif ($tmpgeturl['http_code'] != '200') + { + $error++; + setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors'); $action='create'; } else { - //dol_mkdir(dirname($filetosave)); + //dol_mkdir(dirname($filetosave)); - //$fp = fopen($filetosave, "w"); - //fputs($fp, $tmpgeturl['content']); - //fclose($fp); - //if (! empty($conf->global->MAIN_UMASK)) - // @chmod($file, octdec($conf->global->MAIN_UMASK)); - } + //$fp = fopen($filetosave, "w"); + //fputs($fp, $tmpgeturl['content']); + //fclose($fp); + //if (! empty($conf->global->MAIN_UMASK)) + // @chmod($file, octdec($conf->global->MAIN_UMASK)); - // $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; - $pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n"; + // $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain; + $pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n"; - getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1); + getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1); - $pagecsscontent.=$tmpgeturl['content']."\n"; + $pagecsscontent.=$tmpgeturl['content']."\n"; - $objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader); + $objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader); + } } $pagecsscontent.=''."\n"; @@ -1790,7 +1829,7 @@ if ($action == 'editmeta' || $action == 'create') if ($action != 'create') { - print ''; + print ''; print $langs->trans('IDOfPage'); print ''; print $pageid; @@ -1828,7 +1867,7 @@ if ($action == 'editmeta' || $action == 'create') print ''; print $langs->trans('WEBSITE_TYPE_CONTAINER'); print ''; - $arrayoftype=array('page'=>$langs->trans("Page"), 'banner'=>$langs->trans("Banner"), 'blogpost'=>$langs->trans("BlogPost")); + $arrayoftype=array('page'=>$langs->trans("Page"), 'banner'=>$langs->trans("Banner"), 'blogpost'=>$langs->trans("BlogPost"), 'other'=>$langs->trans("Other")); print $form->selectarray('WEBSITE_TYPE_CONTAINER', $arrayoftype, $type_container); print ''; diff --git a/test/phpunit/AllTests.php b/test/phpunit/AllTests.php index 71b57fb82c7..d1f0e8b5158 100644 --- a/test/phpunit/AllTests.php +++ b/test/phpunit/AllTests.php @@ -93,6 +93,8 @@ class AllTests $suite->addTestSuite('MarginsLibTest'); require_once dirname(__FILE__).'/FilesLibTest.php'; $suite->addTestSuite('FilesLibTest'); + require_once dirname(__FILE__).'/GetUrlLibTest.php'; + $suite->addTestSuite('GetUrlLibTest'); require_once dirname(__FILE__).'/JsonLibTest.php'; $suite->addTestSuite('JsonLibTest'); require_once dirname(__FILE__).'/ImagesLibTest.php'; diff --git a/test/phpunit/GetUrlLibTest.php b/test/phpunit/GetUrlLibTest.php new file mode 100644 index 00000000000..21dacd5a0f1 --- /dev/null +++ b/test/phpunit/GetUrlLibTest.php @@ -0,0 +1,192 @@ + + * Copyright (C) 2012 Regis Houssin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * or see http://www.gnu.org/ + */ + +/** + * \file test/phpunit/GetUrlLibTest.php + * \ingroup test + * \brief PHPUnit test + * \remarks To run this script as CLI: phpunit filename.php + */ + +global $conf,$user,$langs,$db; +//define('TEST_DB_FORCE_TYPE','mysql'); // This is to force using mysql driver +//require_once 'PHPUnit/Autoload.php'; +require_once dirname(__FILE__).'/../../htdocs/master.inc.php'; +require_once dirname(__FILE__).'/../../htdocs/core/lib/geturl.lib.php'; + +if (empty($user->id)) +{ + print "Load permissions for admin user nb 1\n"; + $user->fetch(1); + $user->getrights(); +} +$conf->global->MAIN_DISABLE_ALL_MAILS=1; + + +/** + * Class for PHPUnit tests + * + * @backupGlobals disabled + * @backupStaticAttributes enabled + * @remarks backupGlobals must be disabled to have db,conf,user and lang not erased. + */ +class GetUrlLibTest extends PHPUnit_Framework_TestCase +{ + protected $savconf; + protected $savuser; + protected $savlangs; + protected $savdb; + + /** + * Constructor + * We save global variables into local variables + * + * @return FilesLibTest + */ + function __construct() + { + //$this->sharedFixture + global $conf,$user,$langs,$db; + $this->savconf=$conf; + $this->savuser=$user; + $this->savlangs=$langs; + $this->savdb=$db; + + print __METHOD__." db->type=".$db->type." user->id=".$user->id; + //print " - db ".$db->db; + print "\n"; + } + + // Static methods + public static function setUpBeforeClass() + { + global $conf,$user,$langs,$db; + $db->begin(); // This is to have all actions inside a transaction even if test launched without suite. + + print __METHOD__."\n"; + } + + // tear down after class + public static function tearDownAfterClass() + { + global $conf,$user,$langs,$db; + $db->rollback(); + + print __METHOD__."\n"; + } + + /** + * Init phpunit tests + * + * @return void + */ + protected function setUp() + { + global $conf,$user,$langs,$db; + $conf=$this->savconf; + $user=$this->savuser; + $langs=$this->savlangs; + $db=$this->savdb; + + print __METHOD__."\n"; + } + /** + * End phpunit tests + * + * @return void + */ + protected function tearDown() + { + print __METHOD__."\n"; + } + + + /** + * testGetRootURLFromURL + * + * @return int + */ + public function testGetRootURLFromURL() + { + global $conf,$user,$langs,$db; + $conf=$this->savconf; + $user=$this->savuser; + $langs=$this->savlangs; + $db=$this->savdb; + + $result=getRootURLFromURL('http://www.dolimed.com/screenshots/afile'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('http://www.dolimed.com',$result,'Test 1'); + + $result=getRootURLFromURL('https://www.dolimed.com/screenshots/afile'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('https://www.dolimed.com',$result,'Test 2'); + + $result=getRootURLFromURL('http://www.dolimed.com/screenshots'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('http://www.dolimed.com',$result); + + $result=getRootURLFromURL('https://www.dolimed.com/screenshots'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('https://www.dolimed.com',$result); + + $result=getRootURLFromURL('http://www.dolimed.com/'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('http://www.dolimed.com',$result); + + $result=getRootURLFromURL('https://www.dolimed.com/'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('https://www.dolimed.com',$result); + + $result=getRootURLFromURL('http://www.dolimed.com'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('http://www.dolimed.com',$result); + + $result=getRootURLFromURL('https://www.dolimed.com'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('https://www.dolimed.com',$result); + + return 1; + } + + + /** + * testRemoveHtmlComment + * + * @return int + */ + public function testRemoveHtmlComment() + { + global $conf,$user,$langs,$db; + $conf=$this->savconf; + $user=$this->savuser; + $langs=$this->savlangs; + $db=$this->savdb; + + $result=removeHtmlComment('abcdef'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('abcdef',$result,'Test 1'); + + $result=removeHtmlComment('abcbbdef'); + print __METHOD__." result=".$result."\n"; + $this->assertEquals('abcbbdef',$result,'Test 1'); + + return 1; + } +}