Fix grabber of url when url is on second level dir. More phpunits

This commit is contained in:
Laurent Destailleur 2017-12-10 17:59:19 +01:00
parent 02d9e93ed4
commit e1f0483b23
6 changed files with 328 additions and 30 deletions

View File

@ -123,7 +123,8 @@ function getURLContent($url,$postorget='GET',$param='',$followlocation=1,$addhea
$request = curl_getinfo($ch, CURLINFO_HEADER_OUT); // Reading of request must be done after sending request
dol_syslog("getURLContent request=".$request);
dol_syslog("getURLContent response=".$response);
//dol_syslog("getURLContent response =".response); // This may contains binary data, so we dont output it
dol_syslog("getURLContent response size =".strlen($response)); // This may contains binary data, so we dont output it
$rep=array();
if (curl_errno($ch))
@ -173,5 +174,38 @@ function getDomainFromURL($url)
$tmpdomain = preg_replace('/\/.*$/i', '', $tmpdomain); // Remove part after domain
$tmpdomain = preg_replace('/\.[^\.]+$/', '', $tmpdomain); // Remove first level domain (.com, .net, ...)
$tmpdomain = preg_replace('/^[^\.]+\./', '', $tmpdomain); // Remove part www. before domain name
return $tmpdomain;
}
/**
* Function root url from a long url
* For example: https://www.abc.mydomain.com/dir/page.html return 'https://www.abc.mydomain.com'
* For example: http://www.abc.mydomain.com/ return 'https://www.abc.mydomain.com'
*
* @param string $url Full URL.
* @return string Returns root url
*/
function getRootURLFromURL($url)
{
$prefix='';
$tmpurl = $url;
if (preg_match('/^(https?:\/\/)/i', $tmpurl, $reg)) $prefix = $reg[1];
$tmpurl = preg_replace('/^https?:\/\//i', '', $tmpurl); // Remove http(s)://
$tmpurl = preg_replace('/\/.*$/i', '', $tmpurl); // Remove part after domain
return $prefix.$tmpurl;
}
/**
* Function to remove comments into HTML content
*
* @param string $content Text content
* @return string Returns text without HTML comments
*/
function removeHtmlComment($content)
{
$content = preg_replace('/<!--[^\-]+-->/', '', $content);
return $content;
}

View File

@ -225,7 +225,16 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
{
if (preg_match('/^data:image/i', $regs[2][$key])) continue; // We do nothing for such images
$urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbdirrootwithoutslash = getRootURLFromURL($urltograb);
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograb.'/'.$regs[2][$key]; // We use dir of grabbed file
}
$linkwithoutdomain = $regs[2][$key];
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
@ -251,7 +260,13 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors');
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else
@ -281,7 +296,15 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
{
if (preg_match('/^data:image/i', $regs[2][$key])) continue; // We do nothing for such images
$urltograbbis = $urltograb.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbdirrootwithoutslash = getRootURLFromURL($urltograb);
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograb.'/'.$regs[2][$key]; // We use dir of grabbed file
}
$linkwithoutdomain = $regs[2][$key];
$filetosave = $conf->medias->multidir_output[$conf->entity].'/image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
@ -309,7 +332,13 @@ function getAllImages($object, $objectpage, $urltograb, &$tmp, &$action, $modify
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors');
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else

View File

@ -54,6 +54,8 @@ OrEnterPageInfoManually=Or create empty page from scratch...
FetchAndCreate=Fetch and Create
ExportSite=Export site
IDOfPage=Id of page
Banner=Bandeau
BlogPost=Blog post
WebsiteAccount=Web site account
WebsiteAccounts=Web site accounts
AddWebsiteAccount=Create web site account

View File

@ -255,6 +255,8 @@ if ($action == 'add')
if ($urltograb)
{
include_once DOL_DOCUMENT_ROOT.'/core/lib/geturl.lib.php';
// Clean url to grab, so url can be
// http://www.example.com/ or http://www.example.com/dir1/ or http://www.example.com/dir1/aaa
$urltograbwithoutdomainandparam = preg_replace('/^https?:\/\/[^\/]+\/?/i', '', $urltograb);
@ -263,24 +265,35 @@ if ($action == 'add')
{
$urltograb.='/';
}
$urltograbdirwithoutslash = dirname($urltograb.'.');
include_once DOL_DOCUMENT_ROOT.'/core/lib/geturl.lib.php';
$urltograbdirwithoutslash = dirname($urltograb.'.');
$urltograbdirrootwithoutslash = getRootURLFromURL($urltograbdirwithoutslash);
// Exemple, now $urltograbdirwithoutslash is https://www.dolimed.com/screenshots
// and $urltograbdirrootwithoutslash is https://www.dolimed.com
$tmp = getURLContent($urltograb);
if ($tmp['curl_error_no'])
{
$error++;
setEventMessages($tmp['curl_error_msg'], null, 'errors');
setEventMessages('Error getting '.$urltograb.': '.$tmp['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmp['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograb.': '.$tmp['http_code'], null, 'errors');
$action='create';
}
else
{
// Remove comments
$tmp['content'] = removeHtmlComment($tmp['content']);
preg_match('/<head>(.*)<\/head>/is', $tmp['content'], $reg);
$head = $reg[1];
$objectpage->type_container = 'page';
$objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-',$urltograbwithoutdomainandparam));
$objectpage->pageurl = dol_sanitizeFileName(preg_replace('/[\/\.]/','-', preg_replace('/\/+$/', '', $urltograbwithoutdomainandparam)));
if (empty($objectpage->pageurl))
{
$tmpdomain = getDomainFromURL($urltograb);
@ -336,10 +349,17 @@ if ($action == 'add')
preg_match_all('/<script([^\.>]+)src=["\']([^"\'>]+)["\']([^>]*)><\/script>/i', $objectpage->htmlheader, $regs);
foreach ($regs[0] as $key => $val)
{
dol_syslog("We will grab the resource ".$regs[2][$key]);
dol_syslog("We will grab the resource found into script tag ".$regs[2][$key]);
$linkwithoutdomain = $regs[2][$key];
$urltograbbis = $urltograbdirwithoutslash.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
}
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
@ -362,10 +382,16 @@ if ($action == 'add')
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors');
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
else
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else
{
dol_mkdir(dirname($filetosave));
@ -389,10 +415,17 @@ if ($action == 'add')
preg_match_all('/<link([^\.>]+)href=["\']([^"\'>]+\.css[^"\'>]*)["\']([^>]*)>/i', $objectpage->htmlheader, $regs);
foreach ($regs[0] as $key => $val)
{
dol_syslog("We will grab the resource ".$regs[2][$key]);
dol_syslog("We will grab the resource found into link tag ".$regs[2][$key]);
$linkwithoutdomain = $regs[2][$key];
$urltograbbis = $urltograbdirwithoutslash.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^\//', $regs[2][$key]))
{
$urltograbbis = $urltograbdirrootwithoutslash.$regs[2][$key]; // We use dirroot
}
else
{
$urltograbbis = $urltograbdirwithoutslash.'/'.$regs[2][$key]; // We use dir of grabbed file
}
//$filetosave = $conf->medias->multidir_output[$conf->entity].'/css/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $regs[2][$key])?'':'/').$regs[2][$key];
if (preg_match('/^http/', $regs[2][$key]))
@ -414,28 +447,34 @@ if ($action == 'add')
if ($tmpgeturl['curl_error_no'])
{
$error++;
setEventMessages($tmpgeturl['curl_error_msg'], null, 'errors');
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['curl_error_msg'], null, 'errors');
$action='create';
}
elseif ($tmpgeturl['http_code'] != '200')
{
$error++;
setEventMessages('Error getting '.$urltograbbis.': '.$tmpgeturl['http_code'], null, 'errors');
$action='create';
}
else
{
//dol_mkdir(dirname($filetosave));
//dol_mkdir(dirname($filetosave));
//$fp = fopen($filetosave, "w");
//fputs($fp, $tmpgeturl['content']);
//fclose($fp);
//if (! empty($conf->global->MAIN_UMASK))
// @chmod($file, octdec($conf->global->MAIN_UMASK));
}
//$fp = fopen($filetosave, "w");
//fputs($fp, $tmpgeturl['content']);
//fclose($fp);
//if (! empty($conf->global->MAIN_UMASK))
// @chmod($file, octdec($conf->global->MAIN_UMASK));
// $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n";
// $filename = 'image/'.$object->ref.'/'.$objectpage->pageurl.(preg_match('/^\//', $linkwithoutdomain)?'':'/').$linkwithoutdomain;
$pagecsscontent.='/* Content of file '.$urltograbbis.' */'."\n";
getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1);
getAllImages($object, $objectpage, $urltograbbis, $tmpgeturl['content'], $action, 1);
$pagecsscontent.=$tmpgeturl['content']."\n";
$pagecsscontent.=$tmpgeturl['content']."\n";
$objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader);
$objectpage->htmlheader = preg_replace('/'.preg_quote($regs[0][$key],'/').'\n*/ims', '', $objectpage->htmlheader);
}
}
$pagecsscontent.='</style>'."\n";
@ -1790,7 +1829,7 @@ if ($action == 'editmeta' || $action == 'create')
if ($action != 'create')
{
print '<tr><td class="titlefield">';
print '<tr><td class="titlefield fieldrequired">';
print $langs->trans('IDOfPage');
print '</td><td>';
print $pageid;
@ -1828,7 +1867,7 @@ if ($action == 'editmeta' || $action == 'create')
print '<tr><td class="titlefield fieldrequired">';
print $langs->trans('WEBSITE_TYPE_CONTAINER');
print '</td><td>';
$arrayoftype=array('page'=>$langs->trans("Page"), 'banner'=>$langs->trans("Banner"), 'blogpost'=>$langs->trans("BlogPost"));
$arrayoftype=array('page'=>$langs->trans("Page"), 'banner'=>$langs->trans("Banner"), 'blogpost'=>$langs->trans("BlogPost"), 'other'=>$langs->trans("Other"));
print $form->selectarray('WEBSITE_TYPE_CONTAINER', $arrayoftype, $type_container);
print '</td></tr>';

View File

@ -93,6 +93,8 @@ class AllTests
$suite->addTestSuite('MarginsLibTest');
require_once dirname(__FILE__).'/FilesLibTest.php';
$suite->addTestSuite('FilesLibTest');
require_once dirname(__FILE__).'/GetUrlLibTest.php';
$suite->addTestSuite('GetUrlLibTest');
require_once dirname(__FILE__).'/JsonLibTest.php';
$suite->addTestSuite('JsonLibTest');
require_once dirname(__FILE__).'/ImagesLibTest.php';

View File

@ -0,0 +1,192 @@
<?php
/* Copyright (C) 2010-2012 Laurent Destailleur <eldy@users.sourceforge.net>
* Copyright (C) 2012 Regis Houssin <regis.houssin@capnetworks.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* or see http://www.gnu.org/
*/
/**
* \file test/phpunit/GetUrlLibTest.php
* \ingroup test
* \brief PHPUnit test
* \remarks To run this script as CLI: phpunit filename.php
*/
global $conf,$user,$langs,$db;
//define('TEST_DB_FORCE_TYPE','mysql'); // This is to force using mysql driver
//require_once 'PHPUnit/Autoload.php';
require_once dirname(__FILE__).'/../../htdocs/master.inc.php';
require_once dirname(__FILE__).'/../../htdocs/core/lib/geturl.lib.php';
if (empty($user->id))
{
print "Load permissions for admin user nb 1\n";
$user->fetch(1);
$user->getrights();
}
$conf->global->MAIN_DISABLE_ALL_MAILS=1;
/**
* Class for PHPUnit tests
*
* @backupGlobals disabled
* @backupStaticAttributes enabled
* @remarks backupGlobals must be disabled to have db,conf,user and lang not erased.
*/
class GetUrlLibTest extends PHPUnit_Framework_TestCase
{
protected $savconf;
protected $savuser;
protected $savlangs;
protected $savdb;
/**
* Constructor
* We save global variables into local variables
*
* @return FilesLibTest
*/
function __construct()
{
//$this->sharedFixture
global $conf,$user,$langs,$db;
$this->savconf=$conf;
$this->savuser=$user;
$this->savlangs=$langs;
$this->savdb=$db;
print __METHOD__." db->type=".$db->type." user->id=".$user->id;
//print " - db ".$db->db;
print "\n";
}
// Static methods
public static function setUpBeforeClass()
{
global $conf,$user,$langs,$db;
$db->begin(); // This is to have all actions inside a transaction even if test launched without suite.
print __METHOD__."\n";
}
// tear down after class
public static function tearDownAfterClass()
{
global $conf,$user,$langs,$db;
$db->rollback();
print __METHOD__."\n";
}
/**
* Init phpunit tests
*
* @return void
*/
protected function setUp()
{
global $conf,$user,$langs,$db;
$conf=$this->savconf;
$user=$this->savuser;
$langs=$this->savlangs;
$db=$this->savdb;
print __METHOD__."\n";
}
/**
* End phpunit tests
*
* @return void
*/
protected function tearDown()
{
print __METHOD__."\n";
}
/**
* testGetRootURLFromURL
*
* @return int
*/
public function testGetRootURLFromURL()
{
global $conf,$user,$langs,$db;
$conf=$this->savconf;
$user=$this->savuser;
$langs=$this->savlangs;
$db=$this->savdb;
$result=getRootURLFromURL('http://www.dolimed.com/screenshots/afile');
print __METHOD__." result=".$result."\n";
$this->assertEquals('http://www.dolimed.com',$result,'Test 1');
$result=getRootURLFromURL('https://www.dolimed.com/screenshots/afile');
print __METHOD__." result=".$result."\n";
$this->assertEquals('https://www.dolimed.com',$result,'Test 2');
$result=getRootURLFromURL('http://www.dolimed.com/screenshots');
print __METHOD__." result=".$result."\n";
$this->assertEquals('http://www.dolimed.com',$result);
$result=getRootURLFromURL('https://www.dolimed.com/screenshots');
print __METHOD__." result=".$result."\n";
$this->assertEquals('https://www.dolimed.com',$result);
$result=getRootURLFromURL('http://www.dolimed.com/');
print __METHOD__." result=".$result."\n";
$this->assertEquals('http://www.dolimed.com',$result);
$result=getRootURLFromURL('https://www.dolimed.com/');
print __METHOD__." result=".$result."\n";
$this->assertEquals('https://www.dolimed.com',$result);
$result=getRootURLFromURL('http://www.dolimed.com');
print __METHOD__." result=".$result."\n";
$this->assertEquals('http://www.dolimed.com',$result);
$result=getRootURLFromURL('https://www.dolimed.com');
print __METHOD__." result=".$result."\n";
$this->assertEquals('https://www.dolimed.com',$result);
return 1;
}
/**
* testRemoveHtmlComment
*
* @return int
*/
public function testRemoveHtmlComment()
{
global $conf,$user,$langs,$db;
$conf=$this->savconf;
$user=$this->savuser;
$langs=$this->savlangs;
$db=$this->savdb;
$result=removeHtmlComment('abc<!--[if lt IE 8]>aaaa<![endif]-->def');
print __METHOD__." result=".$result."\n";
$this->assertEquals('abcdef',$result,'Test 1');
$result=removeHtmlComment('abc<!--[if lt IE 8]>aa-->bb<!--aa<![endif]-->def');
print __METHOD__." result=".$result."\n";
$this->assertEquals('abcbbdef',$result,'Test 1');
return 1;
}
}