From f84817f0f6e1dd382970eac111803e247a2510c2 Mon Sep 17 00:00:00 2001 From: Laurent Destailleur Date: Tue, 11 May 2021 23:49:33 +0200 Subject: [PATCH] FIX rss module on servers with allow_url_fopen=0 --- htdocs/core/class/rssparser.class.php | 350 +++++++++++++------------- 1 file changed, 176 insertions(+), 174 deletions(-) diff --git a/htdocs/core/class/rssparser.class.php b/htdocs/core/class/rssparser.class.php index 578e180edff..5f5676601a0 100644 --- a/htdocs/core/class/rssparser.class.php +++ b/htdocs/core/class/rssparser.class.php @@ -1,19 +1,19 @@ * -* This program is free software; you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation; either version 3 of the License, or -* (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program. If not, see . -*/ + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ /** * \file htdocs/core/class/rssparser.class.php @@ -181,7 +181,7 @@ class RssParser * @param string $urlRSS Url to parse * @param int $maxNb Max nb of records to get (0 for no limit) * @param int $cachedelay 0=No cache, nb of seconds we accept cache files (cachedir must also be defined) - * @param string $cachedir Directory where to save cache file + * @param string $cachedir Directory where to save cache file (For example $conf->externalrss->dir_temp) * @return int <0 if KO, >0 if OK */ public function parser($urlRSS, $maxNb = 0, $cachedelay = 60, $cachedir = '') @@ -189,13 +189,13 @@ class RssParser global $conf; include_once DOL_DOCUMENT_ROOT.'/core/lib/files.lib.php'; + include_once DOL_DOCUMENT_ROOT.'/core/lib/geturl.lib.php'; $rss = ''; $str = ''; // This will contain content of feed // Check parameters - if (!dol_is_url($urlRSS)) - { + if (!dol_is_url($urlRSS)) { $this->error = "ErrorBadUrl"; return -1; } @@ -209,11 +209,9 @@ class RssParser // Search into cache $foundintocache = 0; - if ($cachedelay > 0 && $cachedir) - { + if ($cachedelay > 0 && $cachedir) { $filedate = dol_filemtime($newpathofdestfile); - if ($filedate >= ($nowgmt - $cachedelay)) - { + if ($filedate >= ($nowgmt - $cachedelay)) { //dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we use it."); $foundintocache = 1; @@ -224,32 +222,22 @@ class RssParser } // Load file into $str - if ($foundintocache) // Cache file found and is not too old - { + if ($foundintocache) { // Cache file found and is not too old $str = file_get_contents($newpathofdestfile); } else { try { - ini_set("user_agent", "Dolibarr ERP-CRM RSS reader"); - ini_set("max_execution_time", $conf->global->MAIN_USE_RESPONSE_TIMEOUT); - ini_set("default_socket_timeout", $conf->global->MAIN_USE_RESPONSE_TIMEOUT); - - $opts = array('http'=>array('method'=>"GET")); - if (!empty($conf->global->MAIN_USE_CONNECT_TIMEOUT)) $opts['http']['timeout'] = $conf->global->MAIN_USE_CONNECT_TIMEOUT; - if (!empty($conf->global->MAIN_PROXY_USE)) $opts['http']['proxy'] = 'tcp://'.$conf->global->MAIN_PROXY_HOST.':'.$conf->global->MAIN_PROXY_PORT; - //var_dump($opts);exit; - $context = stream_context_create($opts); - - $str = file_get_contents($this->_urlRSS, false, $context); + $result = getURLContent($this->_urlRSS, 'GET', '', 1, array(), array('http', 'https'), 0); + if (!empty($result['content'])) { + $str = $result['content']; + } } catch (Exception $e) { print 'Error retrieving URL '.$this->_urlRSS.' - '.$e->getMessage(); } } - if ($str !== false) - { + if ($str !== false) { // Convert $str into xml - if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) - { + if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { //print 'xx'.LIBXML_NOCDATA; libxml_use_internal_errors(false); $rss = simplexml_load_string($str, "SimpleXMLElement", LIBXML_NOCDATA); @@ -276,19 +264,20 @@ class RssParser } // If $rss loaded - if ($rss) - { + if ($rss) { // Save file into cache - if (empty($foundintocache) && $cachedir) - { + if (empty($foundintocache) && $cachedir) { dol_syslog(get_class($this)."::parser cache file ".$newpathofdestfile." is saved onto disk."); - if (!dol_is_dir($cachedir)) dol_mkdir($cachedir); + if (!dol_is_dir($cachedir)) { + dol_mkdir($cachedir); + } $fp = fopen($newpathofdestfile, 'w'); - if ($fp) - { + if ($fp) { fwrite($fp, $str); fclose($fp); - if (!empty($conf->global->MAIN_UMASK)) $newmask = $conf->global->MAIN_UMASK; + if (!empty($conf->global->MAIN_UMASK)) { + $newmask = $conf->global->MAIN_UMASK; + } @chmod($newpathofdestfile, octdec($newmask)); $this->_lastfetchdate = $nowgmt; @@ -299,10 +288,11 @@ class RssParser unset($str); // Free memory - if (empty($rss->_format)) // If format not detected automatically - { + if (empty($rss->_format)) { // If format not detected automatically $rss->_format = 'rss'; - if (empty($rss->channel)) $rss->_format = 'atom'; + if (empty($rss->channel)) { + $rss->_format = 'atom'; + } } $items = array(); @@ -311,46 +301,98 @@ class RssParser if ($rss->_format == 'rss') { //var_dump($rss); if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { - if (!empty($rss->channel->language)) $this->_language = (string) $rss->channel->language; - if (!empty($rss->channel->generator)) $this->_generator = (string) $rss->channel->generator; - if (!empty($rss->channel->copyright)) $this->_copyright = (string) $rss->channel->copyright; - if (!empty($rss->channel->lastbuilddate)) $this->_lastbuilddate = (string) $rss->channel->lastbuilddate; - if (!empty($rss->channel->image->url[0])) $this->_imageurl = (string) $rss->channel->image->url[0]; - if (!empty($rss->channel->link)) $this->_link = (string) $rss->channel->link; - if (!empty($rss->channel->title)) $this->_title = (string) $rss->channel->title; - if (!empty($rss->channel->description)) $this->_description = (string) $rss->channel->description; + if (!empty($rss->channel->language)) { + $this->_language = (string) $rss->channel->language; + } + if (!empty($rss->channel->generator)) { + $this->_generator = (string) $rss->channel->generator; + } + if (!empty($rss->channel->copyright)) { + $this->_copyright = (string) $rss->channel->copyright; + } + if (!empty($rss->channel->lastbuilddate)) { + $this->_lastbuilddate = (string) $rss->channel->lastbuilddate; + } + if (!empty($rss->channel->image->url[0])) { + $this->_imageurl = (string) $rss->channel->image->url[0]; + } + if (!empty($rss->channel->link)) { + $this->_link = (string) $rss->channel->link; + } + if (!empty($rss->channel->title)) { + $this->_title = (string) $rss->channel->title; + } + if (!empty($rss->channel->description)) { + $this->_description = (string) $rss->channel->description; + } } else { //var_dump($rss->channel); - if (!empty($rss->channel['language'])) $this->_language = (string) $rss->channel['language']; - if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator']; - if (!empty($rss->channel['copyright'])) $this->_copyright = (string) $rss->channel['copyright']; - if (!empty($rss->channel['lastbuilddate'])) $this->_lastbuilddate = (string) $rss->channel['lastbuilddate']; - if (!empty($rss->image['url'])) $this->_imageurl = (string) $rss->image['url']; - if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link']; - if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title']; - if (!empty($rss->channel['description'])) $this->_description = (string) $rss->channel['description']; + if (!empty($rss->channel['language'])) { + $this->_language = (string) $rss->channel['language']; + } + if (!empty($rss->channel['generator'])) { + $this->_generator = (string) $rss->channel['generator']; + } + if (!empty($rss->channel['copyright'])) { + $this->_copyright = (string) $rss->channel['copyright']; + } + if (!empty($rss->channel['lastbuilddate'])) { + $this->_lastbuilddate = (string) $rss->channel['lastbuilddate']; + } + if (!empty($rss->image['url'])) { + $this->_imageurl = (string) $rss->image['url']; + } + if (!empty($rss->channel['link'])) { + $this->_link = (string) $rss->channel['link']; + } + if (!empty($rss->channel['title'])) { + $this->_title = (string) $rss->channel['title']; + } + if (!empty($rss->channel['description'])) { + $this->_description = (string) $rss->channel['description']; + } } - if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) $items = $rss->channel->item; // With simplexml - else $items = $rss->items; // With xmlparse + if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { + $items = $rss->channel->item; // With simplexml + } else { + $items = $rss->items; // With xmlparse + } //var_dump($items);exit; } elseif ($rss->_format == 'atom') { //var_dump($rss); - if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) - { - if (!empty($rss->generator)) $this->_generator = (string) $rss->generator; - if (!empty($rss->lastbuilddate)) $this->_lastbuilddate = (string) $rss->modified; - if (!empty($rss->link->href)) $this->_link = (string) $rss->link->href; - if (!empty($rss->title)) $this->_title = (string) $rss->title; - if (!empty($rss->description)) $this->_description = (string) $rss->description; + if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { + if (!empty($rss->generator)) { + $this->_generator = (string) $rss->generator; + } + if (!empty($rss->lastbuilddate)) { + $this->_lastbuilddate = (string) $rss->modified; + } + if (!empty($rss->link->href)) { + $this->_link = (string) $rss->link->href; + } + if (!empty($rss->title)) { + $this->_title = (string) $rss->title; + } + if (!empty($rss->description)) { + $this->_description = (string) $rss->description; + } } else { //if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language']; - if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator']; + if (!empty($rss->channel['generator'])) { + $this->_generator = (string) $rss->channel['generator']; + } //if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright']; - if (!empty($rss->channel['modified'])) $this->_lastbuilddate = (string) $rss->channel['modified']; + if (!empty($rss->channel['modified'])) { + $this->_lastbuilddate = (string) $rss->channel['modified']; + } //if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url']; - if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link']; - if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title']; + if (!empty($rss->channel['link'])) { + $this->_link = (string) $rss->channel['link']; + } + if (!empty($rss->channel['title'])) { + $this->_title = (string) $rss->channel['title']; + } //if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description']; if (!empty($rss->channel)) { @@ -358,9 +400,12 @@ class RssParser } } if (!empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { - $tmprss = xml2php($rss); $items = $tmprss['entry']; - } // With simplexml - else $items = $rss->items; // With xmlparse + $tmprss = xml2php($rss); + $items = $tmprss['entry']; + } else { + // With simplexml + $items = $rss->items; // With xmlparse + } //var_dump($items);exit; } @@ -436,7 +481,9 @@ class RssParser $i++; - if ($i > $maxNb) break; // We get all records we want + if ($i > $maxNb) { + break; // We get all records we want + } } } @@ -466,18 +513,15 @@ class RssParser // check for a namespace, and split if found $ns = false; - if (strpos($element, ':')) - { + if (strpos($element, ':')) { list($ns, $el) = explode(':', $element, 2); } - if ($ns and $ns != 'rdf') - { + if ($ns and $ns != 'rdf') { $this->current_namespace = $ns; } // if feed type isn't set, then this is first element of feed identify feed from root element - if (empty($this->_format)) - { + if (empty($this->_format)) { if ($el == 'rdf') { $this->_format = 'rss'; $this->feed_version = '1.0'; @@ -492,75 +536,54 @@ class RssParser return; } - if ($el == 'channel') - { + if ($el == 'channel') { $this->inchannel = true; - } elseif ($el == 'item' or $el == 'entry') - { + } elseif ($el == 'item' or $el == 'entry') { $this->initem = true; if (isset($attrs['rdf:about'])) { $this->current_item['about'] = $attrs['rdf:about']; } - } - - // if we're in the default namespace of an RSS feed, - // record textinput or image fields - elseif ( - $this->_format == 'rss' and - $this->current_namespace == '' and - $el == 'textinput' ) - { - $this->intextinput = true; - } elseif ( - $this->_format == 'rss' and - $this->current_namespace == '' and - $el == 'image' ) - { - $this->inimage = true; - } - - // handle atom content constructs - elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS)) - { + } elseif ($this->_format == 'rss' and + $this->current_namespace == '' and + $el == 'textinput') { + // if we're in the default namespace of an RSS feed, + // record textinput or image fields + $this->intextinput = true; + } elseif ($this->_format == 'rss' and + $this->current_namespace == '' and + $el == 'image') { + $this->inimage = true; + } elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS)) { + // handle atom content constructs // avoid clashing w/ RSS mod_content if ($el == 'content') { $el = 'atom_content'; } $this->incontent = $el; - } - - // if inside an Atom content construct (e.g. content or summary) field treat tags as text - elseif ($this->_format == 'atom' and $this->incontent) - { + } elseif ($this->_format == 'atom' and $this->incontent) { + // if inside an Atom content construct (e.g. content or summary) field treat tags as text // if tags are inlined, then flatten $attrs_str = join(' ', array_map('map_attrs', array_keys($attrs), array_values($attrs))); $this->append_content("<$element $attrs_str>"); array_unshift($this->stack, $el); - } - - // Atom support many links per containging element. - // Magpie treats link elements of type rel='alternate' - // as being equivalent to RSS's simple link element. - // - elseif ($this->_format == 'atom' and $el == 'link') - { - if (isset($attrs['rel']) && $attrs['rel'] == 'alternate') - { + } elseif ($this->_format == 'atom' and $el == 'link') { + // Atom support many links per containging element. + // Magpie treats link elements of type rel='alternate' + // as being equivalent to RSS's simple link element. + if (isset($attrs['rel']) && $attrs['rel'] == 'alternate') { $link_el = 'link'; - } elseif (!isset($attrs['rel'])) - { + } elseif (!isset($attrs['rel'])) { $link_el = 'link'; } else { $link_el = 'link_'.$attrs['rel']; } $this->append($link_el, $attrs['href']); - } - // set stack[0] to current element - else { + } else { + // set stack[0] to current element array_unshift($this->stack, $el); } } @@ -577,8 +600,7 @@ class RssParser public function feed_cdata($p, $text) { // phpcs:enable - if ($this->_format == 'atom' and $this->incontent) - { + if ($this->_format == 'atom' and $this->incontent) { $this->append_content($text); } else { $current_el = join('_', array_reverse($this->stack)); @@ -599,28 +621,22 @@ class RssParser // phpcs:enable $el = strtolower($el); - if ($el == 'item' or $el == 'entry') - { + if ($el == 'item' or $el == 'entry') { $this->items[] = $this->current_item; $this->current_item = array(); $this->initem = false; - } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput') - { + } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput') { $this->intextinput = false; - } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'image') - { + } elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'image') { $this->inimage = false; - } elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS)) - { + } elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS)) { $this->incontent = false; - } elseif ($el == 'channel' or $el == 'feed') - { + } elseif ($el == 'channel' or $el == 'feed') { $this->inchannel = false; } elseif ($this->_format == 'atom' and $this->incontent) { // balance tags properly // note: i don't think this is actually neccessary - if ($this->stack[0] == $el) - { + if ($this->stack[0] == $el) { $this->append_content(""); } else { $this->append_content("<$el />"); @@ -679,8 +695,7 @@ class RssParser if (!$el) { return; } - if ($this->current_namespace) - { + if ($this->current_namespace) { if ($this->initem) { $this->concat($this->current_item[$this->current_namespace][$el], $text); } elseif ($this->inchannel) { @@ -714,11 +729,9 @@ class RssParser { $result = ""; - if (isset($item['summary'])) - { + if (isset($item['summary'])) { $result = $item['summary']; - } elseif (isset($item['atom_content'])) - { + } elseif (isset($item['atom_content'])) { $result = $item['atom_content']; } @@ -728,8 +741,7 @@ class RssParser $result = str_replace("\n", "", $result); - if (strlen($result) > $maxlength) - { + if (strlen($result) > $maxlength) { $result = substr($result, 0, $maxlength); $result .= "..."; } @@ -745,28 +757,23 @@ class RssParser */ private function getAtomImageUrl(array $feed) { - if (isset($feed['icon'])) - { + if (isset($feed['icon'])) { return $feed['logo']; } - if (isset($feed['icon'])) - { + if (isset($feed['icon'])) { return $feed['logo']; } - if (isset($feed['webfeeds:logo'])) - { + if (isset($feed['webfeeds:logo'])) { return $feed['webfeeds:logo']; } - if (isset($feed['webfeeds:icon'])) - { + if (isset($feed['webfeeds:icon'])) { return $feed['webfeeds:icon']; } - if (isset($feed['webfeeds:wordmark'])) - { + if (isset($feed['webfeeds:wordmark'])) { return $feed['webfeeds:wordmark']; } @@ -786,27 +793,23 @@ function xml2php($xml) $fils = 0; $tab = false; $array = array(); - foreach ($xml->children() as $key => $value) - { + foreach ($xml->children() as $key => $value) { $child = xml2php($value); //To deal with the attributes - foreach ($value->attributes() as $ak=>$av) - { + foreach ($value->attributes() as $ak => $av) { $child[$ak] = (string) $av; } //Let see if the new child is not in the array - if ($tab === false && in_array($key, array_keys($array))) - { + if ($tab === false && in_array($key, array_keys($array))) { //If this element is already in the array we will create an indexed array $tmp = $array[$key]; $array[$key] = null; $array[$key][] = $tmp; $array[$key][] = $child; $tab = true; - } elseif ($tab === true) - { + } elseif ($tab === true) { //Add an element in an existing array $array[$key][] = $child; } else { @@ -818,8 +821,7 @@ function xml2php($xml) } - if ($fils == 0) - { + if ($fils == 0) { return (string) $xml; }