diff --git a/htdocs/core/class/rssparser.class.php b/htdocs/core/class/rssparser.class.php index 90a2f70550a..054f2724eec 100755 --- a/htdocs/core/class/rssparser.class.php +++ b/htdocs/core/class/rssparser.class.php @@ -1,642 +1,642 @@ - - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/** - * \file htdocs/core/class/rssparser.class.php - * \ingroup core - * \brief File of class to parse rss feeds - */ -class RssParser -{ - var $db; - var $error; - - protected $_format=''; - protected $_urlRSS; - protected $_language; - protected $_generator; - protected $_copyright; - protected $_lastbuilddate; - protected $_imageurl; - protected $_link; - protected $_title; - protected $_description; - protected $_lastfetchdate; // Last successful fetch - protected $_rssarray=array(); - - // Accessors - public function getFormat() { return $this->_format; } - public function getUrlRss() { return $this->_urlRSS; } - public function getLanguage() { return $this->_language; } - public function getGenerator() { return $this->_generator; } - public function getCopyright() { return $this->_copyright; } - public function getLastBuildDate() { return $this->_lastbuilddate; } - public function getImageUrl() { return $this->_imageurl; } - public function getLink() { return $this->_link; } - public function getTitle() { return $this->_title; } - public function getDescription() { return $this->_description; } - public function getLastFetchDate() { return $this->_lastfetchdate; } - public function getItems() { return $this->_rssarray; } - - - // For parsing with xmlparser - var $stack = array(); // parser stack - var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); - - - /** - * Constructor - */ - public function RssParser($db) - { - $this->db=$db; - } - - - /** - * Parse rss URL - * - * @param urlRSS Url to parse - * @param maxNb Max nb of records to get (0 for no limit) - * @param cachedelay 0=No cache, nb of seconds we accept cache files (cachedir must also be defined) - * @param cachedir Directory where to save cache file - * @return int <0 if KO, >0 if OK - */ - public function parser($urlRSS, $maxNb=0, $cachedelay=60, $cachedir='') - { - include_once(DOL_DOCUMENT_ROOT.'/lib/files.lib.php'); - - $str=''; // This will contain content of feed - - // Check parameters - if (! dol_is_url($urlRSS)) - { - $this->error="ErrorBadUrl"; - return -1; - } - - $this->_urlRSS = $urlRSS; - $newpathofdestfile=$cachedir.'/'.md5($this->_urlRSS); - $newmask='0644'; - - //dol_syslog("RssPArser::parser parse url=".$urlRSS." => cache file=".$newpathofdestfile); - $nowgmt = dol_now(); - - // Search into cache - $foundintocache=0; - if ($cachedelay > 0 && $cachedir) - { - $filedate=dol_filemtime($newpathofdestfile); - if ($filedate >= ($nowgmt - $cachedelay)) - { - //dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we use it."); - $foundintocache=1; - - $this->_lastfetchdate=$filedate; - } - else - { - dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not found or older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we can't use it."); - } - } - - // Load file into $str - if ($foundintocache) // Cache file found and is not too old - { - $str = file_get_contents($newpathofdestfile); - } - else - { - try { - ini_set("user_agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); - ini_set("max_execution_time", 10); - $str = file_get_contents($this->_urlRSS); - } - catch (Exception $e) { - print 'Error retrieving URL '.$this->urlRSS.' - '.$e->getMessage(); - } - } - - // Convert $str into xml - if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) - { - //print 'xx'.LIBXML_NOCDATA; - libxml_use_internal_errors(false); - $rss = simplexml_load_string($str, "SimpleXMLElement", LIBXML_NOCDATA); - } - else - { - $xmlparser=xml_parser_create(''); - if (!is_resource($xmlparser)) { $this->error="ErrorFailedToCreateParser"; return -1; } - - xml_set_object( $xmlparser, $this ); - xml_set_element_handler($xmlparser, 'feed_start_element', 'feed_end_element' ); - xml_set_character_data_handler( $xmlparser, 'feed_cdata' ); - $status = xml_parse( $xmlparser, $str ); - xml_parser_free( $xmlparser ); - $rss=$this; - //var_dump($this);exit; - } - - // If $rss loaded - if ($rss) - { - // Save file into cache - if (empty($foundintocache) && $cachedir) - { - dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is saved onto disk."); - if (! dol_is_dir($cachedir)) dol_mkdir($cachedir); - $fp = fopen($newpathofdestfile, 'w'); - fwrite($fp, $str); - fclose($fp); - if (! empty($conf->global->MAIN_UMASK)) $newmask=$conf->global->MAIN_UMASK; - @chmod($newpathofdestfile, octdec($newmask)); - - $this->_lastfetchdate=$nowgmt; - } - - unset($str); // Free memory - - if (empty($rss->_format)) // If format not detected automatically - { - $rss->_format='rss'; - if (empty($rss->channel)) $rss->_format='atom'; - } - - $items=array(); - - // Save description entries - if ($rss->_format == 'rss') - { - //var_dump($rss); - if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) - { - if (!empty($rss->channel->language)) $this->_language = (string) $rss->channel->language; - if (!empty($rss->channel->generator)) $this->_generator = (string) $rss->channel->generator; - if (!empty($rss->channel->copyright)) $this->_copyright = (string) $rss->channel->copyright; - if (!empty($rss->channel->lastbuilddate)) $this->_lastbuilddate = (string) $rss->channel->lastbuilddate; - if (!empty($rss->channel->image->url[0])) $this->_imageurl = (string) $rss->channel->image->url[0]; - if (!empty($rss->channel->link)) $this->_link = (string) $rss->channel->link; - if (!empty($rss->channel->title)) $this->_title = (string) $rss->channel->title; - if (!empty($rss->channel->description)) $this->_description = (string) $rss->channel->description; - } - else - { - if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language']; - if (!empty($rss->channel['rss_generator'])) $this->_generator = (string) $rss->channel['rss_generator']; - if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright']; - if (!empty($rss->channel['rss_lastbuilddate'])) $this->_lastbuilddate = (string) $rss->channel['rss_lastbuilddate']; - if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url']; - if (!empty($rss->channel['rss_link'])) $this->_link = (string) $rss->channel['rss_link']; - if (!empty($rss->channel['rss_title'])) $this->_title = (string) $rss->channel['rss_title']; - if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description']; - } - - if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) $items=$rss->channel->item; // With simplexml - else $items=$rss->items; // With xmlparse - //var_dump($items);exit; - } - else if ($rss->_format == 'atom') - { - //var_dump($rss); - if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) - { - if (!empty($rss->generator)) $this->_generator = (string) $rss->generator; - if (!empty($rss->lastbuilddate)) $this->_lastbuilddate = (string) $rss->modified; - if (!empty($rss->link->href)) $this->_link = (string) $rss->link->href; - if (!empty($rss->title)) $this->_title = (string) $rss->title; - if (!empty($rss->description)) $this->_description = (string) $rss->description; - } - else - { - //if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language']; - if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator']; - //if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright']; - if (!empty($rss->channel['modified'])) $this->_lastbuilddate = (string) $rss->channel['modified']; - //if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url']; - if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link']; - if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title']; - //if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description']; - } - if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { $tmprss=xml2php($rss); $items=$tmprss['entry'];} // With simplexml - else $items=$rss->items; // With xmlparse - //var_dump($items);exit; - } - - $i = 0; - // Loop on each record - foreach($items as $item) - { - //var_dump($item);exit; - if ($rss->_format == 'rss') - { - if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) - { - $itemLink = (string) $item->link; - $itemTitle = (string) $item->title; - $itemDescription = (string) $item->description; - $itemPubDate = (string) $item->pubDate; - $itemId = ''; - $itemAuthor = ''; - } - else - { - $itemLink = (string) $item['rss_link']; - $itemTitle = (string) $item['rss_title']; - $itemDescription = (string) $item['rss_description']; - $itemPubDate = (string) $item['rss_pubdate']; - $itemId = (string) $item['rss_guid']; - $itemAuthor = (string) $item['rss_author']; - } - - // Loop on each category - $itemCategory=array(); - if (is_array($item->category)) - { - foreach ($item->category as $cat) - { - $itemCategory[] = (string) $cat; - } - } - } - else if ($rss->_format == 'atom') - { - if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) - { - $itemLink = (string) $item['link']['href']; - $itemTitle = (string) $item['title']; - $itemDescription = (string) $item['summary']; - $itemPubDate = (string) $item['created']; - $itemId = (string) $item['id']; - $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']); - } - else - { - $itemLink = (string) $item['link']['href']; - $itemTitle = (string) $item['title']; - $itemDescription = (string) $item['summary']; - $itemPubDate = (string) $item['created']; - $itemId = (string) $item['id']; - $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']); - } - } - else print 'ErrorBadFeedFormat'; - - // Add record to result array - $this->_rssarray[$i] = array( - 'link'=>$itemLink, - 'title'=>$itemTitle, - 'description'=>$itemDescription, - 'pubDate'=>$itemPubDate, - 'category'=>$itemCategory, - 'id'=>$itemId, - 'author'=>$itemAuthor); - - $i++; - - if ($i > $maxNb) break; // We get all records we want - } - - return 1; - } - else - { - $this->error='ErrorFailedToLoadRSSFile'; - return -1; - } - } - - - - /** - * Triggered when opened tag is found - * - * @param $p - * @param $element Tag - * @param $attrs Attributes of tags - */ - function feed_start_element($p, $element, &$attrs) - { - $el = $element = strtolower($element); - $attrs = array_change_key_case($attrs, CASE_LOWER); - - // check for a namespace, and split if found - $ns = false; - if ( strpos( $element, ':' ) ) { - list($ns, $el) = explode( ':', $element, 2); - } - if ( $ns and $ns != 'rdf' ) { - $this->current_namespace = $ns; - } - - // if feed type isn't set, then this is first element of feed identify feed from root element - if (empty($this->_format)) - { - if ( $el == 'rdf' ) { - $this->_format = 'rss'; - $this->feed_version = '1.0'; - } - elseif ( $el == 'rss' ) { - $this->_format = 'rss'; - $this->feed_version = $attrs['version']; - } - elseif ( $el == 'feed' ) { - $this->_format = 'atom'; - $this->feed_version = $attrs['version']; - $this->inchannel = true; - } - return; - } - - if ( $el == 'channel' ) - { - $this->inchannel = true; - } - elseif ($el == 'item' or $el == 'entry' ) - { - $this->initem = true; - if ( isset($attrs['rdf:about']) ) { - $this->current_item['about'] = $attrs['rdf:about']; - } - } - - // if we're in the default namespace of an RSS feed, - // record textinput or image fields - elseif ( - $this->_format == 'rss' and - $this->current_namespace == '' and - $el == 'textinput' ) - { - $this->intextinput = true; - } - - elseif ( - $this->_format == 'rss' and - $this->current_namespace == '' and - $el == 'image' ) - { - $this->inimage = true; - } - - # handle atom content constructs - elseif ( $this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) ) - { - // avoid clashing w/ RSS mod_content - if ($el == 'content' ) { - $el = 'atom_content'; - } - - $this->incontent = $el; - - - } - - // if inside an Atom content construct (e.g. content or summary) field treat tags as text - elseif ($this->_format == 'atom' and $this->incontent ) - { - // if tags are inlined, then flatten - $attrs_str = join(' ', - array_map('map_attrs', - array_keys($attrs), - array_values($attrs) ) ); - - $this->append_content( "<$element $attrs_str>" ); - - array_unshift( $this->stack, $el ); - } - - // Atom support many links per containging element. - // Magpie treats link elements of type rel='alternate' - // as being equivalent to RSS's simple link element. - // - elseif ($this->_format == 'atom' and $el == 'link' ) - { - if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) - { - $link_el = 'link'; - } - else { - $link_el = 'link_' . $attrs['rel']; - } - - $this->append($link_el, $attrs['href']); - } - // set stack[0] to current element - else { - array_unshift($this->stack, $el); - } - } - - - /** - * Triggered when CDATA is found - * - * @param $p - * @param $element Tag - * @param $attrs Attributes of tags - */ - function feed_cdata ($p, $text) { - if ($this->_format == 'atom' and $this->incontent) - { - $this->append_content( $text ); - } - else { - $current_el = join('_', array_reverse($this->stack)); - $this->append($current_el, $text); - } - } - - /** - * Triggered when closed tag is found - * - * @param $p - * @param $element Tag - */ - function feed_end_element ($p, $el) { - $el = strtolower($el); - - if ( $el == 'item' or $el == 'entry' ) - { - $this->items[] = $this->current_item; - $this->current_item = array(); - $this->initem = false; - } - elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput' ) - { - $this->intextinput = false; - } - elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'image' ) - { - $this->inimage = false; - } - elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) ) - { - $this->incontent = false; - } - elseif ($el == 'channel' or $el == 'feed' ) - { - $this->inchannel = false; - } - elseif ($this->_format == 'atom' and $this->incontent ) { - // balance tags properly - // note: i don't think this is actually neccessary - if ( $this->stack[0] == $el ) - { - $this->append_content(""); - } - else { - $this->append_content("<$el />"); - } - - array_shift( $this->stack ); - } - else { - array_shift( $this->stack ); - } - - $this->current_namespace = false; - } - - - /** - * To concat 2 string with no warning if an operand is not defined - * - * @param $str1 - * @param $str2 - */ - function concat (&$str1, $str2="") { - if (!isset($str1) ) { - $str1=""; - } - $str1 .= $str2; - } - - /** - */ - function append_content($text) { - if ( $this->initem ) { - $this->concat( $this->current_item[ $this->incontent ], $text ); - } - elseif ( $this->inchannel ) { - $this->concat( $this->channel[ $this->incontent ], $text ); - } - } - - /** - * smart append - field and namespace aware - */ - function append($el, $text) { - if (!$el) { - return; - } - if ( $this->current_namespace ) - { - if ( $this->initem ) { - $this->concat( - $this->current_item[ $this->current_namespace ][ $el ], $text); - } - elseif ($this->inchannel) { - $this->concat( - $this->channel[ $this->current_namespace][ $el ], $text ); - } - elseif ($this->intextinput) { - $this->concat( - $this->textinput[ $this->current_namespace][ $el ], $text ); - } - elseif ($this->inimage) { - $this->concat( - $this->image[ $this->current_namespace ][ $el ], $text ); - } - } - else { - if ( $this->initem ) { - $this->concat( - $this->current_item[ $el ], $text); - } - elseif ($this->intextinput) { - $this->concat( - $this->textinput[ $el ], $text ); - } - elseif ($this->inimage) { - $this->concat( - $this->image[ $el ], $text ); - } - elseif ($this->inchannel) { - $this->concat( - $this->channel[ $el ], $text ); - } - - } - } - -} - - -/** - * Function to convert an XML object into an array - */ -function xml2php($xml) -{ - $fils = 0; - $tab = false; - $array = array(); - foreach($xml->children() as $key => $value) - { - $child = xml2php($value); - - //To deal with the attributes - foreach($value->attributes() as $ak=>$av) - { - $child[$ak] = (string)$av; - - } - - //Let see if the new child is not in the array - if($tab==false && in_array($key,array_keys($array))) - { - //If this element is already in the array we will create an indexed array - $tmp = $array[$key]; - $array[$key] = NULL; - $array[$key][] = $tmp; - $array[$key][] = $child; - $tab = true; - } - elseif($tab == true) - { - //Add an element in an existing array - $array[$key][] = $child; - } - else - { - //Add a simple element - $array[$key] = $child; - } - - $fils++; - } - - - if($fils==0) - { - return (string)$xml; - } - - return $array; - -} - + + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * \file htdocs/core/class/rssparser.class.php + * \ingroup core + * \brief File of class to parse rss feeds + */ +class RssParser +{ + var $db; + var $error; + + protected $_format=''; + protected $_urlRSS; + protected $_language; + protected $_generator; + protected $_copyright; + protected $_lastbuilddate; + protected $_imageurl; + protected $_link; + protected $_title; + protected $_description; + protected $_lastfetchdate; // Last successful fetch + protected $_rssarray=array(); + + // Accessors + public function getFormat() { return $this->_format; } + public function getUrlRss() { return $this->_urlRSS; } + public function getLanguage() { return $this->_language; } + public function getGenerator() { return $this->_generator; } + public function getCopyright() { return $this->_copyright; } + public function getLastBuildDate() { return $this->_lastbuilddate; } + public function getImageUrl() { return $this->_imageurl; } + public function getLink() { return $this->_link; } + public function getTitle() { return $this->_title; } + public function getDescription() { return $this->_description; } + public function getLastFetchDate() { return $this->_lastfetchdate; } + public function getItems() { return $this->_rssarray; } + + + // For parsing with xmlparser + var $stack = array(); // parser stack + var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); + + + /** + * Constructor + */ + public function RssParser($db) + { + $this->db=$db; + } + + + /** + * Parse rss URL + * + * @param urlRSS Url to parse + * @param maxNb Max nb of records to get (0 for no limit) + * @param cachedelay 0=No cache, nb of seconds we accept cache files (cachedir must also be defined) + * @param cachedir Directory where to save cache file + * @return int <0 if KO, >0 if OK + */ + public function parser($urlRSS, $maxNb=0, $cachedelay=60, $cachedir='') + { + include_once(DOL_DOCUMENT_ROOT.'/lib/files.lib.php'); + + $str=''; // This will contain content of feed + + // Check parameters + if (! dol_is_url($urlRSS)) + { + $this->error="ErrorBadUrl"; + return -1; + } + + $this->_urlRSS = $urlRSS; + $newpathofdestfile=$cachedir.'/'.md5($this->_urlRSS); + $newmask='0644'; + + //dol_syslog("RssPArser::parser parse url=".$urlRSS." => cache file=".$newpathofdestfile); + $nowgmt = dol_now(); + + // Search into cache + $foundintocache=0; + if ($cachedelay > 0 && $cachedir) + { + $filedate=dol_filemtime($newpathofdestfile); + if ($filedate >= ($nowgmt - $cachedelay)) + { + //dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we use it."); + $foundintocache=1; + + $this->_lastfetchdate=$filedate; + } + else + { + dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not found or older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we can't use it."); + } + } + + // Load file into $str + if ($foundintocache) // Cache file found and is not too old + { + $str = file_get_contents($newpathofdestfile); + } + else + { + try { + ini_set("user_agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)"); + ini_set("max_execution_time", 10); + $str = file_get_contents($this->_urlRSS); + } + catch (Exception $e) { + print 'Error retrieving URL '.$this->urlRSS.' - '.$e->getMessage(); + } + } + + // Convert $str into xml + if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) + { + //print 'xx'.LIBXML_NOCDATA; + libxml_use_internal_errors(false); + $rss = simplexml_load_string($str, "SimpleXMLElement", LIBXML_NOCDATA); + } + else + { + $xmlparser=xml_parser_create(''); + if (!is_resource($xmlparser)) { $this->error="ErrorFailedToCreateParser"; return -1; } + + xml_set_object( $xmlparser, $this ); + xml_set_element_handler($xmlparser, 'feed_start_element', 'feed_end_element' ); + xml_set_character_data_handler( $xmlparser, 'feed_cdata' ); + $status = xml_parse( $xmlparser, $str ); + xml_parser_free( $xmlparser ); + $rss=$this; + //var_dump($this);exit; + } + + // If $rss loaded + if ($rss) + { + // Save file into cache + if (empty($foundintocache) && $cachedir) + { + dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is saved onto disk."); + if (! dol_is_dir($cachedir)) dol_mkdir($cachedir); + $fp = fopen($newpathofdestfile, 'w'); + fwrite($fp, $str); + fclose($fp); + if (! empty($conf->global->MAIN_UMASK)) $newmask=$conf->global->MAIN_UMASK; + @chmod($newpathofdestfile, octdec($newmask)); + + $this->_lastfetchdate=$nowgmt; + } + + unset($str); // Free memory + + if (empty($rss->_format)) // If format not detected automatically + { + $rss->_format='rss'; + if (empty($rss->channel)) $rss->_format='atom'; + } + + $items=array(); + + // Save description entries + if ($rss->_format == 'rss') + { + //var_dump($rss); + if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) + { + if (!empty($rss->channel->language)) $this->_language = (string) $rss->channel->language; + if (!empty($rss->channel->generator)) $this->_generator = (string) $rss->channel->generator; + if (!empty($rss->channel->copyright)) $this->_copyright = (string) $rss->channel->copyright; + if (!empty($rss->channel->lastbuilddate)) $this->_lastbuilddate = (string) $rss->channel->lastbuilddate; + if (!empty($rss->channel->image->url[0])) $this->_imageurl = (string) $rss->channel->image->url[0]; + if (!empty($rss->channel->link)) $this->_link = (string) $rss->channel->link; + if (!empty($rss->channel->title)) $this->_title = (string) $rss->channel->title; + if (!empty($rss->channel->description)) $this->_description = (string) $rss->channel->description; + } + else + { + if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language']; + if (!empty($rss->channel['rss_generator'])) $this->_generator = (string) $rss->channel['rss_generator']; + if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright']; + if (!empty($rss->channel['rss_lastbuilddate'])) $this->_lastbuilddate = (string) $rss->channel['rss_lastbuilddate']; + if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url']; + if (!empty($rss->channel['rss_link'])) $this->_link = (string) $rss->channel['rss_link']; + if (!empty($rss->channel['rss_title'])) $this->_title = (string) $rss->channel['rss_title']; + if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description']; + } + + if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) $items=$rss->channel->item; // With simplexml + else $items=$rss->items; // With xmlparse + //var_dump($items);exit; + } + else if ($rss->_format == 'atom') + { + //var_dump($rss); + if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) + { + if (!empty($rss->generator)) $this->_generator = (string) $rss->generator; + if (!empty($rss->lastbuilddate)) $this->_lastbuilddate = (string) $rss->modified; + if (!empty($rss->link->href)) $this->_link = (string) $rss->link->href; + if (!empty($rss->title)) $this->_title = (string) $rss->title; + if (!empty($rss->description)) $this->_description = (string) $rss->description; + } + else + { + //if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language']; + if (!empty($rss->channel['generator'])) $this->_generator = (string) $rss->channel['generator']; + //if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright']; + if (!empty($rss->channel['modified'])) $this->_lastbuilddate = (string) $rss->channel['modified']; + //if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url']; + if (!empty($rss->channel['link'])) $this->_link = (string) $rss->channel['link']; + if (!empty($rss->channel['title'])) $this->_title = (string) $rss->channel['title']; + //if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description']; + } + if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) { $tmprss=xml2php($rss); $items=$tmprss['entry'];} // With simplexml + else $items=$rss->items; // With xmlparse + //var_dump($items);exit; + } + + $i = 0; + // Loop on each record + foreach($items as $item) + { + //var_dump($item);exit; + if ($rss->_format == 'rss') + { + if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) + { + $itemLink = (string) $item->link; + $itemTitle = (string) $item->title; + $itemDescription = (string) $item->description; + $itemPubDate = (string) $item->pubDate; + $itemId = ''; + $itemAuthor = ''; + } + else + { + $itemLink = (string) $item['rss_link']; + $itemTitle = (string) $item['rss_title']; + $itemDescription = (string) $item['rss_description']; + $itemPubDate = (string) $item['rss_pubdate']; + $itemId = (string) $item['rss_guid']; + $itemAuthor = (string) $item['rss_author']; + } + + // Loop on each category + $itemCategory=array(); + if (is_array($item->category)) + { + foreach ($item->category as $cat) + { + $itemCategory[] = (string) $cat; + } + } + } + else if ($rss->_format == 'atom') + { + if (! empty($conf->global->EXTERNALRSS_USE_SIMPLEXML)) + { + $itemLink = (string) $item['link']['href']; + $itemTitle = (string) $item['title']; + $itemDescription = (string) $item['summary']; + $itemPubDate = (string) $item['created']; + $itemId = (string) $item['id']; + $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']); + } + else + { + $itemLink = (string) $item['link']['href']; + $itemTitle = (string) $item['title']; + $itemDescription = (string) $item['summary']; + $itemPubDate = (string) $item['created']; + $itemId = (string) $item['id']; + $itemAuthor = (string) ($item['author']?$item['author']:$item['author_name']); + } + } + else print 'ErrorBadFeedFormat'; + + // Add record to result array + $this->_rssarray[$i] = array( + 'link'=>$itemLink, + 'title'=>$itemTitle, + 'description'=>$itemDescription, + 'pubDate'=>$itemPubDate, + 'category'=>$itemCategory, + 'id'=>$itemId, + 'author'=>$itemAuthor); + + $i++; + + if ($i > $maxNb) break; // We get all records we want + } + + return 1; + } + else + { + $this->error='ErrorFailedToLoadRSSFile'; + return -1; + } + } + + + + /** + * Triggered when opened tag is found + * + * @param $p + * @param $element Tag + * @param $attrs Attributes of tags + */ + function feed_start_element($p, $element, &$attrs) + { + $el = $element = strtolower($element); + $attrs = array_change_key_case($attrs, CASE_LOWER); + + // check for a namespace, and split if found + $ns = false; + if ( strpos( $element, ':' ) ) { + list($ns, $el) = explode( ':', $element, 2); + } + if ( $ns and $ns != 'rdf' ) { + $this->current_namespace = $ns; + } + + // if feed type isn't set, then this is first element of feed identify feed from root element + if (empty($this->_format)) + { + if ( $el == 'rdf' ) { + $this->_format = 'rss'; + $this->feed_version = '1.0'; + } + elseif ( $el == 'rss' ) { + $this->_format = 'rss'; + $this->feed_version = $attrs['version']; + } + elseif ( $el == 'feed' ) { + $this->_format = 'atom'; + $this->feed_version = $attrs['version']; + $this->inchannel = true; + } + return; + } + + if ( $el == 'channel' ) + { + $this->inchannel = true; + } + elseif ($el == 'item' or $el == 'entry' ) + { + $this->initem = true; + if ( isset($attrs['rdf:about']) ) { + $this->current_item['about'] = $attrs['rdf:about']; + } + } + + // if we're in the default namespace of an RSS feed, + // record textinput or image fields + elseif ( + $this->_format == 'rss' and + $this->current_namespace == '' and + $el == 'textinput' ) + { + $this->intextinput = true; + } + + elseif ( + $this->_format == 'rss' and + $this->current_namespace == '' and + $el == 'image' ) + { + $this->inimage = true; + } + + # handle atom content constructs + elseif ( $this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) ) + { + // avoid clashing w/ RSS mod_content + if ($el == 'content' ) { + $el = 'atom_content'; + } + + $this->incontent = $el; + + + } + + // if inside an Atom content construct (e.g. content or summary) field treat tags as text + elseif ($this->_format == 'atom' and $this->incontent ) + { + // if tags are inlined, then flatten + $attrs_str = join(' ', + array_map('map_attrs', + array_keys($attrs), + array_values($attrs) ) ); + + $this->append_content( "<$element $attrs_str>" ); + + array_unshift( $this->stack, $el ); + } + + // Atom support many links per containging element. + // Magpie treats link elements of type rel='alternate' + // as being equivalent to RSS's simple link element. + // + elseif ($this->_format == 'atom' and $el == 'link' ) + { + if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) + { + $link_el = 'link'; + } + else { + $link_el = 'link_' . $attrs['rel']; + } + + $this->append($link_el, $attrs['href']); + } + // set stack[0] to current element + else { + array_unshift($this->stack, $el); + } + } + + + /** + * Triggered when CDATA is found + * + * @param $p + * @param $element Tag + * @param $attrs Attributes of tags + */ + function feed_cdata ($p, $text) { + if ($this->_format == 'atom' and $this->incontent) + { + $this->append_content( $text ); + } + else { + $current_el = join('_', array_reverse($this->stack)); + $this->append($current_el, $text); + } + } + + /** + * Triggered when closed tag is found + * + * @param $p + * @param $element Tag + */ + function feed_end_element ($p, $el) { + $el = strtolower($el); + + if ( $el == 'item' or $el == 'entry' ) + { + $this->items[] = $this->current_item; + $this->current_item = array(); + $this->initem = false; + } + elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'textinput' ) + { + $this->intextinput = false; + } + elseif ($this->_format == 'rss' and $this->current_namespace == '' and $el == 'image' ) + { + $this->inimage = false; + } + elseif ($this->_format == 'atom' and in_array($el, $this->_CONTENT_CONSTRUCTS) ) + { + $this->incontent = false; + } + elseif ($el == 'channel' or $el == 'feed' ) + { + $this->inchannel = false; + } + elseif ($this->_format == 'atom' and $this->incontent ) { + // balance tags properly + // note: i don't think this is actually neccessary + if ( $this->stack[0] == $el ) + { + $this->append_content(""); + } + else { + $this->append_content("<$el />"); + } + + array_shift( $this->stack ); + } + else { + array_shift( $this->stack ); + } + + $this->current_namespace = false; + } + + + /** + * To concat 2 string with no warning if an operand is not defined + * + * @param $str1 + * @param $str2 + */ + function concat (&$str1, $str2="") { + if (!isset($str1) ) { + $str1=""; + } + $str1 .= $str2; + } + + /** + */ + function append_content($text) { + if ( $this->initem ) { + $this->concat( $this->current_item[ $this->incontent ], $text ); + } + elseif ( $this->inchannel ) { + $this->concat( $this->channel[ $this->incontent ], $text ); + } + } + + /** + * smart append - field and namespace aware + */ + function append($el, $text) { + if (!$el) { + return; + } + if ( $this->current_namespace ) + { + if ( $this->initem ) { + $this->concat( + $this->current_item[ $this->current_namespace ][ $el ], $text); + } + elseif ($this->inchannel) { + $this->concat( + $this->channel[ $this->current_namespace][ $el ], $text ); + } + elseif ($this->intextinput) { + $this->concat( + $this->textinput[ $this->current_namespace][ $el ], $text ); + } + elseif ($this->inimage) { + $this->concat( + $this->image[ $this->current_namespace ][ $el ], $text ); + } + } + else { + if ( $this->initem ) { + $this->concat( + $this->current_item[ $el ], $text); + } + elseif ($this->intextinput) { + $this->concat( + $this->textinput[ $el ], $text ); + } + elseif ($this->inimage) { + $this->concat( + $this->image[ $el ], $text ); + } + elseif ($this->inchannel) { + $this->concat( + $this->channel[ $el ], $text ); + } + + } + } + +} + + +/** + * Function to convert an XML object into an array + */ +function xml2php($xml) +{ + $fils = 0; + $tab = false; + $array = array(); + foreach($xml->children() as $key => $value) + { + $child = xml2php($value); + + //To deal with the attributes + foreach($value->attributes() as $ak=>$av) + { + $child[$ak] = (string)$av; + + } + + //Let see if the new child is not in the array + if($tab==false && in_array($key,array_keys($array))) + { + //If this element is already in the array we will create an indexed array + $tmp = $array[$key]; + $array[$key] = NULL; + $array[$key][] = $tmp; + $array[$key][] = $child; + $tab = true; + } + elseif($tab == true) + { + //Add an element in an existing array + $array[$key][] = $child; + } + else + { + //Add a simple element + $array[$key] = $child; + } + + $fils++; + } + + + if($fils==0) + { + return (string)$xml; + } + + return $array; + +} + ?> \ No newline at end of file