<?php
// ZebraFeeds - copyright (c) 2006 Laurent Cazalet
// http://www.cazalet.org/zebrafeeds
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
//
// ZebraFeeds RSS fetch layer

if (!defined('ZF_VER')) exit;

require_once($zf_path . 'includes/magpierss/rss_fetch.inc');



/* steal of the magpieRSS fetch_rss function
We just want to be able to have a cache age time on a per-feed basis
while in the original function it's common to all feeds
$channel is a channel array as defined in aggregator.php

$refreshtime is expected to be in minutes

should be the only call to magpie

if refreshtime == -1 (infinite): force the use of the cached version
if refreshtime == 0  : force refresh from publisher
else, use refreshtime in minutes

*/
function zf_custom_fetch_rss (&$channel, $refreshtime, &$resultString) {
    // initialize constants
    init();

    
    if ( !isset($channel['xmlurl']) ) {
        error("zf_custom_fetch_rss called without a url");
        return false;
    } else {
        $url = $channel['xmlurl'];
        if ( MAGPIE_DEBUG > 1) {
            debug('<br/>-- ZebraFeeds: refreshing '.$url, E_USER_NOTICE);
        }
    }

    // if cache is disabled
    if ( !MAGPIE_CACHE_ON ) {
        // fetch file, and parse it
        $resp = _fetch_remote_file( $url );
        if ( is_success( $resp->status ) ) {
            return _response_to_rss( $resp );
        }
        else {
            error("Failed to fetch $url and cache is off");
            $resultString = 'Failed to fetch and cache is off:';
            return false;
        }
    }
    // else cache is ON,
    else {
        // Flow
        // 1. check cache
        // 2. if there is a hit, make sure its fresh
        // 3. if cached obj fails freshness check, fetch remote
        // 4. if remote fails, return stale object, or error

        /* ZF change here: instead of a constant, use our variable
        magpieRSS cache age is in seconds, but $refreshtime is in minutes */

        //debug("Refresh:".$refreshtime , E_USER_WARNING);
        $cache = new RSSCache( MAGPIE_CACHE_DIR, $refreshtime*60 );
        if ( MAGPIE_DEBUG > 1) {
            debug("ZebraFeeds: Magpie Cache time ".$refreshtime*60, E_USER_NOTICE);
        }

        if (MAGPIE_DEBUG and $cache->ERROR) {
            debug($cache->ERROR, E_USER_WARNING);
        }


        $cache_status    = 0;       // response of check_cache
        $request_headers = array(); // HTTP headers to send with fetch
        $rss             = 0;       // parsed RSS object
        $errormsg        = 0;       // errors, if any

        // store parsed XML by desired output encoding
        // as character munging happens at parse time
        $cache_key       = $url . MAGPIE_OUTPUT_ENCODING;

        if (!$cache->ERROR) {
            // return cache HIT, MISS, or STALE
            $cache_status = $cache->check_cache( $cache_key);
            if ( MAGPIE_DEBUG > 1) {
                debug("ZebraFeeds: Cache age ".$cache->cache_age($cache_key).', '.md5($cache_key). ' modif:'.date ("F d Y H:i:s.", filemtime(MAGPIE_CACHE_DIR.'/'.md5($cache_key))), E_USER_NOTICE);
            }
        }

        // if object cached, and cache is fresh, return cached obj
        // ZebraFeeds tweak: use cache only if refresh not forced or if explicitely requested
        if ($refreshtime != 0) {
            if ( $cache_status == 'HIT' || $refreshtime == -1) {
                $rss = $cache->get( $cache_key );
                if ( isset($rss) and $rss ) {
                    // should be cache age
                    $rss->from_cache = 1;
                    if ( MAGPIE_DEBUG > 1) {
                        debug("ZF using MagpieRSS Cache ($cache_status,$refreshtime)", E_USER_NOTICE);
                    }
                    /* set channel data, like title and description from what's 
                    configured in the subscription list */
                    // rather do it before saving cache zf_customizeRSSChannel($rss, $channel);
                    /* for each item: $item['channel'] = &$rss->channel */
                    //array_walk($rss->items, 'zf_bindChannel', $rss->channel);
                    zf_bindChannel(&$rss->items, &$rss->channel); 
                    // for debugging only
                    //zf_normalizeRSSItems($rss, $rss->channel);
                    return $rss;
                } else {
                    if ( MAGPIE_DEBUG > 1) {
                        debug("MagpieRSS: invalid Cache ($cache_status, $refreshtime)", E_USER_NOTICE);
                    }

                }
            }
        }

        // else attempt a conditional get

        // ZebraFeeds tweak: no headers if force refresh
        // setup headers
        if ( $cache_status == 'STALE' && $refreshtime != 0 ) {
            $rss = $cache->get( $cache_key );
            if ( $rss and $rss->etag and $rss->last_modified ) {
                $request_headers['If-None-Match'] = $rss->etag;
                $request_headers['If-Last-Modified'] = $rss->last_modified;
            }
        }

        if ( MAGPIE_DEBUG > 1) {
            debug("MagpieRSS: fetching remote file ".$url, E_USER_NOTICE);
        }
        $resp = _fetch_remote_file( $url, $request_headers );

        if (isset($resp) and $resp) {
          if ($resp->status == '304' ) {
                // we have the most current copy
                if ( MAGPIE_DEBUG > 1) {
                    debug("Got 304 for $url");
                }
                // reset cache on 304 (at minutillo insistent prodding)
                    
                $cache->set($cache_key, $rss);
                //zf_customizeRSSChannel($rss, $channel);
                // for each item: $item['channel'] = &$rss->channel
                //array_walk($rss->items, 'zf_bindChannel', $rss->channel);
                zf_bindChannel(&$rss->items, &$rss->channel);
                return $rss;
            }
            elseif ( is_success( $resp->status ) ) {
                $rss = _response_to_rss( $resp );
                if ( $rss ) {
                    if (MAGPIE_DEBUG > 1) {
                        debug("Fetch successful");
                    }
                    /* one shot: add our extra data and do our post processing
                    BEFORE storing to cache */
                    /* we need the link in the raw channel array for normalize */  
                    $channel['link'] = $rss->channel['link'];
                    zf_normalizeRSSChannel($rss, $channel);
                    zf_normalizeRSSItems($rss, $channel);
                    
                    /* set channel data, like title and description from what's 
                      configured in the subscription list - 
                      do it before saving to cache */
                    zf_customizeRSSChannel($rss, $channel);
                    // add object to cache
                    $cache->set( $cache_key, $rss );
                    /* for each item: $item['channel'] = &$rss->channel 
                    must be done after caching*/
                    //array_walk($rss->items, 'zf_bindChannel', $rss->channel);
                    zf_bindChannel(&$rss->items, &$rss->channel);
                    return $rss;
                } else {
                    $resultString = 'Failed to parse';
                }
                
            }
            else {
                $errormsg = "Failed to fetch $url ";
                $resultString = 'Failed to fetch';
                if ( $resp->status == '-100' ) {
                    $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
                }
                elseif ( $resp->error ) {
                    # compensate for Snoopy's annoying habbit to tacking
                    # on '\n'
                    $http_error = substr($resp->error, 0, -2);
                    $errormsg .= "(HTTP Error: $http_error)";
                }
                else {
                    $errormsg .=  "(HTTP Response: " . $resp->response_code .')';
                }
            }
        }
        else {
            $errormsg = "Unable to retrieve RSS file for unknown reasons.";
            $resultString = 'Failed to fetch, unknown reason';
        }

        // else fetch failed

        // attempt to return cached object
        if ($rss) {
            if ( MAGPIE_DEBUG ) {
                debug("Returning STALE object for $url");
            }
            /* set channel data, like title and description from what's 
               configured in the subscription list */
            zf_customizeRSSChannel($rss, $channel);
            /* for each item: $item['channel'] = &$rss->channel */
            // troubles with PHP4
            // array_walk($rss->items, 'zf_bindChannel', $rss->channel);
            zf_bindChannel(&$rss->items, &$rss->channel);
            /*echo '<pre>';
            print_r($rss->items);
            echo '</pre>';*/
            return $rss;
        }

        // else we totally failed
        error( $errormsg );

        return false;

    } // end if ( !MAGPIE_CACHE_ON ) {
} // end custom fetch_rss()


/* make sure our rSS channel array has all what we need
this data will get cached, so this function is called only once, 
right after the feed is fetched over http */
function zf_normalizeRSSChannel(&$rss, &$channel) {
     
     /* for this it's okay to store in cache */
     $rss->channel['id'] = zf_makeId($channel['xmlurl'], '');
     $rss->channel['isvirtual'] = false;
     $rss->channel['xmlurl'] = $channel['xmlurl'];
     $rss->channel['last_modified'] = (isset($rss->last_modified)?$rss->last_modified:0);
     $rss->channel['last_fetched'] = time();
}

/* give our custom values for title and desc 
    dilemma: do we do this prior saving to cache, then it's available for 
     ajax requests, but userdefined description is stored in cache and cannot be
     changed in ajax requests if the user changes them
     or do we do that on the fly, which has the opposite effects */ 
function zf_customizeRSSChannel(&$rss, &$channel) {
     if (isset($channel['title'])) {
         $rss->channel['title'] = $channel['title'];
     }
     if (isset($channel['description'])) {
         $rss->channel['description'] = $channel['description'];
     }
     
}

function zf_normalizeRSSItems(&$rss, &$channel) {
    array_walk($rss->items, 'zf_normalizeItem', &$channel);
}

/* give a reference to the channel to each item
   not sure of what happens if we do so before serialization
   so we do a separated step for that AFTER serialization */
function zf_bindChannel(&$items, &$channel) {
    for ($i=0; $i<count($items); $i++) {
       $items[$i]['channel'] = &$channel;
    }
}


/*all sorts of processing to the item object
 Everything that happens here is cached
 - normalize items for dates and description
 - make relative paths absolute in item's description
 - set items and channel id
*/
function zf_normalizeItem(&$item, $key, &$channel){

    /* build our id, used as CSS element id */
    $item['id'] = zf_makeId($channel['xmlurl'], $item['link']);

    
    /* try to get a valid date. timestamp should be given by magpie, but some times it's not */

    if ( !isset($item['date_timestamp'])) {
        if (isset($item['date'])) {
            $item['date_timestamp'] = zf_cleanupDate($item['date']);
            if ($item['date_timestamp'] == -1) {
                $item['pubdate'] = $item['date'];
            }
            if (ZF_DEBUG==2) {
                zf_debug('-- using date '. $item['date_timestamp']);
            }
        }

        if (isset($item['dc']['date'])) {
            $item['date_timestamp'] = zf_cleanupDate($item['dc']['date']);
            if ($item['date_timestamp'] == -1) {
                $item['pubdate'] = $item['dc']['date'];
            }
            if (ZF_DEBUG==2) {
                 zf_debug( '--using dc date '. $item['date_timestamp'] );
            }
        }

        if (isset($item['issued'])) {
            $item['date_timestamp'] = zf_cleanupDate($item['issued']);
            if ($item['date_timestamp'] == -1) {
                $item['pubdate'] = $item['issued'];
            }
            if (ZF_DEBUG==2) {
                zf_debug('-- using issued '. $item['date_timestamp']);
            }
        }
        if (isset($item['updated'])) {
            $item['date_timestamp'] = zf_cleanupDate($item['updated']);
            if ($item['date_timestamp'] == -1) {
                $item['pubdate'] = $item['updated'];
            }
            if (ZF_DEBUG==2) {
                zf_debug('-- using updated '. $item['date_timestamp']);
            }
        }
        // finally, if still not set, set to 0. we should let our
        // history management system decide
        if ( !isset($item['date_timestamp'])) {
            //$item['date_timestamp'] = 0;
            //print_r($channel);
            $firstseen = $channel['history']->getDateFirstSeen($item['id']);
            if ($firstseen == 0) {
                $firstseen = time();
            }
            $item['date_timestamp'] = $firstseen;
            if (ZF_DEBUG==2) {
                zf_debug('-- using history time '. $item['date_timestamp']);
            }
            
        }
    } else {
        /* timestamp is set, but sometimes it can be wrong, especially 
           when publisher puts non standard (french) dates 
           in the pubDate element...
           this code deals with this case that personnally annoys me ;-)

           stolen and adapted from magpie code
        */
           

        # regex to match a french date
        $pat = "/(\d{2})\/(\d{2})\/(\d{2}) (\d{2}):(\d{2})/";

        if ( preg_match( $pat, $item['pubdate'], $match ) ) {
            list( $year, $month, $day, $hours, $minutes, $seconds) = 
            array( $match[3], $match[2], $match[1], $match[4], $match[5], 0);

            # calc epoch for current date assuming GMT
            $item['date_timestamp'] = mktime( $hours, $minutes, $seconds, $month, $day, $year);
        }


    }


    // if no description, try to get the summary instead
    if ( !isset($item['description'])) {
        if (isset($item['summary']) && (strlen($item['summary']) != 0) ) {
            $item['description'] = $item['summary'];
        }
        if (ZF_DEBUG==2) {
            zf_debug('-- forcing summary ');
        }
    }

    if ( !isset($item['description'])) {
        if (isset($item['atom_content']) && (strlen($item['atom_content']) != 0) ) {
            $item['description'] = $item['atom_content'];
        }
    }

    if ( !isset($item['description'])) {
        if (isset($item['atom']['summary']) && (strlen($item['atom']['summary']) != 0) ) {
            $item['description'] = $item['atom']['summary'];
        }
    }
    // give priority to the encoded version
    if ( isset($item['content']['encoded']) && (ZF_FORCE_ENCODED_CONTENT =='yes') ) {
        if (strlen ($item['content']['encoded']) != 0 ) {
            $item['description'] = $item['content']['encoded'];
        }
        if (ZF_DEBUG==2) {
            zf_debug('-- forcing encoded content');
        }
    }

    if ( strlen($item['title']) == 0) {
        $item['title'] = "&nbsp;&nbsp;...&nbsp;&nbsp;";
    }
    

    //Caution: if some links are relative, it's relative to the channel's site url, not the xml address
    $item['description'] = zf_makeAbsolute($item['description'], $channel['link']);


    if (strlen($item['summary']) == 0 ) {
          $item['summary'] = $item['description'];
    } else {
          $item['summary'] = zf_makeAbsolute($item['summary'], $channel['link']);
    }

    if (strlen($item['summary']) >= ZF_MAX_SUMMARY_LENGTH ) {
        $item['summary'] = substr(strip_tags($item['summary']), 0, ZF_SUMMARY_TRUNCATED_LENGTH).'...';
    }
}

/* stolen from a forum post: 
http://www.howtoforge.com/forums/showthread.php?t=4

thanks to the author, it works great!

*/
function zf_makeAbsolute($txt, $base_url) {
    $needles = array('href="', 'src="', 'background="', "href='", "src='", "background='");
    $new_txt = '';
    if(substr($base_url,-1) != '/') 
    $base_url .= '/';

    $new_base_url = $base_url;
    $base_url_parts = parse_url($base_url);
    foreach($needles as $needle){
        while($pos = strpos($txt, $needle)){
            $pos += strlen($needle);
            if(substr($txt,$pos,7) != 'http://' && substr($txt,$pos,8) != 'https://' && substr($txt,$pos,6) != 'ftp://' && substr($txt,$pos,9) != 'mailto://') {
                if(substr($txt,$pos,1) == '/') {
                    $new_base_url = $base_url_parts['scheme'].'://'.$base_url_parts['host'];
                }
                $new_txt .= substr($txt,0,$pos).$new_base_url;
            } else {
                $new_txt .= substr($txt,0,$pos);
            }
            $txt = substr($txt,$pos);
        }
        $txt = $new_txt.$txt;
        $new_txt = '';
    }
    return $txt;


}


/* completely empirical function to try to cope with exotic date formats 
in order to have them accepted by strtotime (and get a real nice timestamp)
it works for feeds I read

2006-02-14T14:50:04Z -> 2006-02-14T14:50:04
2006-02-15T23:25:00+00:00 -> 2006-02-15T23:25:00
2006-02-15T23:25:00-08:00 -> 2006-02-15T23:25:00

sometimes you also see:
2006-02-15T23:25:00.736+01:00 -> 2006-02-15T23:25:00

drawback: it discards Timezone information

input: a date string with unhandled format
output: a unix timestamp of this date, coming from strtotime
*/
function zf_cleanupDate($datestr) {
    $search = array("/Z$/", "/\.[0-9]+Z$/", "/\.[0-9]+[\+-][0-9]+:[0-9]+/", "/[\+-][0-9]+:[0-9]+/");
    $replace = array(" ");
    $newdate = preg_replace($search, $replace, $datestr);
    return strtotime($newdate);

}


?>
