<?php
set_time_limit(0);
define('zend_dir','/www/lib');
set_include_path(get_include_path().path_separator.zend_dir);
require_once('zend.php');
zend::loadclass('zend_feed');
zend::loadclass('zend_filter_input');
$link = mysql_connect('localhost', 'user', 'pwd')
or die('could not connect: ' . mysql_error());
//echo 'connected successfully';
mysql_select_db('phpeye') or die('could not select database');
$feedchannel = array(
'http://www.phpdeveloper.org/phpdev.rdf',
'http://www.planet-php.net/rss/'
);
foreach ($feedchannel as $channel) {
readrssfeed($channel);
}
echo "done!";
function readrssfeed($feedaddress){
try {
$rss = zend_feed::import($feedaddress);
} catch (zend_feed_exception $e) {
// feed 导入失败
//echo "exception caught importing feed: {$e->getmessage()}/n";
$msg = "exception caught importing feed: {$e->getmessage()}/n ";
$datetime = date("y-m-d h:i:s",time());
$errmsg = $msg.' '.$datetime."/n/n";
echo $errmsg;
file_put_contents('feedreader.log',$errmsg,file_append);
exit;
}
// 初始化保存 channel 数据的数组,$rss内部数据只能通过调用类方法才能访问
$channel = array(
'title' => $rss->title(),
'link' => $rss->link(),
'description' => $rss->description(),
'items' => array()
);
$count=0;
// 循环获得channel的item并存储到相关数组中
foreach ($rss as $item) {
$channel['items'][] = array(
'title' => $item->title(),
'link' => $item->link(),
'description' => $item->description()
);
//标题和内容都需要转义
$title = strip_tags(mysql_real_escape_string($item->title()));
$link = strip_tags($item->link());
$description = mysql_real_escape_string($item->description());
//先查询数据库看记录是否已经存在,存在则不操作,否则向数据库添加新记录
$query = "select * from feedentry where entitle = '$title'";
//echo "<hr>sql: ".$query."<p>/n/r";
$result = mysql_query($query) or die('query failed: ' . mysql_error());
$num_rows = mysql_num_rows($result);
if($num_rows > 0) {
//echo "the record already exists! ";
//do nothing...
}else{
$query = "insert into feedentry (entitle,link,endescription,addtime) values ('$title','$link','$description',now())";
//echo "<hr>sql: ".$query."<p>/n/r";
$result = mysql_query($query) or die('query failed: ' . mysql_error());
$count++;
}
}
//zend::dump($channel);
$datetime = date("y-m-d h:i:s",time());
if($count>0){
$msg = $count." entrys read successfully! ".$datetime."/n/n";
echo $msg;
file_put_contents('feedreader.log',$msg,file_append);
}else{
$msg = 'read nothing...'.$datetime."/n/n";
echo $msg;
file_put_contents('feedreader.log',$msg,file_append);
}
}
?>
文件中除了rss读取,解析和入库外,还有debug和日志功能。
数据表结构如下:
create table `feedentry` (
`id` int(11) not null auto_increment,
`entitle` varchar(200) not null default '',
`link` varchar(200) not null default '',
`endescription` mediumtext not null,
`category` varchar(50) not null default '',
`comments` text not null,
`publishtime` datetime not null default '0000-00-00 00:00:00',
`addtime` datetime not null default '0000-00-00 00:00:00',
primary key (`id`)
) type=myisam ;
呵呵
,把这个文件保存。然后利用linux的cron脚本让它定时在命令行下运行,下面让它每小时的40分运行一次:$feedchannel这个数组中添加元素,非常方便,当然如果你有兴趣,可以写一个web界面来管理,那样就更方便了。
改进:
新闻热点
疑难解答