// this is an array of arrays, with each array element representing an
<item> // each outer array element is itself an associative array
// with keys ("title", "link", "description")
$items = array(),
// opening tag handler
function elementbegin($parser, $name, $attributes)
{
global $currenttag, $flag,
$currenttag = $name,
// set flag if entering <channel> or <item> block
if ($name == "item")
{
$flag = 1,
}
else if ($name == "channel")
{
$flag = 2,
}
}
// closing tag handler
function elementend($parser, $name)
{
global $currenttag, $flag, $count,
$currenttag = "",
// set flag if exiting <channel> or <item> block
if ($name == "item")
{
$count++,
$flag = 0,
}
else if ($name == "channel")
{
$flag = 0,
}
}
// character data handler
function characterdata($parser, $data)
{
global $currenttag, $flag, $items, $count, $channel,
$data = trim(htmlspecialchars($data)),
if ($currenttag == "title" || $currenttag == "link" ||
$currenttag ==
"description")
{
// add data to $channels[] or $items[] array
if ($flag == 1)
{
$items[$count][strtolower($currenttag)] .=
$data,
}
else if ($flag == 2)
{
$channel[strtolower($currenttag)] .= $data,
}
}
}
// create parser
$xp = xml_parser_create(),
// set element handler
xml_set_element_handler($xp, "elementbegin", "elementend"),
xml_set_character_data_handler($xp, "characterdata"),
xml_parser_set_option($xp, xml_option_case_folding, true),
xml_parser_set_option($xp, xml_option_skip_white, true),
// read xml file
if (!($fp = fopen($file, "r")))
{
die("could not read $file"),
}
// parse data
while ($xml = fread($fp, 4096))
{
if (!xml_parse($xp, $xml, feof($fp)))
{
die("xml parser error: " .
xml_error_string(xml_get_error_code($xp))),
}
}
// destroy parser
xml_parser_free($xp),
// now iterate through $items[] array
// and print each item as a table row
foreach ($items as $item)
{
echo "<tr><td><a href=" . $item["link"] . ">" . $item["title"] .
"</a><br>" . $item["description"] . "</td></tr>", }
?>
</table>
</body>
</html>
与先前的那段的主要区别在于,这段脚本创建了两个数组,用于保存分析过程中所提取的信息。其中,$channel是联合性数组(associative array),存放被处理的频道的基本描述信息,而$items是一个二维数组,包含关于单独的频道条目(channel intems)的信息。$items数组中的每一个元素本身又是一个联合性数组,包含title,url和description关键字。$items数组中元素总数与rdf文档中的<item>区块总数相同。
还需注意$flag变量的变化,根据被处理的是<channel></channel>区块还是<item></item>区块,它现在保存两个值。这一点很有必要,因为只有这样,分析器才能把信息放入正确的数组里面。
一旦文档分析完毕,事情就简单了——遍历$items 数组,以表格形式打印其中的每一个条目(item)。远行结果如下:
7)返回到类(back to class)
既然你有这么大的权力,那么究竟为什么要把自己限制在仅仅是单个的rdf来源呢?就象我早先说过的一样,大多数主要的站点都经常为他们所提供的内容做快照。其实将所有这些不同的来源插入到你的站点当中是相当简单的。让我们看看是如何做的。
首先,我们把前面例子中的代码模块化。这样一来,你就无须为每一个单个的来源都一遍又一遍的重写相同的代码了。简化的方法就是将之打包成类,再把这个类包含到我的php脚本当中。
类代码如下:
<?
class rdfparser
{
//
// variables
//
// set up local variables for this class
var $currenttag = "",
var $flag = "",
var $count = 0,
// this is an associative array of channel data with keys
("title", "link", "description")
var $channel = array(),
// this is an array of arrays, with each array element
representing an <item>
// each outer array element is itself an associative array
// with keys ("title", "link", "description")
var $items = array(),
//
// methods
//
// set the name of the rdf file to parse
// this is usually a local file
// you may set it to a remote file if your php build supports
url fopen()
function setresource($file)
{
$this->file = $file,
}
// parse the rdf file set with setresource()
// this populates the $channel and $items arrays
function parseresource()
{
// create parser
$this->xp = xml_parser_create(),
// set object reference
xml_set_object($this->xp, $this),
// set handlers and parser options
xml_set_element_handler($this->xp, "elementbegin",
"elementend"),
xml_set_character_data_handler($this->xp,
"characterdata"),
xml_parser_set_option($this->xp,
xml_option_case_folding, true),
xml_parser_set_option($this->xp, xml_option_skip_white,
true),
// read xml file
新闻热点
疑难解答