IIS日志分析搜索引擎爬虫记录程序

2024-08-29 03:13:51

字体：大中小

来源：转载

供稿：网友

使用注意：

　　修改iis.php文件中iis日志的绝对路径

　　例如：$folder=”c:/windows/system32/logfiles/站点日志目录/”; //后面记得一定要带斜杠(/)。

　　( 用虚拟空间的不懂查看你的站点绝对路径?上传个探针查看!

　　直接查看法：http://站点域名/iis.php

　　本地查看法：把日志下载到本地 http://127.0.0.1/iis.php )

　　注意：

　　//站点日志目录，注意该目录必须要有站点用户读取权限!

　　//如果把日志下载到本地请修改143行的网址为您网站的网址，此操作不是必要操作，不影响分析结果。

　　//修改文件名称iis.php 需要同时修改对应代码 ctrl+h 把 iis.php全部替换成您要修改的文件名否则程序运行出错。

　　//如果iis日志文件过大，可能会导致程序超时!同时也不建议大家使用!

以下是php源代码：
<?php
/*
 牛仔iis日志蜘蛛爬行记录分析器 v1.1(php gb2312 版)
 作者：牛仔
 qq：172379201
 email:[email protected]
*/
//===================================================
 header("content-type:text/html; charset=gb2312");
//站点日志目录，注意该目录必须要有站点用户读取权限！
//如果把日志下载到本地请修改143行的网址为您网站的网址，此操作不是必要操作，不影响分析结果。
//如果修改了文件名称iis.php 需要同时修改代码 ctrl+h 把 iis.php全部替换成您要修改的文件名否则程序运行出错。
$folder="d:/vhost/webroot/jooker82465/www/wordpress/uploads/w3svc87164023/"; //后面记得一定要带斜杠 / ！
$pagesize = 50;//设置分页显示条数！
//=========================
$type = addslashes($_get[’type’]);
if ($type)$type = base64_decode($type);
$showfile = addslashes($_get[’showfile’]);
$page = addslashes($_get[’page’]);
if (!$page)$page=1;
//============================
//打开目录
if (!$type){
if (file_exists($folder))
{
 $fp=opendir($folder);
 while(false!=$file=readdir($fp))
 {
 if($file!=’.’ &&$file!=’..’)
 {
 $file="$file";
 $arr_file[]=$file;
 }
 }
 if(is_array($arr_file))
 {
 for ($i=count($arr_file)-1;$i>=0;$i--)
 {
 $indexstr.="
<tr><td height=/"25/" width=/"10%/">".date("y-m-d",filectime($folder.$arr_file[$i]))."</td>
<td height=/"25/" width=/"10%/" align=/"center/">
<a href=/"iis.php?type=".base64_encode(baiduspider)."&showfile=".$arr_file[$i]."/">百度(baidu)</a></td>
<td height=/"25/" width=/"10%/" align=/"center/">
<a href=/"iis.php?type=".base64_encode(googlebot)."&showfile=".$arr_file[$i]."/">谷歌(google)</a></td>
<td height=/"25/" width=/"10%/" align=/"center/">
<a href=/"iis.php?type=".base64_encode(yahoo)."&showfile=".$arr_file[$i]."/">雅虎(yahoo)</a></td>
<td height=/"25/" width=/"10%/" align=/"center/">
<a href=/"iis.php?type=".base64_encode(yodaobot)."&showfile=".$arr_file[$i]."/">有道(yodao)</a></td>
<td height=/"25/" width=/"10%/" align=/"center/">
<a href=/"iis.php?type=".base64_encode(sosospider)."&showfile=".$arr_file[$i]."/">搜搜(soso)</a></td>
<td height=/"25/" width=/"10%/" align=/"center/">
<a href=/"iis.php?type=".base64_encode(sogou)."&showfile=".$arr_file[$i]."/">搜狗(sogou)</a></td>
<td height=/"25/" width=/"10%/" align=/"center/">
<a href=/"iis.php?type=".base64_encode(msnbot)."&showfile=".$arr_file[$i]."/">微软(msn)</a></td>
</tr>";
 }
 }
closedir($fp);
$html = indexhtml();
$copy = mycopy();
$html = str_replace("[showlog]",$indexstr,$html);
$html = str_replace("[copy]",$copy,$html);
echo $html;
}else{
 echo "该日志目录不存在或权限不足，请检查设置！";
 exit();
}
}elseif ($type==’baiduspider’){
 echo show($type,$folder,$showfile,$page,$pagesize);
}elseif ($type==’googlebot’){
 echo show($type,$folder,$showfile,$page,$pagesize);
}elseif ($type==’yahoo’){
 echo show($type,$folder,$showfile,$page,$pagesize);
}elseif ($type==’yodaobot’){
 echo show($type,$folder,$showfile,$page,$pagesize);
}elseif ($type==’sosospider’){
 echo show($type,$folder,$showfile,$page,$pagesize);
}elseif ($type==’sogou’){
 echo show($type,$folder,$showfile,$page,$pagesize);
}elseif ($type==’msnbot’){
 echo show($type,$folder,$showfile,$page,$pagesize);
}

function show($type,$folder,$showfile,$page,$pagesize)
{
if ($type==’baiduspider’)
{
 $title=’百度’;
}elseif ($type==’googlebot’){
 $title=’谷歌’;
}elseif ($type==’yahoo’){
 $title=’雅虎’;
}elseif ($type==’yodaobot’){
 $title=’有道’;
}elseif ($type==’sosospider’){
 $title=’搜搜’;
}elseif ($type==’sogou’){
 $title=’搜狗’;
}elseif ($type==’msnbot’){
 $title=’msn’;
}
if ($type&&$folder&&$showfile)
{
 if(file_exists($folder.$showfile))
 {
 $fp= fopen($folder.$showfile,"r");
 }else{
 echo "该日志文件不存在，请检查设置！";
 exit;
 }
 $j=0;
 $y=0;
 $t=0;
 $h=0;
 while (!feof($fp))
 {
 $str = fgets($fp);
 $str =iconv("utf-8","gb2312//ignore",$str);
 if(strpos($str,$type))
 {
 $j++;
 $temp[].=$str;
 $tmpcount = explode(" ",$str);
 if ($tmpcount[11]==200)$t++;
 if ($tmpcount[11]==304)$h++;
 if ($tmpcount[11]==404)$y++;
 }
 }
 fclose($fp);
 $count = count($temp);
 if ($page==1)
 {
 $countshow=$count;
 $mynum = $count-$pagesize;
 }else{
 $countshow =$count-($page*$pagesize-$pagesize);
 $mynum = $count-$page*$pagesize;
 }
 $pagecount =ceil(count($temp) / $pagesize);
 if ($page>=$pagecount)
 {
 $mynum = $pagecount;
 }
 $m=0;
 for ($i=$countshow-1;$i>=$mynum;$i--)
 {
 $num = explode(" ",$temp[$i]);
 $domain="http://tarr.cn"; //网站url 末尾不要带斜杠
 $show.="
<tr onmouseout=/"this.style.backgroundcolor=’#ffffff’/" onmouseover=/"this.style.backgroundcolor=’#f6f6f6’/">
<td class=/"c/" width=/"200;/">".$num[0]." ".$num[1]."</td>
<td class=/"c/">".$num[9]."</td>
<td class=/"pl/"><a href=/"$domain$num[5]/" _fcksavedurl="/"$domain$num[5]/"" target=/"_blank/">".$num[5]."</a></td>
<td class=/"c/">".$num[11]."</td>
</tr>";
 }
 unset($temp);
 $showpage = "<td colspan=/"4/" height=/"30/" align=/"center/">每页 ".$pagesize." 条当前".$page."/$pagecount";
 $showpage.=" <a href=/"?type=".base64_encode($type)."&showfile=".$showfile."/">首页</a>";
 if ($page!=1)
 {
 $showpage.=" <a href=/"?type=".base64_encode($type)."&showfile=".$showfile."&page=".($page-1)."/">上一页</a>";
 }
 if ($page!=$pagecount)
 {
 $showpage.=" <a href=/"?type=".base64_encode($type)."&showfile=".$showfile."&page=".($page+1)."/">下一页</a>";
 $weei = " <a href=/"?type=".base64_encode($type)."&showfile=".$showfile."&page=".($pagecount)."/">尾页</a>";
 }
 $showpage.=$weei."</td>";
 if ($show)
 {
 $html = pagehtml();
 $copy = mycopy();
 $htmltitle = "牛仔iis日志蜘蛛爬行记录分析器茄咧啡修改版";//请保留，谢谢！
 $html = str_replace("[title]",$title,$html);
 $html = str_replace("[htmltitle]",$htmltitle,$html);
 $html = str_replace("[show]",$show,$html);
 $html = str_replace("[count]",$j,$html);
 $html = str_replace("[page]",$showpage,$html);
 $html = str_replace("[y]",$y,$html);
 $html = str_replace("[t]",$t,$html);
 $html = str_replace("[h]",$h,$html);
 $html = str_replace("[copy]",$copy,$html);
 return $html;
 }
}
}
function indexhtml()
{
return ’<html>
<head>
<meta http-equiv="content-language" content="zh-cn">
<meta http-equiv="content-type" content="text/html; charset=gb2312">
<title>牛仔iis日志蜘蛛爬行记录分析器 v1.1</title>
<style>

</style>
</head>
<body>
<table border="1" width="100%" id="table1" cellspacing="0" cellpadding="0" >
<tr>
 <td colspan="8" bgcolor="#808080" height="30" align="center">
 牛仔iis日志蜘蛛爬行记录分析器茄咧啡修改版</td>
</tr>
<tr>
 <td height="25" align="center" width="260">日期</td>
 <td colspan="6" height="25" align="center">引擎</td>
</tr>
<tr>
 [showlog]
</tr>
</table>
[copy]
</body>
</html>’;
}
function pagehtml()//============显示模板，标签代替显示内容！
{
return ’<html>
<head>
<meta http-equiv="content-language" content="zh-cn">
<meta http-equiv="content-type" content="text/html; charset=gb2312">
<title>[title]蜘蛛爬行分析 - [htmltitle]</title>
<style>

</style>
</head>
<body>
<table border="1" width="100%" id="table1" cellspacing="0" cellpadding="0" height="74">
<tr>
<td><a href="iis.php">返回日志目录</a> | <a href="http://www.dj965.com">dj965</a>
 <td colspan="3" bgcolor="#808080" height="30" align="center">
 [title]蜘蛛爬行分析</td>
</tr>
 <tr>
 <td colspan="4" height="20" align="center">本日志[title]蜘蛛共爬行 [count] 次，其中正常 [t] 个，死链 [y] 个，缓存 [h] 个</td>
</tr>
<tr>
 <td align="center" width="200px;">时间</td>
 <td align="center" width="150px;">蜘蛛ip</td>
 <td align="center">被爬url</td>
 <td align="center" width="100px;">爬行结果</td>
</tr>
[show]
<tr>
 [page]
</tr>
</table>
[copy]
</body>
</html>’;
}
function mycopy()
{
return ’<table border="1" width="100%" id="table2" cellspacing="0" cellpadding="0" height="402">
<tr>
 <td height="35" bgcolor="#c0c0c0" align="center">注备说明</td>
</tr>
<tr>
 <td height="170">
  正常：表示该面页蜘蛛访问正常，并已经下载。爬行状态返回200。
  死链：表示蜘蛛访问的面页不存在或链接错误，爬行状态返回404。
  缓存：表示蜘蛛之前已经爬过的面页且该面页未更新过，蜘蛛缓存区已存在该文件，不再下载该面页内容。爬行状态返回304。
  注意：蜘蛛爬过的面页不一定会放出来，因为蜘蛛爬回去的数据须经过引擎规则筛选后才会放出来，至于详细请查看引擎收录帮助。
 </td>
</tr>
<tr>
 <td>
  程序名称：<a target="_blank" href="http://tarr.cn/?p=23">牛仔iis日志蜘蛛爬行记录分析器 - 茄咧啡修改版</a> 修改者：<a href="http://www.tarr.cn/" target="_blank">茄咧啡</a>
 *******************************************************
  原程序名称：<a target="_blank" href="http://www.niuzi.com/">牛仔iis日志蜘蛛爬行记录分析器</a>
  原作者：牛仔
  qq：172379201
  email:17gd$163.com ($转换@)
  注意：本程序只供大家学习使用，请勿用作商业用途。
</tr>
</table>’;
}
?>

上一篇：IIS与PHP水火也相容

下一篇：Windows服务器中IIS返回的网页错误代码大汇总及原因解