研究了两天搞出以下代码,共享给各个同行,我只贴过程函数出来,百度是网上找来的,其实其他三个也有代码,只是对应的网页代码改了,数据抓取不对,我重写了这三个函数,尤其是alexa罗索....
sub GoogleRank(strurl,id)
Set R=Server.CreateObject("Microsoft.XmlHttp")
R.Open "GET",Url,False,"",""
R.SetRequestHeader "Referer",Url
R.Send
str1=B2S(R.ResponseBody)
str1=replace(str1,",","")
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top="<font color=#FB5E3C>"
str_bottom="</font>"
reg.Pattern=""&str_top&"((.|/n)*?)"&str_bottom&""
Set matches = reg.execute(str1)
str1=""
For Each match1 in matches
str1=match1.value
Next
Set matches = Nothing
Set reg = Nothing
str1=replace(replace(str1,str_top,""),str_bottom,"")
conn.execute("update webtable set pr='"&str1&"' where id="&id)
end sub
Sub Error(str)
select case str
case 1
response.write "<BR> 搜索引擎为空,请联系<a href=mailto:zhming1112@hotmail.com>zhming1112@hotmail.com</a>"
case 2
response.write "<BR> 站点名字为空,请联系<a href=mailto:zhming1112@hotmail.com>zhming1112@hotmail.com</a>"
case 3
response.write "<BR> 你输入的搜索引擎本程序不支持,请联系<a href=mailto:zhming1112@hotmail.com>zhming1112@hotmail.com</a></body></html>"
case 4
response.write "<BR> 未知错误--抓取不到数据 请<font color=blue><a href=javascript:location.reload();>刷新</a></font>重试</body></html>"
end select
response.end
End Sub
Sub google(strurl,id,all)
url="http://www.google.cn/search?complete=1&hl=zh-CN&q=site%3A"&strUrl&"&meta="
str= getHTTPPage(url)
if str="" then
conn.execute("update webtable set google='0' where id="&id)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true
str_top="<td align=right nowrap><font size=-1>"
str_bottom="</font></td></tr></table>"
reg.Pattern=""&str_top&"((.)*)"&str_bottom&""
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing
if instr(str,"<html>")=1 then
str2=0
else
str=split(str,"</b>")
str1=str(3)
str2=split(str1,"<b>")(1)
end if
if str2="" or len(str2)>200 then
conn.execute("update webtable set google='0' where id="&id)
else
conn.execute("update webtable set google='"&str2&"' where id="&id)
end if
end if
End Sub
Sub baidu(str,id,all)
'call print_do("baidu")
if all="n" then
url="http://www.baidu.com/s?wd=site%3A"&str&"&cl=3"
else
strext=split(str,".")
url="http://www.baidu.com/s?wd="&strext(0)&"&cl=3"
end if
'response.Write("<br>baidu's url:"&url)
If IsObjInstalled("AspHTTP.Conn")=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
if str="" then
Call Error(4)
else
set reg=new Regexp
reg.Multiline=True
reg.Global=False
reg.IgnoreCase=true
str_top="<td align=""right"" nowrap>"
str_bottom="</td>"
reg.Pattern=""&str_top&"((.|/n)*?)"&str_bottom&""
Set matches = reg.execute(str)
For Each match1 in matches
str=match1.value
Next
Set matches = Nothing
Set reg = Nothing
response.write "<BR>"
'response.write " "
if str="" or len(str)>200 then
conn.execute("update webtable set baidu='0' where id="&id)
else
if instr(str,"约")=0 then
keyw="页"
else
keyw="约"
end if
str=Mid(str,(InStr(str,keyw)+1),(InStr(str,"篇")-InStr(str,keyw)-1))
response.write str
conn.execute("update webtable set baidu='"&replace(replace(str,",","")," ","")&"' where id="&id)
end if
end if
End Sub
Sub alexa(strurl,id)
url="http://www.alexa.com/data/details/traffic_details?q=&url="&strurl
Set R=Server.CreateObject("Microsoft.XmlHttp")
R.Open "GET",Url,False,"",""
R.SetRequestHeader "Referer",Url
R.Send
str1=Bytes2bStr(R.ResponseBody)
str1=replace(str1,",","")
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top="<!--Did you know"
str_bottom="</span><br>"
reg.Pattern=""&str_top&"((.|/n)*?)"&str_bottom&""
Set matches = reg.execute(str1)
str1=""
For Each match1 in matches
str1=str1&match1.value
Next
Set matches = Nothing
Set reg = Nothing
str1 = Replace(str1,"<!--Did you know? Alexa offers this data programmatically. Visit http://aws.amazon.com/awis for more information about the Alexa Web Information Service.-->","")
if str1<>"" then
str1=replace(str1,"<span class","")
str1=replace(str1,"</span></span>","")
str1=replace(str1,"""","")
str1=replace(str1," ","")
str1=split(str1,"<br>")(0)
if cstr(right(str1,7))="</span>" then
str1=left(trim(str1),len(str1)-7)
end if
if isnumeric(str1) then
num=str1
else
csstxt=GetAlexaCss()
num=""
str1=split(str1,"</span>")
for i=0 to ubound(str1)
str2=str1(i)
if left(str2,instr(str2,"="))<>"" then
num=num&left(str2,instr(str2,"=")-1)
str2=right(str2,len(str2)-instr(str2,"="))
end if
str3=split(str2,">")
for j=0 to ubound(str3)
next
if str3(0)<>"" then
if not isnumeric(str3(0)) then
if instr(csstxt,str3(0))=0 then
num=num&str3(1)
end if
end if
end if
next
end if
else
num=0
end if
sql="update webtable set alexa='"&num&"' where id="&id
response.Write("<script>alert('"&sql&"')</script>")
conn.execute(sql)
End Sub
Function B2S(Str)
Dim O
Set O = Server.CreateObject("adodb.stream")
O.Type = 1
O.Mode =3
O.Open
O.Write Str
O.Position = 0
O.Type = 2
O.Charset = "GB2312"
B2S=O.ReadText
O.Close
Set O = nothing
End Function
'获取alexa的样式表
Function GetAlexaCss()
url="http://client.alexa.com/common/css/scramble.css"
If IsObjInstalled("AspHTTP.Conn")=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
GetAlexaCss=str
end function
Sub print_do(str)
response.write "<script>"
response.write "function HiddenLoad()"
response.write "{"
response.write "parent.do"&str&".style.display='none';"
response.write "}"
response.write "</script>"
response.write "<body leftmargin=0 topmargin=0 marginwidth=0 marginheight=0 bgcolor=#f2f2f2 onload=HiddenLoad()>"
end sub
Function getHTTPPage(url)
on error resume next
dim http
set http=Server.createobject("Microsoft.XMLHTTP")
Http.open "GET",url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=bytes2BSTR(Http.responseBody)
set http=nothing
if err.number<>0 then err.Clear
End function
Function Bytes2bStr(vin)
Dim BytesStream,StringReturn
Set BytesStream = Server.CreateObject("ADODB.Stream")
BytesStream.Type = 2
BytesStream.Open
BytesStream.WriteText vin
BytesStream.Position = 0
BytesStream.Charset = "GB2312"
BytesStream.Position = 2
StringReturn =BytesStream.ReadText
BytesStream.close
Set BytesStream = Nothing
Bytes2bStr = StringReturn
End Function
Function getaspHTTPPage(url)
if url="" then
exit function
end if
Set HttpObj = Server.CreateObject("AspHTTP.Conn")
'设置代理
服务器,通过代理上网的用户需要设置此选项
If ProxyIP=1 Then
HttpObj.Proxy="192.168.5.254:808"
end if
HTTPObj.TimeOut = 45
HttpObj.Url = url
HttpObj.RequestMethod = "GET"
getaspHTTPPage = HttpObj.GetURL
set HttpObj=nothing
End function
Function IsObjInstalled(strClassString)
On Error Resume Next
IsObjInstalled = False
Err = 0
Dim xTestObj
Set xTestObj = Server.CreateObject(strClassString)
If 0 = Err Then
If AspHttpOpen=1 Then
IsObjInstalled = True
'Response.write "当前组件 ASPHTTP"
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Set xTestObj = Nothing
Err = 0
End Function