XMLHTTP批量抓取远程资料
2024-07-09 22:40:53
供稿:网友
可以在此基础上结合正则表达式做成更好的效果,希望大家能分享一下Xmlhttp的session共享技术
<html>
<head>
<title>AUTOGET</title>
<meta http-equiv="Content-Type" content="text/html; charset=gb2312">
</head>
<body bgcolor="#FFFFFF" >
<%
'=================================================
'FileName: Getit.Asp
'Intro : Auto Get Data From Remote WebSite
'Author: Babyt(阿泰)
'URL: http://blog.csdn.net/babyt
'createAt: 2002-02 Lastupdate:2004-09
'DB Table : data
'Table Field:
' UID -> Long -> Keep ID Of the pages
' UContent -> Text -> Keep Content Of the Pages(HTML)
'=================================================
Server.ScriptTimeout=5000
'on error resume next
Set conn = Server.createObject("ADODB.Connection")
conn.open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Server.MapPath("getit.mdb")
Set rs = Server.createObject("ADODB.Recordset")
sql="select * from data"
rs.open sql,conn,1,3
Dim comeFrom,myErr,myCount
'========================================================
comeFrom="http://www.xxx.com/U.asp?ID="
myErr1="该资料不存在"
myErr2="该资料已隐藏"
'========================================================
'***************************************************************
' 只需要更改这里 i 的始点intMin和终点intMax,设定步长intStep
' 每次区间设置成5万左右。估计要两个多小时。期间不需要人工干预
'****************************************************************
intMin=0
intMax=10000
'设定步长
intStep=100
'==========================================================
'以下代码不要更改
'==========================================================
Call GetPart (intMin)
Response.write "已经转换完成" & intMin & "~~" & intMax & "之间的数据"
rs.close
Set rs=Nothing
conn.Close
set conn=nothing
%>
</body>
</html>
<%
'使用XMLHTTP抓取地址并进次内容处理
Function GetBody(Url)
Dim objXML
On Error Resume Next
Set objXML = createObject("Microsoft.XMLHTTP")
With objXML
.Open "Get", Url, False, "", ""
.Send
GetBody = .ResponseBody