/############################################
版权声明:
文章内容为本站编辑,创作.你可以任意转载、发布、使用但请务必以明文标注文章原始出处及本声明
http://www.opent.cn 作者:浪淘沙
############################################/
using system;
using system.data;
using system.configuration;
using system.web;
using system.web.security;
using system.web.ui;
using system.web.ui.webcontrols;
using system.web.ui.webcontrols.webparts;
using system.web.ui.htmlcontrols;
using msxml2;
using system.text.regularexpressions;
namespace ec
{
/// <summary>
/// 远程文件抓取类
/// </summary>
public class getremoteobj
{
#region 构造与析构函数
public getremoteobj()
{
//
// todo: 在此处添加构造函数逻辑
//
}
~getremoteobj()
{
dispose();
}
#endregion
#region idisposable 成员
public void dispose()
{
gc.suppressfinalize(this);
}
#endregion
#region 日期随机函数
/**********************************
* 函数名称:daterndname
* 功能说明:日期随机函数
* 参 数:ra:随机数
* 调用示例:
* getremoteobj o = new getremoteobj();
* random ra = new random();
* string s = o.daterndname(ra);
* response.write(s);
* o.dispose();
* ********************************/
/// <summary>
/// 日期随机函数
/// </summary>
/// <param name="ra">随机数</param>
/// <returns></returns>
public string daterndname(random ra)
{
datetime d = datetime.now;
string s = null, y, m, dd, h, mm, ss;
y = d.year.tostring();
m = d.month.tostring();
if (m.length < 2) m = "0" + m;
dd = d.day.tostring();
if (dd.length < 2) dd = "0" + dd;
h = d.hour.tostring();
if (h.length < 2) h = "0" + h;
mm = d.minute.tostring();
if (mm.length < 2) mm = "0" + mm;
ss = d.second.tostring();
if (ss.length < 2) ss = "0" + ss;
s += y + m + dd + h + mm + ss;
s += ra.next(100, 999).tostring();
return s;
}
#endregion
#region 取得文件后缀
/**********************************
* 函数名称:getfileextends
* 功能说明:取得文件后缀
* 参 数:filename:文件名称
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"/xrssfile/2007-2/23/200722311844445.gif";
* string s = o.getfileextends(url);
* response.write(s);
* o.dispose();
* ********************************/
/// <summary>
/// 取得文件后缀
/// </summary>
/// <param name="filename">文件名称</param>
/// <returns></returns>
public string getfileextends(string filename)
{
string ext = null;
if (filename.indexof('.') > 0)
{
string[] fs = filename.split('.');
ext = fs[fs.length - 1];
}
return ext;
}
#endregion
#region 获取远程文件源代码
/**********************************
* 函数名称:getremotehtmlcode
* 功能说明:获取远程文件源代码
* 参 数:url:远程url
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"http://www.baidu.com";
* string s = o.getremotehtmlcode(url);
* response.write(s);
* o.dispose();
* ********************************/
/// <summary>
/// 获取远程文件源代码
/// </summary>
/// <param name="url">远程url</param>
/// <returns></returns>
public string getremotehtmlcode(string url)
{
string s = "";
msxml2.xmlhttp _xmlhttp = new msxml2.xmlhttpclass();
_xmlhttp.open("get", url, false, null, null);
_xmlhttp.send("");
if (_xmlhttp.readystate == 4)
{
s = system.text.encoding.default.getstring((byte[])_xmlhttp.responsebody);
}
return s;
}
#endregion
#region 保存远程文件
/**********************************
* 函数名称:remotesave
* 功能说明:保存远程文件
* 参 数:url:远程url;path:保存到的路径
* 调用示例:
* getremoteobj o = new getremoteobj();
* string s = "";
* string url = @"/xrssfile/2007-2/23/200722311844445.gif";
* string path =server.mappath("html/");
* s = o.remotesave(url,path);
* response.write(s);
* o.dispose();
* ******************************/
/// <summary>
/// 保存远程文件
/// </summary>
/// <param name="url">远程url</param>
/// <param name="path">保存到的路径</param>
/// <returns></returns>
public string remotesave(string url, string path)
{
random ra = new random();
string stringfilename = daterndname(ra) + "." + getfileextends(url);
string stringfilepath = path + stringfilename;
msxml2.xmlhttp _xmlhttp = new msxml2.xmlhttpclass();
_xmlhttp.open("get", url, false, null, null);
_xmlhttp.send("");
if (_xmlhttp.readystate == 4)
{
if (system.io.file.exists(stringfilepath))
system.io.file.delete(stringfilepath);
system.io.filestream fs = new system.io.filestream(stringfilepath, system.io.filemode.createnew);
system.io.binarywriter w = new system.io.binarywriter(fs);
w.write((byte[])_xmlhttp.responsebody);
w.close();
fs.close();
}
else
throw new exception(_xmlhttp.statustext);
return stringfilename;
}
#endregion
#region 替换网页中的换行和引号
/**********************************
* 函数名称:replaceenter
* 功能说明:替换网页中的换行和引号
* 参 数:htmlcode:html源代码
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"http://www.baidu.com";
* strion htmlcode = o.getremotehtmlcode(url);
* string s = o.replaceenter(htmlcode);
* response.write(s);
* o.dispose();
* ********************************/
/// <summary>
/// 替换网页中的换行和引号
/// </summary>
/// <param name="htmlcode">html源代码</param>
/// <returns></returns>
public string replaceenter(string htmlcode)
{
string s = "";
if (htmlcode == null || htmlcode == "")
s = "";
else
s = htmlcode.replace("/"", "");
s = s.replace("/r/n", "");
return s;
}
#endregion
#region 执行正则提取出值
/**********************************
* 函数名称:getregvalue
* 功能说明:执行正则提取出值
* 参 数:htmlcode:html源代码
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"http://www.baidu.com";
* strion htmlcode = o.getremotehtmlcode(url);
* string s = o.replaceenter(htmlcode);
* string reg="<title>.+?</title>";
* string getvalue=o.getregvalue(reg,htmlcode)
* response.write(getvalue);
* o.dispose();
* ********************************/
/// <summary>
/// 执行正则提取出值
/// </summary>
/// <param name="regexstring">正则表达式</param>
/// <param name="remotestr">htmlcode源代码</param>
/// <returns></returns>
public string getregvalue(string regexstring, string remotestr)
{
string matchvale = "";
regex r = new regex(regexstring);
match m = r.match(remotestr);
if (m.success)
{
matchvale = m.value;
}
return matchvale;
}
#endregion
#region 替换html源代码
/**********************************
* 函数名称:removehtml
* 功能说明:替换html源代码
* 参 数:htmlcode:html源代码
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"http://www.baidu.com";
* strion htmlcode = o.getremotehtmlcode(url);
* string s = o.replaceenter(htmlcode);
* string reg="<title>.+?</title>";
* string getvalue=o.getregvalue(reg,htmlcode)
* response.write(getvalue);
* o.dispose();
* ********************************/
/// <summary>
/// 替换html源代码
/// </summary>
/// <param name="htmlcode">html源代码</param>
/// <returns></returns>
public string removehtml(string htmlcode)
{
string matchvale = htmlcode;
foreach (match s in regex.matches(htmlcode, "<.+?>"))
{
matchvale = matchvale.replace(s.value, "");
}
return matchvale;
}
#endregion
#region 匹配页面的链接
/**********************************
* 函数名称:gethref
* 功能说明:匹配页面的链接
* 参 数:htmlcode:html源代码
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"http://www.baidu.com";
* strion htmlcode = o.getremotehtmlcode(url);
* string s = o.gethref(htmlcode);
* response.write(s);
* o.dispose();
* ********************************/
/// <summary>
/// 获取页面的链接正则
/// </summary>
/// <param name="htmlcode"></param>
/// <returns></returns>
public string gethref(string htmlcode)
{
string matchvale = "";
string reg = @"(h|h)(r|r)(e|e)(f|f) *= *('|"")?((/w|//|//|/.|:|-|_)+)('|""| *|>)?";
foreach(match m in regex.matches(htmlcode,reg))
{
matchvale += (m.value).tolower().replace("href=", "").trim() + "||";
}
return matchvale;
}
#endregion
#region 匹配页面的图片地址
/**********************************
* 函数名称:getimgsrc
* 功能说明:匹配页面的图片地址
* 参 数:htmlcode:html源代码;imghttp:要补充的http.当比如:<img src="http://www.pushad.com/info/bb/x.gif">则要补充http://www.baidu.com/,当包含http信息时,则可以为空
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"http://www.baidu.com";
* strion htmlcode = o.getremotehtmlcode(url);
* string s = o.getimgsrc(htmlcode,"http://www.baidu.com/");
* response.write(s);
* o.dispose();
* ********************************/
/// <summary>
/// 匹配页面的图片地址
/// </summary>
/// <param name="htmlcode"></param>
/// <param name="imghttp">要补充的http://路径信息</param>
/// <returns></returns>
public string getimgsrc(string htmlcode, string imghttp)
{
string matchvale = "";
string reg = @"<img.+?>";
foreach (match m in regex.matches(htmlcode, reg))
{
matchvale += getimg((m.value).tolower().trim(), imghttp) + "||";
}
return matchvale;
}
/// <summary>
/// 匹配<img src="" />中的图片路径实际链接
/// </summary>
/// <param name="imgstring"><img src="" />字符串</param>
/// <returns></returns>
public string getimg(string imgstring, string imghttp)
{
string matchvale = "";
string reg = @"src=.+/.(bmp|jpg|gif|png|)";
foreach (match m in regex.matches(imgstring.tolower(), reg))
{
matchvale += (m.value).tolower().trim().replace("src=","");
}
return (imghttp+matchvale);
}
#endregion
#region 替换通过正则获取字符串所带的正则首尾匹配字符串
/**********************************
* 函数名称:gethref
* 功能说明:匹配页面的链接
* 参 数:htmlcode:html源代码
* 调用示例:
* getremoteobj o = new getremoteobj();
* string url = @"http://www.baidu.com";
* strion htmlcode = o.getremotehtmlcode(url);
* string s = o.regreplace(htmlcode,"<title>","</title>");
* response.write(s);
* o.dispose();
* ********************************/
/// <summary>
/// 替换通过正则获取字符串所带的正则首尾匹配字符串
/// </summary>
/// <param name="regvalue">要替换的值</param>
/// <param name="regstart">正则匹配的首字符串</param>
/// <param name="regend">正则匹配的尾字符串</param>
/// <returns></returns>
public string regreplace(string regvalue, string regstart,string regend)
{
string s = regvalue;
if (regvalue != "" && regvalue != null)
{
if (regstart != "" && regstart != null)
{
s = s.replace(regstart, "");
}
if (regend != "" && regend != null)
{
s = s.replace(regend, "");
}
}
return s;
}
#endregion
}
}
新闻热点
疑难解答
图片精选