首页 > 编程 > HTML > 正文

利用正则表达式去掉html代码

2024-08-26 00:15:37
字体:
来源:转载
供稿:网友

using system.text.regularexpressions;//需要引用

  // 利用正则表达式去掉"<"和">"之间的内容
  private string stripht(string strhtml)
  {
   regex regex=new regex("<.+?>",regexoptions.ignorecase);
   string stroutput=regex.replace(strhtml,"");
   return stroutput;
  }


//方法二(不知为什么此方法占用cpu100%)

public static string drophtml(string strhtml)
  {
   string [] aryreg ={
          @"<script[^>]*?>.*?</script>",
          @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""''])(//[""''tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
          @"([/r])[/s]+",
          @"&(quot|#34);",
          @"&(amp|#38);",
          @"&(lt|#60);",
          @"&(gt|#62);",
          @"&(nbsp|#160);",
          @"&(iexcl|#161);",
          @"&(cent|#162);",
          @"&(pound|#163);",
          @"&(copy|#169);",
          @"&#(/d+);",
          @"-->",
          @"<!--.*"        
         };

   string [] aryrep = {
           "",
           "",
           "",
           "/"",
           "&",
           "<",
           ">",
           " ",
           "/xa1",//chr(161),
           "/xa2",//chr(162),
           "/xa3",//chr(163),
           "/xa9",//chr(169),
           "",
           "/r",
           ""   
          };

   string newreg =aryreg[0];
   string stroutput=strhtml;
   for(int i = 0;i<aryreg.length;i++)
   {
    regex regex = new regex(aryreg[i],regexoptions.ignorecase );
    stroutput = regex.replace(stroutput,aryrep[i]);
   }

   stroutput.replace("<","");
   stroutput.replace(">","");
   stroutput.replace("/r","");
   return stroutput;
     
  }


发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表