首页 > 学院 > 开发设计 > 正文

HtmlEntities

2019-11-17 02:32:51
字体:
来源:转载
供稿:网友

HtmlEntities

#region GetOnlyTextFromHtmlCode + RemoveHtmlChars + RemoveTagFromHtmlCode        /// <summary>        /// http://www.codePRoject.com/script/Content/ViewAssociatedFile.aspx?rzp=%2FKB%2Fedit%2FZetaHtmlEditControl%2F%2FZetaHtmlEditControl-Source.zip&zep=Control%2FHtmlEditControl.cs&obid=43954&obtid=2&ovid=13        /// </summary>        /// <param name="htmlCode"></param>        /// <returns></returns>        private static string getOnlyTextFromHtmlCode(string htmlCode)        {            //<br>            htmlCode = htmlCode.Replace("/r/n", @" ");            htmlCode = htmlCode.Replace("/r", @" ");            htmlCode = htmlCode.Replace("/n", @" ");            htmlCode = htmlCode.Replace(@"</p>", Environment.NewLine + Environment.NewLine);            htmlCode = htmlCode.Replace(@"</P>", Environment.NewLine + Environment.NewLine);            //html comment             htmlCode = Regex.Replace(                htmlCode,                @"<!--.*?-->",                string.Empty,                RegexOptions.Singleline | RegexOptions.IgnoreCase);            //<p>            htmlCode = Regex.Replace(htmlCode,                @"<br[^>]*>",                Environment.NewLine,                RegexOptions.Singleline | RegexOptions.IgnoreCase);            //tags            htmlCode = removeTagFromHtmlCode(@"style", htmlCode);            htmlCode = removeTagFromHtmlCode(@"script", htmlCode);            //html            htmlCode = Regex.Replace(                htmlCode,                "<(.|/n)+?>",                string.Empty,                RegexOptions.Singleline | RegexOptions.IgnoreCase);            //umlaute            htmlCode = unescapeHtmlEntities(htmlCode);            //whitespaces            htmlCode = Regex.Replace(                htmlCode,                @" +",                @" ",                RegexOptions.Singleline | RegexOptions.IgnoreCase);            return htmlCode;        }        /// <summary>        /// http://dev.w3.org/html5/html-author/charref        /// </summary>        /// <param name="htmlCode"></param>        /// <returns></returns>        private static string unescapeHtmlEntities(string htmlCode)        {

      htmlCode = htmlCode.Replace(@"&nbsp;", @" ");

      htmlCode = htmlCode.Replace(@"&Auml;", @"ä");       htmlCode = htmlCode.Replace(@"&absp;", @"");       htmlCode = htmlCode.Replace(@"&obsp;", @"");       htmlCode = htmlCode.Replace(@"&Obsp;", @"");       htmlCode = htmlCode.Replace(@"&ubsp;", @"");       htmlCode = htmlCode.Replace(@"&Ubsp;", @"");       htmlCode = htmlCode.Replace(@"&szlig;", @"ß");

      htmlCode = htmlCode.Replace(@"&pound;", @"£");       htmlCode = htmlCode.Replace(@"&sect;", @"§");       htmlCode = htmlCode.Replace(@"&copy;", @"©");       htmlCode = htmlCode.Replace(@"&reg;", @"®");       htmlCode = htmlCode.Replace(@"&micro;", @"µ");       htmlCode = htmlCode.Replace(@"&para;", @"¶");       htmlCode = htmlCode.Replace(@"&Oslash;", @"Ø");       htmlCode = htmlCode.Replace(@"&oslash;", @"Ø");       htmlCode = htmlCode.Replace(@"&divide;", @"÷");       htmlCode = htmlCode.Replace(@"&times;", @"×");

            return htmlCode;        }        private static string removeTagFromHtmlCode(            string tag,            string htmlCode)        {            return Regex.Replace(                htmlCode,                string.Format(@"<{0}.*?</{1}>", tag, tag),                string.Empty,                RegexOptions.Singleline | RegexOptions.IgnoreCase);        }        #endregion

  


发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表