panel
This commit is contained in:
227
NetPanel.Help/HtmlTag.cs
Normal file
227
NetPanel.Help/HtmlTag.cs
Normal file
@@ -0,0 +1,227 @@
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace NetPanel.Help
|
||||
{
|
||||
public class HtmlTag
|
||||
{
|
||||
private String m_Name;
|
||||
private String m_BeginTag;
|
||||
private String m_InnerHTML;
|
||||
private Hashtable m_Attributes = new Hashtable();
|
||||
|
||||
static Regex attrReg = new Regex(@"([a-zA-Z1-9_-]+)\s*=\s*(\x27|\x22)([^\x27\x22]*)(\x27|\x22)", RegexOptions.IgnoreCase);
|
||||
|
||||
private HtmlTag(string name, string beginTag, string innerHTML)
|
||||
{
|
||||
m_Name = name;
|
||||
m_BeginTag = beginTag;
|
||||
m_InnerHTML = innerHTML;
|
||||
|
||||
MatchCollection matchs = attrReg.Matches(beginTag);
|
||||
foreach (Match match in matchs)
|
||||
{
|
||||
m_Attributes[match.Groups[1].Value.ToUpper()] = match.Groups[3].Value;
|
||||
}
|
||||
}
|
||||
public string GetBeginTag()
|
||||
{
|
||||
|
||||
return m_BeginTag;
|
||||
}
|
||||
public List<HtmlTag> FindTag(String name)
|
||||
{
|
||||
return FindTag(m_InnerHTML, name, String.Format(@"<{0}(\s[^<>]*|)>", name));
|
||||
}
|
||||
public List<HtmlTag> FindImgTag()
|
||||
{
|
||||
return FindTag(m_InnerHTML, "img", @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>");
|
||||
}
|
||||
public List<HtmlTag> FindTag(String name, String format)
|
||||
{
|
||||
return FindTag(m_InnerHTML, name, format);
|
||||
}
|
||||
|
||||
public List<HtmlTag> FindTagByAttr(String tagName, String attrName, String attrValue)
|
||||
{
|
||||
return FindTagByAttr(m_InnerHTML, tagName, attrName, attrValue);
|
||||
}
|
||||
|
||||
public String TagName
|
||||
{
|
||||
get { return m_Name; }
|
||||
}
|
||||
|
||||
public String InnerHTML
|
||||
{
|
||||
get { return m_InnerHTML; }
|
||||
}
|
||||
public String InnerText
|
||||
{
|
||||
get { return checkStr(m_InnerHTML); }
|
||||
}
|
||||
public String GetAttribute(string name)
|
||||
{
|
||||
return m_Attributes[name.ToUpper()] as String;
|
||||
}
|
||||
public String FindDate
|
||||
{
|
||||
get
|
||||
{
|
||||
Match m = Regex.Match(InnerText, @"(?<date>((1[6-9]|[2-3]\d)\d{2})-(\d{1,2})-(\d{1,2}))");
|
||||
if (m.Groups.Count > 0)
|
||||
{
|
||||
return m.Groups["date"].Value;
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
public static string checkStr2(string html)
|
||||
{
|
||||
html = html.Replace("<br>", "$br$");
|
||||
html = html.Replace("<br/>", "$br/$");
|
||||
System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
html = regex1.Replace(html, ""); //过滤<script></script>标记
|
||||
html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
|
||||
html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
|
||||
html = regex4.Replace(html, ""); //过滤iframe
|
||||
html = regex5.Replace(html, ""); //过滤frameset
|
||||
html = regex6.Replace(html, ""); //过滤frameset
|
||||
// html = regex7.Replace(html, ""); //过滤frameset
|
||||
// html = regex8.Replace(html, ""); //过滤frameset
|
||||
html = regex9.Replace(html, "");
|
||||
html = html.Replace(" ", "");
|
||||
html = html.Replace("</strong>", "");
|
||||
html = html.Replace("<strong>", "");
|
||||
html = html.Replace("$br$", "<br>");
|
||||
html = html.Replace("$br/$", "<br/>");
|
||||
return html;
|
||||
}
|
||||
public static string checkStr(string html)
|
||||
{
|
||||
if (string.IsNullOrEmpty(html))
|
||||
{
|
||||
return html;
|
||||
}
|
||||
System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
|
||||
html = regex1.Replace(html, ""); //过滤<script></script>标记
|
||||
html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
|
||||
html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
|
||||
html = regex4.Replace(html, ""); //过滤iframe
|
||||
html = regex5.Replace(html, ""); //过滤frameset
|
||||
html = regex6.Replace(html, ""); //过滤frameset
|
||||
html = regex7.Replace(html, ""); //过滤frameset
|
||||
html = regex8.Replace(html, ""); //过滤frameset
|
||||
html = regex9.Replace(html, "");
|
||||
html = html.Replace(" ", "");
|
||||
html = html.Replace("</strong>", "");
|
||||
html = html.Replace("<strong>", "");
|
||||
html = html.Replace(" ", " ");
|
||||
return html;
|
||||
}
|
||||
public static string HtmlToText(string str)
|
||||
{
|
||||
|
||||
string m_outstr = str;
|
||||
m_outstr = new Regex(@"(?m)<script[^>]*>(\w|\W)*?</script[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
|
||||
m_outstr = new Regex(@"(?m)<style[^>]*>(\w|\W)*?</style[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
|
||||
m_outstr = new Regex(@"(?m)<select[^>]*>(\w|\W)*?</select[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
|
||||
|
||||
//m_outstr = new Regex(@"(?m)<a[^>]*>(\w|\W)*?</a[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
|
||||
Regex objReg = new System.Text.RegularExpressions.Regex("(<[^>]+?>)| ", RegexOptions.Multiline | RegexOptions.IgnoreCase);
|
||||
m_outstr = objReg.Replace(m_outstr, "");
|
||||
Regex objReg2 = new System.Text.RegularExpressions.Regex("(\\s)+", RegexOptions.Multiline | RegexOptions.IgnoreCase);
|
||||
m_outstr = objReg2.Replace(m_outstr, " ");
|
||||
return m_outstr;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// 在文本html的文本查找标志名为tagName,并且属性attrName的值为attrValue的所有标志
|
||||
/// 例如:FindTagByAttr(html, "div", "class", "demo")
|
||||
/// 返回所有class为demo的div标志
|
||||
/// </summary>
|
||||
public static List<HtmlTag> FindTagByAttr(String html, String tagName, String attrName, String attrValue)
|
||||
{
|
||||
String format = String.Format(@"<{0}\s[^<>]*{1}\s*=\s*(\x27|\x22){2}(\x27|\x22)[^<>]*>", tagName, attrName, attrValue);
|
||||
return FindTag(html, tagName, format);
|
||||
}
|
||||
|
||||
public static List<HtmlTag> FindTag(String html, String name, String format)
|
||||
{
|
||||
Regex reg = new Regex(format, RegexOptions.IgnoreCase);
|
||||
Regex tagReg = new Regex(String.Format(@"<(\/|)({0})(\s[^<>]*|)>", name), RegexOptions.IgnoreCase);
|
||||
|
||||
List<HtmlTag> tags = new List<HtmlTag>();
|
||||
int start = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
Match match = reg.Match(html, start);
|
||||
if (match.Success)
|
||||
{
|
||||
start = match.Index + match.Length;
|
||||
Match tagMatch = null;
|
||||
int beginTagCount = 1;
|
||||
|
||||
while (true)
|
||||
{
|
||||
tagMatch = tagReg.Match(html, start);
|
||||
if (!tagMatch.Success)
|
||||
{
|
||||
tagMatch = null;
|
||||
break;
|
||||
}
|
||||
start = tagMatch.Index + tagMatch.Length;
|
||||
if (tagMatch.Groups[1].Value == "/") beginTagCount--;
|
||||
else beginTagCount++;
|
||||
if (beginTagCount == 0) break;
|
||||
}
|
||||
|
||||
if (tagMatch != null)
|
||||
{
|
||||
HtmlTag tag = new HtmlTag(name, match.Value, html.Substring(match.Index + match.Length, tagMatch.Index - match.Index - match.Length));
|
||||
tags.Add(tag);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return tags;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user