c#获取html代码中的网址

发布日期:2018-02-05    浏览次数:447
 private static List GetHyperLinks(string htmlCode, string url)
        {
            ArrayList al = new ArrayList();
            bool IsGenxin = false;
            StringBuilder weburlSB = new StringBuilder();//SQL 
            StringBuilder linkSb = new StringBuilder();//展示数据 
            List Weburllistzx = new List();//新增 
            List Weburllist = new List();//旧的 
            string ProductionContent = htmlCode;
            Regex reg = new Regex(@"http(s)?://([\w-]+\.)+[\w-]+/?");
            string wangzhanyuming = reg.Match(url, 0).Value;
            MatchCollection mc = Regex.Matches(ProductionContent.Replace("href="/", "href="" + wangzhanyuming).Replace("href='/", "href='" + wangzhanyuming).Replace("href=/", "href=" + wangzhanyuming).Replace("href="./", "href="" + wangzhanyuming), @"<[aA][^>]* href=[^>]*>", RegexOptions.Singleline);
            int Index = 1;
            foreach (Match m in mc)
            {
                MatchCollection mc1 = Regex.Matches(m.Value, @"[a-zA-z]+://[^\s]*", RegexOptions.Singleline);
                if (mc1.Count > 0)
                {
                    foreach (Match m1 in mc1)
                    {
                        string linkurlstr = string.Empty;
                        linkurlstr = m1.Value.Replace(""", "").Replace("'", "").Replace(">", "").Replace(";", "");
                        weburlSB.Append("$-$");
                        weburlSB.Append(linkurlstr);
                        weburlSB.Append("$_$");
                        if (!Weburllist.Contains(linkurlstr) && !Weburllistzx.Contains(linkurlstr))
                        {
                            IsGenxin = true;
                            Weburllistzx.Add(linkurlstr);
                            linkSb.AppendFormat("{0}
", linkurlstr); } } } else { if (m.Value.IndexOf("javascript") == -1) { string amstr = string.Empty; string wangzhanxiangduilujin = string.Empty; wangzhanxiangduilujin = url.Substring(0, url.LastIndexOf("/") + 1); amstr = m.Value.Replace("href="", "href="" + wangzhanxiangduilujin).Replace("href='", "href='" + wangzhanxiangduilujin); MatchCollection mc11 = Regex.Matches(amstr, @"[a-zA-z]+://[^\s]*", RegexOptions.Singleline); foreach (Match m1 in mc11) { string linkurlstr = string.Empty; linkurlstr = m1.Value.Replace(""", "").Replace("'", "").Replace(">", "").Replace(";", ""); weburlSB.Append("$-$"); weburlSB.Append(linkurlstr); weburlSB.Append("$_$"); if (!Weburllist.Contains(linkurlstr) && !Weburllistzx.Contains(linkurlstr)) { IsGenxin = true; Weburllistzx.Add(linkurlstr); linkSb.AppendFormat("{0}
", linkurlstr); } } } } Index++; } return Weburllistzx; }

本文网址:https://www.wyxxw.cn/blog-detail-2-6-61.html

返回列表

非特殊说明,本文版权归原作者所有,转载请注明出处

提示:本站所有资源仅供学习与参考,请勿用于商业用途。图片来自互联网~如侵犯您的权益,请联系QQ:1067507709.

提示:转载请注明来自:https://www.wyxxw.cn/blog-detail-2-6-61.html 。 本文发布者:momo