using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using Tool; using System.Net; using System.Text.RegularExpressions; using System.Threading; namespace Search { public partial class Form1 : Form { public Form1() { InitializeComponent(); } /** * 队列,保存将要访问的URL */ public class Queue { //使用链表实现队列 private LinkedList<string> queue = new LinkedList<string>(); //入队列 public void enQueue(string t) { queue.AddLast(t); } //出队列 public string deQueue() { string o = queue.Last.Value; queue.RemoveLast(); return o; } //判断队列是否为空 public bool isQueueEmpty() { return queue.Count > 0 ? false : true; } //判断队列是否包含t public bool contians(string t) { return queue.Contains(t); } public int getcount() { return queue.Count; } } public class LinkQueue { //已访问的url 集合 private static ISet<string> visitedUrl = new HashSet<string>(); //待访问的url 集合 private static Queue unVisitedUrl = new Queue(); //获得URL 队列 public static Queue getUnVisitedUrl() { return unVisitedUrl; } //添加到访问过的URL 队列中 public static void addVisitedUrl(String url) { visitedUrl.Add(url); } //移除访问过的URL public static void removeVisitedUrl(String url) { visitedUrl.Remove(url); } //未访问的URL 出队列 public static Object unVisitedUrlDeQueue() { return unVisitedUrl.deQueue(); } // 保证每个URL 只被访问一次 public static void addUnvisitedUrl(String url) { if (url != null && !url.Trim().Equals("") && !visitedUrl.Contains(url) && !unVisitedUrl.contians(url)) unVisitedUrl.enQueue(url); } //获得已经访问的URL 数目 public static int getVisitedUrlNum() { return visitedUrl.Count; } //判断未访问的URL 队列中是否为空 public static bool unVisitedUrlsEmpty() { return unVisitedUrl.isQueueEmpty(); } } string[] urlarr=new string[100]; private void button1_Click(object sender, EventArgs e) { zzHttp http = new zzHttp(); CookieContainer cookie = new CookieContainer(); string url = textBox1.Text!=""?textBox1.Text:"http://image.baidu.com/"; string content=http.SendDataByGET(url,"",ref cookie); string baseUri = Utility.GetBaseUri(url); string[] links = Parser.ExtractLinks(baseUri, content); foreach (string link in links) { richTextBox1.Text += link; richTextBox1.Text += "\n"; } Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(content); Queue que = new Queue(); foreach (Match match in matches) que.enQueue(match.Groups["imgUrl"].Value); int k; for (k = 0; k < que.getcount(); k++) { string picurl = que.deQueue(); richTextBox1.Text += picurl; richTextBox1.Text += "\n"; string[] s = picurl.Split('/'); string picname=s[s.Length - 1]; zzHttp.downfile(picurl, picname, @"d:\pic\"); } label1.Text = k+"张"; } |