Download Html Agility Pack from the following link:
http://htmlagilitypack.codeplex.com/
Add a reference of the library to your project
Step 2: download the html web page to parse
Use the following code to download the web page.
Step 3: Parse the downloaded web page for the html table using Html Agility Pack
private void Download(string url) { WebClient client = new WebClient(); client.DownloadFileCompleted += new AsyncCompletedEventHandler(client_DownloadFileCompleted); client.DownloadFileAsync(new Uri(url), @"c:\temp.html"); } void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e) { //do something here }Use the following code in the client_DownloadFileCompleted() to enumerate all links in the web page.
void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e) { string address=""; foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//table")) { //Console.WriteLine("Found: " + table.Id); HtmlNodeCollection hnc=table.SelectNodes("tr"); if(hnc != null) { foreach (HtmlNode row in hnc) { bool address_found=false; bool address_stored=true; foreach (HtmlNode cell in row.SelectNodes("td")) { // get address if (cell.InnerText.Contains("详细地址") && !cell.InnerText.Contains("")) { address_found = true; address_stored = false; } else if (address_found && address_stored == false) { address = cell.InnerText.Trim(); address_stored = true; } } } } } Console.WriteLine("Found Address in Web Page: {0}", address); }
No comments:
Post a Comment