Download Html Agility Pack from the following link:
http://htmlagilitypack.codeplex.com/
Add a reference of the library to your project
Step 2: download the html web page to parse
Use the following code to download the web page.
Step 3: Parse the downloaded web page for the html table using Html Agility Pack
private void Download(string url)
{
WebClient client = new WebClient();
client.DownloadFileCompleted += new AsyncCompletedEventHandler(client_DownloadFileCompleted);
client.DownloadFileAsync(new Uri(url), @"c:\temp.html");
}
void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e)
{
//do something here
}
Use the following code in the client_DownloadFileCompleted() to enumerate all links in the web page.void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e)
{
string address="";
foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//table"))
{
//Console.WriteLine("Found: " + table.Id);
HtmlNodeCollection hnc=table.SelectNodes("tr");
if(hnc != null)
{
foreach (HtmlNode row in hnc)
{
bool address_found=false;
bool address_stored=true;
foreach (HtmlNode cell in row.SelectNodes("td"))
{
// get address
if (cell.InnerText.Contains("详细地址") && !cell.InnerText.Contains(""))
{
address_found = true;
address_stored = false;
}
else if (address_found && address_stored == false)
{
address = cell.InnerText.Trim();
address_stored = true;
}
}
}
}
}
Console.WriteLine("Found Address in Web Page: {0}", address);
}
No comments:
Post a Comment