Download Html Agility Pack from the following link:
http://htmlagilitypack.codeplex.com/
Add a reference of the library to your project
Step 2: download the html web page to parse
Use the following code to download the web page.
private void Download(string url)
{
WebClient client = new WebClient();
client.DownloadFileCompleted += new AsyncCompletedEventHandler(client_DownloadFileCompleted);
client.DownloadFileAsync(new Uri(url), @"c:\temp.html");
}
void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e)
{
//do something here
}
Step 3: Parse the downloaded web page using Html Agility PackUse the following code in the client_DownloadFileCompleted() to enumerate all links in the web page.
void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e)
{
Dictionary<string, string> search_foods=new Dictionary<string, string>();
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
HtmlNode.ElementsFlags.Remove("option");
doc.Load("c:\\temp.html", Encoding.UTF8);
HtmlNodeCollection select_node_collection = doc.DocumentNode.SelectNodes("//select[@name='SearchFood']//option");
if (select_node_collection != null)
{
foreach (HtmlNode node in select_node_collection)
{
string option_value = node.Attributes["value"].Value;
string option_text = node.InnerText;
search_foods[option_text] = option_value;
}
}
}
No comments:
Post a Comment