Sunday, July 21, 2013

Parsing Html form input elements in Web Page using C#

Step 1: download and add reference of Html Agility Pack to your project
Download Html Agility Pack from the following link:

http://htmlagilitypack.codeplex.com/

Add a reference of the library to your project

Step 2: download the html web page to parse

Use the following code to download the web page.

private void Download(string url)
{
    WebClient client = new WebClient();

    client.DownloadFileCompleted +=    new AsyncCompletedEventHandler(client_DownloadFileCompleted);

    client.DownloadFileAsync(new Uri(url), @"c:\temp.html");
 }

void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e)
{
    //do something here
}

Step 3: Parse the downloaded web page using Html Agility Pack

Use the following code in the client_DownloadFileCompleted() to enumerate all links in the web page.

void client_DownloadFileCompleted(object sender, AsyncCompletedEventArgs e)
{
   String storesadd="";
   string storesname="";
   string bestfood="";
 HtmlNodeCollection form_node_collection=doc.DocumentNode.SelectNodes("//form");
 foreach (HtmlNode form_node in form_node_collection)
 {
  HtmlNodeCollection input_node_collection = form_node.SelectNodes("//input");
  if(input_node_collection != null)
  {
   foreach (HtmlNode input_node in input_node_collection)
   {
    if (input_node.Attributes.Contains("id") && input_node.Attributes.Contains("value"))
    {
     string input_id=input_node.Attributes["id"].Value;
     string input_value = input_node.Attributes["value"].Value;
     if (string.IsNullOrEmpty(input_value))
     {
      continue;
     }
     if (input_id == "storesadd")
     {
      storesadd = input_value;
     }
     else if (input_id == "bestfood")
     {
      bestfood = input_value;
     }
     else if (input_id == "storesname")
     {
      storesname = input_value;
     }
    }

   }
  }
 }
}

No comments:

Post a Comment