Screen Scraper Code Sample 01

Screen Scraper Sample 01

Once I had to make a simple, one-page, basic, screen scraper sample.

The code is below.

This is the code-infront…

using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Text;

public partial class ScreenScraperExample4 : System.Web.UI.Page
{
	#region MemberConstants

	public const string DefaultDateTimeFormat = "yyyy-MM-dd HH:mm:ss.fffffff tt zzz";
	public const int DefaultUrlLengthMin = 16;
	public const int DefaultStartTokenLengthMin = 1;
	public const int DefaultEndTokenLengthMin = 1;
	public const string DefaultUrl = @"http://www.Google.com";
	public const string DefaultStartToken = @"Web";
	public const string DefaultEndToken = @"News";

	#endregion //MemberConstants

	#region HelperMethods

	private string GetDataValue(string targetUrl, string targetTokenStart, string targetTokenEnd)
	{
		string myReturnValue = "";

		//Special thanks to the following link for starting point, etc,...
		//http://www.eggheadcafe.com/community/aspnet/2/10065129/parsing-datas-from-the-ht.aspx

		targetUrl = (targetUrl + "").Trim();

		if (targetUrl.Length < ScreenScraperExample4.DefaultUrlLengthMin)
		{
			throw new System.NotSupportedException(
				"URL must be greater-than-or-equal-to '" +
				ScreenScraperExample4.DefaultUrlLengthMin.ToString() + "'.");
		}

		targetTokenStart = (targetTokenStart + "").Trim();

		if (targetTokenStart.Length < ScreenScraperExample4.DefaultStartTokenLengthMin)
		{
			throw new System.NotSupportedException(
				"Start-token must be greater-than-or-equal-to '" +
				ScreenScraperExample4.DefaultStartTokenLengthMin.ToString() + "'.");
		}

		targetTokenEnd = (targetTokenEnd + "").Trim();

		if (targetTokenEnd.Length < ScreenScraperExample4.DefaultEndTokenLengthMin)
		{
			throw new System.NotSupportedException(
				"End-token must be greater-than-or-equal-to '" +
				ScreenScraperExample4.DefaultEndTokenLengthMin.ToString() + "'.");
		}

		//Get a Reader.
		StreamReader myReader = null;

		//Get the link.
		WebRequest myRequest = WebRequest.Create(targetUrl);

		//Get the HTML.
		WebResponse myResponse = myRequest.GetResponse();

		//Fill the Reader.
		myReader = new StreamReader(myResponse.GetResponseStream());

		//Get the content.
		string myContent = myReader.ReadToEnd();

		//Get the Regex.
		Regex myRegex =
			new Regex(targetTokenStart + "((.|\n)*?)" + targetTokenEnd, RegexOptions.IgnoreCase);

		//Run the Regex.
		Match myMatch = myRegex.Match(myContent);

		//Bam! We return the value from our Match, and we're in business. 
		myReturnValue = myMatch.Value + "";

		return myReturnValue;
	}

	private void ResetFormNow()
	{
		this.UrlTextBox.Text = ScreenScraperExample4.DefaultUrl;
		this.StartTokenTextBox.Text = ScreenScraperExample4.DefaultStartToken;
		this.EndTokenTextBox.Text = ScreenScraperExample4.DefaultEndToken;
		this.OutputTextBox.Text = "";
	}

	private void ClearFormNow()
	{
		this.UrlTextBox.Text = "";
		this.StartTokenTextBox.Text = "";
		this.EndTokenTextBox.Text = "";
		this.OutputTextBox.Text = "";
	}

	#endregion //HelperMethods

	#region HandlerMethods

	private void Page_Load(object sender, System.EventArgs e)
	{
		try
		{
			this.ResetButton.OnClientClick = @"return confirm('Discard data and reset?');";
			this.ClearButton.OnClientClick = @"return confirm('Discard data and clear?');";
			this.CancelButton.OnClientClick = @"return confirm('Discard data and exit?');";

			if (!this.IsPostBack)
			{
				this.ResetFormNow();
				this.StatusLabel.Text =
					DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
			}
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void TestInlineButton_Click(object sender, EventArgs e)
	{
		try
		{
			string myTargetUrl = this.UrlTextBox.Text;
			myTargetUrl = myTargetUrl + "";
			myTargetUrl = myTargetUrl.Trim();

			if (myTargetUrl.Length < ScreenScraperExample4.DefaultUrlLengthMin)
			{
				throw new System.NotSupportedException(
					"URL must be greater-than-or-equal-to '" +
					ScreenScraperExample4.DefaultUrlLengthMin.ToString() + "'.");
			}

			string myReturn =
				this.GetDataValue(myTargetUrl, this.StartTokenTextBox.Text, this.EndTokenTextBox.Text);

			this.OutputTextBox.Text = myReturn;

			this.StatusLabel.Text =
				DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void ResetButton_Click(object sender, EventArgs e)
	{
		try
		{
			this.ResetFormNow();

			this.StatusLabel.Text =
				DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void ClearButton_Click(object sender, EventArgs e)
	{
		try
		{
			this.ClearFormNow();

			this.StatusLabel.Text =
				DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void CancelButton_Click(object sender, EventArgs e)
	{
		try
		{
			this.Response.Redirect("Default.aspx");
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	#endregion //HandlerMethods
}

This is the code-behind…


using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text.RegularExpressions;
using System.Text;

public partial class ScreenScraperExample4 : System.Web.UI.Page
{
	#region MemberConstants

	public const string DefaultDateTimeFormat = "yyyy-MM-dd HH:mm:ss.fffffff tt zzz";
	public const int DefaultUrlLengthMin = 16;
	public const int DefaultStartTokenLengthMin = 1;
	public const int DefaultEndTokenLengthMin = 1;
	public const string DefaultUrl = @"http://www.Google.com";
	public const string DefaultStartToken = @"Web";
	public const string DefaultEndToken = @"News";

	#endregion //MemberConstants

	#region HelperMethods

	private string GetDataValue(string targetUrl, string targetTokenStart, string targetTokenEnd)
	{
		string myReturnValue = "";

		targetUrl = (targetUrl + "").Trim();

		if (targetUrl.Length < ScreenScraperExample4.DefaultUrlLengthMin)
		{
			throw new System.NotSupportedException(
				"URL must be greater-than-or-equal-to '" + 
				ScreenScraperExample4.DefaultUrlLengthMin.ToString() + "'.");
		}

		targetTokenStart = (targetTokenStart + "").Trim();

		if (targetTokenStart.Length < ScreenScraperExample4.DefaultStartTokenLengthMin)
		{
			throw new System.NotSupportedException(
				"Start-token must be greater-than-or-equal-to '" + 
				ScreenScraperExample4.DefaultStartTokenLengthMin.ToString() + "'.");
		}

		targetTokenEnd = (targetTokenEnd + "").Trim();

		if (targetTokenEnd.Length < ScreenScraperExample4.DefaultEndTokenLengthMin)
		{
			throw new System.NotSupportedException(
				"End-token must be greater-than-or-equal-to '" + 
				ScreenScraperExample4.DefaultEndTokenLengthMin.ToString() + "'.");
		}

		//Get a Reader.
		StreamReader myReader = null;

		//Get the link.
		WebRequest myRequest = WebRequest.Create(targetUrl);

		//Get the HTML.
		WebResponse myResponse = myRequest.GetResponse();

		//Put the HTML into the Reader.
		myReader = new StreamReader(myResponse.GetResponseStream());

		//And dump the StreamReader into a string...
		string myContent = myReader.ReadToEnd();

		//Here we set up our Regular expression to snatch what's between the BEGIN and END comments. 
		Regex myRegex =
			new Regex(targetTokenStart + "((.|\n)*?)" + targetTokenEnd, RegexOptions.IgnoreCase);

		//Here we apply our regular expression to our string using the Match object. 
		Match myMatch = myRegex.Match(myContent);

		//Bam! We return the value from our Match, and we're in business. 
		myReturnValue = myMatch.Value + "";

		return myReturnValue;
	}

	private void ResetFormNow()
	{
		this.UrlTextBox.Text = ScreenScraperExample4.DefaultUrl;
		this.StartTokenTextBox.Text = ScreenScraperExample4.DefaultStartToken;
		this.EndTokenTextBox.Text = ScreenScraperExample4.DefaultEndToken;
		this.OutputTextBox.Text = "";
	}

	private void ClearFormNow()
	{
		this.UrlTextBox.Text = "";
		this.StartTokenTextBox.Text = "";
		this.EndTokenTextBox.Text = "";
		this.OutputTextBox.Text = "";
	}

	#endregion //HelperMethods

	#region HandlerMethods

	private void Page_Load(object sender, System.EventArgs e)
	{
		try
		{
			this.ResetButton.OnClientClick = @"return confirm('Discard data and reset?');";
			this.ClearButton.OnClientClick = @"return confirm('Discard data and clear?');";
			this.CancelButton.OnClientClick = @"return confirm('Discard data and exit?');";

			if (!this.IsPostBack)
			{
				this.ResetFormNow();
				this.StatusLabel.Text =
					DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
			}
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void TestInlineButton_Click(object sender, EventArgs e)
	{
		try
		{
			string myTargetUrl = this.UrlTextBox.Text;
			myTargetUrl = myTargetUrl + "";
			myTargetUrl = myTargetUrl.Trim();

			if (myTargetUrl.Length < ScreenScraperExample4.DefaultUrlLengthMin)
			{
				throw new System.NotSupportedException(
					"URL must be greater-than-or-equal-to '" +
					ScreenScraperExample4.DefaultUrlLengthMin.ToString() + "'.");
			}

			string myReturn = 
				this.GetDataValue(myTargetUrl, this.StartTokenTextBox.Text, this.EndTokenTextBox.Text);

			this.OutputTextBox.Text = myReturn;

			this.StatusLabel.Text =
				DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void ResetButton_Click(object sender, EventArgs e)
	{
		try
		{
			this.ResetFormNow();

			this.StatusLabel.Text =
				DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void ClearButton_Click(object sender, EventArgs e)
	{
		try
		{
			this.ClearFormNow();

			this.StatusLabel.Text =
				DateTime.Now.ToString(ScreenScraperExample4.DefaultDateTimeFormat) + ": Done.";
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	protected void CancelButton_Click(object sender, EventArgs e)
	{
		try
		{
			this.Response.Redirect("Default.aspx");
		}
		catch (Exception ex)
		{
			this.StatusLabel.Text = ex.ToString();
		}
	}

	#endregion //HandlerMethods
}
Advertisements

Author: mkamoski1

n/a