0

I'm trying to the html information on a certain a website so I can parse out the info for our database. The problem is that the second & third responseFromServer info comes back the same. However, when I follow the links on inside a real web browser, I get the right information (correct page).

I'm thinking that each WebRequest is basically opening a 'new' instance of the web and what I want it to do is use the same instance.

Is there a way to specify (using a WebClient?) so that each request is contained in the 'same browser' (for lack of a better term)

static void CountyInfo(string Address)
            {
        WebClient webClient = new WebClient();
        webClient.Headers.Add("Cache-Control: private");
        webClient.Headers.Add("Content-Type: text/html; charset=utf-8");
        webClient.Headers.Add("Server: Microsoft-IIS/6.0");
        webClient.Headers.Add("X-AspNet-Version: 4.0.30319");
        webClient.Headers.Add("X-Powered-By: ASP.NET");
        webClient.Headers.Add("X-UA-Compatible: IE=8, IE=9, IE=10, IE=11");

                Address = Address.Replace(" ", "+");
                string url1 = "http://mcassessor.maricopa.gov/?s=" + Address;
                WebRequest request1 = WebRequest.Create(url1);
                WebResponse response1 = request1.GetResponse();
                //Stream dataStream1 = response1.GetResponseStream();
           Stream dataStream1 = webClient.OpenRead(url1);
                StreamReader reader1 = new StreamReader(dataStream1);

                string responseFromServer1 = reader1.ReadToEnd();
                string ParcelNum = getBetween(responseFromServer1, "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=", "target=");
                ParcelNum = new String(ParcelNum.Where(Char.IsDigit).ToArray());
                //reader1.Close();
                //response1.Close();           

                //NEW GET request
                string url2 = "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=" + ParcelNum;
                WebRequest request2 = WebRequest.Create(url2);
                WebResponse response2 = request2.GetResponse();
                //Stream dataStream2 = response2.GetResponseStream();
           Stream dataStream2 = webClient.OpenRead(url2);
                StreamReader reader2 = new StreamReader(dataStream2);
                string responseFromServer2 = reader2.ReadToEnd();
                //reader2.Close();
                //response2.Close();

                //NEW GET request
                string url3 = "http://treasurer.maricopa.gov/Parcel/" + "TaxDetails.aspx?taxyear=2013";
                WebRequest request3 = WebRequest.Create(url3);
                WebResponse response3 = request3.GetResponse();
                //Stream dataStream3 = response3.GetResponseStream();
           Stream dataStream3 = webClient.OpenRead(url3);
                StreamReader reader3 = new StreamReader(dataStream3);
                string responseFromServer3 = reader3.ReadToEnd();
                reader3.Close();
                response3.Close();
            }

EDIT: just saw this. request1 gives me the correct page (the query results page) but request 2 and 3 return me back to the "Home Page" of the website. Even though i am passing in url2 and url3 into the requests2 & 3 respectively.

MaylorTaylor
  • 4,671
  • 16
  • 47
  • 76
  • 1
    The only thing that "in the same browser" can relate to is the request headers that are sent (this includes cookies). Why don't you download fiddler2 and inspect the differences between the requests sent from the browser and those that are sent by your app? You'll be in a much better position to figure out what's going wrong. – spender Dec 10 '13 at 18:52
  • Ok, so I have done this, I added most of the headers into the code as they appeared in Fiddler and still the same results. Then I looked into using the WebClient class which produced the same results. Any more ideas? (changes are in the OP) – MaylorTaylor Dec 10 '13 at 22:43
  • Can no one help me even in the slightest? – MaylorTaylor Dec 12 '13 at 15:35

2 Answers2

1

You need to manage the cookies. Get the cookies served by the web server and pass them back on the next request.

spender
  • 117,338
  • 33
  • 229
  • 351
0

Sorry about the long code post, but this is how i got it working. Let me know if you can condense it any.

static public CookieContainer cookieJar;

  static void Main(string[] args)
    {
        string ControlNumber = "######";
        GetOrderInfo newOrder = new GetOrderInfo(ControlNumber);

        obtainCookies();
        MARICOPAcounty(newOrder.OrderAddress);
    }

static void obtainCookies()
        {
            string postData = "http://mcassessor.maricopa.gov/";
            CookieContainer tempCookies = new CookieContainer();
            UTF8Encoding encoding = new UTF8Encoding();
            byte[] byteData = encoding.GetBytes(postData);

            HttpWebRequest postReq = (HttpWebRequest)WebRequest.Create("http://mcassessor.maricopa.gov/");

            postReq.Method = "POST";
            postReq.KeepAlive = true;
            Cookie chocoChip = new Cookie("_ga", "GA1.2.1813386723.1386802842") { Domain = "http://mcassessor.maricopa.gov/" };

            postReq.CookieContainer = new CookieContainer();
            postReq.CookieContainer.Add(chocoChip);
            postReq.ContentType = "text/html; charset=utf-8";
            postReq.Referer = "http://mcassessor.maricopa.gov/";
            postReq.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            postReq.ContentLength = byteData.Length;

            Stream postReqStream = postReq.GetRequestStream();
            postReqStream.Write(byteData, 0, byteData.Length);
            postReqStream.Close();
            HttpWebResponse postResponse;

            postResponse = (HttpWebResponse)postReq.GetResponse();
            postResponse.Cookies.Add(chocoChip);                           
            StreamReader postReqReader = new StreamReader(postResponse.GetResponseStream());

            tempCookies.Add(chocoChip);
            cookieJar = tempCookies;
            string soureCode = postReqReader.ReadToEnd();

            if (postReq != null)
            {
                Console.WriteLine("\r\n\r\n postResponse COOKIES");
                Console.WriteLine(postResponse.Cookies[0]);

                Console.WriteLine("\r\n\r\n postReq Headers");
                Console.WriteLine(postReq.Headers.ToString());

                Console.WriteLine("\r\n\r\n postResponse Headers");
                Console.Write("\t" + postResponse.Headers);
            }
        }


    public static string MARICOPAcounty(string Address)
{
//-------------------------------------------------------------------------------------------------------------------------------//
            //-------------------------------------------------------------------------------------------------------------------------------//
            Address = Address.Replace(" ", "+");
            string url1 = "http://mcassessor.maricopa.gov/?s=" + Address;
            HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(url1);


            request1.CookieContainer = cookieJar;                
            request1.Method = "GET";
            request1.Accept = "text/html, application/xhtml+xml, */*";
            request1.Headers.Add("Accept-Language: en-US");
            request1.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            request1.Headers.Add("Accept-Enconding: gzip, deflate");

            HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();
            StreamReader sr1 = new StreamReader(response1.GetResponseStream());
            string sourceCode1 = sr1.ReadToEnd();
            string ParcelNum = getBetween(sourceCode1, "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=", "target=");
            ParcelNum = new String(ParcelNum.Where(Char.IsDigit).ToArray());

            if (response1 != null)
            {
                Console.WriteLine("\r\n\r\n request1 Headers");
                Console.WriteLine(request1.Headers.ToString());

                Console.WriteLine("\r\n\r\nresponse1 Headers");
                Console.Write("\t" + response1.Headers);
            }

            sr1.Close();
            response1.Close();
            //-------------------------------------------------------------------------------------------------------------------------------//
            //-------------------------------------------------------------------------------------------------------------------------------//
            //NEW GET request

            string url2 = "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=50423370"; //+ ParcelNum;           
            HttpWebRequest request2 = (HttpWebRequest)WebRequest.Create(url2);

            request2.CookieContainer = cookieJar;
            request2.Method = "GET";
            request2.Accept = "text/html, application/xhtml+xml, */*";
            request2.Headers.Add("Accept-Language: en-US");
            request2.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            request2.Headers.Add("Accept-Encoding: gzip, deflate");
            request2.Referer = url1;

            HttpWebResponse response2 = (HttpWebResponse)request2.GetResponse();
            StreamReader sr2 = new StreamReader(response2.GetResponseStream());
            string sourceCode2 = sr2.ReadToEnd();

            if (response2 != null)
            {
                Console.WriteLine("\r\n\r\nrrequest2 Headers");
                Console.WriteLine(request2.Headers);
                Console.WriteLine(request2.CookieContainer);

                Console.WriteLine("\r\n\r\nresponse2 Headers");
                Console.Write("\t" + response2.Headers);
            }

            sr2.Close();
            response2.Close();

            //-------------------------------------------------------------------------------------------------------------------------------//
            //-------------------------------------------------------------------------------------------------------------------------------//
            //new GET request
            string url3 = "http://treasurer.maricopa.gov/Parcel/TaxDetails.aspx?taxyear=2013";
            HttpWebRequest request3 = (HttpWebRequest)WebRequest.Create(url3);

            request3.CookieContainer = cookieJar;
            request3.Method = "GET";
            request3.Accept = "text/html, application/xhtml+xml, */*";
            request3.Headers.Add("Accept-Language: en-US");
            request3.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            request3.Headers.Add("Accept-Encoding: gzip, deflate");
            request3.Referer = url2;

            HttpWebResponse response3 = (HttpWebResponse)request3.GetResponse();
            StreamReader sr3 = new StreamReader(response3.GetResponseStream());
            string sourceCode3 = sr3.ReadToEnd();

            if (response3 != null)
            {
                Console.WriteLine("\r\n\r\nrrequest3 Headers");
                Console.WriteLine(request3.Headers);

                Console.WriteLine("\r\n\r\nresponse3 Headers");
                Console.Write("\t" + response3.Headers);
            }

            sr3.Close();
            response3.Close();

            return sourceCode3;
}
MaylorTaylor
  • 4,671
  • 16
  • 47
  • 76