Protected Sub Button1_Click(sender As Object, e As EventArgs) Handles Button1.Click
Dim Imagelink As String = ""
Dim Text As String = TextBox1.Text
Dim request As HttpWebRequest = DirectCast(HttpWebRequest.Create(Text), HttpWebRequest)
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)"
Dim respons As HttpWebResponse
respons = DirectCast(request.GetResponse(), HttpWebResponse)
Dim enc As Encoding
Try
enc = Encoding.GetEncoding(respons.CharacterSet)
Catch ex As Exception
enc = Encoding.GetEncoding("ISO-8859-1")
End Try
Dim reader As New StreamReader(respons.GetResponseStream(), enc)
Dim sr As String = reader.ReadToEnd()
Dim Pattern As String = "<img([^s]|s[^r]|sr[^c]|src[^=]|src=[^'""])*src=['""](?<SRC>[^'""]*)['""]"
Dim m As MatchCollection = Regex.Matches(sr, Pattern)
For Each mm As Match In m
Dim link_ As String = mm.Groups("SRC").Value
' Dim x_ As String = link_.Substring(0, 7)
If link_.Substring(0, 7) = "http://" Then
Response.Write(mm.Groups("SRC").Value + "" + "<br>")
Imagelink = link_
End If
Next
Dim image_ As New Image
image_.Attributes("src") = Imagelink
PlaceHolder1.Controls.Add(image_)
End Sub
This is the code i use to send in a request to a webpage get its content and extract the image links off the webpage. However in some webpages it doesn't return the charset in the web-header it returns "". When i however try to parse with default encoding it doesn't give the proper content either ? this is really frustrating any one has come across this type of situation before ? If anyone could point me in the right direction as to how to overcome this or Predict what kind of encoding to use , it would be great thanks.
Example of site which gives no charset in response header