This program basically goes to a websites and gets all the links in a specific div tag. It then navigates to each link and gets the links in those pages as well.
However, after getting the first links successfully and traveling to the first website, it recheck the windows form data and resets the class variables to their original values, thus losing all the links.
Why does it reload the windows form data and how could I keep the previous data?
Imports System.Text.RegularExpressions
Public Class Form
Private stage As String = "Getting Page Links"
Dim PageUrls() As String = {}
Dim PageHtml() As String = {}
Private Sub Form_Load(sender As Object, e As System.EventArgs) Handles Me.Load
WebBrowser.Navigate("websiteurlhidden")
End Sub
Private Sub WebBrowser_DocumentCompleted(sender As Object, e As System.Windows.Forms.WebBrowserDocumentCompletedEventArgs) Handles WebBrowser.DocumentCompleted
If WebBrowser.ReadyState = WebBrowserReadyState.Complete Then
Try
Select Case stage
Case "Getting Page Links"
Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")
For Each htmlElement As HtmlElement In HtmlElementCollection
Dim imgUrl As String = htmlElement.GetAttribute("classname")
If imgUrl = " nine " Then
Dim linkIndex As Integer = 0
Dim index2 As Integer = 0
For Each link As HtmlElement In htmlElement.GetElementsByTagName("a")
If linkIndex >= 26 Then
If Not String.IsNullOrEmpty(link.GetAttribute("href")) Then
ReDim Preserve PageUrls(index2)
PageUrls(index2) = link.GetAttribute("href")
' MessageBox.Show(link.GetAttribute("href"))
' MessageBox.Show(PageUrls(linkIndex))
index2 = index2 + 1
End If
End If
linkIndex = linkIndex + 1
Next
For Each str As String In PageUrls
' MessageBox.Show(str)
Next
stage = "Going through pages"
End If
Next
GoThroughPages()
Case "Going through pages"
Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")
Dim linkIndex As Integer = 0
For Each htmlElement As HtmlElement In HtmlElementCollection
Dim imgUrl As String = htmlElement.GetAttribute("classname")
If imgUrl = " nine " Then
ReDim Preserve PageHtml(linkIndex)
'need to make permanent.
PageHtml(linkindex) = htmlElement.ToString()
Dim PageDownloadLinks = htmlElement.GetElementsByTagName("a")
End If
End
Next
' GoThroughPages()
Case Else
MessageBox.Show("case else")
End Select
Catch ex As Exception
' MessageBox.Show(ex.Message & " " & ex.ToString)
End Try
End If
End Sub
Private Sub GoThroughPages()
For linkIndex As Integer = 0 To PageUrls.Count - 1
MessageBox.Show(PageUrls(linkIndex))
WebBrowser.Navigate(PageUrls(linkIndex))
Delay(3)
While (WebBrowser.IsBusy)
Application.DoEvents()
End While
Next
End Sub
Sub Delay(ByVal dblSecs As Double)
Const OneSec As Double = 1.0# / (1440.0# * 60.0#)
Dim dblWaitTil As Date
Now.AddSeconds(OneSec)
dblWaitTil = Now.AddSeconds(OneSec).AddSeconds(dblSecs)
Do Until Now > dblWaitTil
Application.DoEvents() ' Allow windows messages to be processed
Loop
End Sub
End Class