0

This program basically goes to a websites and gets all the links in a specific div tag. It then navigates to each link and gets the links in those pages as well.

However, after getting the first links successfully and traveling to the first website, it recheck the windows form data and resets the class variables to their original values, thus losing all the links.

Why does it reload the windows form data and how could I keep the previous data?

Imports System.Text.RegularExpressions

Public Class Form

Private stage As String = "Getting Page Links"
Dim PageUrls() As String = {}
Dim PageHtml() As String = {}

Private Sub Form_Load(sender As Object, e As System.EventArgs) Handles Me.Load

    WebBrowser.Navigate("websiteurlhidden")


End Sub

Private Sub WebBrowser_DocumentCompleted(sender As Object, e As System.Windows.Forms.WebBrowserDocumentCompletedEventArgs) Handles WebBrowser.DocumentCompleted
    If WebBrowser.ReadyState = WebBrowserReadyState.Complete Then

        Try




            Select Case stage
                Case "Getting Page Links"

                    Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
                    Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")

                    For Each htmlElement As HtmlElement In HtmlElementCollection

                        Dim imgUrl As String = htmlElement.GetAttribute("classname")
                        If imgUrl = " nine " Then



                            Dim linkIndex As Integer = 0
                            Dim index2 As Integer = 0
                            For Each link As HtmlElement In htmlElement.GetElementsByTagName("a")
                                If linkIndex >= 26 Then
                                    If Not String.IsNullOrEmpty(link.GetAttribute("href")) Then
                                        ReDim Preserve PageUrls(index2)

                                        PageUrls(index2) = link.GetAttribute("href")
                                        ' MessageBox.Show(link.GetAttribute("href"))
                                        '  MessageBox.Show(PageUrls(linkIndex))
                                        index2 = index2 + 1
                                    End If
                                End If
                                linkIndex = linkIndex + 1
                            Next

                            For Each str As String In PageUrls
                                '          MessageBox.Show(str)
                            Next
                            stage = "Going through  pages"

                        End If

                    Next

                    GoThroughPages()

                Case "Going through  pages"

                    Dim htmlDocument As HtmlDocument = Me.WebBrowser.Document
                    Dim htmlElementCollection As HtmlElementCollection = htmlDocument.GetElementsByTagName("DIV")
                    Dim linkIndex As Integer = 0
                    For Each htmlElement As HtmlElement In HtmlElementCollection

                        Dim imgUrl As String = htmlElement.GetAttribute("classname")
                        If imgUrl = " nine " Then
                            ReDim Preserve PageHtml(linkIndex)
                            'need to make permanent.
                            PageHtml(linkindex) = htmlElement.ToString()

                            Dim PageDownloadLinks = htmlElement.GetElementsByTagName("a")

                        End If
                        End
                    Next

                    ' GoThroughPages()

                Case Else
                    MessageBox.Show("case else")

            End Select

        Catch ex As Exception
            '  MessageBox.Show(ex.Message & " " & ex.ToString)
        End Try
    End If

End Sub

Private Sub GoThroughPages()

    For linkIndex As Integer = 0 To PageUrls.Count - 1


        MessageBox.Show(PageUrls(linkIndex))
        WebBrowser.Navigate(PageUrls(linkIndex))
        Delay(3)
        While (WebBrowser.IsBusy)

            Application.DoEvents()
        End While
    Next

End Sub

Sub Delay(ByVal dblSecs As Double)

    Const OneSec As Double = 1.0# / (1440.0# * 60.0#)
    Dim dblWaitTil As Date
    Now.AddSeconds(OneSec)
    dblWaitTil = Now.AddSeconds(OneSec).AddSeconds(dblSecs)
    Do Until Now > dblWaitTil
        Application.DoEvents() ' Allow windows messages to be processed
    Loop

End Sub

End Class
SysDragon
  • 9,692
  • 15
  • 60
  • 89
Moraki
  • 243
  • 1
  • 3
  • 9
  • Maybe you are not successfully counting the new bound of your arrays. Use Generics so that you just have to Add the items to your lists and not worry about its size. – Hanlet Escaño Feb 19 '13 at 22:28

1 Answers1

0

The simple solution is to change:

Dim linkIndex As Integer = 0

to

Static linkIndex As Integer = 0

This will cause linkIndex to retain its value between calls and the ReDim Preserve PageHtml(linkIndex) will not be reset on each call.

Mark Hurd
  • 10,665
  • 10
  • 68
  • 101