1

I wrote a macro to download data from a website, after the website is fully loaded, it will scrap the data by the html tag, however, sometimes the data is incorrectly scraped due to unknown error, I want to add a checking after each variant 'x' completed, e.g. If the activesheet contains the word "中报",then go back to the step "'Select the Report Type" to re-do the scraping. Also, I know some of the variables/data types are not set at the very beginning. Could anyone help to solve this? Thanks in advance!

Sub GetFinanceData()

    Dim x As Variant
    Dim IE As Object
    For x = 1 To 1584
    Dim URL As String, elemCollection As Object
    Dim t As Integer, r As Integer, c As Integer

    Worksheets("Stocks").Select
    Worksheets("Stocks").Activate

    'Open IE and Go to the Website

    'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
    URL = Cells(x, 1)

    Set IE = CreateObject("InternetExplorer.Application")
    With IE
        .navigate URL
        .Visible = False

        Do While .Busy = True Or .readyState <> 4
            Loop
        DoEvents

    Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = _
    ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value     'You could even simplify it and just state the name as Cells(x,2)


    'Select the Report Type

    Set selectItems = IE.Document.getElementsByTagName("select")
        For Each i In selectItems
            i.Value = "zero"
            i.FireEvent ("onchange")
            Application.Wait (Now + TimeValue("0:00:05"))
        Next i

        Do While .Busy: DoEvents: Loop

    ActiveSheet.Range("A1:K2000").ClearContents

    ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
    ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
    ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)

    'Find and Get Table Data

    tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
    tblStartRow = 6
    Set elemCollection = .Document.getElementsByTagName("TABLE")
    For t = 0 To elemCollection.Length - 1
        For r = 0 To (elemCollection(t).Rows.Length - 1)
            For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
                ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
            Next c
        Next r

        ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
        tblStartRow = tblStartRow + r + 4

    Next t

        End With

        ' cleaning up memory

        IE.Quit

    Next x

End Sub
MatthewD
  • 6,719
  • 5
  • 22
  • 41
Nicholas Kan
  • 161
  • 1
  • 3
  • 14
  • Are you looking for 中报 in cells on the activesheet or in the sheet name? – MatthewD Sep 02 '15 at 12:58
  • @MatthewD, Hi MattewD, I am looking for "中报" in cells on the activesheet, if it is found, then re-do from selecting the report type, actually, "中报" means interim report, but what I want is annual report, hope it is clear. Thanks very much for your quick answer, could you please give me a hand? – Nicholas Kan Sep 02 '15 at 13:10
  • I am unclear as to where you want to search, but look into the range find. https://msdn.microsoft.com/en-us/library/office/ff839746.aspx If you find it call SelectReportType shown in my answer. – MatthewD Sep 02 '15 at 13:19
  • @MatthewD Hi Matthew, I got an error 'Type Mis-match' for below line tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4)) – Nicholas Kan Sep 02 '15 at 14:36
  • try tblNameArr = str(Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))) – MatthewD Sep 02 '15 at 14:50

1 Answers1

0

This is cleaned up quite a bit.

I added a SelectReportType: line label. Whenever you want to go back to that condition, use insert the line

Goto SelectReportType

And it will take you to that spot. The better way to do it would be to place that code in a separate function so you can call it anytime your test for "中报" is true. But I'm not following your code well enough to understand what you are doing to assist you with that.

Sub GetFinanceData()

    Dim x As Variant
    Dim IE As Object
    Dim URL As String, elemCollection As Object
    Dim t As Integer, r As Integer, c As Integer
    Dim selectItems As Variant, i As Variant
    Dim tblNameArr() As String
    Dim tblStartRow As Long

    Worksheets("Stocks").Select
    Worksheets("Stocks").Activate

    For x = 1 To 1584

        'Open IE and Go to the Website

        'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
        URL = Cells(x, 1)

        Set IE = CreateObject("InternetExplorer.Application")
        With IE
            .Navigate URL
            .Visible = False

            Do While .Busy = True Or .ReadyState <> 4
                Loop
            DoEvents

            Worksheets.Add(After:=Worksheets(Worksheets.count)).name = _
            ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value     'You could even simplify it and just state the name as Cells(x,2)

SelectReportType:
            'Select the Report Type

            Set selectItems = IE.Document.getElementsByTagName("select")
                For Each i In selectItems
                    i.Value = "zero"
                    i.FireEvent ("onchange")
                    Application.Wait (Now + TimeValue("0:00:05"))
                Next i

                Do While .Busy: DoEvents: Loop

                ActiveSheet.Range("A1:K2000").ClearContents

                ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
                ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
                ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)

                'Find and Get Table Data

                tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
                tblStartRow = 6
                Set elemCollection = .Document.getElementsByTagName("TABLE")
                For t = 0 To elemCollection.Length - 1
                    For r = 0 To (elemCollection(t).Rows.Length - 1)
                        For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
                            ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
                        Next c
                    Next r

                    ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
                    tblStartRow = tblStartRow + r + 4

                Next t

        End With

        ' cleaning up memory

        IE.Quit

    Next x

End Sub
MatthewD
  • 6,719
  • 5
  • 22
  • 41