2

I am trying to create a VBA function that parses VBA code. I'm at the stage where I'm trying to put in an array variable all the instructions present on a line of code. For example the following code contains two instructions:

strVar = "Some text"
lngVar = 2

Those 2 instructions can also be written as following:

strVar = "Some text": lngVar = 2

I specify that I already have a personal function that transforms a "multiline" line into a simple line:

strVar = "From text": _
  lngVar = 2 'Multiline'

So the CodeLine argument (the line of code to be parsed) of my custom function always contains a single code line. I thought I had reached the desired result since I get the right result with some twisted lines such as :

str = " : 1": str = " : 2": str = " : 2"

or

str = """ : 1""": str = """ : 2""": str = """ :"" 2""

But I realize that I don't get the desired result for this kind of lines:

Next vntSubString: CommentPosition = IIf(blnStringMode, 0, InStr(1, CodeLine, ": "))

I'd really like to do something generic and quick to execute, I think I'm not too far from the final result, but I'm a bit blocked.

Here's what my personal function looks like at the moment:

Public Function SplitInstructions(ByVal CodeLine As String) As Variant()

  Dim vntResult() As Variant
  Dim vntSubString¹ As Variant
  Dim blnIsStringMode As Boolean 'Determines if we are in a subtext between quotes or not
  Dim vntSubString² As Variant

  Let vntResult = VBA.Array

  If InStr(1, CodeLine, ": ") = 0 Then 'A single instruction
    Let vntResult = VBA.Array(CodeLine)
  ElseIf InStr(1, CodeLine, """") = 0 Then 'Several statements, but no quotes => On Split
    Do Until VBA.InStr(1, CodeLine, "::") = 0
      Let CodeLine = VBA.Replace(CodeLine, "::", ":")
    Loop: Call AddToArray(vntResult, Split(CodeLine, ":"))
  Else 'it gets complicated
    For Each vntSubString¹ In Split(CodeLine, """")
      If blnIsStringMode Then
        Let vntResult(UBound(vntResult)) = Trim$(vntResult(UBound(vntResult)) & """" & vntSubString¹ & """")
      Else
        For Each vntSubString² In Split(vntSubString¹, ": ")
          If vntSubString² <> vbNullString Then Call AddToArray(vntResult, vntSubString²)
        Next vntSubString²
      End If
      Let blnIsStringMode = Not blnIsStringMode
    Next vntSubString¹
  End If

  Let SplitInstructions = vntResult

End Function

Private Sub AddToArray(ByRef Arr() As Variant, ByVal Value As Variant)

  Dim vntValue As Variant

  If VBA.IsArray(Value) Then
    For Each vntValue In Value
      Call AddToArray(Arr, vntValue)
    Next vntValue
  Else
    ReDim Preserve Arr(LBound(Arr) To UBound(Arr) + 1)
    Let Arr(UBound(Arr)) = Value
  End If

End Sub

In advance, thank you for your help!

Edit : here is the function I use to determine then comment position of a code line

Private Function CommentPosition(ByVal CodeLine As String) As Long

  Dim vntSubString As Variant
  Dim blnStringMode As Boolean
  Dim x As Long

  For Each vntSubString In VBA.Split(CodeLine, """")
    If Not blnStringMode Then
      Let x = VBA.InStr(1, vntSubString, "'")
      If x > 0 Then
        Let CommentPosition = CommentPosition + x
        Exit Function
      End If
    End If
    Let blnStringMode = Not blnStringMode
    Let CommentPosition = CommentPosition + VBA.Len(vntSubString) + 1
  Next vntSubString

  Let CommentPosition = VBA.IIf(blnStringMode, 0, VBA.InStr(1, CodeLine, "'"))

End Function
ODEXT
  • 19
  • 4

3 Answers3

0

Building on Dick's answer to parse the string, but utilising InStr to look ahead to the next character of interest:

Sub test()
    Dim CodeLine As String
    Dim CodeLines() As String
    
    CodeLine = "str = """""" : 1"""""": str = """""" : 2"""""": str = """""" :"""" 2"""""

    CodeLines = SplitInstructions(CodeLine)
    
    Stop
End Sub

Function SplitInstructions(ByVal CodeLine As String) As String()
    Dim CharOfInterest As String
    Dim idx As Long
    Dim aReturn() As String
    Dim NumLines As Long
    
    ReDim aReturn(1 To 1000)
    NumLines = 1
    aReturn(1) = CodeLine
    idx = 1
    Do
        Debug.Print aReturn(NumLines), idx
        CharOfInterest = GetNextCharOfInterest(aReturn(NumLines), idx)
        Select Case CharOfInterest
            Case """"
                ' Ignore remainder of quoted string
                idx = GetStringClose(aReturn(NumLines), idx) + 1
            Case ":"
                ' Break on :
                aReturn(NumLines + 1) = Trim$(Mid$(aReturn(NumLines), idx + 1))
                aReturn(NumLines) = Trim$(Left$(aReturn(NumLines), idx - 1))
                NumLines = NumLines + 1
                idx = 1
            Case "'", vbNullString
                ' Comment, or end of code
                ReDim Preserve aReturn(1 To NumLines)
                Exit Do
        End Select
    Loop
    SplitInstructions = aReturn
End Function

' Look ahead to end of Quoted string
Function GetStringClose(CodeLine As String, ByRef idx As Long)
    Dim i As Long
    If Mid$(CodeLine, idx, 1) = """" Then 'verfiy
        i = InStr(idx + 1, CodeLine, """")
        Do
            If Mid$(CodeLine, i + 1, 1) = """" Then
                ' delimited "
                i = i + 1
                i = InStr(i + 1, CodeLine, """")
            Else
                ' end of quoted string
                i = IIf(i = 0, Len(CodeLine) + 1, i)
                GetStringClose = i
                Exit Do
            End If
        Loop
    Else
        'invalid call
        Stop
    End If
End Function


Function GetNextCharOfInterest(CodeLine As String, idx As Long) As String
    Dim Quote As Long
    Dim Colon As Long
    Dim Comment As Long
    Dim MinPos As Long
    
    If idx > Len(CodeLine) Then
        GetNextCharOfInterest = vbNullString
        Exit Function
    End If
    Quote = InStr(idx, CodeLine, """")
    Colon = InStr(idx, CodeLine, ":")
    Comment = InStr(idx, CodeLine, "'")
    
    If Quote + Colon + Comment = 0 Then
        GetNextCharOfInterest = vbNullString
    Else
        Quote = IIf(Quote = 0, Len(CodeLine) + 1, Quote)
        Colon = IIf(Colon = 0, Len(CodeLine) + 1, Colon)
        Comment = IIf(Comment = 0, Len(CodeLine) + 1, Comment)
        
        MinPos = Application.Min(Quote, Colon, Comment)
        If Quote = MinPos Then
            GetNextCharOfInterest = """"
            idx = Quote
        ElseIf Colon = MinPos Then
            GetNextCharOfInterest = ":"
            idx = Colon
        Else
            GetNextCharOfInterest = "'"
            idx = Comment
        End If
    End If
End Function

The test result

enter image description here

chris neilsen
  • 52,446
  • 10
  • 84
  • 123
-1

If you want to split on double quotes, you'd need to determine if you're inside a set of parentheses and keep all of that together. I think that's a lot of work. What if you just iterate through the string sequentially?

Public Function SplitInstructions2(ByVal CodeLine As String) As String()
    
    Dim i As Long
    Dim lLastPos As Long
    Dim aReturn() As String
    Dim bInString As Boolean
    Dim lCnt As Long
    
    ReDim aReturn(1 To 1000)
    lLastPos = 1
    
    For i = 1 To Len(CodeLine)
        If Mid$(CodeLine, i, 1) = ":" And Not bInString Then
            lCnt = lCnt + 1
            aReturn(lCnt) = Trim$(Mid$(CodeLine, lLastPos, i - lLastPos))
            lLastPos = i + 1
        ElseIf Mid$(CodeLine, i, 1) = """" Then
            bInString = Not bInString
        End If
    Next i
    
    lCnt = lCnt + 1
    aReturn(lCnt) = Trim$(Mid$(CodeLine, lLastPos, Len(CodeLine) - lLastPos + 1))
    
    ReDim Preserve aReturn(1 To lCnt)
    
    SplitInstructions2 = aReturn
    
End Function
Dick Kusleika
  • 32,673
  • 4
  • 52
  • 73
  • Hello Dick. Thanks for your answer. I already implemented such a solution, but I want to use the Split function to make it way faster. For instance, I used the Split function to determine the comment position of a code line faster than with a sequential iteration – ODEXT Mar 11 '22 at 18:40
  • I edited the message with the comment position function. – ODEXT Mar 11 '22 at 18:45
  • @dick wouldn't you have to handle escaped `"`'s when in a string. As is I think this will reset bInString too soon. FWIW it's a shame OP seems to have rejected this out of hand. – chris neilsen Mar 11 '22 at 19:23
  • @chrisneilsen, I did not reject his solution. I already had implemented it, and it works just fine. I just wanted to find a faster algorithm. AAMOF, I did find a method that is twice faster. – ODEXT Mar 12 '22 at 00:54
  • @odext did you try my version? How did it compare? – chris neilsen Mar 12 '22 at 01:11
  • @chrisneilsen, I did try it and with the examples I chose (400 000 code lines), here are the results : 3.1 sec with the solution I posted 5.7 sec with the sequential approach 16+ sec with your code (I modified all string() arrays as variant() arrays to fit my AddToArray procedure – ODEXT Mar 12 '22 at 02:11
-1

I kept digging, and found a way. It can probably be enhanced, but here's the full code :

Option Explicit

Public Sub DurationTest()

  Dim StartTime As Single
  Dim Count As Long
  Dim vntCodeLine As Variant
  Dim vntInstructions() As Variant

  Let StartTime = VBA.Timer

  For Count = 1 To 100000
    For Each vntCodeLine In VBA.Array( _
      "str = "" : 1"": str = "" : 2"": str = "" : 2""", _
      "var = 2: var = 2", _
      "str = """""" : 1"""""": str = "" """": 2"""""": str = "" """":"""" 2""", _
      "Next vntSubString: CommentPosition = IIf(blnStringMode, 0, InStr(1, CodeLine, "": ""))")

      Let vntInstructions = SplitInstructions(vntCodeLine)
    Next vntCodeLine
  Next Count

  Call VBA.MsgBox("Solution 1: " & VBA.Timer - StartTime)

  Let StartTime = VBA.Timer

  For Count = 1 To 100000
    For Each vntCodeLine In VBA.Array( _
      "str = "" : 1"": str = "" : 2"": str = "" : 2""", _
      "var = 2: var = 2", _
      "str = """""" : 1"""""": str = "" """": 2"""""": str = "" """":"""" 2""", _
      "Next vntSubString: CommentPosition = IIf(blnStringMode, 0, InStr(1, CodeLine, "": ""))")

      Let vntInstructions = SplitInstructions2(vntCodeLine)
    Next vntCodeLine
  Next Count

  Call VBA.MsgBox("Solution 2: " & VBA.Timer - StartTime)

End Sub

Public Function SplitInstructions(ByVal CodeLine As String) As Variant() 'Use Split function, faster than SplitInstructions2

  Dim vntResult() As Variant
  Dim vntSubString As Variant
  Dim blnStringMode As Boolean
  Dim x As Long, y As Long
  Dim lngStart As Long

  Let vntResult = VBA.Array

  If VBA.InStr(1, CodeLine, """") = 0 Then
    For Each vntSubString In VBA.Split(CodeLine, ":")
      Let vntSubString = VBA.Trim$(ReplaceAll(vntSubString, "::", ":"))
      If vntSubString <> VBA.vbNullString Then Call AddToArray(vntResult, vntSubString)
    Next vntSubString
  ElseIf VBA.InStr(1, CodeLine, ": ") = 0 Then
    Let vntSubString = VBA.Trim$(ReplaceAll(CodeLine, "::", ":"))
    If vntSubString <> VBA.vbNullString Then Call AddToArray(vntResult, vntSubString)
  Else
    For Each vntSubString In VBA.Split(CodeLine, """")
      If Not blnStringMode Then
        Let x = VBA.InStr(1, vntSubString, ":")
        If x > 0 Then
          Call AddToArray(vntResult, VBA.Mid$(CodeLine, lngStart + 1, x + y - lngStart - 1))
          Let lngStart = y + VBA.InStrRev(vntSubString, ":") + 1
        End If
      End If
      Let blnStringMode = Not blnStringMode
      Let y = y + VBA.Len(vntSubString) + 1
    Next vntSubString
    If x + y - lngStart - 1 > 0 Then
      Call AddToArray(vntResult, VBA.Mid$(CodeLine, lngStart + 1, x + y - lngStart - 1))
    End If
  End If

  Let SplitInstructions = vntResult

End Function

Public Function SplitInstructions2(ByVal CodeLine As String) As Variant() 'Sequential iterations, slower than SplitInstructions

  Dim vntResult() As Variant
  Dim lngStart As Long, k As Long
  Dim blnStringMode As Boolean

  Let vntResult = VBA.Array
  Let lngStart = 1

  
  For k = 1 To VBA.Len(CodeLine)
    If VBA.Mid$(CodeLine, k, 1) = """" Then
      Let blnStringMode = Not blnStringMode
    ElseIf VBA.Mid$(CodeLine, k, 1) = ":" Then
      If Not blnStringMode Then
        If k > lngStart Then Call AddToArray(vntResult, VBA.Trim$(VBA.Mid$(CodeLine, lngStart, k - lngStart)))
      Let lngStart = k + 1: End If
    End If
  Next k
  If k > lngStart Then Call AddToArray(vntResult, VBA.Trim$(VBA.Mid$(CodeLine, lngStart, k - lngStart)))

  Let SplitInstructions2 = vntResult

End Function

Private Sub AddToArray(ByRef Arr() As Variant, ByVal Value As Variant)

  Dim vntValue As Variant

  If VBA.IsArray(Value) Then
    For Each vntValue In Value
      Call AddToArray(Arr, vntValue)
    Next vntValue
  Else
    ReDim Preserve Arr(LBound(Arr) To UBound(Arr) + 1)
    Let Arr(UBound(Arr)) = Value
  End If

End Sub

Public Function ReplaceAll(ByVal Expression As String, ByVal Find As String, ByVal Replace As String) As String

  Do Until VBA.InStr(1, Expression, Find) = 0
    Let Expression = VBA.Replace(Expression, Find, Replace)
  Loop: Let ReplaceAll = Expression

End Function

Thanks for reading me. Take care everyone.

ODEXT
  • 19
  • 4