3

I would like to ask how to use the CommandLineToArgvW function in x86 assembly. I'm having trouble with it. All I can print as of today is the number of the arguments along with the program execution in cmd. I would like to save the arguments in different variables. How can I do it?

My code looks like this:

include \masm32\include\masm32rt.inc

.data
  Format db "%d", 10, 0

.data?
  Arguments db 100 dup(?)

.code

start:

  mov esi, offset Arguments

  push ebp
  mov ebp, esp
  sub esp, 4

  call GetCommandLineW

  lea ecx, dword ptr[ebp - 4]
  push ecx
  push eax
  call CommandLineToArgvW

  mov esi, eax
  push offset Arguments
  call StdOut

  push dword ptr [ebp - 4]
  push offset Format
  call crt_printf
  add esp, 8


  push 0
  call ExitProcess

end start

My output as of now is the number of arguments. For example:

  • D:\masm32>Hello.exe I am Hello
  • 4
  • D:\masm32>
Pentagon
  • 58
  • 9

1 Answers1

5

CommandLineToArgvW has at least three quirks you have to watch out for:

  1. The result is an array of pointers to wide-character strings.

    The MASM32 function crt_printf uses the function printf from the Microsoft VC runtime library (msvcrt.dll). Therefore, you can use an uppercase 'S' as the type field character. Take a look at printf Type Field Characters on MSDN.

  2. The result is the address of the first element of an array of pointers to a string.

    Most print functions expect a pointer to a string, not a pointer to a pointer to a string. You have to dereference that address to get a pointer to the string. A command line "Hello.exe I am Hello" will be splitted into four strings: "Hello.exe", "I", "am", "Hello". The pointers to these strings are to find in an array with 4 pointers: [pointer to "Hello.exe"], [pointer to "I"], and so on. Assume the function CommandLineToArgvW has a return value EAX=0x001445A8. The Hexdump looks like

    Address   Hex dump                                         ASCII
    001445A8  B8 45 14 00|CC 45 14 00|D0 45 14 00|D6 45 14 00| ¸E.ÌE.ÐE.ÖE.
    001445B8  48 00 65 00|6C 00 6C 00|6F 00 2E 00|65 00 78 00| H.e.l.l.o...e.x.
    001445C8  65 00 00 00|49 00 00 00|61 00 6D 00|00 00 48 00| e...I...a.m...H.
    001445D8  65 00 6C 00|6C 00 6F 00|00 00 00 00|00 00 00 00| e.l.l.o.........
    

    At address 0x001445A8 is a pointer to 0x001445B8 (displayed in the dump in little endian format) and this is the beginning of "Hello.exe" in wide-character format. The next pointer is 4 bytes behind 0x001445A8: 0x001445CC - points to "I". The next pointer is 4 bytes away and so on. You can quickly go through that array just by adding 4. And you can easily get the address of a string in the middle of the list by multiplying the index by 4 - the pointer to the third string ("am", index: 2) is at 0x001445A8 + 2 * 4 = 0x001445B0 => 0x001445D0 => "am".

  3. The function allocates memory, which has to be manually freed with LocalFree.

I changed your program as little as possible:

include \masm32\include\masm32rt.inc

.data
    Format db "argc: %d", 10, 0
    fmt db "%S",10,0                ; %S: printf wide-character string / wprintf single-character string
    szArglist dd ?

.code

start:

    push ebp
    mov ebp, esp
    sub esp, 4

    ; https://msdn.microsoft.com/library/windows/desktop/ms683156.aspx
    call GetCommandLineW        ; EAX = pointer to the command line

    ; https://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
    lea ecx, dword ptr[ebp - 4] ; Get the current address of [ebp-4]
    push ecx                    ; int *pNumArgs (Pointer to a SDWORD, here at ebp-4)
    push eax                    ; LPCWSTR lpCmdLine (from GetCommandLineW)
    call CommandLineToArgvW

    mov [szArglist], eax        ; Store the result of CommandLineToArgvW (at least for LocalFree)

    mov esi, eax                ; ESI = address of a pointer (the first element in szArglist)
    mov ebx, [ebp-4]            ; Countdown the number of arguments

    @@:                         ; Loop
    push dword ptr [esi]        ; Pointer to a string (dereferenced esi)
    push OFFSET fmt             ; Format string
    call crt_printf             ; printf (""%S\n", esi)
    add esp, 8                  ; Clear the stack after printf
    add esi, 4                  ; Next address of a pointer (next element of szArglist)
    dec ebx                     ; Countdown the number of arguments
    jne @B                      ; Loop to the last @@

    push dword ptr [szArglist]
    call LocalFree              ; Free the memory occupied by CommandLineToArgvW

    push dword ptr [ebp - 4]    ; Value that is stored in [ebp-4]
    push offset Format          ; Pointer to format string
    call crt_printf             ; printf ("argc: %d\n", [ebp-4])
    add esp, 8                  ; Clear the stack after printf

    push 0
    call ExitProcess

end start

The MASM32 function StdOut cannot handle wide-character-strings. You have to convert them first to ANSI strings. The Windows function for that purpose is WideCharToMultiByte:

include \masm32\include\masm32rt.inc

.data
    szArglist dd ?
    buf db 1024 DUP (?)
    crlf db 13, 10, 0           ; New line

.code

start:

    push ebp
    mov ebp, esp
    sub esp, 4

    ; https://msdn.microsoft.com/library/windows/desktop/ms683156.aspx
    call GetCommandLineW        ; EAX = pointer to the command line

    ; https://msdn.microsoft.com/library/windows/desktop/bb776391.aspx
    lea ecx, dword ptr[ebp - 4] ; Get the current address of [ebp-4]
    push ecx                    ; int *pNumArgs (Pointer to a SDWORD, here at ebp-4)
    push eax                    ; LPCWSTR lpCmdLine (from GetCommandLineW)
    call CommandLineToArgvW

    mov [szArglist], eax        ; Store the result of CommandLineToArgvW (at least for LocalFree)

    mov esi, eax                ; ESI = address of a pointer (the first element in szArglist)
    mov ebx, [ebp-4]            ; Countdown the number of arguments

    @@:                         ; Loop

    ; https://msdn.microsoft.com/library/windows/desktop/dd374130.aspx
    push NULL                   ; LPBOOL  lpUsedDefaultChar
    push NULL                   ; LPCSTR  lpDefaultChar
    push SIZEOF buf             ; int     cbMultiByte
    push OFFSET buf             ; LPSTR   lpMultiByteStr
    push -1                     ; int     cchWideChar
    push [esi]                  ; LPCWSTR lpWideCharStr (dereferenced esi)
    push 0                      ; DWORD   dwFlags
    push 0                      ; UINT    CodePage
    call WideCharToMultiByte

    push OFFSET buf             ; Pointer to an ANSI string
    call StdOut
    push OFFSET crlf            ; New line
    call StdOut

    add esi, 4                  ; Next address of a pointer (next element of szArglist)
    dec ebx                     ; Countdown the number of arguments
    jne @B                      ; Loop to the last @@

    push dword ptr [szArglist]
    call LocalFree              ; Free the memory occupied by CommandLineToArgvW

    push OFFSET buf
    push dword ptr [ebp - 4]
    call dwtoa
    push OFFSET buf             ; Pointer to a string
    call StdOut                 ; printf (""%S\n", esi)
    push OFFSET crlf
    call StdOut

    push 0
    call ExitProcess

end start
rkhb
  • 14,159
  • 7
  • 32
  • 60
  • Can i ask how you can save each arguments into variables? I can't seem to save them in a variable. Thanks in advance! – Pentagon May 31 '17 at 02:37
  • @Pentagon: Frankly, I don't know what you want to achieve. The arguments are already saved. `szArglist` is an array of pointers to the strings. You've got it saved as long as you don't destroy them with `LocalFree`. Further processing - like comparing with a constant string - isn't as easy as in a high-level programming language, and you don't need to transfer them to a "variable". The approach is quite different depending on your intentions. I will be pleased to show you an example if you describe your intention in more detail. – rkhb May 31 '17 at 06:45
  • I would like to call each argument one by one with the use of StdOut function. I can't seem to do it using the szArglist array. – Pentagon May 31 '17 at 09:33
  • @Pentagon: Please look at my reworked answer. I did my best ;-) – rkhb May 31 '17 at 13:58
  • Thank you for your time, @rkhb ! I get the idea now. Thanks again, brooooooo. – Pentagon Jun 01 '17 at 01:32
  • I have a question. It seems that the saved string is not null terminated. How can I do it? – Pentagon Jun 07 '17 at 09:24
  • @Pentagon: This can happen if `buf` is too small. Increase its size, e.g. increase it to 2000 bytes: `buf db 2000 DUP (?)`. Or put a null at the end of buf: `mov buf + SIZEOF buf - 1, 0`. – rkhb Jun 07 '17 at 09:56