0

I'm trying to debug a program that gives the error: Abort (core dumped). Valgrind detects a stack smashing and gives a LEAK SUMMARY with 1 block still reachable. It signals to line 12 of a the function downloadAndOpen where I have an fopen that I thought was closed at the end of main but it seems like it isn't. I would appreciate help with this bug. The valgrind output is:

*** stack smashing detected ***: ./mtg terminated
==9594== 
==9594== HEAP SUMMARY:
==9594==     in use at exit: 352 bytes in 1 blocks
==9594==   total heap usage: 1 allocs, 0 frees, 352 bytes allocated
==9594== 
==9594== 352 bytes in 1 blocks are still reachable in loss record 1 of 1
==9594==    at 0x402A17C: malloc (in /usr/lib/valgrind/vgpreload_memcheck-x86-linux.so)
==9594==    by 0x40BE62B: __fopen_internal (iofopen.c:73)
==9594==    by 0x40BE70A: fopen@@GLIBC_2.1 (iofopen.c:103)
==9594==    by 0x8048729: downloadAndOpen (downloadAndOpen.c:12)
==9594==    by 0x80485B5: main (mtg.c:15)
==9594== 
==9594== LEAK SUMMARY:
==9594==    definitely lost: 0 bytes in 0 blocks
==9594==    indirectly lost: 0 bytes in 0 blocks
==9594==      possibly lost: 0 bytes in 0 blocks
==9594==    still reachable: 352 bytes in 1 blocks
==9594==         suppressed: 0 bytes in 0 blocks
==9594== 
==9594== For counts of detected and suppressed errors, rerun with: -v
==9594== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
Aborted (core dumped)

Edit: After fixing the first problem I got a similar one after the program worked correctly until page 18. The Valgrind report is:

==11845== Invalid read of size 4
==11845==    at 0x40C5F35: getc (getc.c:38)
==11845==    by 0x80487EA: download (download.c:12)
==11845==    by 0x2020201F: ???
==11845==  Address 0x420ba20 is 248 bytes inside a block of size 352 free'd
==11845==    at 0x402B358: free (in /usr/lib/valgrind/vgpreload_memcheck-x86-linux.so)
==11845==    by 0x40BDDB9: fclose@@GLIBC_2.1 (iofclose.c:85)
==11845==    by 0x804866B: main (mtg.c:26)
==11845== 
==11845== Invalid read of size 4
==11845==    at 0x40C5F3E: getc (getc.c:38)
==11845==    by 0x80487EA: download (download.c:12)
==11845==    by 0x2020201F: ???
==11845==  Address 0x420ba68 is 320 bytes inside a block of size 352 free'd
==11845==    at 0x402B358: free (in /usr/lib/valgrind/vgpreload_memcheck-x86-linux.so)
==11845==    by 0x40BDDB9: fclose@@GLIBC_2.1 (iofclose.c:85)
==11845==    by 0x804866B: main (mtg.c:26)
==11845== 
==11845== Invalid read of size 4
==11845==    at 0x40C5F48: getc (getc.c:38)
==11845==    by 0x80487EA: download (download.c:12)
==11845==    by 0x2020201F: ???
==11845==  Address 0x8 is not stack'd, malloc'd or (recently) free'd
==11845== 
==11845== 
==11845== Process terminating with default action of signal 11 (SIGSEGV)
==11845==  Access not within mapped region at address 0x8
==11845==    at 0x40C5F48: getc (getc.c:38)
==11845==    by 0x80487EA: download (download.c:12)
==11845==    by 0x2020201F: ???
==11845==  If you believe this happened as a result of a stack
==11845==  overflow in your program's main thread (unlikely but
==11845==  possible), you can try to increase the size of the
==11845==  main thread stack using the --main-stacksize= flag.
==11845==  The main thread stack size used in this run was 8388608.
==11845== 
==11845== HEAP SUMMARY:
==11845==     in use at exit: 352 bytes in 1 blocks
==11845==   total heap usage: 18 allocs, 17 frees, 6,336 bytes allocated
==11845== 
==11845== 352 bytes in 1 blocks are still reachable in loss record 1 of 1
==11845==    at 0x402A17C: malloc (in /usr/lib/valgrind/vgpreload_memcheck-x86-linux.so)
==11845==    by 0x40BE62B: __fopen_internal (iofopen.c:73)
==11845==    by 0x40BE70A: fopen@@GLIBC_2.1 (iofopen.c:103)
==11845==    by 0x8048729: downloadAndOpen (downloadAndOpen.c:12)
==11845==    by 0x80485B5: main (mtg.c:15)
==11845== 
==11845== LEAK SUMMARY:
==11845==    definitely lost: 0 bytes in 0 blocks
==11845==    indirectly lost: 0 bytes in 0 blocks
==11845==      possibly lost: 0 bytes in 0 blocks
==11845==    still reachable: 352 bytes in 1 blocks
==11845==         suppressed: 0 bytes in 0 blocks
==11845== 
==11845== For counts of detected and suppressed errors, rerun with: -v
==11845== ERROR SUMMARY: 3 errors from 3 contexts (suppressed: 0 from 0)
Segmentation fault (core dumped)

I've isolated the problem but I still can't see what's wrong:

#include <stdio.h>
#include <stdlib.h> // for using system calls
#include <stdbool.h>

int main (void)
{
    char url[200], cmd[200];
    int pos = 0, c, j;
    bool found = false;
    FILE *fp;
    fp = fopen ("file.txt", "w+");
    fprintf (fp, "\"http://4.bp.blogspot.com/-mIE4JlppKMU/T9_mxKR__wI/AAAAAAAAASs/deHLBL21ZbE/s640/Temple Garden.png\"");
    while (!found)
    {
        if ( (c = getc (fp)) == EOF ) {
            printf ("Image not found\n");
            return 1;
        }
        printf ("%c", c);
        url[pos] = c;
        if ( pos > 0 && url[pos-1] == 'g' && url[pos] == '\"' )
        {
            found = true;
        }
        ++pos;
    }
    --pos;
    char url2[pos];
    for ( j = 1; j < pos; j++ )
    {
        url2[j - 1] = url[j];
    }
    url2[j - 1] = '\0';

    //http://joequery.me/code/snprintf-c/
    // wget -q for quiet -nc, --no-clobber   skip downloads that would download to existing files (no sobreescribir)
    snprintf(cmd, 200, "wget -q -nc -O /home/arturo/Dropbox/Digital_Renders/%d \'%s\'", 1, url2);
    system(cmd);
    return 0;
}

Here is the code of the main function:

#include "helpers.h"

char  postBegin[] = "forum-post-body-content", postEnd[] = "p-comment-notes", img[] = "img src=";
int length1 = 22, length2 = 14, length3 = 7;
int pos1 = 0, pos2 = 0, pos3 = 0;

int main ()
{
    bool inPost = false;
    FILE *fp;
    int c;

    for ( int i = 1; i <= 151; i++ )
    {
        downloadAndOpen (&fp, i);
        while ( (c = getc (fp)) != EOF ) {
            if ( search (postBegin, length1, c, &pos1) )
                inPost = true;
            if (inPost) {
                if ( search (postEnd, length2, c, &pos2) )
                    inPost = false;
                if ( search (img, length3, c, &pos3) )
                    download (&fp);
            }
        }
        fclose (fp);
    }
}

And this is the function where Valgrind complains:

#include "helpers.h"

    void downloadAndOpen (FILE **fp, int i)
    {
        char cmd[128]={0}, file[20];
        // download web page
        sprintf (cmd, "wget -q -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'", i, i);
        system (cmd);

        // open text file
        sprintf (file, "page%d.txt", i);
        *fp = fopen (file, "r");
    }

The rest of the program is here:

#include <stdio.h>
#include <stdlib.h> // for using system calls
#include <stdbool.h>
#include <string.h> // for strlen

void downloadAndOpen (FILE **fp, int i);
bool search (char needle[], int length, char c, int *pos);
void download (FILE* *fp);

#include "helpers.h"

void download (FILE **fp)
{
    char url[128], cmd[128];
    int pos = 0, c, j;
    static int num = 1;
    bool found = false;

    while (!found)
    {
        if ( (c = getc (*fp)) == EOF ) {
            printf ("Image not found\n");
            return;
        }
        url[pos] = c;
        if ( pos > 0 && url[pos-1] == 'g' && url[pos] == '\"' )
        {
            found = true;
        }
        ++pos;
    }
    --pos;
    char url2[pos];
    for ( j = 1; j < pos; j++ )
    {
        url2[j - 1] = url[j];
    }
    url2[j - 1] = '\0';
    sprintf(cmd, "wget -q -O /home/arturo/Dropbox/Digital_Renders/%d \'%s\'", num++, url2);
    system(cmd);
}

#include "helpers.h"

bool search (char needle[], int length, char c, int *pos)
{
    if (needle[*pos] == c)
    {
        if (*pos == length)
        {
            return true;
            *pos = -1;
        }
        (*pos)++;
    }
    else
    {
        if(*pos > 0)
        *pos = 0;
    }
    return false;
}

And the Makefile:

CC = gcc
CFLAGS = -ggdb3 -O0 -std=c99 -Wall -Werror

all: mtg

mtg: mtg.c downloadAndOpen.c search.c download.c helpers.h
    $(CC) $(CFLAGS) -o mtg mtg.c downloadAndOpen.c search.c download.c

clean:
    rm -f *.o a.out core mtg
  • 1
    Does `"wget -q -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'"` actually end up fitting into the 128 chars of `cmd`? It seems like you might not have the space for a terminating null character, and that you'd not have enough space if you need too many digits for the result of `%d`. – Joshua Taylor Dec 22 '14 at 18:54
  • I tried it before with a function and got 122 for the length. `#include #include int main (void) { char *link = "wget -q -O page1.txt http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=1"; printf ("%i\n", strlen (link)); }` –  Dec 22 '14 at 19:01
  • @JoshuaTaylor is right. once you get to i=10, the cmd string is 127 bytes long. If i is ever three digits, you will smash the stack, and in your main loop, `i` goes over 99, so there it is. When i=100, you need 130 chars to hold the string with trailing `'\0'` – JohnH Dec 22 '14 at 19:02
  • 1
    Also note that while it won't help you allocate enough space, using snprintf with a bound matching your buffer would have kept you from trying to write outside the buffer here. Then you could have used a simple printf to see what the cmd ended up being, and it would have been clear that it didn't fit. – Joshua Taylor Dec 22 '14 at 19:05
  • Note, in your 'I tried it' example, you left out the single ticks around the URL, which add two bytes. – JohnH Dec 22 '14 at 19:05
  • Ok I checked with cmd[200] and it works. Thanks –  Dec 22 '14 at 19:05
  • @Alejandria - it is 129 byte, if you include space for the trailing null at the end of the string. `int main (void) { char *link = "wget -q -O page150.txt 'http://www.mtgsalvation.com/forums/creativity/artwork /340782-official-digital-rendering-thread?page=150'"; printf ("%i\n", strlen (link)+1); }'` – JohnH Dec 22 '14 at 19:12
  • Note the case for using `snprintf()` instead of `sprintf()`, and for checking the result of `snprintf()`. You might also have `asprintf()` available, which would also help you avoid this problem. – Jonathan Leffler Dec 22 '14 at 20:02
  • is there any reason that `length1`, `length2`, `length3` are one less than the length of the 3 strings on the previous line? – M.M Dec 22 '14 at 22:43
  • in the function search I do `if (*pos == length)` so length has to be the max position of the array. –  Dec 22 '14 at 22:55
  • @Alejandria for people reading your code (or yourself reading it at a later date!) it'd be better to update the search function so that `length` is actually the length of the string it's searching for – M.M Dec 22 '14 at 23:30
  • the code should always check the returned value from any of the malloc family of functions to assure to memory allocation was successful – user3629249 Dec 23 '14 at 02:32
  • regarding the makefile: 1) cc := /usr/lib/gcc 2) globbing all the compiles and the link into a single line is not a good nor flexable plan. Suggest breaking out into a compile rule from one .c to one .o and then using the list of .o files on a separate rule that performs the link. 3) the 'all' and 'clean' do not produce a file with those names. suggest adding the line: .PHONY: all clean – user3629249 Dec 23 '14 at 02:37
  • the code needs to check the returned value from fopen() to assure the operation was successful – user3629249 Dec 23 '14 at 02:41
  • the wget command can fail, so the wget line should route stderr to a file, then after system(cmd); should open/read that file to assure the wget was successful – user3629249 Dec 23 '14 at 02:47
  • this declaration: char cmd[128]={0}, will fail when page number exceeds 9 and may fail anyway due to the need to include the nul byte terminator on the end of the string. Also, will your command line implementation in the OS handle a line greater than 128 bytes in length? – user3629249 Dec 23 '14 at 02:51
  • the main() function will not compile cleanly because the declaration (as it should) indicates are int returned value, but no return (someint) line exists in the execution paths – user3629249 Dec 23 '14 at 03:08
  • in the main() function, what is the magic number '151' about? Better to #define that number, with a descriptive name and a comment – user3629249 Dec 23 '14 at 03:10

1 Answers1

6

You may have buffer overflows in both of these sprintf commands:

char cmd[128]={0}, file[20];

sprintf (cmd, "wget -q -O page%d.txt 'http://www.mtgsalvation.com/forums/creativity/artwork/340782-official-digital-rendering-thread?page=%d'", i, i);
sprintf (file, "page%d.txt", i);

Rather than try to analyze whether a particular instance overflowed or not, fix your code:

int n = snprintf(cmd, sizeof cmd, "............
if ( n < 0 || n >= sizeof cmd )
    exit(EXIT_FAILURE);    // or other error handling

and

n = snprintf(file, sizeof file, "page%d.txt", i);
if ( n < 0 || n >= sizeof file )
    exit(EXIT_FAILURE);    // or other error handling

If you make this change and still get the problem then update your post.

M.M
  • 138,810
  • 21
  • 208
  • 365
  • or, even better, malloc a large enough area, then free it after calling system(cmd); – user3629249 Dec 23 '14 at 03:11
  • Yep although you've still got to use `snprintf` (possibly wrapped in an asprintf-like function) – M.M Dec 23 '14 at 03:12
  • I found the problem and fixed it. There was an empty image environment and getc was getting characters until a buffer overflow. I'll change the sprintf anyways. –  Dec 23 '14 at 10:35