0

I'm trying to write a VERY basic shell program in C. The problem I am facing is trying to fill my argv array of character pointers with the words taken from input. When I attempt to print out the contents of the argv array after attempting to fill it using the parse() function below I get a segmentation fault. I know this means that I am probably trying to access part of the argv array that is out of bounds. However, even when supplying only one argument to fill the array, I still get the segfault. The printf call used to print argc returns the correct value for argc based on input, but the second printf call with *argv[0] is the one causing the segfault. I am wondering if my error is in the way I am attempting to print the contents of argv, or if the error is because I am attempting to fill argv incorrectly.

EDIT: I should add that the getword() function takes in a line of text and returns the first word delimited by spaces, and a number of other delimiters. I can post all the delimiters it breaks the words up by if necessary, but I do not think the problem is because of getword().

EDIT 2: Added in the header file and included the #include statement in main.

EDIT 3: Added the getword function under main(), and getword.h below p2.h

Here is p2.h, the header file included in main:

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "getword.h"
#include <signal.h>

#define MAXITEM 100

getword.h:

#include <stdio.h>
#include <string.h>
#include <strings.h>

#define STORAGE 255

int getword(char *w);

int parse(char *, char *[]);

Here is the main function :

#include "p2.h"
int main() {
    pid_t pid, child_pid;
    int argc, inputRedirect;
    char *devNull;
    devNull = (char *) malloc(10);
    strcpy(devNull, "/dev/null");
    char *argv[MAXITEM];
    char commandLine[STORAGE];


    for (;;) {
        printf("p2: ");
        scanf("%s", commandLine);
        argc = parse(commandLine, argv);
        printf("argc = %d\n", argc);

        if(argc == 0)
            continue;
        printf("*argv = %s\n", *argv[0]);
        child_pid = fork();
        if (child_pid < 0) {
            printf("Cannot fork! Terminating...");
            exit(1);
        } else if (child_pid == 0) {
            inputRedirect = open(devNull, O_RDONLY);
            dup2(inputRedirect, STDIN_FILENO);
            close(inputRedirect);
            execvp(*argv, argv);
        }
        else {
            for(;;) {
                pid = wait(NULL);
                if(pid == child_pid)
                   break;
            }
            printf("Child's pid is %d\n", child_pid);
        }
    }
    killpg(getpid(), SIGTERM);
    printf("p2 Terminated.\n");
    exit(0);
}

int parse(char *commandLine, char *argv[]) {
    int i, argc = 0;
    char *commandPointer = commandLine;
    while (*commandPointer != '\0') {
        *argv = commandPointer;
        argc++;
        getword(commandPointer);
    }
    *commandPointer = '\0';
    *argv = '\0';
    return argc;
}

getword.c:

#include "getword.h"
#include <stdlib.h>

/*Function Prototypes*/
int tilde(char *p, int i);
int BSFollowedByMetaCharacter(int c, char *w);


int getword(char *w) {

    int c;
    int index = 0;

    /*This while loop removes all leading blanks and whitespace characters
     * The if statement then tests if the first character is a new line or
     *  semicolon metacharacter*/
    while ((c = getchar()) == ' ' || c == '\t' || c == '\n' || c == ';') {
        if (c == '\n' || c == ';') {
            w[index] = '\0';
            return 0;
        }
    }

    /*This if statement calls ungetc() to push whatever character was taken
     * from the input stream in the previous while loop back to the input
     * stream. If EOF was taken from the input stream, ungetc() will return EOF,
     * which will then cause getword() to return -1, signalling that it reached
     * the End Of File. */
    if (ungetc(c, stdin) == EOF)
        return -1;

    /*This if statement deals with some of the "non-special" metacharacters.
     * If one of these metacharacters is pulled from the input stream by getchar(),
     * it is stored in w and null-terminated. getword() then returns the length of
     * the current string stored in w. If getchar() pulls anything besides one of the
     * specified metacharacters from the input stream, it is then returned using ungetc() after
     * the if statement.*/
    if ((c = getchar()) == '<' || c == '>' || c == '|' || c == '&') {
        w[index++] = c;
        int d = getchar();
        if (c == '>' && d == '>')
            w[index++] = d;
        else {
            ungetc(d, stdin);
        }
        w[index] = '\0';
        return index;
    }
    ungetc(c, stdin);

    /*This while statement handles plain text from the input stream, as well as a few 'special'
     * metacharacters. It also ensures that the word scanned is shorter than STORAGE-1 bytes.*/
    while ((c = getchar()) != ' ' && c != '<' && c != '>' && c != '|'
        && c != ';' && c != '&' && c != '\t' && c != '\n' && c != '\0'
        && index <= STORAGE - 1) {
        if (c == '~') {
            int *ip = &index;
            index = tilde(&w[index], *ip);
            continue;
        }/*END IF*/
        else if (c == '\\') {
            int d = c;
            c = getchar();
            if (BSFollowedByMetaCharacter(c, w)) {
                w[index++] = c;
                continue;
            } else {
                w[index++] = d;
            }

        }/*END ELSE IF*/
        w[index] = c;
        index++;
    }/*END WHILE*/

    ungetc(c, stdin);/*This final ungetc() call is used to push any meta characters*/
    w[index] = '\0'; /*used as delimiters back to the input stream, to be retrieved*/
    return index;    /*at the next call of getword().                                      */
}/*END getword()*/

int tilde(char *cp, int i) {
    int *ip;
    ip = &i;
    char *p = cp;
    char *o;
    o = (strcpy(p, getenv("HOME")));
    int offset = strlen(o);
    *ip = *ip + offset;
    return i;
}

int BSFollowedByMetaCharacter(int c, char *w) {
    if (c == '~' || c == '<' || c == '>' || c == '|' || c == ';' || c == '&'
        || c == ' ' || c == '\t' || c == '\\') {
        return 1;
    } else {
        return 0;
    }
}
trawww
  • 103
  • 1
  • 6
  • 14
  • 1
    `char *devNull; devNull = (char *) malloc(10); strcpy(devNull, "/dev/null");` is the worst thing you could do. You are heap-allocating for no reason, failing to free the allocated memory, thus leaking it. You are also casting the return value of `malloc()`, which is wrong. Why not simply write `const char *devNull = "/dev/null";`? –  Nov 25 '13 at 06:33
  • Thanks for the input @H2CO3, I made the changes you suggested. I thought that since the devNull pointer is going to be used throughout the whole program that allocating space on the heap would make more sense, but overlooked the idea of a const char *. However I am still getting the segfault when attempting to print out argv. should i attempt to print it by dereferencing argv[0] twice? like: "printf("*argv = %s\n", **argv[0]);"? – trawww Nov 25 '13 at 06:40
  • did you try running your code in a debugger, by the way? –  Nov 25 '13 at 06:41
  • As you're trying to post the code you're working with (instead of a piece of code), I suggest you post the _full_ code, which includes necessary headers and macro definitions, and shall be able to compile & run & reproduce your problem. – starrify Nov 25 '13 at 06:48
  • @H2CO3 Yes, using gdb on the program confirms that the segfault occurs at the printf call attempting to print contents of argv[0] – trawww Nov 25 '13 at 06:48
  • 2
    @trawww then you shall try modify that printf line into ` printf("*argv = %s\n", argv[0]);` since `argv[0]` is already of type `char*` and shall not be de-referenced when being used for %s`` – starrify Nov 25 '13 at 06:50
  • @starrify After making that change and running p2 through gdb, I still get a segfault, but now it is saying its at "0xff242cb0 in strlen () from /lib/libc.so.1". The only instance of strlen() in my whole program is in the getword() function, which i will upload after this comment, but only is called when there is a tilde character present in input. I entered "hello world" as input and get the error, so I don't understand why strlen is even being called. – trawww Nov 25 '13 at 07:14
  • @trawww Well, I don't know the reason for now but I suggest you to check the return value of `getenv`, since it may return `NULL` if the item was not found, then `strcpy` may do nothing (however I think it shall trigger an exception) and leave string `p` unchanged. Then `strlen` may fail since the data comes from an uninitialized array `commandLine`. – starrify Nov 25 '13 at 08:13
  • In your code listing, you do not change `*commandPointer`. So, the loop should be infinite. Also, in `index = tilde(&w[index], *ip);`, why not send just `ip` and handle it in the function? – asif Nov 25 '13 at 08:48
  • checking return value of getenv still gives me the same error message running through gdb, which i copy and pasted below. "Program received signal SIGSEGV, Segmentation fault. 0xff242cb0 in strlen () from /lib/libc.so.1" – trawww Nov 25 '13 at 08:51
  • @asif I thought that the call "getword(commandPointer);" advances the pointer, which would then change commandPointer. I successfully print the value of argc which occurs AFTER the call to parse(commandLine, argv) in main(). If the loop was infinite wouldn't it never return from parse? Or is this incorrect? – trawww Nov 25 '13 at 09:29

1 Answers1

0

The functions in getword.c seems correct. Your problem is in function parse.

To use execvp, contents of argv should be following (input:"hello world"):

argv[0] -> "hello"
argv[1] -> "world"
argv[2] -> NULL

Here, argv is an array of character pointers. But, in parse function, you are treating argv like simple character pointers in here:

*argv = commandPointer;

and here:

*argv = '\0';

Change your parse function into something like this:

int parse(char *commandLine, char *argv[]) {
    int argc = 0;
    char *commandPointer;
    argv[argc++] = commandLine;

    do{
        commandPointer = (char*)malloc(sizeof(char) * STORAGE);
        argv[argc++] = commandPointer;
        getword(commandPointer);
    }while(*commandPointer != '\0');
    argv[argc] = NULL;
    return argc;
}

Now, you should free allocated memory after if-else tree like:

for(int i = 0; i < argc; i++) free(argv[i]);
asif
  • 975
  • 8
  • 16
  • i implemented the parse function verbatim, but still got the segfault. After changing – trawww Nov 25 '13 at 13:14
  • Which function is giving you segfault now? Does it returns form `parse` correctly? Does it still prints correct value of `argc`? – asif Nov 25 '13 at 14:54
  • removing the first * in "*argv[argc++] = commandPointer;" seemed to do it. argv is not filled with any input i enter in main(), and i am able to iterate and print through each array entry. Thank you @asif ! – trawww Nov 25 '13 at 16:50
  • sorry, my bad...:( Edited and fixed. – asif Nov 26 '13 at 04:40