1

I am having hard time getting the substring of a string in C. For example if I have a

char *buff = "cat –v <  x y z | ";
char *p = strtok (buff, " ");
while (p != NULL)
{
    if (!strcmp(p, "<") && !isredirected)
    {
        isredirected = 1;
        infileindex  = tokenscounter + 1;
        inputredirectionindex = tokenscounter;
    }
    commandsArray[tokenscounter++]  = p;
    p = strtok (NULL, " ");

}

from this buff string, I would like to remove any string that is between '<' and '|'. that is removing x y z . I used strtok to parse all the tokens, but cannot get to remove that x y z. After I find '<' I want to get rid of all the tokens that come after < and before |

Jean-François Fabre
  • 137,073
  • 23
  • 153
  • 219
Samun
  • 151
  • 1
  • 1
  • 12

3 Answers3

1

I would generally recommend regexes for this, and certainly not strtok, and even less on a string literal (undefined behaviour, see C's strtok() and read only string literals)

One solution with only basic libs would be to:

  • look for the start string/character
  • look for the end string/character
  • rebuild a string (as long or shorter) with the part before the start string assembled with the part after the end string.

I'm using strstr for this. It's built-in, doesn't need a loop, and works for multi-char patterns.

#include<stdio.h>
#include<string.h>
#include<stdlib.h>

int main()
{
  const char *buff = "cat -v <  x y z | hello";
  const char *start_pattern = "<";
  const char *end_pattern = "|";

  const char *start = strstr(buff,start_pattern);
  if (start)
  {
    const char *end = strstr(start,end_pattern);
    if (end)
    {
    // allocate enough memory
    char *newbuff = malloc(strlen(buff)+1);

    int startlen = start-buff;   // length of the start of the string we want to keep

    strncpy(newbuff,buff,startlen);  // start of the string
    strcpy(newbuff+startlen,end+strlen(end_pattern));  // end of the string

    printf("Result:%s\n",newbuff);
    free(newbuff);   // free the memory
    }
  }

}

EDIT: some code has been added to the question in the meanwhile. Which explains I didn't take it into account, as I was trying to write a not-so-clunky solution.

Jean-François Fabre
  • 137,073
  • 23
  • 153
  • 219
0

There is (almost) not builtin solution:

  • either you want to modify inplace: in that case you have to "move" the end of your string (basically |);
  • or create a new string, copy the relevant part an skip what's not.
Aif
  • 11,015
  • 1
  • 30
  • 44
0

An alternative would be a simple function that iterates over the source string copying characters that are not to be removed to a destination string as in the following.

char *  CopyStringRemove(char *pDest, const char *pSrc)
{
    // copy the source string, pSrc, to the destination string, pDest, while
    // removing special codes that are between a < character and a | character.
    // we will copy the two special code characters but remove everything in between.
    char * pRet = pDest;

    if (pDest) {
        if (pSrc) {
            int   iState = 0;      // state indicates whether copying characters or not.

            for (; *pSrc; pSrc++) {
                switch (*pSrc) {
                case '<':
                    iState = 1;         // indicate we are skipping characters
                    *pDest++ = *pSrc;   // however copy this character we found
                    break;
                case '|':
                    iState = 0;     // indicate we are copying characters
                    break;
                default:
                    break;
                }
                switch (iState) {
                case 0:
                    *pDest++ = *pSrc;    // state is to copy the current character
                    break;
                case 1:                  // state is to not copy current character, just skip over it.
                    break;
                }
            }
        }
        *pDest = 0;
    }

    return pRet;
}

This function provides quite a bit of flexibility in that the source can be a constant or not. The destination may be an array on the stack or an array malloced from the heap. If the source array is not const then you can do an inplace change by calling the CopyStringRemove() function with both source and destination being the same buffer.

It also allows for problems with the input such as not having a '<' character or a '|' character in the string.

A test harness such as:

void testfunc(const char *buff)
{
    {
        char destbuff[128] = { 0 };
        printf("    orig string \"%s\"\n", buff);
        CopyStringRemove(destbuff, buff);
        printf("        new     \"%s\"\n", destbuff);
    }

    {
        char destbuff[128] = { 0 };
        char buff2[128] = { 0 };
        strcpy_s(buff2, sizeof(buff2), buff);
        printf("    orig string \"%s\"\n", buff2);
        CopyStringRemove(destbuff, buff2);
        printf("        new     \"%s\"\n", destbuff);
    }

    {
        char buff2[128] = { 0 };
        strcpy_s(buff2, sizeof(buff2), buff);
        printf("    orig string \"%s\"\n", buff2);
        CopyStringRemove(buff2, buff2);
        printf("        new     \"%s\"\n", buff2);
    }

}

void main_xfun(void)
{
    char *buff = "cat -v <  x y z | ";
    char *buffa = "cat -v <  x y z  ";
    char *buffb = "cat -v   x y z | ";
    char *buffc = "cat -v   x y z  ";

    printf("\ntest #1\n");
    testfunc(buff);
    printf("\ntest #2\n");
    testfunc(buffa);
    printf("\ntest #3\n");
    testfunc(buffb);
    printf("\ntest #4\n");
    testfunc(buffc);
}

yields a result of:

test #1
    orig string "cat -v <  x y z | "
        new     "cat -v <| "
    orig string "cat -v <  x y z | "
        new     "cat -v <| "
    orig string "cat -v <  x y z | "
        new     "cat -v <| "

test #2
    orig string "cat -v <  x y z  "
        new     "cat -v <"
    orig string "cat -v <  x y z  "
        new     "cat -v <"
    orig string "cat -v <  x y z  "
        new     "cat -v <"

test #3
    orig string "cat -v   x y z | "
        new     "cat -v   x y z | "
    orig string "cat -v   x y z | "
        new     "cat -v   x y z | "
    orig string "cat -v   x y z | "
        new     "cat -v   x y z | "

test #4
    orig string "cat -v   x y z  "
        new     "cat -v   x y z  "
    orig string "cat -v   x y z  "
        new     "cat -v   x y z  "
    orig string "cat -v   x y z  "
        new     "cat -v   x y z  "
Richard Chambers
  • 16,643
  • 4
  • 81
  • 106