2

I have an array of char pointers (string array), which contains some duplicate values. I've found an algorithm that truncates the array by removing its duplicate values.

Here is a code sample :

int i, j , k;
int n = 10;
char *teams[n];

for(i=0;i<n;i++){

    for(j=i+1;j<n;){

       if(*(team[j]==*(team[i])){

           for(k=j;k<n;k++){

               //strcpy(team[k], team[k+1]);
               team[k] = team[k+1];
           }
           n--;
      }else{
           j++;
      }
   }
}

I've read that the only way to copy strings between string arrays is to use strcpy(s1, s2). But in my case I can't use it, because strcpy function permits to copy s2 into s1 only if s2 has a lenght equal or bigger than the lenght of s1. So how can I implement this algorithm if I can't put the string pointed by the pointer team[k+1] in team[k] ?

Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335
davideAlbertini
  • 93
  • 2
  • 11

2 Answers2

0
#include <stdio.h>
#include <string.h>


unsigned dedup(char **arr, unsigned count)
{
unsigned this, that ;

for(this=0;this<count;this++){
    for(that=this+1;that<count;){
       if( strcmp(arr[that], arr[this])) {that++; continue; }

#if PRESERVE_ORDER
       memmove(arr+that, arr+that+1, (--count - that) * sizeof arr[that] );
#else
       arr[that] = arr[--count];
#endif

      }
   }
return count; /* the count after deduplication */
}

char *array[] = { "one", "two", "three", "two", "one", "four", "five", "two" };

int main(void)
{
unsigned count, index;

count = dedup(array, 8);

for (index = 0; index < count; index++) {
        printf("%s\n", array[index] );
        }
return 0;
}

[UPDATED]: I added the PRESERVE_ORDER version

wildplasser
  • 43,142
  • 8
  • 66
  • 109
  • 1
    This would be a great answer if you wrote a bit about how it works and explained to the OP why he doesn't need to copy strings. –  Jan 29 '17 at 17:16
  • You change the order of the array with `arr[that] = arr[--count];`! You do not only remove the duplicates! – Paul Ogilvie Jan 29 '17 at 17:17
  • I've never seen 'this' keyword in a procedural language, what does it stands for ? – davideAlbertini Jan 29 '17 at 17:46
  • It is not a keyword in C. The syntax highlighting confuses C and C++, I guess. @PaulOgilvie There is no restriction in the question that the order should be preserved. Besides: if duplicates are possible, who cares about order? – wildplasser Jan 29 '17 at 17:50
  • @wildplasser OK! Now I figure out what it does ! Thank you – davideAlbertini Jan 29 '17 at 17:51
0

It seems you need to remove duplicated string representations instead of duplicated addresses to strings.

If so then this if statement (if to add missed closed parenthesis)

if( *(team[j] ) ==*( team[i] ) ){

compares only first characters of strings instead of comparing strings pointed to by the pointers.

In this loop

      for(k=j;k<n;k++){

           //strcpy(team[k], team[k+1]);
           team[k] = team[k+1];
       }

each time when a duplicates string is found there is copied the whole array of pointers. Moreover there is an attempt to access memory beyond the array in this statement when k is equal to n-1

           team[k] = team[k+1];
                          ^^^^

You can write a separate function that will "remove" duplicates. The function can for example return pointer after the last unique element in the modified array.

#include <stdio.h>
#include <string.h>

char ** unique( char *s[], size_t n )
{
    size_t i = 0;

    for ( size_t j = 0; j < n; j++ )
    {
        size_t k = 0;
        while ( k < i && strcmp( s[k], s[j] ) != 0 ) ++k;

        if ( k == i ) 
        {
            if ( i != j  ) s[i] = s[j];
            ++i;
        }
    }

    return s + i;
}

int main(void) 
{
    char * s[] = { "A", "B", "A", "C", "A" };
    const size_t N = sizeof( s ) / sizeof( *s );

    for ( size_t i = 0; i < N; i++ ) printf( "%s ", s[i] );
    printf( "\n" );

    char **p = unique( s, N );

    size_t n = p - s;

    for ( size_t i = 0; i < n; i++ ) printf( "%s ", s[i] );
    printf( "\n" );



    return 0;
}

The program output is

A B A C A 
A B C 
Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335