0

I'm having a very strange problem with this bit of code, sorry its pretty messy. Basically its a pagerank algorithm. Each struct webpage is contained in the dynamic array "pages". The pages vector is put through the algorithm until its absolute value (|P|) is smaller than 'epsilon'. Now the issue is with lines 195-201. If i remove the iteration over the array in those lines (i.e. an empty while loop), it works for cases that only require one iteration. However, when i do have the for loop (even for one iteration cases), it throws error6 (line 179, debugging shows e == NULL) without even having run over the inserted loop. Ive set breakpoints etc, and still gives error6 without even having read the extra code. What's going on here? Im pretty new to C and parallel programming so its probably something fundamental. Would appreciate any help!

input format:

number_of_cores
number_of_pages
...
page_names
...
page_links

output format:

...
page_rank
...

code

#include <assert.h>
#include <math.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static const double D = 0.85;
static const double EPSILON = 0.005;
int ncores;
int npages;
struct webpage** pages;
int maxdepth;

struct webpage* has(char s[20], int e);
void* threadf(void* ptr);
int quit(void);
double rec(int s, int f, int depth);




struct webpage {
    char name[20];
    double oldrank;
    double rank;
    struct node* in;
    int incount;
    int outcount;

};

struct node {
    struct webpage* data;
    struct node* next;
};

struct arg {
    int s;
    int f;
    int depth;
    double ret;
};

struct webpage*
has(char s[20], int e) {
    int p;
    for (p=0; p<e; ++p) {
        if (strcmp(s, pages[p]->name) == 0) {
            return pages[p];
        }
    }
    return NULL;
}

void *
threadf(void* ptr) {
    struct arg* curr = (struct arg*)ptr;
    curr->ret = rec(curr->s, curr->f, curr->depth);
}
int
quit(void) {
    int i;
    for(i=0; i<npages; ++i) {
        struct node* curr = pages[i]->in;
        struct node* next;
        while(curr != NULL) {
            next = curr->next;
            free(curr);
            curr = next;
        }
        free(pages[i]);
    }
    free(pages);
    return 0;
}

double 
seq(int s, int f) {
    double sum;
    sum = 0;
    int w;
    for (w=s; w<=f; w++) {
        struct webpage* curr = pages[w];
        double ser;
        ser = 0;
        struct node* currn = curr->in;
        while (currn != NULL) {
            struct webpage* n = currn->data;
            ser = ser + ((n->oldrank)/(n->outcount));
            currn = currn->next;
        }

        double temp = (((1-D)/npages) + (D*ser)); 
        sum = sum + pow((temp - curr->oldrank), 2);
        curr->oldrank = curr->rank;
        curr->rank = temp;
    }
    return sum;
}


double 
rec(int s, int f, int depth) {
    if (depth == maxdepth ) {
        return seq(s, f);
    } else {
        if (s < f){
            int m;
            m = (s+f)/2;
            struct arg l;
            struct arg r;
            l.s = s;
            l.f = m;
            l.depth = depth+1;
            r.s = m+1;
            r.f = f;
            r.depth = depth+1;
            pthread_t left, right;
            pthread_create(&left, NULL, threadf, (void*) &l);
            pthread_create(&right, NULL, threadf, (void*) &r);
            pthread_join(left, NULL);
            pthread_join(right, NULL);
            double res;
            res = l.ret + r.ret;
            return res;
        } 
        return seq(s, f);

    }
}

int
main(void) {
    if (scanf("%d", &ncores) != 1) {
        printf("error1\n");
        return quit();
    }
    if (scanf(" %d", &npages) != 1) {
        printf("error2\n");
        return quit();
    }
    int i;
    char n[20];
    pages = (struct webpage**)malloc(npages*sizeof(struct webpage*));
    for (i=0; i<npages; ++i) {

        if (scanf(" %c", n) != 1 || has(n, i) != NULL) {
            printf("error3\n");
            return quit();
        }
        pages[i] = (struct webpage*)malloc(sizeof(struct webpage));
        struct webpage* curr = pages[i];
        strcpy(curr->name, n);
        curr->oldrank = 1/npages;
        curr->in = NULL;
        curr->incount = 0;
        curr->outcount = 0;

    }

    int nedges;
    if (scanf(" %d", &nedges) != 1) {
        printf("error4\n");
        return quit();
    }
    for (i=0; i<nedges; ++i) {
        char f[20], t[20];
        if (scanf(" %s %s", f, t) != 2) {
            printf("error5\n"); 
            return quit();
        }
        char from[20], to[20];
        strcpy(from, f);
        strcpy(to, t);
        struct webpage* s = has(from, npages);
        struct webpage* e = has(to, npages);
        if (s == NULL || e == NULL) {
            printf("error6\n");
            return quit();
        }
        s->outcount++;
        e->incount++;
        struct node* new;
        new = (struct node*)malloc(sizeof(struct node));
        new->data = s;
        if (e->in == NULL) {
            e->in = new;
        } else {
            new->next = e->in;
            e->in = new;
        }
    }
    maxdepth = (log(ncores))/(log(2)) + 0.5;
    while (sqrt(rec(0, npages-1, 0)) > EPSILON){
        int c;
        for (c=0; c<npages; ++c) {
            struct webpage* curr = pages[c];
            curr->oldrank = curr->rank;
        }
    }
    int z;
    for (z=0; z<npages; ++z) {
        struct webpage* curr = pages[z];
        printf("%s %.4lf\n", curr->name, curr->rank);
    }

    return quit();

}

sample input:

8
4
a
b
c
d
4
a a

output:

error6
Milk
  • 647
  • 1
  • 6
  • 18
  • To increase the chance that someone answers you, you might want to condense the code in such a way that only the relevant part is left and the problem still occurs. – gspr May 13 '11 at 10:32
  • yes its a bit much to digest isn't it. Maybe I'll repost with a bit more condensed code and when the americas are awake. cheers! – Milk May 13 '11 at 11:30
  • Americas have had their coffee now, but it's not helping much. If you could pastebin some sample input that would help. The only multithreading I see is in `rec`, where you create a pair of threads and then join them (correct?) If you create the thread for `left`, then `join` on that, then create `right` and `join` on it, you'll remove any question of the error being in your parallel part. Might help narrow it down. (EDIT: shouldn't say "in your parallel part," more "in the parallelism.") – jtniehof May 13 '11 at 14:30
  • To clarify: the only place pthreads is used in this program is in the `rec()` function. The 'kickoff' for the recursive (indirectly via threads) `rec()` function is the `while` loop at line 195. But you see the error thrown *before* you get to that `while` loop (and therefore before any threads are spun up)? And you only see the error if the while loop is there? Even though the error occurs before the while loop is hit? – Michael Burr May 13 '11 at 16:16
  • As jtniehof mentioned, sample data that repros the problem would be helpful. – Michael Burr May 14 '11 at 00:23
  • yep so the recursive function is called on ln 194, but when i run the sample input (see OP), with breakpoints at 194-201, it still returns error6 without even having run over the added code. If I remove lines 196-199, it runs fine for that specific case. – Milk May 14 '11 at 01:58
  • and just to clarify, the recursive function spawns 2 threads that work on each half of the sub-array (specified by start and finish bound parameters) until a single element is left, or the maxdepth is reached - in which case it will switch to iterative/sequential method. – Milk May 14 '11 at 02:04

1 Answers1

1
char n[20];
[ ... ]
    if (scanf(" %c", n) != 1 || has(n, i) != NULL) {

The %c format specifier for scanf reads only one character. So n consists of the character you typed plus whatever garbage happened to be on the stack before you called scanf(). If you use %s, it will consist of the character you typed plus a NUL byte for terminating the string plus garbage you don't care about.

Also note that you can limit the amount of characters scanf() reads by using a width specifier, as in:

scanf("%19s", n)

(meaning: read 19 characters and add a NUL byte). Otherwise, your buffer could overflow, possibly leading to arbitrary code execution (or at least a crash when used by non-malicious users).

ninjalj
  • 42,493
  • 9
  • 106
  • 148
  • thanks a lot mate I never would've found that, width specifier will come in very handy aswell. Cheers! – Milk May 15 '11 at 01:07