1

I have this code:

static void XMLCALL
hackHandler(void *data, const XML_Char *name, const XML_Char **attr)
{
SetPointers* sets = static_cast<SetPointers*>(data);
if (strcmp(name, "instruction") == 0 || strcmp(name, "load") == 0 ||
    strcmp(name, "modify") == 0||strcmp(name, "store") == 0) {
    long address(0);
    long page(0);
    int offset(0);
    long size(0);
    int i(0);
    for (i = 0; attr[i]; i += 2) {
        if (strcmp(attr[i], "address") == 0) {
            address = strtol(attr[i+1], NULL, 16);
            page = address >> 12;
            offset = address & 0xFFF;
            continue;
        }
        if (strcmp(attr[i], "size") == 0) {
            size = strtol(attr[i + 1], NULL, 16);
        }
    }
    map<long, bitset<4096> >::iterator itLocal;

    itLocal = sets->lCount->find(page);
    if (itLocal == sets->lCount->end()) {
        sets->lCount->insert(pair<long, bitset<4096> >
            (page, bitset<4096>()));
        itLocal = sets->lCount->find(page);
    }
    //now mark the bitmap
    for (i = 0; i < size; i++) {
        (itLocal->second)[i + offset] = 1;
    }

    if (strcmp(name, "instruction") == 0) {
        itLocal = sets->lCode->find(page);
        if (itLocal == sets->lCode->end()) {
            sets->lCode->insert(pair<long, bitset<4096> >
                (page, bitset<4096>()));
            itLocal = sets->lCode->find(page);
        }
        for (i = 0; i < size; i++) {
            (itLocal->second)[i + offset] = 1;
        }
    } else {
        itLocal = sets->lMemory->find(page);
        if (itLocal == sets->lMemory->end()) {
            sets->lMemory->insert(pair<long, bitset<4096> >
                (page, bitset<4096>()));
            itLocal = sets->lMemory->find(page);
        }
        for (i = 0; i < size; i++) {
            (itLocal->second)[i + offset] = 1;
        }
    }
}
}

This aims to mark a bitset, 4096 bits long, with a 1 when that byte of a page is accessed.

This code works well on my test machine, when I use about 1GB of XML to test. But when I run it on the full thing (220GB of XML) it gives a segmentation fault on:

 sets->lCode->insert(pair<long, bitset<4096> >
            (page, bitset<4096>()));

But it does this very early on in the run, so it's difficult to think this is a product of the size of the data. In any case I am have had no problem in analysing this larger data set using some very similar code (check my github repo at https://github.com/mcmenaminadrian - this project is memsize, but pagestat uses very siumilar code). The only differentiating factor with this code seems to be the use of bitset.

Can someone spot the error which has eluded me so far?

(The code is multithreaded - is bitset thread safe? Could this be a library issue - my test system is Mac OSX, but the "production" system is Linux - Ubuntu 12.04 LTS?)

adrianmcmenamin
  • 1,081
  • 1
  • 15
  • 44
  • 1
    is `i + offset` guaranteed to be less than `4096`? – R Sahu Apr 22 '14 at 16:18
  • That's the obvious question - and it should be. I guess there could be a bad piece of XML which is causing this though – adrianmcmenamin Apr 22 '14 at 16:19
  • I doubt that any common c++ container implementation is thread-safe in a way that it allows concurrent read/write operations. See docs for [g++](http://gcc.gnu.org/onlinedocs/libstdc++/manual/using_concurrency.html) and [msvc](http://msdn.microsoft.com/en-us/library/c9ceah3b.aspx) – Stephan Apr 22 '14 at 16:19
  • There shouldn't be any of that - each thread deals with it's own XML fragment – adrianmcmenamin Apr 22 '14 at 16:21
  • @RSahu - seems you were right, when I use 'set' - which checks bounds, the code throws an out of bounds exception. If you would like to make your comment an answer I will accept it. – adrianmcmenamin Apr 22 '14 at 16:26

1 Answers1

1

There are no checks to make sure that i + offset is less than 4096. That could be the source of the problem.

R Sahu
  • 204,454
  • 14
  • 159
  • 270