0

I'm making a webcrawler and want to spawn a number of threads each with its own event loop to monitor network activity. Here is my code so far:

static void my_cb(EV_P_ struct ev_io *w, int revents)
{
  GlobalInfo *g = (GlobalInfo *)w->data;
  i = 0;
  if (g->concurrent_connections < MAX_CONNECTIONS)
  {
    while (i < MAX_LOAD && i < MAX_CONNECTIONS - g->concurrent_connections)
    {
      add_url(g);
    }
  }
}

static int init(GlobalInfo *g)
{
  int fd;

  fd = open("myfile", O_RDWR | O_NONBLOCK, 0);
  if(fd == -1) {
    perror("open");
    exit(1);
  }

  g->input = fdopen(fd, "r");

  ev_io_init(&g->fifo_event, my_cb, fd, EV_READ);
  ev_io_start(g->loop, &g->fifo_event);
}

void *crawler(void *threadid)
{
  GlobalInfo g;

  long tid;
  tid = (long)threadid;
  printf("Initalised thread #%ld!\n", tid);

  memset(&g, 0, sizeof(GlobalInfo));
  g.loop = ev_loop_new(EVFLAG_AUTO);

  g.done = 0;
  g.downloaded = 0;
  g.head = 0;
  g.added = 0;
  g.concurrent_connections = 0;

  init(&g);
  g.multi = curl_multi_init();

  ev_timer_init(&g.timer_event, timer_cb, 0., 0.);
  g.timer_event.data = &g;
  g.fifo_event.data = &g;
  curl_multi_setopt(g.multi, CURLMOPT_SOCKETFUNCTION, sock_cb);
  curl_multi_setopt(g.multi, CURLMOPT_SOCKETDATA, &g);
  curl_multi_setopt(g.multi, CURLMOPT_TIMERFUNCTION, multi_timer_cb);
  curl_multi_setopt(g.multi, CURLMOPT_TIMERDATA, &g);

  /* we don't call any curl_multi_socket*() function yet as we have no handles
     added! */

  ev_loop(g.loop, 0);
  curl_multi_cleanup(g.multi);
  pthread_exit(NULL);
}

int main(int argc, char **argv)
{
  (void)argc;
  (void)argv;

  mysql_start();

  pthread_t threads[NUM_THREADS];
  int rc;
  long t;
  for(t=0; t<NUM_THREADS; t++){
    rc = pthread_create(&threads[t], NULL, crawler, (void *)t);
    if (rc){
      printf("ERROR; return code from pthread_create() is %d\n", rc);
      exit(-1);
    }
  }

  pthread_exit(NULL);

  mysql_stop();
  mysql_library_end();
  return 0;
}

As you can see I attempt to make a new event loop in each thread with ev_loop_new. However, the program aborts with the following backtrace:

#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1  0x00007ffff75bb859 in __GI_abort () at abort.c:79
#2  0x00007ffff76263ee in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7750285 "%s\n") at ../sysdeps/posix/libc_fatal.c:155
#3  0x00007ffff762e47c in malloc_printerr (str=str@entry=0x7ffff7752690 "double free or corruption (!prev)") at malloc.c:5347
#4  0x00007ffff763012c in _int_free (av=0x7ffff7781b80 <main_arena>, p=0x5555569ff1e0, have_lock=<optimized out>) at malloc.c:4317
#5  0x00007ffff79518c4 in ?? () from /lib/x86_64-linux-gnu/libmysqlclient.so.21
#6  0x00007ffff795215a in ?? () from /lib/x86_64-linux-gnu/libmysqlclient.so.21
#7  0x00007ffff78ffd0e in ?? () from /lib/x86_64-linux-gnu/libmysqlclient.so.21
#8  0x00007ffff78fffa5 in ?? () from /lib/x86_64-linux-gnu/libmysqlclient.so.21
#9  0x00007ffff7900155 in ?? () from /lib/x86_64-linux-gnu/libmysqlclient.so.21
#10 0x00007ffff7903795 in ?? () from /lib/x86_64-linux-gnu/libmysqlclient.so.21
#11 0x00007ffff7905804 in mysql_real_query_nonblocking () from /lib/x86_64-linux-gnu/libmysqlclient.so.21
#12 0x0000555555559a4e in add_url (g=0x7ffff6e71e50) at threads.c:1202
#13 0x000055555555a69b in my_cb (loop=0x7ffff0000f70, w=0x7ffff6e71e58, revents=1) at threads.c:1457
#14 0x00007ffff7810bc3 in ev_invoke_pending () from /lib/x86_64-linux-gnu/libev.so.4
#15 0x00007ffff7814b93 in ev_run () from /lib/x86_64-linux-gnu/libev.so.4
#16 0x0000555555556a73 in ev_loop (loop=0x7ffff0000f70, flags=0) at /usr/include/ev.h:842
#17 0x000055555555a990 in crawler (threadid=0x0) at threads.c:1517
#18 0x00007ffff78b1609 in start_thread (arg=<optimized out>) at pthread_create.c:477
#19 0x00007ffff76b8293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) q

As you can see the offending line is ev_loop(g.loop, 0); which causes the abort. I thought I was doing the right thing with making a new event loop in each thread with ev_loop_new.

What am I doing wrong?

John Bollinger
  • 160,171
  • 8
  • 81
  • 157
Yaakov Roth
  • 101
  • 1
  • "*As you can see the offending line is `ev_loop(g.loop, 0);`*" -- yes and no. Any and everything that happens in libev or any callback it calls will have `ev_loop()` in its backtrace. Since it seems to successfully be calling one of your callbacks, it is very likely that the problem is in the callback, or in one of the functions it calls. As a guess based primarily on the backtrace, I suspect that there is something wrong with the way your `add_url()` function is trying to use `mysql_real_query_nonblocking()`. – John Bollinger Nov 30 '20 at 02:33
  • @JohnBollinger I only get the problem if `NUM_THREADS` is greater than 1. `mysql_real_query_nonblocking()` causes no problems in serial versions of my program or when `NUM_THREADS` = 1. – Yaakov Roth Nov 30 '20 at 03:21
  • And? How does that tend to implicate your use of libev more than your use of libmysqlclient, or more than issues in your callback that are not directly related to either library? I'm pointing the finger at `add_url()` because that's the one of your own functions closest to the point where the error was raised. – John Bollinger Nov 30 '20 at 03:32
  • And also because the error is related to freeing dynamically-allocated memory, and your `crawler()` function does not appear to allocate any memory, or at least none that your callbacks could touch. – John Bollinger Nov 30 '20 at 03:38

0 Answers0