4
#include <unistd.h>
#include <stdio.h>

void dump_log(int size){
    char cmd[1024];
    snprintf(cmd, sizeof(cmd)/sizeof(cmd[0]), "dd if=/dev/zero of=from.bin bs=1024 count=%d", size);
    int ret = system(cmd);
    if (ret<0){
        perror("system");
    }

}

int main(){    
    const char *filepath = "from.bin";

    while(1){
        dump_log(1024*100);
        sleep(10);
        unlink(filepath);
    }

    return 0;
}

strace -T ./a.out show this:

unlink("from.bin")                      = 0 <0.019916>

unlink a file(100M) spend 19ms, what happen when unlink a file? why so slow?

system information: linux 3.13.0-57-generic , Ubuntu 14.04.2 LTS, ext4

zzn
  • 2,376
  • 16
  • 30
  • 2
    This is pretty close to one revolution of the disk, which is also the time a write barrier takes. So I'll bet that you're using a rotating disk and you have write barriers enabled. – David Schwartz Jul 16 '15 at 08:15
  • http://pubs.opengroup.org/onlinepubs/9699919799/functions/unlink.html – gengisdave Jul 16 '15 at 08:46
  • @DavidSchwartz seems nothing to do with write barriers, enable/disable it don't make big difference. – zzn Jul 18 '15 at 07:53

1 Answers1

6

If you have a huge file to unlink(2) the kernel doesn't unlock the inode until all the blocks pointers are returned to the free block list. You can check the difference in time by making a second link (which will make the unlink only to release the link you are deleting, without releasing any block). By specification, the code releasing all those blocks is your process (well, running in kernel mode, not usermode, but there's no reserved process to return the blocks to the free list) and it will not return until it has released all the blocks.

Example: (edit)

The following code will illustrate this:

#include <errno.h>
#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define D(X) "%s:%d:%s: " X, __FILE__, __LINE__, __func__

int main(int argc, char **argv)
{
    int opt, i;

    while ((opt = getopt(argc, argv, "")) != EOF) {
        switch (opt) {
        } /* switch */
    } /* while */

    argc -= optind; argv += optind;

    for (i = 0; i < argc; i++) {
        struct timespec now, then; 
        int res;

        res = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
        if (res < 0) {
            fprintf(stderr,
                    D("ERROR: %s (errno = %d)\n"),
                    strerror(errno), errno);
            exit(EXIT_FAILURE);
        } /* if */

        unlink(argv[i]);

        res = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &then);
        if (res < 0) {
            fprintf(stderr, D("ERROR: %s (errno = %d)\n"),
                    strerror(errno), errno);
            exit(EXIT_FAILURE);
        } /* if */
        then.tv_nsec -= now.tv_nsec;
        then.tv_sec -= now.tv_sec;
        if (then.tv_nsec < 0) {
            then.tv_nsec += 1000000000L;
            then.tv_sec--;
        } /* if */
        printf(D("%s: %d.%09d s. (CPU time)\n"),
                argv[i], then.tv_sec, then.tv_nsec);
    } /* for */
    exit(EXIT_SUCCESS);
} /* main */

Then I construct a 2Gb file with this command:

$ yes | dd of=pepe bs=1M iflag=fullblock count=2048

Then I make 32 links to this file:

i=0
while [ "$i" -lt 32 ]
do ln pepe pepe$i
   i=$(expr $i + 1)
done

Then I run the following command (which shows only CPU time):

$ unlink pepe[0-9]* pepe
unlink.c:47:main: pepe0: 0.000074272 s. (CPU time)
unlink.c:47:main: pepe1: 0.000022722 s. (CPU time)
unlink.c:47:main: pepe10: 0.000015034 s. (CPU time)
unlink.c:47:main: pepe11: 0.000013254 s. (CPU time)
unlink.c:47:main: pepe12: 0.000012827 s. (CPU time)
unlink.c:47:main: pepe13: 0.000012462 s. (CPU time)
unlink.c:47:main: pepe14: 0.000012241 s. (CPU time)
unlink.c:47:main: pepe15: 0.000012753 s. (CPU time)
unlink.c:47:main: pepe16: 0.000012517 s. (CPU time)
unlink.c:47:main: pepe17: 0.000012245 s. (CPU time)
unlink.c:47:main: pepe18: 0.000013104 s. (CPU time)
unlink.c:47:main: pepe19: 0.000012491 s. (CPU time)
unlink.c:47:main: pepe2: 0.000012662 s. (CPU time)
unlink.c:47:main: pepe20: 0.000012606 s. (CPU time)
unlink.c:47:main: pepe21: 0.000012803 s. (CPU time)
unlink.c:47:main: pepe22: 0.000012597 s. (CPU time)
unlink.c:47:main: pepe23: 0.000012391 s. (CPU time)
unlink.c:47:main: pepe24: 0.000012582 s. (CPU time)
unlink.c:47:main: pepe25: 0.000012557 s. (CPU time)
unlink.c:47:main: pepe26: 0.000012386 s. (CPU time)
unlink.c:47:main: pepe27: 0.000012261 s. (CPU time)
unlink.c:47:main: pepe28: 0.000012245 s. (CPU time)
unlink.c:47:main: pepe29: 0.000012351 s. (CPU time)
unlink.c:47:main: pepe3: 0.000011940 s. (CPU time)
unlink.c:47:main: pepe30: 0.000013003 s. (CPU time)
unlink.c:47:main: pepe31: 0.000012231 s. (CPU time)
unlink.c:47:main: pepe4: 0.000012777 s. (CPU time)
unlink.c:47:main: pepe5: 0.000012546 s. (CPU time)
unlink.c:47:main: pepe6: 0.000012461 s. (CPU time)
unlink.c:47:main: pepe7: 0.000013129 s. (CPU time)
unlink.c:47:main: pepe8: 0.000012311 s. (CPU time)
unlink.c:47:main: pepe9: 0.000012446 s. (CPU time)
unlink.c:47:main: pepe: 0.195457587 s. (CPU time)

As you see, all but the last link take around 12 microseconds, but the last is almost two tenths of a second to execute.

Community
  • 1
  • 1
Luis Colorado
  • 10,974
  • 1
  • 16
  • 31
  • At least we have extent-based filesystems now, like ext4 (with extents enabled), XFS, and almost everything newer than ext2. In ext2, there was an actual block bitmap, and every block used by a file had to be listed. Now, the info of what blocks are used by an inode is stored much more compactly, as start+length ranges. Before extents, large files often took a very noticeable amount of time to `rm(1)`. (I use XFS, where unlink is fast even for large files (unless hugely fragmented), even on rotational media.) – Peter Cordes Jul 17 '15 at 11:38
  • @PeterCordes, I don't fully understand what do you mean. I have tried to illustrate the phenomenon on a four core, 8GB ram and ext4 filesystem. At least I think the difference between all the **unlink(2)** s and the last one is significative enough to illustrate that there's nothing but the process that executes the **unlink(2)** system call to arrange for all blocks in the file to be returned to the free list (whatever form it takes in the filesystem) – Luis Colorado Jul 17 '15 at 11:42
  • I'm saying that things used to be worse for large files, before we had filesystems that were good at them. Removing the last link is going to be slower than any of the others, because they just decrement the ref count, but it's milliseconds, not seconds. (Your 0.2s time is surprisingly high. Is your ext4 FS mounted with extents enabled?) – Peter Cordes Jul 17 '15 at 11:57
  • ops... sorry, I misunderstood you :) I don't know if I have extents active or not. I have made another test, to erase a holes file (a file made with `dd if=/dev/zero of=pepe bs=1k count=1 seek=128000000`) and ocuppying only one block in the disk with results in the same magnitude. – Luis Colorado Jul 17 '15 at 11:59
  • @PeterCordes extents enabled If it's ext4, disabled if it's ext3 but mount as ext4. Am i right? – zzn Jul 18 '15 at 08:35