9

I wanna do some bitwise operations (for example xor two files) on files in Linux , and I have no idea how I can do that. Is there any command for that or not?

any help will be appreciated.

Sina
  • 1,632
  • 3
  • 15
  • 21

4 Answers4

9

You can map the file with mmap, apply bitwise operations on the mapped memory, and close it.

Alternatively, reading chunks into a buffer, applying the operation on the buffer, and writing out the buffer works too.

Here's an example (C, not C++; since everything but the error handlings is the same) that inverts all bits:

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
int main(int argc, char* argv[]) {
    if (argc != 2) {printf("Usage: %s file\n", argv[0]); exit(1);}

    int fd = open(argv[1], O_RDWR);
    if (fd == -1) {perror("Error opening file for writing"); exit(2);}

    struct stat st;
    if (fstat(fd, &st) == -1) {perror("Can't determine file size"); exit(3);}

    char* file = mmap(NULL, st.st_size, PROT_READ | PROT_WRITE,
                      MAP_SHARED, fd, 0);
    if (file == MAP_FAILED) {
        perror("Can't map file");
        exit(4);
    }

    for (ssize_t i = 0;i < st.st_size;i++) {
        /* Binary operation goes here.
        If speed is an issue, you may want to do it on a 32 or 64 bit value at
        once, and handle any remaining bytes in special code. */
        file[i] = ~file[i];
    }

    munmap(file, st.st_size);
    close(fd);
    return 0;
}
phihag
  • 278,196
  • 72
  • 453
  • 469
  • Thank you very much for your answer . Would you mind explaining the first solution a little more? – Sina Jul 31 '11 at 11:22
  • @Sina I added an example program (I don't trust my C++ skills, so I wrote it in C - you may want to C++-ify the error handling). Does this help? – phihag Jul 31 '11 at 11:35
  • I'm so sorry but , how can I do it on a 32 or 64 bit value at once? – Sina Jul 31 '11 at 13:36
  • 1
    @Sina First, calculate how many entries you have (`num = st.size/sizeof(entry)`, where `entry` is `int32_t` or so). Cast `file` to a pointer to the desired type, and replace `st.st_size` with `num` in the for loop. Don't forget to handle the remaining `st.st_size - num * sizeof(entry)` bytes! – phihag Jul 31 '11 at 13:47
  • Thanks a lot again , is there a way to get the HEX value we want to xor with the file as an argument ? how? and can we write sth in head and tail of the file ? – Sina Jul 31 '11 at 14:26
  • 1
    @Sina You can use [strtol](http://www.cplusplus.com/reference/clibrary/cstdlib/strtol/) to convert a hex string argument to a number. There's [no way](http://stackoverflow.com/questions/6537727/efficient-way-to-change-the-header-of-a-file-in-python/6537817#6537817) to write the head of a file without completely rewriting it. You [can append to a memory-mapped file](http://stackoverflow.com/questions/4460507/appending-to-a-memory-mapped-file) . – phihag Jul 31 '11 at 14:36
2

A quick internet search revealed Monolith, a dedicated open-source program for the purpose of XORing two files. I found it because Bruce Schneier blogged about it, and the purposes of this seem to be of legal nature.

Kerrek SB
  • 464,522
  • 92
  • 875
  • 1,084
2

Thanks to "phihag", this code is for doing binary operations on 2 files.
Ex.1: You have two files and want to compare those two, so you do a binary XOR on those.
Ex.2: You have downloaded a file with jdownloader or sth similar and you have moved the unfinished download to another folder and then the download manager continues unfinished parts and create another file. So you have two seprate files which can completes each other. Now if you do a binary OR on these two files you have a complete file.

WARNING: The larger file will be overwritten with the the operation result.

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <string.h>

int main(int argc, char* argv[])
{
    int FP1 = 0, FP2 = 0;
    struct stat St1, St2;
    char *File1 = NULL, *File2 = NULL;
    int Rn = 0;

    if (argc != 4)
    {
        printf("Usage: %s File1 File2 Operator\n", argv[0]);
        exit(1);
    }

    //Opening and mapping File1
    FP1 = open(argv[1], O_RDWR);
    if (FP1 == -1)
    {
        perror("Error opening file1 for writing");
        exit(2);
    }

    if (fstat(FP1, &St1) == -1)
    {
        perror("Can't determine file1 size");
        exit(3);
    }

    File1 = (char*) mmap(NULL, St1.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, FP1, 0);
    if (File1 == MAP_FAILED)
    {
        perror("Can't map file1");
        exit(4);
    }
    //======================

    //Opening and mapping File2
    FP2 = open(argv[2], O_RDWR);
    if (FP2 == -1)
    {
        perror("Error opening file2 for writing");
        exit(2);
    }

    if (fstat(FP2, &St2) == -1)
    {
        perror("Can't determine file2 size");
        exit(3);
    }

    File2 = (char*) mmap(NULL, St2.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, FP2, 0);
    if (File2 == MAP_FAILED)
    {
        perror("Can't map file2");
        exit(4);
    }
    //======================

    //Binary operations
    ssize_t i = 0;
    switch (*(argv[3]))
    {
        case '|':
            if (St1.st_size <= St2.st_size)
                for (i = 0; i < St1.st_size; i ++)
                    File2[i] = File1[i] | File2[i];
            else
                for (i = 0; i < St2.st_size; i ++)
                    File1[i] = File1[i] | File2[i];
            break;
        case '&':
            if (St1.st_size <= St2.st_size)
                for (i = 0; i < St1.st_size; i ++)
                    File2[i] = File1[i] & File2[i];
            else
                for (i = 0; i < St2.st_size; i ++)
                    File1[i] = File1[i] & File2[i];
            break;
        case '^':
            if (St1.st_size <= St2.st_size)
                for (i = 0; i < St1.st_size; i ++)
                    File2[i] = File1[i] ^ File2[i];
            else
                for (i = 0; i < St2.st_size; i ++)
                    File1[i] = File1[i] ^ File2[i];
            break;
        default:
            perror("Unknown binary operator");
            exit(5);
    }
    //======================

    munmap(File1, St1.st_size);
    munmap(File2, St2.st_size);
    close(FP1);
    close(FP2);

    //Renaming the changed file and make output
    char Buffer[1024];
    if (St1.st_size <= St2.st_size)
    {
        Rn = system(strcat(strcat(strcat(strcat(strcpy(Buffer, "mv \""), argv[2]), "\" \""), argv[2]),"-Mapped\""));
        if (Rn == -1)
        {
            perror("Unable to rename the new file.");
            exit(6);
        }
        else
            printf("%s is mapped.\n", argv[2]);
    }
    else
    {
        Rn = system(strcat(strcat(strcat(strcat(strcpy(Buffer, "mv \""), argv[1]), "\" \""), argv[1]),"-Mapped\""));
        if (Rn == -1)
        {
            perror("Unable to rename the new file.");
            exit(6);
        }
        else
            printf("%s is mapped.\n", argv[1]);
    }
    //======================

    return 0;
}
AshkanVZ
  • 681
  • 6
  • 14
0

For those who prefer a Python script:

#!/usr/bin/env python3

import binascii
import sys

blocksize = 4096

input1 = open(sys.argv[1], 'rb')
input2 = open(sys.argv[2], 'rb')
output = open(sys.argv[3], 'wb')

while True:
    block1 = input1.read(blocksize)
    block2 = input2.read(blocksize)
    if not block1 and not block2:
        break  # reached EOF in both files
    if len(block1) != len(block2):
        sys.stderr.write('Premature EOF, truncating to shorter file\n')
        block1 = block1[:min(len(block1), len(block2))]
        block2 = block2[:min(len(block1), len(block2))]
    # convert to large integer
    int1 = int(binascii.hexlify(block1), 16)
    int2 = int(binascii.hexlify(block2), 16)
    # apply logical operator: xor
    int_o = int1 ^ int2
    # covert back to binary
    hexformat = '%%0%dx' %(2*len(block1))  # e.g. '%0512x' for 256 bytes
    block_o = binascii.unhexlify(hexformat %int_o)
    output.write(block_o)

output.close()
input1.close()
input2.close()

With files of different lengths, it warns and stops. In some applications, it may be preferable to pad the shorter input with zero bytes or to wrap back to the start of the input file. This can be achieved on the command line by concatenating the shorter file with itself or output from /dev/zero.

Joachim Wagner
  • 860
  • 7
  • 16