After I tried warc2warc to no success I created the following small python script to accomplish this task. Seems to work reasonably well!
Usage: python warcgz-to-warc compressed.warc.gz -o output.warc
import argparse
import gzip
import shutil
import os
def convert_warc(input_file_path, output_file_path=None):
if output_file_path is None:
output_file_path = os.path.splitext(input_file_path)[0]
with gzip.open(input_file_path, 'rb') as f_in:
with open(output_file_path, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert a WARC file compressed with gzip to a WARC file.')
parser.add_argument('input_file_path', help='The path to the input WARC file.')
parser.add_argument('-o', '--output_file_path', help='The path to the output WARC file. If not provided, the output file will have the same name as the input file with the ".gz" extension removed.')
args = parser.parse_args()
convert_warc(args.input_file_path, args.output_file_path)