I looked all over the internet for threads with the same error complaining about missing SHA1 sums, but no one ever seemed to have any solutions. Therefore, I decided to look at the metadata files manually and write a script to simply "fill in" the entries that had missing SHA1 sums.
You can run the script by tying fix_metadata.py <your backup directory>
- The original metadata file is backed up, but so far in my testing, this seems to fix the problem. I would still like to know why so many files had missing checksums to begin with.
#!/usr/bin/python3
# Usage: fix_metadata.py <backup folder>
import re, gzip, hashlib, os, time, sys
def digest(filename, chunk=1024**2):
#Compute sha1 digest
h = hashlib.sha1()
print("Calculating hash of:", filename, end='')
count = 0
with open(filename, 'rb') as f:
while True:
data = f.read(chunk)
if data:
h.update(data)
count += 1
if not count % 100:
print('.', end='', flush=True)
else:
print()
return h.hexdigest()
os.chdir(sys.argv[1])
for meta in os.listdir('rdiff-backup-data'):
if 'metadata' in meta and meta.endswith('.gz'):
meta = os.path.join('rdiff-backup-data', meta)
fixed = meta+'.fixed'
print("Processing:", meta)
output = gzip.open(fixed, 'w')
else:
continue
entry = [] #Entry for a single file
sha1 = False #Did we find a SHA1 hash?
updated = 0 #Number of entries updated
with gzip.open(meta, 'rt') as f:
for line in f:
if line.startswith('File '):
if entry and not sha1:
filename = (re.sub('^File','', entry[0])).strip()
if os.path.exists(filename):
if os.path.isfile(filename):
entry.insert(3, ' SHA1Digest ' + digest(filename.strip())+'\n')
updated += 1
else:
print("Could not find file:", filename)
output.write(''.join(entry).encode())
entry = []
sha1 = False
if line.startswith(' SHA1Digest'):
sha1 = True
entry.append(line)
if entry:
output.write(''.join(entry).encode())
output.close()
if updated:
print(updated, "files fixed")
os.rename(meta, meta + '.'+str(int(time.time())) + '.original.bak')
os.rename(fixed, meta)
print('Finished:', meta)
else:
os.remove(fixed)