0

When I run this code specifying a 16TB disk:

//
//  main.swift
//  walking_on_directories
//
//

import Foundation
import CommonCrypto
import Foundation

func sha256Checksum(forFileAtPath path: String) -> String? {
    // Open the file for reading
    guard let file = FileHandle(forReadingAtPath: path) else {
        return nil
    }
    defer {
        file.closeFile()
    }

    // Initialize the SHA-256 context
    var context = CC_SHA256_CTX()
    CC_SHA256_Init(&context)

    // Read the file in chunks and update the context
    let chunkSize = 1024 * 1024 // 1 MB
    while autoreleasepool(invoking: {
        let data = file.readData(ofLength: chunkSize)
        if !data.isEmpty {
            data.withUnsafeBytes {
                _ = CC_SHA256_Update(&context, $0.baseAddress, CC_LONG(data.count))
            }
            return true
        } else {
            return false
        }
    }) {}

    // Finalize the context and get the checksumt
    var digest = [UInt8](repeating: 0, count: Int(CC_SHA256_DIGEST_LENGTH))
    CC_SHA256_Final(&digest, &context)
    let checksum = digest.map { String(format: "%02hhx", $0) }.joined()

    return checksum
}

let startLocation =  "/Volumes/16-1-23/debug_file_tracker/images"
let fileManager = FileManager.default
let enumerator:FileManager.DirectoryEnumerator = fileManager.enumerator(atPath:startLocation)!
var count = 0

while let element = enumerator.nextObject() as? String {
    var existing_files_checksum = Set<String>()

    var isDirectory: ObjCBool = false
    let filePath: String = startLocation + "/" + element
    if fileManager.fileExists(atPath: filePath, isDirectory: &isDirectory) {
        if isDirectory.boolValue {
            print("Skipping directory: " + filePath)
        }else {

            if let checksum = sha256Checksum(forFileAtPath: filePath) {
                if existing_files_checksum.contains(checksum) {
                    print("deleting " + filePath)
                    try fileManager.removeItem(atPath: filePath)
                    count = count + 1
                } else {
                    print("adding " + checksum)
                    existing_files_checksum.insert(checksum)
                }
            } else {
                print("Failed to calculate checksum for " + filePath)
            }
        }
    }

}
print("deleted " + String(count) + " files")
    

It runs for hours, printing checksums. It calculates the checksum for about 17,000 files. Then my disk breaks.

Here's what I mean by breaks:

Before running the program:The disk checkout out with disktutil, It is fine:

ls -al  /Volumes/16-1-23/

shows

 rwx------@  14 sam  staff      448 Apr 15 15:42 .
drwxr-xr-x    9 sam  wheel      288 Apr 25 10:44 ..
-rw-r--r--@   1 sam  staff    14340 Apr 17 19:48 .DS_Store
d--x--x--x    8 sam  staff      256 Apr 25 10:44 .DocumentRevisions-V100
drwx------    4 sam  staff      128 Jan 25 16:15 .Spotlight-V100
d-wx--x--t    4 sam  staff      128 Feb  1 11:04 .TemporaryItems
d-wx--x--t    3 sam  staff       96 Jan 26 13:39 .Trashes
drwx------    3 sam  staff       96 Apr 25 10:44 .fseventsd
drwx------@   5 sam  staff      160 Apr 16 14:41 debug_file_tracker

But then, this program crashes because it can't find the next file to checksum.

When I do an ls -al /volumes/16-1-23 the command returns permission denied.

Using a heavier hammer:

sudo ls -al
total 0
d--x--x--x   2 root  wheel   64 Apr 23 19:16 .
drwxr-xr-x  23 sam   wheel  736 Apr 24 09:03 ..

Yikes, where did everything go?

if I do an ls /volumes I see this:

ls -al
total 96
drwxr-xr-x  23 sam   wheel    736 Apr 24 09:19 .
drwxr-xr-x  20 root  wheel    640 Apr  1 12:46 ..
d--x--x--x   2 root  wheel     64 Apr 23 19:16 16-1-23
d--x--x--x   2 root  wheel     64 Apr 23 21:59 16-1-23 1
d--x--x--x   2 root  wheel     64 Apr 24 04:09 16-1-23 10
d--x--x--x   2 root  wheel     64 Apr 24 08:13 16-1-23 11
drwx------@ 14 sam   staff    448 Apr 15 15:42 16-1-23 12
d--x--x--x   2 root  wheel     64 Apr 23 22:14 16-1-23 2
d--x--x--x   2 root  wheel     64 Apr 23 23:04 16-1-23 3
d--x--x--x   2 root  wheel     64 Apr 23 23:04 16-1-23 4
d--x--x--x   2 root  wheel     64 Apr 24 00:13 16-1-23 5
d--x--x--x   2 root  wheel     64 Apr 24 00:13 16-1-23 6
d--x--x--x   2 root  wheel     64 Apr 24 01:08 16-1-23 7
d--x--x--x   2 root  wheel     64 Apr 24 02:01 16-1-23 8
d--x--x--x   2 root  wheel     64 Apr 24 03:05 16-1-23 9
drwxrwxr-x@  9 sam   staff    288 Apr  7 13:06 16-3-23

I'd really love to know what's going on.

Why has my disk been broken into a bunch of unreadable disks? (Sorry I can't find a better term for this.)

I can produce this error with Python code that does that same thing.

I was hoping that the problem would go away if I wrote it in Swift.

It didn't.

I guess that's good.

However, now am I really stuck.

Oh, one last thing. When I reboot, the drive returns to "normal" However, remounting the drive does not fix it.

Super stuck here, Apple Developer forumn has been silent.

Any advice appreciated.

Thank you in advance.

  • I don't have an answer, I'm afraid but it looks like this might be some file system bug. I'd probably raise a bug report with Apple. – JeremyP Apr 26 '23 at 08:55

0 Answers0