Please read before quoting “repost” - I am aware similar questions have been asked, but I am yet to find a satisfactory answer
My goal is to provide a tree-like directory structure of disk space usage allowing the user to drill down the hierarchy in order to locate sizable folders.
The program TreeSize is an excellent example of this, and I am looking to get the same response times as this program.
My current code can iterate through my 480GB of files in aprx 25 seconds using the MFT. I am looking from this point to start building directory sizes by getting the file information (MFT contains only fileName and parentId, not full file path)
To get file information from MFT journal entry my current code calls
TCHAR filePath[MAX_PATH];
HANDLE hh = OpenFileById(hDevice, &(getFileIdDescriptor(pRecord->FileReferenceNumber)), 0, 0, 0, 0);
GetFinalPathNameByHandle(hh, filePath, MAX_PATH, 0);
Unfortunately this code increases the overall execution time of the program from 25 seconds to 5 minutes.
Is there a better way to get the file information?
Many thanks if you suggest FindFirstFile and FindNextFile but for processing large directories these options are too slow
Code as below (I’m not a C programmer as you might notice!)
#include <iostream>
#include <string>
#include <fstream>
#include <windows.h>
#include <fstream>
#include <atlbase.h>
#include <windows.h>
#include <stdio.h>
using namespace std;
typedef std::basic_string<TCHAR> tstring;
FILE_ID_DESCRIPTOR getFileIdDescriptor(const DWORDLONG fileId)
{
FILE_ID_DESCRIPTOR fileDescriptor;
fileDescriptor.Type = FileIdType;
fileDescriptor.FileId.QuadPart = fileId;
fileDescriptor.dwSize = sizeof(fileDescriptor);
return fileDescriptor;
}
bool ReadMFT()
{
HANDLE hDevice = CreateFile(TEXT("\\\\.\\C:"),
GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE,
0,
OPEN_EXISTING,
FILE_FLAG_OVERLAPPED,
0);
if (hDevice == INVALID_HANDLE_VALUE) // cannot open the drive
{
printf("Error %d", GetLastError());
return (FALSE);
}
USN_JOURNAL_DATA ujd = { 0 };
DWORD cb = 0;
BYTE pData[sizeof(DWORDLONG) + 0x10000] = { 0 };
if (!DeviceIoControl(hDevice, FSCTL_QUERY_USN_JOURNAL, NULL, 0, &ujd, sizeof(USN_JOURNAL_DATA), &cb, NULL))
{
printf("Error %d", GetLastError());
return (FALSE);
}
MFT_ENUM_DATA med = { 0 };
med.StartFileReferenceNumber = 0;
med.LowUsn = 0;
med.HighUsn = ujd.NextUsn;
while (TRUE)
{
if (!DeviceIoControl(hDevice, FSCTL_ENUM_USN_DATA, &med, sizeof(med), pData, sizeof(pData), &cb, NULL))
{
printf("Error %d", GetLastError());
break;
}
PUSN_RECORD pRecord = (PUSN_RECORD)&pData[sizeof(USN)];
//Inner Loop
while ((PBYTE)pRecord < (pData + cb))
{
tstring sz((LPCWSTR)
((PBYTE)pRecord + pRecord->FileNameOffset),
pRecord->FileNameLength / sizeof(WCHAR));
pRecord = (PUSN_RECORD)((PBYTE)pRecord + pRecord->RecordLength);
// *******************************************************************************
// APPROACH 1
// Adding these lines of code increases the time from 25 seconds to 340 seconds
// Although it may be possible to push this onto a queue and run these in parrallel
// I still think it's an expensive option
/*TCHAR filePath[MAX_PATH];
HANDLE hh = OpenFileById(hDevice, &(getFileIdDescriptor(pRecord->FileReferenceNumber)), 0, 0, 0, 0);
GetFinalPathNameByHandle(hh, filePath, MAX_PATH, 0);*/
}
med.StartFileReferenceNumber = *(DWORDLONG *)pData;
}
}
int main()
{
ReadMFT();
}
Many thanks