0

I am trying to improve the runtime of my golang code. I am very still quite new to Golang so please forgive my slightly messy code.

I am iterating over a very large number of files, compare the file information and create a new struct with the compares file information. This should then be updated in the postgres DB. It looks kind of like that:

for _, fp := range changedFilePaths {
        oldFileInfo := queryFilePath(fp, db, "old_file_info")
        newFileInfo := queryFilePath(fp, db, "new_file_info")
        oldFileInfoM := StructToMap(oldFileInfo)
        newFileInfoM := StructToMap(newFileInfo)
        comparedNewFile := compareKeys(oldFileInfoM, newFileInfoM)
        comparedNewFileS := convertMapToFile(comparedNewFile)
        updateNewFileTableRow(db, comparedNewFileS)
    }
func updateNewFileTableRow(db *pgx.Conn, file File) {
    defer timeTrack(time.Now(), "updateNewFileTableRow")

    sqlStatement := `UPDATE new_file_info
    SET timestamp = $1, inode = $2, mode = $3, gid = $4, uid = $5, size = $6, mtime = $7, hash = $8, status_inode = $9, status_mode = $10, status_gid = $11, status_uid = $12, status_size = $13, status_mtime = $14, status_hash = $15, status_deleted = $16, status_new = $17, preservation_copy_status = $18, preservation_copy_hash = $19, restore_status = $20, restore_hash = $21
    WHERE path = $22`

    _, err := db.Prepare("update_new_file_info", sqlStatement)
    CheckError(err)

    result, err := db.Exec("update_new_file_info", file.Timestamp, file.Inode, file.Mode, file.Gid, file.Uid, file.Size,
        file.Mtime, file.Hash, file.StatusInode, file.StatusMode, file.StatusGid, file.StatusUid,
        file.StatusSize, file.StatusMtime, file.StatusHash, file.StatusDeleted, file.StatusNew,
        file.PreservationCopyStatus, file.PreservationCopyHash, file.RestoreStatus, file.RestoreHash, file.Path)
    CheckError(err)
    fmt.Println("Updated status keys")
    fmt.Printf("Rows Affected: %d\n", result.RowsAffected())
}

I stumbled across the Batch functionality provided by pgx and tried to get that working but I can't get it working, and don't really know which arguments to provide and how they need to be structured, as I find it really hard to understand the documentation.

func updateNewFileTableRowBatch(batch *pgx.Batch, file File) {
    sqlStatement := `UPDATE new_file_info
    SET timestamp = $1, inode = $2, mode = $3, gid = $4, uid = $5, size = $6, mtime = $7, hash = $8, status_inode = $9, status_mode = $10, status_gid = $11, status_uid = $12, status_size = $13, status_mtime = $14, status_hash = $15, status_deleted = $16, status_new = $17, preservation_copy_status = $18, preservation_copy_hash = $19, restore_status = $20, restore_hash = $21
    WHERE path = $22`

    args := []interface{}{
        file.Timestamp, file.Inode, file.Mode, file.Gid, file.Uid, file.Size,
        file.Mtime, file.Hash, file.StatusInode, file.StatusMode, file.StatusGid, file.StatusUid,
        file.StatusSize, file.StatusMtime, file.StatusHash, file.StatusDeleted, file.StatusNew,
        file.PreservationCopyStatus, file.PreservationCopyHash, file.RestoreStatus, file.RestoreHash, file.Path,
    }

    batch.Queue(sqlStatement, args...)
}

I was wondering if thats even the most sensible approach, and if so, if anyone could give me some pointers how to implement it, that would be highly appreciated.

Sebastian
  • 61
  • 6
  • I would suggest you to try `CopyFrom` for your purposes. It will increase the speed drastically! https://pkg.go.dev/github.com/jackc/pgx/v5#hdr-Copy_Protocol – Pavlo Golub Apr 04 '23 at 08:43

0 Answers0