4

Given a binary format with a header that include the number of records and records of format:

{ type : Int8, timestamp : UInt32, user_id : UInt64 }

0000 0004 0153 0927 d139 6747 c045 d991
2100 53d1 6287 4fd2 69fd 8e5f 0475 0153
f323 a72b 4984 a40b 8d54 db00 53a0 78d4
1db8 b1a6 4129 1651

I'm coming from Ruby and I have the following solution which works, but I think there might be a more elegant or 'crystal' way to read the bytes when it's structured data?

class User
  USER_TYPES = {
    0 => :admin,
    1 => :user,
  }
  property user_type : Symbol
  property timestamp : UInt32
  property user_id : UInt64

  def initialize(user_type : Int8, @timestamp : UInt32, @user_id : UInt64)
    @user_type = USER_TYPES[user_type]
  end
end

class Parser
  property users : Array(User)

  def initialize
    @users = [] of User
  end

  def parse(file_path : String)
    File.open(file_path) do |file|
      offset = 0
      count : UInt32 = seek_and_unpack(file, offset, UInt32)
      offset += 4

      (0..count).each do |i|
        user_type = seek_and_unpack(file, offset, Int8)
        timestamp = seek_and_unpack(file, offset + 1, UInt32)
        user_id = seek_and_unpack(file, offset + 5, UInt64)
        user = User.new(user_type, timestamp, user_id)
        @users << user
        offset += 13
      end
      @users
    end
  end

  private def seek_and_unpack(file : File, offset : Int32, read_type)
    file.seek(offset)
    file.read_bytes(read_type, IO::ByteFormat::BigEndian)
  end
end

puts Parser.new.parse("my_file.dat")
# [#<User:0x102805fe0 @user_type=:user, @timestamp=1393108945, @user_id=4136353673894269217>,
# #<User:0x102805fc0 @user_type=:admin, @timestamp=1406231175, @user_id=5751776211841778805>,
# #<User:0x102805fa0 @user_type=:user, @timestamp=1408443303, @user_id=3119170057034093787>,
# #<User:0x102805f80 @user_type=:admin, @timestamp=1403025620, @user_id=2141656950430570065>]
felixbuenemann
  • 579
  • 3
  • 18
kreek
  • 8,774
  • 8
  • 44
  • 69

3 Answers3

2

You can get rid of the seeks, since read_bytes already seeks the IO and wrap the unpack operation in a macro to make it more readable:

class Parser
  property users

  @users = [] of User

  def parse(path)
    File.open(path) do |file|
      count = unpack(UInt32)

      count.times do
        @users << User.new(
          user_type: unpack(Int8),
          timestamp: unpack(UInt32),
          user_id: unpack(UInt64)
        )
      end
      @users
    end
  end

  macro unpack(type)
    file.read_bytes({{type}}, IO::ByteFormat::BigEndian)
  end
end
felixbuenemann
  • 579
  • 3
  • 18
0

you can also use https://github.com/spider-gazelle/bindata which is a little more declarative and also handles endianness for you.

require "bindata"

enum UserType
  Admin = 0
  User
end

class User < BinData
  endian big

  enum_field UInt8, type : UserType = UserType::User
  uint32 :timestamp
  uint64 :user_id
end

users = [] of User
File.open(path) do |file|
  count = file.read_bytes(UInt32, :big_endian)

  count.times do
    users << file.read_bytes(User)
  end
end
users
0

There multiple possibilities to make your code as you described it more 'crystal'

You could use IO#read instead of IO#seek and IO#read_bytes:

def parse(file_path : String)
  File.open(file_path, "rb") do |file|
    count : UInt32 = file.read(UInt32).first
    
    (0..count).each do |i|
      user_type = file.read(Int8).first
      timestamp = file.read(UInt32).first
      user_id = file.read(UInt64).first
      user = User.new(user_type, timestamp, user_id)
      @users << user
    end
    @users
  end
end

Adding to this you could use IO#each_slice to iterate over the records of the file instead of incrementing an offset

def parse(file_path : String)
  File.open(file_path, "rb") do |file|
    count : UInt32 = file.read(UInt32).first
    
    file.each_slice(13) do |slice|
      user_type = slice[0]
      timestamp = slice[1..4].pack("C*").unpack(UInt32).first
      user_id = slice[5..12].pack("C*").unpack(UInt64).first
      user = User.new(user_type, timestamp, user_id)
      @users << user
    end
    @users
  end
end

You could also use IO#each_struct so you can directly unpack the records into a struct:

struct Record
  type : Int8
  timestamp : UInt32
  user_id : UInt64
end

def parse(file_path : String)
  File.open(file_path, "rb") do |file|
    count : UInt32 = file.read(UInt32).first
    
    file.each_struct(Record) do |record|
      user_type = USER_TYPES[record.type]
      timestamp = record.timestamp
      user_id = record.user_id
      user = User.new(user_type, timestamp, user_id)
      @users << user
    end
    @users
  end
end
noah1400
  • 1,282
  • 1
  • 4
  • 15
  • Hi, @noah1400! I stumbled upon your answer in a search and got hooked. Could you tell me where you saw the description of `each_struct` and `unpack` methods in Crystal? – Sergey Fedorov Apr 30 '23 at 13:54