3

I came across this problem while working on a question that was raised earlier.

This is likely specific to ObjectInputStream and not binary reading in general, so the title may be misleading.

Basically the problem from there goes like this: the author has serialized a hashmap of strings to doubles. The author's custom serialization format for each entry in the hashmap is pretty simple

int n        // length of string key as a 4-byte integer
byte[n] key  // a string of length n
double value // the value associated with the key

Now for some reason, during the serialization process, one of the strings 2010-00-008.html was serialized with two extra bytes, shown here.

img

So instead of 16 bytes being written, 18 bytes were written instead. This is bound to cause problems because it still says the string is 16 bytes long.

However, for some reason, you can write the hash map out and read it back in perfectly! It seems like given an 18 byte string, you can read 16 bytes and still read the whole thing.

Test code

Here's the code. It's basically the code in the other question except I made it so that you should be able to just change the path and run it. After you run it, you'll get a sequence of write statements followed by a sequence of read statements. Inspect the file and you should notice the extra bytes in the string, but the program does not crash.

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.HashMap;
import java.util.Map;
public class Test {

    // customize the path as needed
    public static String path = "C:\\temp\\sample.dat";

    HashMap<String, Double> map = new HashMap<String, Double>();

    public Test() {
        map.put("2010-00-027.html",21732.994621513037); map.put("2010-00-020.html",3466.5169348296736); map.put("2010-00-051.html",12528.648992702407); map.put("2010-00-062.html",3354.8950010256385);
        map.put("2010-00-024.html",10295.095511718278); map.put("2010-00-052.html",5381.513344679818);  map.put("2010-00-007.html",16466.33813960735);  map.put("2010-00-017.html",9484.969198176652);
        map.put("2010-00-054.html",15423.873112634772); map.put("2010-00-022.html",8123.842752870753);  map.put("2010-00-033.html",21238.496665104063); map.put("2010-00-028.html",7578.792651786424);
        map.put("2010-00-048.html",3566.4118233046393); map.put("2010-00-040.html",2681.0799941861724); map.put("2010-00-049.html",14308.090890746222); map.put("2010-00-058.html",5911.342406606804);
        map.put("2010-00-045.html",2284.118716145881);  map.put("2010-00-031.html",2859.565771680721);  map.put("2010-00-046.html",4555.187022907964);  map.put("2010-00-036.html",8479.709295569426);
        map.put("2010-00-061.html",846.8292195815125);  map.put("2010-00-023.html",14108.644025417952); map.put("2010-00-041.html",22686.232732684934); map.put("2010-00-025.html",9513.539663409734);
        map.put("2010-00-012.html",459.6427911376829);  map.put("2010-00-005.html",0.0);    map.put("2010-00-013.html",2646.403220496738);  map.put("2010-00-065.html",5808.86423609936);
        map.put("2010-00-056.html",12154.250518054876); map.put("2010-00-008.html",10811.15198506469);  map.put("2010-00-042.html",9271.006516004005);  map.put("2010-00-000.html",4387.4162586468965);
        map.put("2010-00-059.html",4456.211623469774);  map.put("2010-00-055.html",3534.7511584735325); map.put("2010-00-057.html",8745.640098512009);  map.put("2010-00-032.html",4993.295735075575);
        map.put("2010-00-021.html",3852.5805998017922); map.put("2010-00-043.html",4108.020033536286);  map.put("2010-00-053.html",2.2446400279239946); map.put("2010-00-030.html",17853.541210836203);
    }

    public void write() { 
        try {
            ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(path));
            oos.writeInt(map.size()); // write size of the map
            for (Map.Entry<String, Double> entry : map.entrySet()) { // iterate entries
                System.out.println("writing ("+ entry.getKey() +","+ entry.getValue() +")");
                byte[] bytes = entry.getKey().getBytes();
                oos.writeInt(bytes.length); // length of key string
                oos.write(bytes); // key string bytes
                oos.writeDouble(entry.getValue()); // value
            }
            oos.close();
        } catch (Exception e) {

        }
    }

    public void read() {
        try {
            FileInputStream f = new FileInputStream(path);
            ObjectInputStream ois = new ObjectInputStream(f);
            int size = ois.readInt(); // read size of the map
            HashMap<String, Double> newMap = new HashMap<>(size);
            for (int i = 0; i < size; i++) { // iterate entries
                int length = ois.readInt(); // length of key string
                byte[] bytes = new byte[length];
                ois.readFully(bytes, 0, length);
                //ois.read(bytes);
                String key = new String(bytes);
                double value = ois.readDouble(); // value
                newMap.put(key, value);
                System.out.println("read ("+ key +","+ value +")");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }        
    }

    public static void main(String[] args) {
        Test t = new Test();
        t.write();
        t.read();
    }
}
Community
  • 1
  • 1
MxLDevs
  • 19,048
  • 36
  • 123
  • 194
  • 1
    I'm downvoting this question because of several inaccuracies and omissions, which I have corrected, which make it unanswerable in its original form. Fortunately I had been following the original question that gave rise to this one. – user207421 May 30 '14 at 01:26
  • If you just want to write a string directly to a file, look into using `DataOutputStream` instead of `ObjectOutputStream` (and `DataInputStream` instead of `ObjectInputStream`). It will write exactly what you tell it to write, with no cleverness. – user253751 May 30 '14 at 01:31

1 Answers1

4

You need to read the Protocol chapter of the Object Serialization Specification. The stream is full of type and block markers in addition to the actual data. This is one of them, and it is filtered out by ObjectInputStream when the stream is read correctly.

EDIT The extra bytes are 77 64, which means TC_BLOCK_DATA of size 0x64.

user207421
  • 305,947
  • 44
  • 307
  • 483