0

I'm trying to create a Trie structure in Zig using Zigs StringHashMap. I am able to get it to work a bit, but only by using a "inline" for loop which is not really usable as this requires the paths to be known at compile time :-(

Any help/explanation would be much appreciated :-)

The code:

const std = @import("std");
const Allocator = std.mem.Allocator;
const print = std.debug.print;
const expect = std.testing.expect;

const HashMap = struct {
    value: u8,
    children: std.StringHashMap(*HashMap),
};

fn newHashMap(allocator: Allocator, value: u8) HashMap {
    return HashMap{
        .value = value,
        .children = std.StringHashMap(*HashMap).init(allocator),
    };
}

fn showTree(root: *std.StringHashMap(*HashMap), keys:[3][]const u8 ) void {
    var hashMap = root;
    for (keys) |key| {
        print("get key {s}\n", .{key});
        var value = hashMap.get(key);
        if (value) |node| {
            print("we got a value for {s}:{}\n", .{key,node.value});
            hashMap = &node.children;
        } else {
            print("no value for {s}\n", .{key});
            break;
        }
    }
}

test "HashMap" {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const gpaAllocator = gpa.allocator();
    var arena = std.heap.ArenaAllocator.init(gpaAllocator);
    defer {
        arena.deinit();
        const leaked = gpa.deinit();
        if (leaked) expect(false) catch @panic("TEST FAIL"); //fail test; can't try in defer as defer is executed after we return
    }

    const allocator = arena.allocator();
    var root = &std.StringHashMap(*HashMap).init(allocator);
    var hashMap = root;
    const keys = [_][]const u8{ "a", "b", "c" };
    const values: [3]u8 = .{ 1, 2, 3 };
  
    // create tree
    inline for (keys) |key, i| {
        print("put key {s}:{}\n", .{ key, values[i] });
        var newNode = newHashMap(allocator, values[i]);
        try hashMap.put(key, &newNode);
        showTree(root,keys);
        hashMap = &newNode.children;
    }
   
    showTree(root,keys);
}

This prints:

Test [1/1] test "HashMap"... 
put key a:1
put key b:2
put key c:3
get key a
we got a value for a:1
get key b
we got a value for b:2
get key c
we got a value for c:3
All 1 tests passed.

as expected.

Removing the 'inline' results in:

Test [1/1] test "HashMap"... 
put key a:1
put key b:2
put key c:3
get key a
we got a value for a:3
get key b
no value for b
All 1 tests passed.
seriousme
  • 11
  • 2

1 Answers1

0

The answer turned out to be quite obvious (with hindsight ;-)) as mentioned in 1:

var declarations inside functions are stored in the function's stack frame. Once a function returns, any Pointers to variables in the function's stack frame become invalid references, and dereferencing them becomes unchecked Undefined Behavior.

This explains the strange behaviour in a loop without inline. The pointers just get overwritten resulting in Undefined Behaviour.

By adding 'inline' the loop is unwound and then there is no pointer reuse, hence the correct output.

The correct way of dealing with this is to allocate the struct explicitly and pass around the pointer to the struct as shown in 2.

Once that is sorted it all makes sense.

https://ziglang.org/documentation/master/#Where-are-the-bytes

https://www.reddit.com/r/Zig/comments/s6v8t3/idiomatic_zig_for_initializing_an_allocated/

For reference, the working code without 'inline' below:

const std = @import("std");
const Allocator = std.mem.Allocator;
const print = std.debug.print;
const expect = std.testing.expect;

const HashMap = struct {
    value: u8,
    children: std.StringHashMap(*HashMap),
};

fn newHashMap(allocator: Allocator, value: u8) !*HashMap {
    const node = try allocator.create(HashMap);
    node.* = .{
        .value = value,
        .children = std.StringHashMap(*HashMap).init(allocator),
    };
    return node;
}

fn showTree(root: *std.StringHashMap(*HashMap), keys:[3][]const u8 ) void {
    var hashMap = root;
    for (keys) |key| {
        print("get key {s}\n", .{key});
        var value = hashMap.get(key);
        if (value) |node| {
            print("we got a value for {s}:{}\n", .{key,node.value});
            hashMap = &node.children;
        } else {
            print("no value for {s}\n", .{key});
            break;
        }
    }
}

test "HashMap" {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const gpaAllocator = gpa.allocator();
    var arena = std.heap.ArenaAllocator.init(gpaAllocator);
    defer {
        arena.deinit();
        const leaked = gpa.deinit();
        if (leaked) expect(false) catch @panic("TEST FAIL"); //fail test; can't try in defer as defer is executed after we return
    }

    const allocator = arena.allocator();
    var root = &std.StringHashMap(*HashMap).init(allocator);
    var hashMap = root;
    const keys = [_][]const u8{ "a", "b", "c" };
    const values: [3]u8 = .{ 1, 2, 3 };
  
    // create tree
    for (keys) |key, i| {
        print("put key {s}:{}\n", .{ key, values[i] });
        var newNode = try newHashMap(allocator, values[i]);
        try hashMap.put(key, newNode);
        hashMap = &newNode.children;
    }
   
    showTree(root,keys);
}
seriousme
  • 11
  • 2
  • 1
    I don't wanna be that guy but Rust would've caught that – mitiko Nov 14 '22 at 21:42
  • 1
    Probably, the aim of the excercise for me however was to get a better understanding of Zig ;-) I got a similar structure in Typescript and that is less code and I don't need to worry about pointers at all ;-) – seriousme Nov 16 '22 at 17:21