2

There is a bug of my 6.824 raft implementation, it fails to pass TestSnapshotAllCrash2D, but passed all before.

the programe stucked while trying to apply, just like:

...
[Term 1 Server 2] starting new command in 11 for cmd{2942042800837528893}.
[Term 1 Server 2] replicate to 1 success, match index is 10
[Term 1 Server 2] commit index update to 10
[Term 1 Server 2] replicate to 0 success, match index is 10
[Term 1 Server 1] update commit index to 10 from leader.
[Term 1 Server 2] replicate to 1 success, match index is 11
[Term 1 Server 0] update commit index to 10 from leader.
[Term 1 Server 2] commit index update to 11
[Term 1 Server 2] replicate to 0 success, match index is 11
[Term 1 Server 1] updating last applied from 1 to 10
[Term 1 Server 0] updating last applied from 1 to 10
[Term 1 Server 2] updating last applied from 1 to 11

The msg was sent to applyCh but not be received, i guess.

But i don't know why.Theoretically, each call of Start(cmd), there will be an operation for receiving the msg from applyCh.

Part of my implementation below.

// go applier() in Make()
func (rf *Raft) applier() {
    // update last applied
    for rf.killed() == false {
        time.Sleep(cpuGap)
        rf.lock("applier")
        if rf.commitIndex > rf.lastApplied {
            ColorPrintf("[Term %d Server %d] updating last applied from %d to %d", rf.currentTerm, rf.me, rf.lastApplied, rf.commitIndex)
        }
        for rf.commitIndex > rf.lastApplied {
            rf.lastApplied++
            rf.applyCh <- ApplyMsg{
                CommandValid: true,
                Command:      rf.accessLog(rf.lastApplied).Command,
                CommandIndex: rf.accessLog(rf.lastApplied).Index,
            }
        }
        rf.unlock("applier")
    }
}
// update commitIndex if ok.

// append entries rpc sender
func (rf *Raft) replicateOneRound(server int) {
    rf.lock("replicateOneRound")
    if rf.state != LEADER {
        rf.unlock("replicateOneRound")
        return
    }

    prevLogIndex := rf.nextIndex[server] - 1

    // if last log index >= nextIndex for a follower: send rpc with log entries starting at nextIndex
    var entries []Entry
    if rf.lastLog().Index >= rf.nextIndex[server] {
        entries = rf.log[rf.nextIndex[server]-rf.SnapshotIndex:]
    } else {
        entries = make([]Entry, 0)
    }

    request := AppendEntriesArgs{
        Term:         rf.currentTerm,
        LeaderId:     rf.me,
        PrevLogIndex: prevLogIndex,
        PrevLogTerm:  rf.accessLog(prevLogIndex).Term,
        Entries:      entries,
        LeaderCommit: rf.commitIndex,
    }
    rf.unlock("replicateOneRound")

    reply := AppendEntriesReply{}
    if rf.sendAppendEntries(server, &request, &reply) {
        rf.lock("sendAppendEntries")

        if rf.currentTerm < reply.Term {
            ColorPrintf("[Term %d Server %d] replicate to %d fail due to a bigger term %d", rf.currentTerm, rf.me, server, reply.Term)
            rf.setState(FOLLOWER, reply.Term)
        }

        if rf.currentTerm == request.Term && rf.state == LEADER {
            if len(request.Entries) != 0 {
                if reply.Success {
                    // append rpc ok
                    rf.matchIndex[server] = request.Entries[len(request.Entries)-1].Index
                    rf.nextIndex[server] = rf.matchIndex[server] + 1
                    ColorPrintf("[Term %d Server %d] replicate to %d success, match index is %d", rf.currentTerm, rf.me, server, rf.matchIndex[server])

                    // update commitIndex
                    rf.tryUpdateCommitIndex()

                } else {
                    // decrement
                    rf.nextIndex[server] = reply.ConflictIndex
                    ColorPrintf("[Term %d Server %d] replicate to %d fail, decrement next index to %d", rf.currentTerm, rf.me, server, rf.nextIndex[server])
                }
            }
        }
        rf.unlock("sendAppendEntries")
    }

}

func (rf *Raft) tryUpdateCommitIndex() {
    match := make([]int, len(rf.peers))
    copy(match, rf.matchIndex)
    sort.Sort(sort.Reverse(sort.IntSlice(match)))
    N := match[len(rf.peers)/2]
    for N > rf.commitIndex {
        if rf.accessLog(N).Term == rf.currentTerm {
            ColorPrintf("[Term %d Server %d] commit index update to %d", rf.currentTerm, rf.me, N)
            rf.commitIndex = N
            break
        } else {
            N--
        }
    }
}

If i modify code of applyCh := make(chan ApplyMsg) to applyCh := make(chan ApplyMsg, 1000) in config.go, the test could be passed.

I want to know to make it works without modity code in config.go.

sakamoto
  • 21
  • 1

0 Answers0