There is a bug of my 6.824 raft implementation, it fails to pass TestSnapshotAllCrash2D, but passed all before.
the programe stucked while trying to apply
, just like:
...
[Term 1 Server 2] starting new command in 11 for cmd{2942042800837528893}.
[Term 1 Server 2] replicate to 1 success, match index is 10
[Term 1 Server 2] commit index update to 10
[Term 1 Server 2] replicate to 0 success, match index is 10
[Term 1 Server 1] update commit index to 10 from leader.
[Term 1 Server 2] replicate to 1 success, match index is 11
[Term 1 Server 0] update commit index to 10 from leader.
[Term 1 Server 2] commit index update to 11
[Term 1 Server 2] replicate to 0 success, match index is 11
[Term 1 Server 1] updating last applied from 1 to 10
[Term 1 Server 0] updating last applied from 1 to 10
[Term 1 Server 2] updating last applied from 1 to 11
The msg was sent to applyCh but not be received, i guess.
But i don't know why.Theoretically, each call of Start(cmd)
, there will be an operation for receiving the msg from applyCh.
Part of my implementation below.
// go applier() in Make()
func (rf *Raft) applier() {
// update last applied
for rf.killed() == false {
time.Sleep(cpuGap)
rf.lock("applier")
if rf.commitIndex > rf.lastApplied {
ColorPrintf("[Term %d Server %d] updating last applied from %d to %d", rf.currentTerm, rf.me, rf.lastApplied, rf.commitIndex)
}
for rf.commitIndex > rf.lastApplied {
rf.lastApplied++
rf.applyCh <- ApplyMsg{
CommandValid: true,
Command: rf.accessLog(rf.lastApplied).Command,
CommandIndex: rf.accessLog(rf.lastApplied).Index,
}
}
rf.unlock("applier")
}
}
// update commitIndex if ok.
// append entries rpc sender
func (rf *Raft) replicateOneRound(server int) {
rf.lock("replicateOneRound")
if rf.state != LEADER {
rf.unlock("replicateOneRound")
return
}
prevLogIndex := rf.nextIndex[server] - 1
// if last log index >= nextIndex for a follower: send rpc with log entries starting at nextIndex
var entries []Entry
if rf.lastLog().Index >= rf.nextIndex[server] {
entries = rf.log[rf.nextIndex[server]-rf.SnapshotIndex:]
} else {
entries = make([]Entry, 0)
}
request := AppendEntriesArgs{
Term: rf.currentTerm,
LeaderId: rf.me,
PrevLogIndex: prevLogIndex,
PrevLogTerm: rf.accessLog(prevLogIndex).Term,
Entries: entries,
LeaderCommit: rf.commitIndex,
}
rf.unlock("replicateOneRound")
reply := AppendEntriesReply{}
if rf.sendAppendEntries(server, &request, &reply) {
rf.lock("sendAppendEntries")
if rf.currentTerm < reply.Term {
ColorPrintf("[Term %d Server %d] replicate to %d fail due to a bigger term %d", rf.currentTerm, rf.me, server, reply.Term)
rf.setState(FOLLOWER, reply.Term)
}
if rf.currentTerm == request.Term && rf.state == LEADER {
if len(request.Entries) != 0 {
if reply.Success {
// append rpc ok
rf.matchIndex[server] = request.Entries[len(request.Entries)-1].Index
rf.nextIndex[server] = rf.matchIndex[server] + 1
ColorPrintf("[Term %d Server %d] replicate to %d success, match index is %d", rf.currentTerm, rf.me, server, rf.matchIndex[server])
// update commitIndex
rf.tryUpdateCommitIndex()
} else {
// decrement
rf.nextIndex[server] = reply.ConflictIndex
ColorPrintf("[Term %d Server %d] replicate to %d fail, decrement next index to %d", rf.currentTerm, rf.me, server, rf.nextIndex[server])
}
}
}
rf.unlock("sendAppendEntries")
}
}
func (rf *Raft) tryUpdateCommitIndex() {
match := make([]int, len(rf.peers))
copy(match, rf.matchIndex)
sort.Sort(sort.Reverse(sort.IntSlice(match)))
N := match[len(rf.peers)/2]
for N > rf.commitIndex {
if rf.accessLog(N).Term == rf.currentTerm {
ColorPrintf("[Term %d Server %d] commit index update to %d", rf.currentTerm, rf.me, N)
rf.commitIndex = N
break
} else {
N--
}
}
}
If i modify code of applyCh := make(chan ApplyMsg)
to applyCh := make(chan ApplyMsg, 1000)
in config.go
, the test could be passed.
I want to know to make it works without modity code in config.go
.