0

Run the below program and run CTRL + C, the handle routine gets blocked as it is trying to send to a channel but the process routine has shutdown. What is a better concurrency design to solve this?

Edited the program to describe the problem applying the rules suggested here https://stackoverflow.com/a/66708290/4106031

package main

import (
    "context"
    "fmt"
    "os"
    "os/signal"
    "sync"
    "syscall"
    "time"
)

func process(ctx context.Context, c chan string) {
    fmt.Println("process: processing (select)")
    for {
        select {
        case <-ctx.Done():
            fmt.Printf("process: ctx done bye\n")
            return
        case i := <-c:
            fmt.Printf("process: received i: %v\n", i)
        }
    }
}

func handle(ctx context.Context, readChan <-chan string) {
    c := make(chan string, 1)
    wg := &sync.WaitGroup{}
    wg.Add(1)
    go func() {
        process(ctx, c)
        wg.Done()
    }()
    defer wg.Wait()

    for i := 0; ; i++ {
        select {
        case <-ctx.Done():
            fmt.Printf("handle: ctx done bye\n")
            return
        case i := <-readChan:
            fmt.Printf("handle: received: %v\n", i)
            fmt.Printf("handle: sending for processing: %v\n", i)
            // suppose huge time passes here
            // to cause the issue we want to happen
            // we want the process() to exit due to ctx
            // cancellation before send to it happens, this creates deadlock
            time.Sleep(5 * time.Second)
            // deadlock
            c <- i
        }
    }
}

func main() {
    wg := &sync.WaitGroup{}
    ctx, cancel := context.WithCancel(context.Background())
    defer cancel()

    readChan := make(chan string, 10)
    wg.Add(1)
    go func() {
        defer wg.Done()
        for i := 0; ; i++ {
            select {
            case <-ctx.Done():
                fmt.Printf("read: ctx done bye\n")
                return
            case readChan <- fmt.Sprintf("%d", i):
                fmt.Printf("read: sent msg: %v\n", i)
            }
        }
    }()

    wg.Add(1)
    go func() {
        handle(ctx, readChan)
        wg.Done()
    }()

    go func() {
        sigterm := make(chan os.Signal, 1)
        signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM)
        select {
        case <-sigterm:
            fmt.Printf("SIGTERM signal received\n")
            cancel()
        }
    }()

    wg.Wait()
}

Output

$ go run chan-shared.go
read: sent msg: 0
read: sent msg: 1
read: sent msg: 2
read: sent msg: 3
process: processing (select)
read: sent msg: 4
read: sent msg: 5
read: sent msg: 6
handle: received: 0
handle: sending for processing: 0
read: sent msg: 7
read: sent msg: 8
read: sent msg: 9
read: sent msg: 10
handle: received: 1
handle: sending for processing: 1
read: sent msg: 11
process: received i: 0
process: received i: 1
read: sent msg: 12
handle: received: 2
handle: sending for processing: 2
^CSIGTERM signal received
process: ctx done bye
read: ctx done bye
handle: received: 3
handle: sending for processing: 3


Killed: 9
Alok Kumar Singh
  • 2,331
  • 3
  • 18
  • 37
  • cool will wait. – Alok Kumar Singh Mar 19 '21 at 12:26
  • are you done need to update the program? – Alok Kumar Singh Mar 19 '21 at 12:35
  • 1
    you deadlock because writes on c are not read by process if it has exited. That is why you should try write on c using a select with cases on both ctx and the write. –  Mar 19 '21 at 13:16
  • a write to a channel must have a corresponding read, or a default case to keep going. this is not a universal rule, but it is common pitfall for bginners. –  Mar 19 '21 at 13:19
  • I knew why it is happening and wanted to avoid context check on every write. I am writing thousands of data continuously to a channel. Won't this select cause slowness? Wanted to know if there is a better way around this? Some better concurreny design for this – Alok Kumar Singh Mar 19 '21 at 13:21
  • measure it. It is not serious to expect understanding the runtime behavior by reading the code. Too much optimizations are at stake. might be true, might be wrong, might depend subtle escape analysis, might depend runtime version –  Mar 19 '21 at 13:23
  • Cool thanks for the help – Alok Kumar Singh Mar 19 '21 at 13:26

2 Answers2

1

the step by step review

  • Always cancel context, whatever you think.
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
  • Dont wd.Add after starting a routine
    wg.Add(1)
    go handle(ctx, wg)
  • Dont sparsely consume waitgroups
    wg.Add(1)
    go func() {
        handle(ctx)
        wg.Done()
    }()
  • dont for loop on a channel with a default case. Just read from it and let it unblocks
    <-sigterm
    fmt.Printf("SIGTERM signal received\n")
  • main never block on signals, main blocks on the processing routines. Signaling should just do signaling, ie cancel the context.
    go func() {
        sigterm := make(chan os.Signal, 1)
        signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM)
        <-sigterm
        fmt.Printf("SIGTERM signal received\n")
        cancel()
    }()
  • It is possible to check for context cancellation on channel writes.
        select {
        case <-ctx.Done():
            fmt.Printf("process: ctx done bye\n")
            return
        case c <- fmt.Sprintf("%d", i):
            fmt.Printf("handled: sent to channel: %v\n", i)
        }
  • Dont time.Sleep, you can t test for context cancellation with it.
        select {
        case <-ctx.Done():
            fmt.Printf("process: ctx done bye\n")
            return
        case <-time.After(time.Second * 5):
        }

So a complete revised version of the code with those various rules applied gives us

package main

import (
    "context"
    "fmt"
    "os"
    "os/signal"
    "sync"
    "syscall"
    "time"
)

func process(ctx context.Context, c chan string) {
    fmt.Println("process: processing (select)")
    for {
        select {
        case <-ctx.Done():
            fmt.Printf("process: ctx done bye\n")
            return
        case msg := <-c:
            fmt.Printf("process: got msg: %v\n", msg)
        }
    }
}

func handle(ctx context.Context) {
    c := make(chan string, 3)
    wg := &sync.WaitGroup{}
    wg.Add(1)
    go func() {
        process(ctx, c)
        wg.Done()
    }()
    defer wg.Wait()

    for i := 0; ; i++ {
        select {
        case <-ctx.Done():
            fmt.Printf("process: ctx done bye\n")
            return
        case <-time.After(time.Second * 5):
        }
        select {
        case <-ctx.Done():
            fmt.Printf("process: ctx done bye\n")
            return
        case c <- fmt.Sprintf("%d", i):
            fmt.Printf("handled: sent to channel: %v\n", i)
        }
    }
}

func main() {
    wg := &sync.WaitGroup{}
    ctx, cancel := context.WithCancel(context.Background())
    defer cancel()

    wg.Add(1)
    go func() {
        handle(ctx)
        wg.Done()
    }()

    go func() {
        sigterm := make(chan os.Signal, 1)
        signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM)
        <-sigterm
        fmt.Printf("SIGTERM signal received\n")
        cancel()
    }()
    wg.Wait()
}

There is more to tell about exit conditions, but this is dependent on the requirements.

  • 1
    Hey thanks for the rules. But I failed to explain my problem, that is why i was editing. Actually the `handler()` was reading from a channel and writing to other channel which process() was using. The deadlock happens when during this operation the process() exits. – Alok Kumar Singh Mar 19 '21 at 12:47
  • editing and updating applying the rules you suggested. or should i open a fresh question? – Alok Kumar Singh Mar 19 '21 at 12:49
  • 1
    just edit the question now. yes it does not explain the whole set of events that lead to your program not exiting. But there was too much to adjust that i did not deemed necessary to make that analysis. My suggestion is to first learn and apply idioms, then see if you still run into troubles, explain them. –  Mar 19 '21 at 12:54
  • Got your point, your suggestions are helpful. Editing right away. – Alok Kumar Singh Mar 19 '21 at 12:55
  • Edited, please check now. – Alok Kumar Singh Mar 19 '21 at 13:10
0

enter image description here

As mentioned https://stackoverflow.com/a/66708290/4106031 this change has fixed the issue for me. Thanks mh-cbon for the rules too!

Alok Kumar Singh
  • 2,331
  • 3
  • 18
  • 37