0

I am converting our code to use IOCP and I got the communication relatively stable, but the memory usage of the application is increasing. Looks like I am getting back (on completion function calls) much fewer objects of OverlappedEx than I create. My code is below. What am I doing wrong?

#ifndef NETWORK_DATA
#define NETWORK_DATA

#include <afxwin.h>
#include <vector>
#include <string>
#include "CriticalSectionLocker.h"

using namespace std;

DWORD NetworkManager::NetworkThread(void* param)
{
    bool bRun = true;


    while (bRun)
    {
        DWORD wait = ::WaitForSingleObject(CCommunicationManager::s_hShutdownEvent, 0);
        if (WAIT_OBJECT_0 == wait)
        {
            bRun = false;
            DEBUG_LOG0("Shutdown event was signalled thread");
        }
        else
        {
            DWORD dwBytesTransfered = 0;
            void* lpContext = nullptr;
            OVERLAPPED* pOverlapped = nullptr;

            BOOL bReturn = GetQueuedCompletionStatus(s_IOCompletionPort,
                                                    &dwBytesTransfered,
                                                    (LPDWORD)&lpContext,
                                                    &pOverlapped,
                                                    INFINITE);
            if (nullptr == lpContext)
            {
                DEBUG_LOG0("invalid context");
                /*continue;*/
            }
            else
            {
                if (bReturn && dwBytesTransfered > 0)
                {
                    OverlappedEx* data = reinterpret_cast<OverlappedEx*>(pOverlapped);
                    ServerData* networkData = reinterpret_cast<ServerData*>(lpContext);

                    if (networkData && data)
                    {                       
                        switch(data->m_opType)
                        {
                        case OverlappedEx::OP_READ:
                                /*DEBUG_LOG4("device name: %s bytes received: %d socket: %d handle: %d", 
                                    networkData->Name().c_str(), dwBytesTransfered, networkData->Socket(), networkData->Handle());*/
                                networkData->CompleteReceive(dwBytesTransfered, data);                              
                                break;
                            case OverlappedEx::OP_WRITE:
                                /*DEBUG_LOG4("device name: %s bytes sent: %d socket: %d handle: %d",
                                    networkData->Name().c_str(), dwBytesTransfered, networkData->Socket(), networkData->Handle());*/
                                networkData->CompleteSend(dwBytesTransfered, data);
                                break;
                        }
                    }
                }
                else
                {
                    /*DEBUG_LOG2("GetQueuedCompletionStatus failed: bReturn: %d dwBytesTransferred: %u", bReturn, dwBytesTransfered);*/
                }
            }
        }
    }
    return 0;
}


enum NetworkType
{
    UDP,
    TCP
};

struct OverlappedEx : public OVERLAPPED
{
    enum OperationType
    {
        OP_READ,
        OP_WRITE
    };  

    const static int MAX_PACKET_SIZE = 2048;
    WSABUF m_wBuf;
    char m_buffer[MAX_PACKET_SIZE];
    OperationType m_opType; 

    OverlappedEx()
    {       
        Clear();
        m_refCount = 1;
    }

    void AddRef()
    {
        ::InterlockedIncrement(&m_refCount);
    }

    void Release()
    {
        ::InterlockedDecrement(&m_refCount);
    }

    int Refcount() const
    {       
        return InterlockedExchangeAdd((unsigned long*)&m_refCount, 0UL);
    }

    ~OverlappedEx()
    {       
        Clear();        
    }

    void Clear()
    {
        memset(m_buffer, 0, MAX_PACKET_SIZE);
        m_wBuf.buf = m_buffer;
        m_wBuf.len = MAX_PACKET_SIZE;
        Internal = 0;
        InternalHigh = 0;
        Offset = 0;
        OffsetHigh = 0;
        hEvent = nullptr;
        m_opType = OP_READ;
    }

private:
    volatile LONG m_refCount;
};


class ServerData
{       
public:
    const static int MAX_REVEIVE_QUEUE_SIZE = 100;
    const static int MAX_PACKET_SIZE = 2048;
    const static int MAX_SEND_QUEUE_SIZE = 10;
    const static int MAX_RECEIVE_QUEUE_SIZE = 100;  
    const static int MAX_OVERLAPPED_STRUCTS = 20;   


    ServerData(NetworkType netType, const string& sName, CCommunicationManager::CommHandle handle,
                            SOCKET sock, HANDLE IOPort) :   
        m_sName(sName)
    {
        InitializeCriticalSection(&m_receiveQueLock);
        InitializeCriticalSection(&m_objectLock);
        m_Handle = handle;
        m_Socket = sock;
        m_nIPAddress = 0;           
        m_netType = netType;
        m_bEnabled = true;
        m_ovlpIndex = 0;

        for (int i = 0; i < MAX_OVERLAPPED_STRUCTS; ++i)
        {
            m_olps.push_back(new OverlappedEx);
        }

        /* Associate socket with completion handle */
        if (m_Socket != 0)
        {
            CreateIoCompletionPort( reinterpret_cast<HANDLE>(m_Socket), IOPort, reinterpret_cast<ULONG_PTR>(this), 0 );
        }
    }       

    ~ServerData()
    {           
        CriticalSectionLocker lock(&m_receiveQueLock);
        DeleteCriticalSection(&m_receiveQueLock);

        DeleteCriticalSection(&m_objectLock);
        closesocket(m_Socket);  
    }

    const string& Name() const { return m_sName; }
    bool Enabled() const { return m_bEnabled; }
    void SetEnabled(bool bEnabled)
    {
        m_bEnabled = bEnabled;
    }

    int Handle() const { return m_Handle; }
    void SetHandle(int handle)
    {
        m_Handle = handle;
    }

    unsigned long IPAddress() const { return m_nIPAddress; }

    SOCKET Socket() const
    {
        return m_Socket;
    }

    void SetSocket(SOCKET sock)
    {
        m_Socket = sock;
    }

    void SetIPAddress(unsigned long nIP)
    {
        m_nIPAddress = nIP;
    }       

    bool ValidTelegram(const vector<char>& telegram) const
    {
        return false;
    }

    OverlappedEx* GetBuffer()
    {
        OverlappedEx* ret = nullptr;

        if (!m_olps.empty())
        {
            ret = m_olps.front();
            m_olps.pop_front();
        }

        return ret;
    }


    void CompleteReceive(size_t numBytes, OverlappedEx* data)
    {       
        //DEBUG_LOG1("%d buffers are available", AvailableBufferCount());

        if (numBytes > 0)
        {   
            vector<char> v(data->m_buffer, data->m_buffer + numBytes);          
            ReceivedData rd;
            rd.SetData(v);
            EnqueReceiveMessage(rd);    
        }   

        data->Release();
        {
            CriticalSectionLocker lock(&m_objectLock);
            m_olps.push_back(data);
//          DEBUG_LOG1("Queue size: %d", m_olps.size());
        }

        StartReceiving();

    }

    void CompleteSend(size_t numBytes, OverlappedEx* data)
    {

        data->Release();    
        {
            CriticalSectionLocker lock(&m_objectLock);
            m_olps.push_back(data);
            //DEBUG_LOG1("Queue size: %d", m_olps.size());
        }

        //DEBUG_LOG2("Object: %s num sent: %d", Name().c_str(), numBytes);
    }

    void StartReceiving()
    {
        DWORD bytesRecv = 0;
        sockaddr_in senderAddr;
        DWORD flags = 0;
        int senderAddrSize = sizeof(senderAddr);
        int rc = 0;

        CriticalSectionLocker lock(&m_objectLock);
        auto olp = GetBuffer();
        if (!olp)
        {       
            if (...)
            {
                m_olps.push_back(new OverlappedEx);
                olp = GetBuffer();
            }
            else
            {
                if (...)
                {
                    DEBUG_LOG1("Name: %s ************* NO AVAILABLE BUFFERS - bailing ***************", Name().c_str());
                }
                return;
            }
        }

        olp->Clear();
        olp->m_opType = OverlappedEx::OP_READ;
        olp->AddRef();

        switch(GetNetworkType())
        {
        case UDP:
            {
                rc = WSARecvFrom(Socket(),
                                &olp->m_wBuf,
                                1,
                                &bytesRecv,
                                &flags,
                                (SOCKADDR *)&senderAddr,
                                &senderAddrSize, (OVERLAPPED*)olp, NULL);
            }
            break;
        case TCP:
            {
                rc = WSARecv(Socket(), 
                            &olp->m_wBuf,
                            1, 
                            &bytesRecv, 
                            &flags, 
                            (OVERLAPPED*)olp, NULL);
            }
            break;
        }

        if (SOCKET_ERROR == rc)
        {
            DWORD err = WSAGetLastError();
            if (err != WSA_IO_PENDING)
            {
                olp->Release();
                m_olps.push_back(olp);
            }
        }
    }


    void SetWriteBuf(const SendData& msg, OverlappedEx* data)
    {
        int len = min(msg.Data().size(), MAX_PACKET_SIZE);
        memcpy(data->m_buffer, &msg.Data()[0], len);
        data->m_wBuf.buf = data->m_buffer;
        data->m_wBuf.len = len;
    }

    void StartSending(const SendData& msg)
    {

        DEBUG_LOG1("device name: %s", Name().c_str());

        int rc = 0;
        DWORD bytesSent = 0;
        DWORD flags = 0;    
        SOCKET sock = Socket();

        int addrSize = sizeof(sockaddr_in);     

        CriticalSectionLocker lock(&m_objectLock);
        //UpdateOverlapped(OverlappedEx::OP_WRITE);

        auto olp = GetBuffer();
        if (!olp)
        {
            if (...)
            {
                m_olps.push_back(new OverlappedEx);
                olp = GetBuffer();
                DEBUG_LOG2("name: %s ************* NO AVAILABLE BUFFERS new size: %d ***************", Name().c_str(), m_olps.size());
            }
            else
            {
                if (...)
                {
                    DEBUG_LOG1("Name: %s ************* NO AVAILABLE BUFFERS - bailing ***************", Name().c_str());
                }
                return;
            }
        }

        olp->Clear();
        olp->m_opType = OverlappedEx::OP_WRITE;
        olp->AddRef();
        SetWriteBuf(msg, olp);

        switch(GetNetworkType())
        {
        case UDP:

            rc = WSASendTo(Socket(), &olp->m_wBuf, 1,
                            &bytesSent, flags, (sockaddr*)&msg.SendAddress(),
                            addrSize, (OVERLAPPED*)olp, NULL);
            break;
        case TCP:

            rc = WSASend(Socket(), &olp->m_wBuf, 1,
                        &bytesSent, flags, (OVERLAPPED*)olp, NULL);
            break;
        }

        if (SOCKET_ERROR == rc)
        {
            DWORD err = WSAGetLastError();
            if (err != WSA_IO_PENDING)
            {
                olp->Release();
                m_olps.push_back(olp);
            }
        }
    }

    size_t ReceiveQueueSize()
    {
        CriticalSectionLocker lock(&m_receiveQueLock);
        return m_receiveDataQueue.size();
    }

    void GetAllData(vector <ReceivedData> & data)
    {
        CriticalSectionLocker lock(&m_receiveQueLock);
        while (m_receiveDataQueue.size() > 0)
        {
            data.push_back(m_receiveDataQueue.front());
            m_receiveDataQueue.pop_front();
        }
    }

    void DequeReceiveMessage(ReceivedData& msg)
    {
        CriticalSectionLocker lock(&m_receiveQueLock);
        if (m_receiveDataQueue.size() > 0)
        {
            msg = m_receiveDataQueue.front();
            m_receiveDataQueue.pop_front();
        }
    }

    template <class T>
    void EnqueReceiveMessage(T&& data)
    {
        CriticalSectionLocker lock(&m_receiveQueLock);
        if (m_receiveDataQueue.size() <= MAX_RECEIVE_QUEUE_SIZE)
        {
            m_receiveDataQueue.push_back(data);
        }
        else
        {
            static int s_nLogCount = 0;
            if (s_nLogCount % 100 == 0)
            {
                DEBUG_LOG2("Max queue size was reached handle id: %d in %s", Handle(), Name().c_str());
            }
            s_nLogCount++;
        }
    }           

    NetworkType GetNetworkType() const
    {
        return m_netType;
    }   

private:        
    ServerData(const ServerData&);
    ServerData& operator=(const ServerData&);
private:
    bool m_bEnabled;                                                            //!< This member flags if this reciever is enabled for receiving incoming connections.
    int m_Handle;                                                   //!< This member holds the handle for this receiver.
    SOCKET m_Socket;                                                        //!< This member holds the socket information for this receiver.        
    unsigned long m_nIPAddress;                                             //!< This member holds an IP address the socket is bound to.
    deque < ReceivedData > m_receiveDataQueue;
    CRITICAL_SECTION m_receiveQueLock;  
    CRITICAL_SECTION m_objectLock;
    string m_sName; 
    NetworkType m_netType;
    deque<OverlappedEx*> m_olps;
    size_t m_ovlpIndex;
};


#endif
user4581301
  • 33,082
  • 7
  • 33
  • 54
MUXCAH
  • 205
  • 1
  • 6
  • You should first debug your code. Locate leak by instrumentating your code or use [UMDH] (https://learn.microsoft.com/en-us/windows-hardware/drivers/debugger/using-umdh-to-find-a-user-mode-memory-leak ). – lsalamon Jun 28 '17 at 18:45
  • Isalamon: I know what's going on roughly, every time that I am about to do an async send or receive I check whether the deque that contains my OverlappedEx objects has anything. If not I create a new object on the heap. When the completion routine comes back / or error occurs I reuse the object by sticking it back in that deque. – MUXCAH Jun 28 '17 at 18:49
  • Huge amount of code and still not a complete [mcve]. Off topic: `using namespace std;` is a bad, bad thing to do to people by putting it in a header. – user4581301 Jun 28 '17 at 19:01
  • @user4581301: It's still at the prototyping stage. – MUXCAH Jun 28 '17 at 19:06
  • also i can advice to you use rio (if you use win8+ only) - this give great advantage for udp – RbMm Jun 28 '17 at 23:05
  • @RbMm: Thanks, but our machines are Win7. We might be migrating to Win10 later this year. – MUXCAH Jun 28 '17 at 23:22
  • The code as posted silently leaks the `OverlappedEx` object whenever the dequeued packet reports that the I/O failed or processed zero bytes. Are you sure that isn't happening? I notice that when a receive operation completes successfully, a new one is *always* started - there doesn't seem to be any provision for a connection to ever be closed? – Harry Johnston Jun 29 '17 at 00:29
  • @HarryJohnston: Yes, I should handle the 0 bytes case as well. Regarding the connection being closed, I am still at the prototyping stage. On top of leaking memory in Release mode I get random crashes at address 0x00000000. I am tempted to try boost::asio :) – MUXCAH Jun 29 '17 at 14:53
  • @HarryJohnston We have mixed UDP and TCP comms, I am mainly interested in UDP at this point – MUXCAH Jun 29 '17 at 14:56
  • But are you sure that `NetworkThread` silently discarding completion packets isn't the cause of the memory leak? It is very unlikely that the IOCP packets are just disappearing. – Harry Johnston Jun 29 '17 at 21:31
  • I do see one pretty serious bug, resulting in undefined behavior: when you call WSARecvFrom, `&senderAddr` and `&senderAddrSize` point to local variables. When the I/O completes, the part of the stack where they used to live will get clobbered. At that point, anything could happen. – Harry Johnston Jun 29 '17 at 21:35
  • @HarryJohnston Good catch! I wouldn't even suspect that. (&senderAddr and &senderAddrSize) – MUXCAH Jun 30 '17 at 14:35
  • I decided to go with a circular buffer + reference counting. But I do need from time to time to reset the reference so that I can use the OverlappedEx object because by far not all objects are released (ref count decremented). And I think that I covered all the cases where I need to decrement the ref count. – MUXCAH Jun 30 '17 at 21:51

1 Answers1

1

your implementation of void Release() have no sense - you decrement m_refCount and so what ? must be

void Release()
{
    if (!InterlockedDecrement(&m_refCount)) delete this;
}

as result you never free OverlappedEx* data - this what i just view and this give memory leak.

also can advice - use WaitForSingleObject(CCommunicationManager::s_hShutdownEvent, 0); this is bad idea for detect shutdown. call only GetQueuedCompletionStatus and for shutdown call PostQueuedCompletionStatus(s_IOCompletionPort, 0, 0, 0) several times(number or threads listen on s_IOCompletionPort) and if thread view pOverlapped==0 - just exit.

use

OverlappedEx* data = static_cast<OverlappedEx*>(pOverlapped);

instead of reinterpret_cast

make ~OverlappedEx() private - it must not be direct called, only via Release

            olp->Release();
            m_olps.push_back(olp);

after you call Release() on object you must not it more access here, so or olp->Release() or m_olps.push_back(olp); but not both. this kill all logic of Release may be you need overwrite operator delete of OverlappedEx and inside it call m_olps.push_back(olp); and of course overwrite operator new too

again (OVERLAPPED*)olp - for what reinterpret_cast here ? because you inherit own struct from OVERLAPPED compiler auto do type cast here

RbMm
  • 31,280
  • 3
  • 35
  • 56
  • RbMm: Thanks a lot.Every time I get the object back when a completion routine is called I put it back in the deque. That's how I reuse them. – MUXCAH Jun 28 '17 at 18:51
  • RbMm: Why static cast? OverlappedEx derives from OVERLAPPED. – MUXCAH Jun 28 '17 at 18:53
  • 1
    @MUXCAH - exactly because this - and must be `static_cast` – RbMm Jun 28 '17 at 18:55
  • Isn't dynamic_cast used when downcasting? – MUXCAH Jun 28 '17 at 19:00
  • 2
    @MUXCAH - but for what `dynamic_cast` here ?! exactly `static_cast` is need and correct. in your current implementation address of `OVERLAPPED` and `OverlappedEx` is the same - so `reinterpret_cast` and `static_cast` give the same result. however if you change definition of `OverlappedEx` (say add another base class before `OVERLAPPED` or virtual methods) this will be breaked (`reinterpret_cast`) while `static_cast` will be correct worked – RbMm Jun 28 '17 at 19:04
  • 1
    @MUXCAH - so for clear say - in current binary implementation `OverlappedEx* data = reinterpret_cast(pOverlapped);` is wok correct. but from c++ rules - here need exactly `static_cast` (not reinterpret or dynamic) – RbMm Jun 28 '17 at 19:06