4

I would like to stream video camera from ESP32-CAM to web browser. To do so, I use a nodejs server (to broadcast video and serve html) and SocketIO to communicate (between ESP32-CAM -> nodejs and nodejs -> web browser). In this way, this avoid multiple clients to be connected directly to ESP32-CAM and avoid to deal with NAT/router configuration. It acts as relay/repeater not as proxy.

I actually succeed to send video data (throught jpg base64) to nodejs and view it in the web browser.

Here is the code :

ESP32-CAM :


#include "WiFi.h"
#include "esp_camera.h"
#include "base64.h"

#include <ArduinoJson.h>
#include <WebSocketsClient.h>
#include <SocketIOclient.h>

// Pin definition for CAMERA_MODEL_AI_THINKER
#define PWDN_GPIO_NUM     32
#define RESET_GPIO_NUM    -1
#define XCLK_GPIO_NUM      0
#define SIOD_GPIO_NUM     26
#define SIOC_GPIO_NUM     27

#define Y9_GPIO_NUM       35
#define Y8_GPIO_NUM       34
#define Y7_GPIO_NUM       39
#define Y6_GPIO_NUM       36
#define Y5_GPIO_NUM       21
#define Y4_GPIO_NUM       19
#define Y3_GPIO_NUM       18
#define Y2_GPIO_NUM        5
#define VSYNC_GPIO_NUM    25
#define HREF_GPIO_NUM     23
#define PCLK_GPIO_NUM     22


// Replace with your network credentials
const char* hostname = "ESP32CAM";
const char* ssid = "ssid";
const char* password = "pass";
SocketIOclient socketIO;


void socketIOEvent(socketIOmessageType_t type, uint8_t * payload, size_t length) {
    switch(type) {
        case sIOtype_DISCONNECT:
            Serial.printf("[IOc] Disconnected!\n");
            break;
        case sIOtype_CONNECT:
            Serial.printf("[IOc] Connected to url: %s\n", payload);

            // join default namespace (no auto join in Socket.IO V3)
            socketIO.send(sIOtype_CONNECT, "/");
            break;
        case sIOtype_EVENT:
            Serial.printf("[IOc] get event: %s\n", payload);
            break;
        case sIOtype_ACK:
            Serial.printf("[IOc] get ack: %u\n", length);
            break;
        case sIOtype_ERROR:
            Serial.printf("[IOc] get error: %u\n", length);
            break;
        case sIOtype_BINARY_EVENT:
            Serial.printf("[IOc] get binary: %u\n", length);
            break;
        case sIOtype_BINARY_ACK:
            Serial.printf("[IOc] get binary ack: %u\n", length);
            break;
    }
}

void setupCamera()
{

    camera_config_t config;
    config.ledc_channel = LEDC_CHANNEL_0;
    config.ledc_timer = LEDC_TIMER_0;
    config.pin_d0 = Y2_GPIO_NUM;
    config.pin_d1 = Y3_GPIO_NUM;
    config.pin_d2 = Y4_GPIO_NUM;
    config.pin_d3 = Y5_GPIO_NUM;
    config.pin_d4 = Y6_GPIO_NUM;
    config.pin_d5 = Y7_GPIO_NUM;
    config.pin_d6 = Y8_GPIO_NUM;
    config.pin_d7 = Y9_GPIO_NUM;
    config.pin_xclk = XCLK_GPIO_NUM;
    config.pin_pclk = PCLK_GPIO_NUM;
    config.pin_vsync = VSYNC_GPIO_NUM;
    config.pin_href = HREF_GPIO_NUM;
    config.pin_sscb_sda = SIOD_GPIO_NUM;
    config.pin_sscb_scl = SIOC_GPIO_NUM;
    config.pin_pwdn = PWDN_GPIO_NUM;
    config.pin_reset = RESET_GPIO_NUM;
    config.xclk_freq_hz = 20000000;
    config.pixel_format = PIXFORMAT_JPEG;
    
    config.frame_size = FRAMESIZE_CIF; // FRAMESIZE_ + QVGA|CIF|VGA|SVGA|XGA|SXGA|UXGA
    config.jpeg_quality = 10;
    config.fb_count = 2;
  
    // Init Camera
    esp_err_t err = esp_camera_init(&config);
    if (err != ESP_OK) {
      Serial.printf("Camera init failed with error 0x%x", err);
      return;
    }
  
  
}

void setup(){
  Serial.begin(115200);
  
  // Connect to Wi-Fi
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(1000);
    Serial.println("Connecting to WiFi..");
  }

  // Print ESP32 Local IP Address
  Serial.println(WiFi.localIP());

  setupCamera();
  
  // server address, port and URL
  // without ssl to test speed may change later
  socketIO.begin("server", port,"/socket.io/?EIO=4");

  // event handler
  socketIO.onEvent(socketIOEvent);
    


}


unsigned long messageTimestamp = 0;
void loop() {
    socketIO.loop();

    uint64_t now = millis();

    if(now - messageTimestamp > 10) {
        messageTimestamp = now;

        camera_fb_t * fb = NULL;

        // Take Picture with Camera
        fb = esp_camera_fb_get();  
        if(!fb) {
          Serial.println("Camera capture failed");
          return;
        }
        
        //Slow
        String picture_encoded = base64::encode(fb->buf,fb->len);

        // create JSON message for Socket.IO (event)
        DynamicJsonDocument doc(15000);
        JsonArray array = doc.to<JsonArray>();
        
        // add event name
        // Hint: socket.on('event_name', ....
        array.add("jpgstream_server");

        // add payload (parameters) for the event
        JsonObject param1 = array.createNestedObject();
        param1["hostname"] = hostname;
        param1["picture"] = String((char *)fb->buf);

        // JSON to String (serializion)
        String output;
        serializeJson(doc, output);

        // Send event        
        socketIO.sendEVENT(output);
        Serial.println("Image sent");
        Serial.println(output);
        esp_camera_fb_return(fb); 
    }
}

nodejs :

const express = require('express');
const app = express();
const http = require('http').Server(app);
const io = require('socket.io')(http);
const port = 3000;

const express_config= require('./config/express.js');

express_config.init(app);

var cameraArray={};


app.get('/', (req, res) => {
    res.render('index', {});
});

io.on('connection', (socket) => {
  socket.on('jpgstream_server', (msg) => {
    io.to('webusers').emit('jpgstream_client', msg);
  });
  
  socket.on('webuser', (msg) => {
      socket.join('webusers');      
  });
  
  
});

http.listen(port, () => {
      console.log(`App listening at http://localhost:${port}`)
})

Web browser :

<!DOCTYPE html>
<html>
<%- include('./partials/head.ejs') %>
<body class="page_display">
    <div class="main_content">
        <div class="page_title"><h1 class="tcenter">Camera relay</h1></div>
        <div class="tcenter">
            <img id="jpgstream" class="jpgstream" src="" />
        </div>
    </div>
    
    <script src="/socket.io/socket.io.js"></script>
    <script>
    var socket = io();
    
    socket.emit("webuser",{});

    socket.on('jpgstream_client', function(msg) {
        console.log(msg);
        var x = document.getElementsByTagName("img").item(0);
        x.setAttribute("src", 'data:image/jpg;base64,'+msg.picture);        
    });
    </script>
</body>
</html>

I don't expect the video to be smooth and clear due to hardware limitation, but I don't even have 10fps with ridiculous resolution. The bottleneck seems to come from base64 encode. The ESP32-CAM webserver example is faster ( https://github.com/espressif/arduino-esp32/blob/master/libraries/ESP32/examples/Camera/CameraWebServer/CameraWebServer.ino ) but requires direct access to ESP32-CAM.

Is there a solution to optimise base64 encoding or another way to send data throught socketIO to improve speed ?

Inglebard
  • 427
  • 5
  • 14
  • JSON is not a good solution for streaming video. JSON is designed to transmit chunks of structured data. As you pointed out, encoding in base64 is a bit performance hit, so ... have you tried just not doing that and not embedding the frames in JSON? – romkey Mar 09 '21 at 17:07
  • @romkey Can I use socketIO without JSON ? Do you have examples ? – Inglebard Mar 09 '21 at 19:09
  • A great thing to do when you have a question like that is - write a simple program that tries to publish something that's not JSON through socketIO and see if it works. Or read the SocketIO documentation. – romkey Mar 09 '21 at 19:34

3 Answers3

2

I'm aware it's a little late, but I still want to share my solution to this problem: It is indeed possible to send images from an ESP32 camera via socketIO with a decent -- though not perfect -- performance.

I will focus on the OV2640 camera module, as it is used on the AI-Thinker board and is the most common to find. When this camera takes an image, it is processed according to the settings within the camera module (without wasting ESP32 processing time) and then written to the ESP32 buffer. At this point it is completely encoded (jpeg, uint8) and ready to go, all left to do is passing that buffer to socketIO.

The socketIO protocol already offers the possibility to send binary data:

BINARY_EVENT packet

{ "type": 5, "nsp": "/", "data": ["hello", <Buffer 01 02 03>] }

is encoded to 51-["hello",{"_placeholder":true,"num":0}] + <Buffer 01 02 03>

However, the ESP32 implementation of the socketIO class in the chosen websocket library lacks this method. Luckily, it can be implemented easily. All you need to do is to send a properly formated websocket text frame with a place holder (will be replaced by the binary when parsed by the server) followed by the actual binary data in a second, separate frame:

// modified client for sending binaries
class SocketIOclientMod : public SocketIOclient {
    public:
    bool sendBIN(const uint8_t * payload, size_t length, bool headerToPayload = false);
};

// text frame content including hostname and placeholder:
// 451["pic",{"hostname":"ESP32_cam","image":{"placeholder":true,"num":0}}]
char binaryLeadFrame[100];
strcpy(binaryLeadFrame, "451-[\"image\",{\"hostname\":\"");
strcat(binaryLeadFrame, hostname);
strcat(binaryLeadFrame,"\",\"pic\":{\"_placeholder\":true,\"num\":0}}]");

// send text frame followed by binary frame
bool SocketIOclientMod::sendBIN(uint8_t * payload, size_t length, bool headerToPayload) {
    bool ret = false;
    if (length == 0) {
        length = strlen((const char *) payload);
    }
    ret = sendFrame(&_client, WSop_text, (uint8_t *) binaryLeadFrame,
          strlen((const char*) binaryLeadFrame), true, headerToPayload);

    if (ret) {
        ret = sendFrame(&_client, WSop_binary, payload, length, true, headerToPayload);
    }
    return ret;
}

The server will parse this as an event "image"

{
  "hostname":"yourHostname",
  "pic":{
    "type":"Buffer",
    "data": [image data encoded as jpeg, uint8]
    }
}

On your server simply relay the data to your webusers and display the image for the webusers when received.

Now all left to do is to send the buffer:

SocketIOclientMod socketIO;

// use this whenever you want to send an image:
void sendImage(){
    camera_fb_t *fb = esp_camera_fb_get();
    socketIO.sendBIN(fb->buf,fb->len);
    esp_camera_fb_return(fb);
}

Just two more notes to the camera itself. The onboard antenna of the ESP32 is not ideal and if available, an external antenna should be used. It's also benefitial to avoid sending unnecessary pixels by using a window function if only parts of the image are of interest. Overall, the camera module is limited to 15 fps in highest resolution (UXGA/SXGA), 30 fps in SVGA and 60 fps in CIF (see specs of the chip). The rate that can be achieved practically will be lower due to the limited WiFi rate and other processes running on the hardware, but 10 fps in okay-ish quality are certainly possible.

I'm aware that it's easier and less overhead to do the same without socketIO, but that's a choice not everyone has. So if someone else like me reads this, who has no choice but to use socketIO:

I hope this is of some help.

doomke
  • 36
  • 2
1

To be honest, none of the data transformations in the ESP32 (raw->base64->JSON->WebSockets) are an excellent choice for performance. But assuming you're correct in your diagnosis and using this base64 library, the problem probably comes from the fact that while the ESP32 core runs quite quickly (240MHz), all of its code and data come from the external SPI-connected Flash. As you can guess, fetching anything from there is slow. It has a Flash cache of 32KB, but the base64 encoding stuff very likely expires between consecutive frames.

First thing is to make sure that your cores and the SPI bus to Flash are running at maximum frequency (240MHz, 80MHz). No idea how it's done in Arduino-land, sorry. Under ESP-IDF it's done through idf.py menuconfig.

Secondly you can tweak the base64 library by moving the code and data from Flash to RAM. Move the encoding functions to instruction RAM by adding IRAM_ATTR. The character table is marked constexpr which makes the compiler place it into Flash. I suspect removing constexpr will load it into data RAM.

Tarmo
  • 3,728
  • 1
  • 8
  • 25
0

Based on @Tarmo answer, SocketIO doesn't seems to be a good choice for performance.

Even if socketIO have binary support, transformation seems to be still required.

I switch to binary websocket and performance are a lot better.

Project example available here : https://github.com/Inglebard/esp32cam-relay

Inglebard
  • 427
  • 5
  • 14
  • How many fps you got with this solution? I actually want to do the same thing, just thinking about the right solution. – matez Jul 26 '22 at 17:56