Image dimension, ByteBuffer size and format don't match

Question

I'm trying to make a face recognition app in flutter. Most of the code is taken from here. That project used Firebase ML Vision(which is now deprecated), so I followed the migration guide to Google ML Kit. I made changes to the face detection part of the code.

Following is the code for detect function:

Future<List<Face>> detect(CameraImage image, InputImageRotation rotation) {

    final faceDetector = GoogleMlKit.vision.faceDetector(
      const FaceDetectorOptions(
        mode: FaceDetectorMode.accurate,
        enableLandmarks: true,
      ), 
    );
    return  faceDetector.processImage(
      InputImage.fromBytes(
        bytes: image.planes[0].bytes,
        inputImageData:InputImageData(
          inputImageFormat:InputImageFormatMethods.fromRawValue(image.format.raw)!,
          size: Size(image.width.toDouble(), image.height.toDouble()),
          imageRotation: rotation,
          planeData: image.planes.map(
            (Plane plane) {
              return InputImagePlaneMetadata(
                bytesPerRow: plane.bytesPerRow,
                height: plane.height,
                width: plane.width,
              );
            },
          ).toList(),
        ),
      ),
    );
  }

When I call this function, I get the following error: Error I'm unable to figure out where I'm doing something wrong. Here's the initializeCamera function(detect function is called inside it):

void _initializeCamera() async {
    
    CameraDescription description = await getCamera(_direction);

    InputImageRotation rotation = rotationIntToImageRotation(
      description.sensorOrientation,
    );


      _camera =
        CameraController(description, ResolutionPreset.ultraHigh, enableAudio: false);
  
    await _camera!.initialize();
    await loadModel();
    //await Future.delayed(const Duration(milliseconds: 500));
    tempDir = await getApplicationDocumentsDirectory();
    String _embPath = tempDir!.path + '/emb.json';
    jsonFile =  File(_embPath);
    if (jsonFile!.existsSync()) data = json.decode(jsonFile!.readAsStringSync());

    _camera!.startImageStream((CameraImage image)async {
      if (_camera != null) {
        if (_isDetecting) {
          return;
        }
        _isDetecting = true; 
        String res;
        dynamic finalResult = Multimap<String, Face>();
        List<Face> faces = await detect(image, rotation);  <------------------ Detect Function

        if (faces.isEmpty) {
          _faceFound = false;
        } else {
          _faceFound = true;
        }
        Face _face;
        imglib.Image convertedImage =
            _convertCameraImage(image, _direction);
        for (_face in faces) {
          double x, y, w, h;
          x = (_face.boundingBox.left - 10);
          y = (_face.boundingBox.top - 10);
          w = (_face.boundingBox.width + 10);
          h = (_face.boundingBox.height + 10);
          imglib.Image croppedImage = imglib.copyCrop(
              convertedImage, x.round(), y.round(), w.round(), h.round());
          croppedImage = imglib.copyResizeCropSquare(croppedImage, 112);
          // int startTime = new DateTime.now().millisecondsSinceEpoch;
          res = _recog(croppedImage);
          // int endTime = new DateTime.now().millisecondsSinceEpoch;
          // print("Inference took ${endTime - startTime}ms");
          finalResult.add(res, _face);
        }
        setState(() {
          _scanResults = finalResult;
        });
        _isDetecting = false;
      }
    });
  }

EDIT: I finally got the solution

The following "detect" function solved the problem for me:

Future<List<Face>> detect(CameraImage image, InputImageRotation rotation) {

final faceDetector = GoogleMlKit.vision.faceDetector(
  const FaceDetectorOptions(
    mode: FaceDetectorMode.accurate,
    enableLandmarks: true,
  ), 
);
final WriteBuffer allBytes = WriteBuffer();
for (final Plane plane in image.planes) {
  allBytes.putUint8List(plane.bytes);
}
final bytes = allBytes.done().buffer.asUint8List();

final Size imageSize =
    Size(image.width.toDouble(), image.height.toDouble());
final inputImageFormat =
    InputImageFormatMethods.fromRawValue(image.format.raw) ??
        InputImageFormat.NV21;
final planeData = image.planes.map(
  (Plane plane) {
    return InputImagePlaneMetadata(
      bytesPerRow: plane.bytesPerRow,
      height: plane.height,
      width: plane.width,
    );
  },
).toList();

final inputImageData = InputImageData(
  size: imageSize,
  imageRotation: rotation,
  inputImageFormat: inputImageFormat,
  planeData: planeData,
);

return  faceDetector.processImage(
  InputImage.fromBytes(
    bytes: bytes,
    inputImageData:inputImageData
  ),
);

}

score 0 · Answer 1 · answered Jun 14 '22 at 16:30

The problem is in this function

faceDetector.processImage(
      InputImage.fromBytes(
        bytes: image.planes[0].bytes,
        inputImageData:InputImageData(
          inputImageFormat:InputImageFormatMethods.fromRawValue(image.format.raw)!,
          size: Size(image.width.toDouble(), image.height.toDouble()),
          imageRotation: rotation,
          planeData: image.planes.map(
            (Plane plane) {
              return InputImagePlaneMetadata(
                bytesPerRow: plane.bytesPerRow,
                height: plane.height,
                width: plane.width,
              );
            },
          ).toList(),
        ),
      ),

The solution is instead of taking bytes of only first plane image.planes[0].bytes combine bytes from all planes using

faceDetector.processImage(
      InputImage.fromBytes(
        bytes: Uint8List.fromList(
        image.planes.fold(
            <int>[],
            (List<int> previousValue, element) =>
                previousValue..addAll(element.bytes)),
        ),
        inputImageData:InputImageData(
          inputImageFormat:InputImageFormatMethods.fromRawValue(image.format.raw)!,
          size: Size(image.width.toDouble(), image.height.toDouble()),
          imageRotation: rotation,
          planeData: image.planes.map(
            (Plane plane) {
              return InputImagePlaneMetadata(
                bytesPerRow: plane.bytesPerRow,
                height: plane.height,
                width: plane.width,
              );
            },
          ).toList(),
        ),
      ),

I think this is because of difference between the way ios and android CameraImage format. On Android CameraImage has multiple planes and all of them have byte data so we have to combine them all. I am not sure how it works on Ios.

Harry Guthrie · Answer 2 · 2022-06-20T00:45:10.690

The answer from @mumboFromAvnotaklu worked for me and should be accepted as the answer. Below I have just updated the code to work with the latest versions of the Google ML Kit.

if (image.planes.isNotEmpty) {
  // There are usually a few planes per image, potentially worth looking
  // at some sort of best from provided planes solution

  InputImageData iid = InputImageData(
    inputImageFormat: InputImageFormatValue.fromRawValue(image.format.raw)!,
    size: Size(image.width.toDouble(), image.height.toDouble()),
    imageRotation: InputImageRotation.rotation90deg,
    planeData: image.planes
        .map((Plane plane) => InputImagePlaneMetadata(
              bytesPerRow: plane.bytesPerRow,
              height: plane.height,
              width: plane.width,
            ))
        .toList(),
  );

  Uint8List bytes = Uint8List.fromList(
    image.planes.fold(<int>[], (List<int> previousValue, element) => previousValue..addAll(element.bytes)),
  );

  return InputImage.fromBytes(
    bytes: bytes,
    inputImageData: iid,
  );
}

score 0 · Answer 3 · answered Aug 31 '22 at 06:18

Even OPs solution didnt work for me, finally I found a different solution

First change the dependency from ml_kit to the face detection specific library so that this works

google_mlkit_face_detection: ^0.0.1

I am only including what code needs to be changed.

InputImageData _inputImageData = InputImageData(
  imageRotation:
      _cameraService.cameraRotation ?? InputImageRotation.Rotation_0deg,
  inputImageFormat:
      InputImageFormatMethods.fromRawValue(image.format.raw) ??
          InputImageFormat.NV21,
  size:
      Size(image.planes[0].bytesPerRow.toDouble(), image.height.toDouble()),
  planeData: image.planes.map(
    (Plane plane) {
      return InputImagePlaneMetadata(
        bytesPerRow: plane.bytesPerRow,
        height: image.height,
        width: image.width,
      );
    },
  ).toList(),
);

final WriteBuffer allBytes = WriteBuffer();
for (Plane plane in image.planes) {
    allBytes.putUint8List(plane.bytes);
}
final bytes = allBytes.done().buffer.asUint8List();
InputImage _inputImage = InputImage.fromBytes(
    bytes: bytes,
    inputImageData: _inputImageData,
);

return  faceDetector.processImage(
    InputImage.fromBytes(
        bytes: bytes,
        inputImageData:inputImageData
    ),
);

For more information link to the forum that gave me this solution Click here

Image dimension, ByteBuffer size and format don't match

3 Answers3