Prev Tutorial: Android Development with OpenCV

Next Tutorial: Use OpenCL in Android camera preview based CV application

See also: Deep Neural Networks (dnn module)


Original author	Dmitry Kurtaev
Compatibility	OpenCV >= 4.9

Introduction

In this tutorial you'll know how to run deep learning networks on Android device using OpenCV deep learning module. Tutorial was written for Android Studio 2022.2.1.

Requirements

Download and install Android Studio from https://developer.android.com/studio.
Get the latest pre-built OpenCV for Android release from https://github.com/opencv/opencv/releases and unpack it (for example, opencv-4.X.Y-android-sdk.zip, minimum version 4.9 is required).
Download MobileNet object detection model from https://github.com/chuanqi305/MobileNet-SSD. Configuration file MobileNetSSD_deploy.prototxt and model weights MobileNetSSD_deploy.caffemodel are required.

Create an empty Android Studio project and add OpenCV dependency

Use Android Development with OpenCV tutorial to initialize your project and add OpenCV.

Make an app

Our sample will takes pictures from a camera, forwards it into a deep network and receives a set of rectangles, class identifiers and confidence values in range [0, 1].

First of all, we need to add a necessary widget which displays processed frames. Modify app/src/main/res/layout/activity_main.xml:
<?xml version="1.0" encoding="utf-8"?>
<FrameLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context="org.opencv.samples.opencv_mobilenet.MainActivity">
<org.opencv.android.JavaCameraView
android:id="@+id/CameraView"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:visibility="visible" />
</FrameLayout>
Modify /app/src/main/AndroidManifest.xml to enable full-screen mode, set up a correct screen orientation and allow to use a camera.
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<application
android:label="@string/app_name">

<activity
android:exported="true"
android:name=".MainActivity"
android:screenOrientation="landscape"> 
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>

<uses-permission android:name="android.permission.CAMERA"/>
<uses-feature android:name="android.hardware.camera" android:required="false"/>
<uses-feature android:name="android.hardware.camera.autofocus" android:required="false"/>
<uses-feature android:name="android.hardware.camera.front" android:required="false"/>
<uses-feature android:name="android.hardware.camera.front.autofocus" android:required="false"/>
</manifest>
Replace content of app/src/main/java/com/example/myapplication/MainActivity.java and set a custom package name if necessary:

package com.example.myapplication;

import android.content.Context;
import android.content.res.AssetManager;
import android.os.Bundle;
import android.util.Log;
import android.widget.Toast;
import org.opencv.android.CameraActivity;
import org.opencv.android.CameraBridgeViewBase;
import org.opencv.android.CameraBridgeViewBase.CvCameraViewFrame;
import org.opencv.android.CameraBridgeViewBase.CvCameraViewListener2;
import org.opencv.android.OpenCVLoader;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfByte;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.dnn.Net;
import org.opencv.dnn.Dnn;
import org.opencv.imgproc.Imgproc;
import java.io.InputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
public class MainActivity extends CameraActivity implements CvCameraViewListener2 {
    @Override
    public void onResume() {
        super.onResume();
        if (mOpenCvCameraView != null)
            mOpenCvCameraView.enableView();
    }
    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        if (OpenCVLoader.initLocal()) {
            Log.i(TAG, "OpenCV loaded successfully");
        } else {
            Log.e(TAG, "OpenCV initialization failed!");
            (Toast.makeText(this, "OpenCV initialization failed!", Toast.LENGTH_LONG)).show();
            return;
        }
        mModelBuffer = loadFileFromResource(R.raw.mobilenet_iter_73000);
        mConfigBuffer = loadFileFromResource(R.raw.deploy);
        if (mModelBuffer == null || mConfigBuffer == null) {
            Log.e(TAG, "Failed to load model from resources");
        } else
            Log.i(TAG, "Model files loaded successfully");
        net = Dnn.readNet("caffe", mModelBuffer, mConfigBuffer);
        Log.i(TAG, "Network loaded successfully");
        setContentView(R.layout.activity_main);
        // Set up camera listener.
        mOpenCvCameraView = (CameraBridgeViewBase)findViewById(R.id.CameraView);
        mOpenCvCameraView.setVisibility(CameraBridgeViewBase.VISIBLE);
        mOpenCvCameraView.setCvCameraViewListener(this);
    }
    @Override
    public void onPause()
    {
        super.onPause();
        if (mOpenCvCameraView != null)
            mOpenCvCameraView.disableView();
    }
    @Override
    protected List<? extends CameraBridgeViewBase> getCameraViewList() {
        return Collections.singletonList(mOpenCvCameraView);
    }
    public void onDestroy() {
        super.onDestroy();
        if (mOpenCvCameraView != null)
            mOpenCvCameraView.disableView();
        mModelBuffer.release();
        mConfigBuffer.release();
    }
    // Load a network.
    public void onCameraViewStarted(int width, int height) {
    }
    public Mat onCameraFrame(CvCameraViewFrame inputFrame) {
        final int IN_WIDTH = 300;
        final int IN_HEIGHT = 300;
        final float WH_RATIO = (float)IN_WIDTH / IN_HEIGHT;
        final double IN_SCALE_FACTOR = 0.007843;
        final double MEAN_VAL = 127.5;
        final double THRESHOLD = 0.2;
        // Get a new frame
        Log.d(TAG, "handle new frame!");
        Mat frame = inputFrame.rgba();
        Imgproc.cvtColor(frame, frame, Imgproc.COLOR_RGBA2RGB);
        // Forward image through network.
        Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR,
                new Size(IN_WIDTH, IN_HEIGHT),
                new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), /*swapRB*/false, /*crop*/false);
        net.setInput(blob);
        Mat detections = net.forward();
        int cols = frame.cols();
        int rows = frame.rows();
        detections = detections.reshape(1, (int)detections.total() / 7);
        for (int i = 0; i < detections.rows(); ++i) {
            double confidence = detections.get(i, 2)[0];
            if (confidence > THRESHOLD) {
                int classId = (int)detections.get(i, 1)[0];
                int left   = (int)(detections.get(i, 3)[0] * cols);
                int top    = (int)(detections.get(i, 4)[0] * rows);
                int right  = (int)(detections.get(i, 5)[0] * cols);
                int bottom = (int)(detections.get(i, 6)[0] * rows);
                // Draw rectangle around detected object.
                Imgproc.rectangle(frame, new Point(left, top), new Point(right, bottom),
                                  new Scalar(0, 255, 0));
                String label = classNames[classId] + ": " + confidence;
                int[] baseLine = new int[1];
                Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
                // Draw background for label.
                Imgproc.rectangle(frame, new Point(left, top - labelSize.height),
                                  new Point(left + labelSize.width, top + baseLine[0]),
                                  new Scalar(255, 255, 255), Imgproc.FILLED);
                // Write class name and confidence.
                Imgproc.putText(frame, label, new Point(left, top),
                        Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0));
            }
        }
        return frame;
    }
    public void onCameraViewStopped() {}
    private MatOfByte loadFileFromResource(int id) {
       byte[] buffer;
        try {
            // load cascade file from application resources
            InputStream is = getResources().openRawResource(id);
            int size = is.available();
            buffer = new byte[size];
            int bytesRead = is.read(buffer);
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
            Log.e(TAG, "Failed to ONNX model from resources! Exception thrown: " + e);
            (Toast.makeText(this, "Failed to ONNX model from resources!", Toast.LENGTH_LONG)).show();
            return null;
        }
        return new MatOfByte(buffer);
    }
    private static final String TAG = "OpenCV-MobileNet";
    private static final String[] classNames = {"background",
            "aeroplane", "bicycle", "bird", "boat",
            "bottle", "bus", "car", "cat", "chair",
            "cow", "diningtable", "dog", "horse",
            "motorbike", "person", "pottedplant",
            "sheep", "sofa", "train", "tvmonitor"};
    private MatOfByte            mConfigBuffer;
    private MatOfByte            mModelBuffer;
    private Net                  net;
    private CameraBridgeViewBase mOpenCvCameraView;
}

Put downloaded deploy.prototxt and mobilenet_iter_73000.caffemodel into app/src/main/res/raw folder. OpenCV DNN model is mainly designed to load ML and DNN models from file. Modern Android does not allow it without extra permissions, but provides Java API to load bytes from resources. The sample uses alternative DNN API that initializes a model from in-memory buffer rather than a file. The following function reads model file from resources and converts it to MatOfBytes (analog of std::vector<char> in C++ world) object suitable for OpenCV Java API:

    private MatOfByte loadFileFromResource(int id) {
       byte[] buffer;
        try {
            // load cascade file from application resources
            InputStream is = getResources().openRawResource(id);
            int size = is.available();
            buffer = new byte[size];
            int bytesRead = is.read(buffer);
            is.close();
        } catch (IOException e) {
            e.printStackTrace();
            Log.e(TAG, "Failed to ONNX model from resources! Exception thrown: " + e);
            (Toast.makeText(this, "Failed to ONNX model from resources!", Toast.LENGTH_LONG)).show();
            return null;
        }
        return new MatOfByte(buffer);
    }

And then the network initialization is done with the following lines:

        mModelBuffer = loadFileFromResource(R.raw.mobilenet_iter_73000);
        mConfigBuffer = loadFileFromResource(R.raw.deploy);
        if (mModelBuffer == null || mConfigBuffer == null) {
            Log.e(TAG, "Failed to load model from resources");
        } else
            Log.i(TAG, "Model files loaded successfully");
        net = Dnn.readNet("caffe", mModelBuffer, mConfigBuffer);
        Log.i(TAG, "Network loaded successfully");

Take a look how DNN model input is prepared and inference result is interpreted:

        Mat blob = Dnn.blobFromImage(frame, IN_SCALE_FACTOR,
                new Size(IN_WIDTH, IN_HEIGHT),
                new Scalar(MEAN_VAL, MEAN_VAL, MEAN_VAL), /*swapRB*/false, /*crop*/false);
        net.setInput(blob);
        Mat detections = net.forward();
        int cols = frame.cols();
        int rows = frame.rows();
        detections = detections.reshape(1, (int)detections.total() / 7);
        for (int i = 0; i < detections.rows(); ++i) {
            double confidence = detections.get(i, 2)[0];
            if (confidence > THRESHOLD) {
                int classId = (int)detections.get(i, 1)[0];
                int left   = (int)(detections.get(i, 3)[0] * cols);
                int top    = (int)(detections.get(i, 4)[0] * rows);
                int right  = (int)(detections.get(i, 5)[0] * cols);
                int bottom = (int)(detections.get(i, 6)[0] * rows);
                // Draw rectangle around detected object.
                Imgproc.rectangle(frame, new Point(left, top), new Point(right, bottom),
                                  new Scalar(0, 255, 0));
                String label = classNames[classId] + ": " + confidence;
                int[] baseLine = new int[1];
                Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
                // Draw background for label.
                Imgproc.rectangle(frame, new Point(left, top - labelSize.height),
                                  new Point(left + labelSize.width, top + baseLine[0]),
                                  new Scalar(255, 255, 255), Imgproc.FILLED);
                // Write class name and confidence.
                Imgproc.putText(frame, label, new Point(left, top),
                        Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, new Scalar(0, 0, 0));
            }
        }

Dnn.blobFromImage converts camera frame to neural network input tensor. Resize and statistical normalization are applied. Each line of network output tensor contains information on one detected object in the following order: confidence in range [0, 1], class id, left, top, right, bottom box coordinates. All coordinates are in range [0, 1] and should be scaled to image size before rendering.

Launch an application and make a fun!