#define DEBUG_MODE
using OpenCVForUnity.CoreModule;
using OpenCVForUnity.DnnModule;
using OpenCVForUnity.ImgprocModule;
using OpenCVForUnity.ObjdetectModule;
using OpenCVForUnity.UnityUtils;
using OpenCVForUnity.UnityUtils.Helper;
using OpenCVForUnityExample;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using UnityEngine;
using UnityEngine.Video;
namespace Yoga
public class MotionCaptureManager : MonoSingleton<MotionCaptureManager>
private WebCamTextureToMatHelper _webCamTextureToMatHelper;
private bool isInited = false;
private Texture2D texture;
private Net _net;
private Dictionary<ModelType, KeypointsModel> _models = new Dictionary<ModelType, KeypointsModel>();
private KeypointsModel _openPoseModel;
public int inpWidth = 416;
public int inpHeight = 416;
public float confThreshold = 0.35f;
public float nmsThreshold = 0.6f;
public int topK = 1000;
public float SamplingRate = 0.3f;
private List<float[]> _voloResult = new List<float[]>();
private List<Point> _currPersonPoints = new List<Point>();
private YOLOv7ObjectDetector _objectDetector;
private Mat _bgrMat;
private double threshold = 0.5;
private bool _isOnCamCapture = false;
public bool IsOnCamCapture { get => _isOnCamCapture; internal set => _isOnCamCapture = value; }
public WebCamTextureToMatHelper WebCamTextureToMatHelper => _webCamTextureToMatHelper;
public List<Point> CurrPersonPoints { get => _currPersonPoints; set => _currPersonPoints = value; }
public List<float[]> VoloResult { get => _voloResult; set => _voloResult = value; }
private bool _isCorrectAction = false;
private Mat _rgbaMat;
public Mat RgbaMat { get => _rgbaMat; set => _rgbaMat = value; }
private void OnEnable()
EventManager.Instance.AddEventListener(YogaEventType.StartMotionCapture, OnStartMotionCapture);
EventManager.Instance.AddEventListener(YogaEventType.StopMotionCapture, OnStopMotionCapture);
EventManager.Instance.AddEventListener(YogaEventType.EstimateAction, EstimateAction);
EventManager.Instance.AddEventListener(YogaEventType.ScoreUpdate, ScoreUpdate);
EventManager.Instance.AddEventListener(YogaEventType.GetActionBasePoint, GetActionBasePoint);
private void OnDisable()
EventManager.Instance.RemoveEventListener(YogaEventType.StartMotionCapture, OnStartMotionCapture);
EventManager.Instance.RemoveEventListener(YogaEventType.StopMotionCapture, OnStopMotionCapture);
EventManager.Instance.RemoveEventListener(YogaEventType.EstimateAction, EstimateAction);
EventManager.Instance.RemoveEventListener(YogaEventType.ScoreUpdate, ScoreUpdate);
EventManager.Instance.RemoveEventListener(YogaEventType.GetActionBasePoint, GetActionBasePoint);
public override void Init()
_webCamTextureToMatHelper = gameObject.GetComponent<WebCamTextureToMatHelper>();
var video = Resources.Load<VideoClip>(YogaManager.Instance.Action.VideoPath);
UIManager.Instance.ShowPanel<ActionGuideVideoPanel>(false, video);
CVEstimator.Instance.Init(_objectDetector, _openPoseModel);//初始化姿态检测
private void LoadModels()
Utils.setDebugMode(true); //打印日志
_net = null;
_objectDetector = new YOLOv7ObjectDetector(
new Size(inpWidth, inpHeight), confThreshold, nmsThreshold/*, topK*/);
var modelFilePath = Utils.getFilePath(YogaConfig.MODEL_PATHS[ModelType.OpenPose]);
if (string.IsNullOrEmpty(modelFilePath))
Debug.LogError("modelFilePath is empty. Please copy from “OpenCVForUnity/StreamingAssets/” to “Assets/StreamingAssets/” folder. ");
_net = Dnn.readNet(modelFilePath);
_openPoseModel = new KeypointsModel(_net);
_openPoseModel.setInputScale(new Scalar(YogaConfig.InScale));
_openPoseModel.setInputSize(new Size(YogaConfig.InWidth, YogaConfig.InHeight));
_openPoseModel.setInputMean(new Scalar(YogaConfig.InMean));
private void Update()
if (!_isOnCamCapture)
if (!transform.gameObject.activeSelf)
if (_webCamTextureToMatHelper.IsPlaying() && _webCamTextureToMatHelper.DidUpdateThisFrame())
Mat img = _webCamTextureToMatHelper.GetMat();
Imgproc.cvtColor(img, img, Imgproc.COLOR_BGR2RGB);
_rgbaMat = img.clone();
if (_net == null)
Imgproc.putText(img, "model file is not loaded.", new Point(5, img.rows() - 30), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar(255, 255, 255), 2, Imgproc.LINE_AA, false);
Imgproc.putText(img, "Please read console message.", new Point(5, img.rows() - 10), Imgproc.FONT_HERSHEY_SIMPLEX, 0.7, new Scalar(255, 255, 255), 2, Imgproc.LINE_AA, false);
if (_voloResult.Count >= 2)
DebugPrintObjectLayout(img, _voloResult[0], _voloResult[1], _voloResult[2]);
if (_currPersonPoints != null && _currPersonPoints.Count > 0)
List<Point> points = _currPersonPoints;
for (int i = 0; i < YogaConfig.POSE_PAIRS.GetLength(0); i++)
string partFrom = YogaConfig.POSE_PAIRS[i, 0];
string partTo = YogaConfig.POSE_PAIRS[i, 1];
int idFrom = YogaConfig.BODY_PARTS[partFrom];
int idTo = YogaConfig.BODY_PARTS[partTo];
if (points[idFrom] == new Point(-1, -1) || points[idTo] == new Point(-1, -1))
if (points[idFrom] != null && points[idTo] != null)
Imgproc.line(img, points[idFrom], points[idTo], new Scalar(0, 255, 0), 3);
Imgproc.ellipse(img, points[idFrom], new Size(3, 3), 0, 0, 360, new Scalar(0, 0, 255), Core.FILLED);
Imgproc.ellipse(img, points[idTo], new Size(3, 3), 0, 0, 360, new Scalar(0, 0, 255), Core.FILLED);
Utils.matToTexture2D(img, texture);
#region Event Func
private void OnStartMotionCapture()
this.enabled = true;
_isOnCamCapture = true;
private void OnStopMotionCapture()
this.enabled = false;
_isOnCamCapture = false;
private async void GetActionBasePoint()
YogaManager.Instance.Points = _currPersonPoints;
//await Task.Run(() =>
// int i = 0;
// //确保捕捉到该次检测所需所有的点位
// while (true)
// {
// YogaManager.Instance.Points = GetEstimationBodyPoints();
// _currPersonPoints = YogaManager.Instance.Points;
// if (YogaManager.Instance.ActionCheckPoints(YogaManager.Instance.Points))
// break;
// i++;
// Debug.LogWarning($"执行{i}次");
// }
private async void EstimateAction(params object[] args)
var type = args.FirstOrDefault();
if (type == null)
Debug.LogError("EstimateAction type is null");
AvatarAction actionType = (AvatarAction)args.FirstOrDefault();
//await Task.Run(() =>
// _personPoints?.Clear();
// int i = 0;
// while (true)
// {
// //动作检测
// _personPoints = GetEstimationBodyPoints();
// if (YogaManager.Instance.ActionCheckPoints(_personPoints))
// break;
// i++;
// Debug.LogWarning($"执行{i}次");
// }
//if (_personPoints == null || _personPoints.Count == 0)
// Debug.LogWarning("EstimateAction _personPoints is null");
// return;
_isCorrectAction = (_isCorrectAction || YogaManager.Instance.IsCorrectAction(_currPersonPoints, actionType));
private List<Point> GetEstimationBodyPoints()
Mat rgbaMat = _rgbaMat;
Mat bgrMat = new Mat();
if (rgbaMat == null)
Debug.LogWarning("WebCamTexture is null. ");
return null;
if (_objectDetector == null)
Debug.LogWarning("ObjectDetector is not ready. ");
return null;
Imgproc.cvtColor(rgbaMat, bgrMat, Imgproc.COLOR_RGBA2BGR);
Mat results = _objectDetector.infer(bgrMat);
var voloResultBox = new List<float[]>();
bool hasValidObject = false;
for (int i = results.rows() - 1; i >= 0; --i)
float[] box = new float[4];
results.get(i, 0, box); //方框
float[] conf = new float[1];
results.get(i, 4, conf); //检测数据
float[] cls = new float[1];
results.get(i, 5, cls); //类别
if (!IsObjectValid(box, conf, cls, rgbaMat))
hasValidObject = true;
if (!hasValidObject) //没有检测到人体
return null;
_voloResult = voloResultBox;
OpenCVForUnity.CoreModule.Rect roiRect = new OpenCVForUnity.CoreModule.Rect(
Math.Abs((int)(voloResultBox[0][2] - voloResultBox[0][0])),
Math.Abs((int)(voloResultBox[0][3] - voloResultBox[0][1])));
if (roiRect.y < 0 || //0 <= _rowRange.start
(roiRect.y + roiRect.height) < roiRect.y || // _rowRange.start <= _rowRange.end
bgrMat.rows() < (roiRect.y + roiRect.height)) //_rowRange.end <= m.rows
return null;
List<Point> points = null;
Mat personRectImg = new Mat(bgrMat, roiRect);//获取人体区域
points = _openPoseModel.estimate(personRectImg, (float)threshold).toList();
for (int j = 0; j < points.Count; j++)
if (points[j] == null ||
(points[j].x == -1 && points[j].y == -1)) //没找到的点,跳过
points[j].x += roiRect.x;
points[j].y += roiRect.y;
catch (Exception e)
Debug.LogWarning("bgrMat:" + bgrMat.rows());
Debug.LogError("rect:" + roiRect);
return points;
private bool IsObjectValid(float[] box, float[] confidence, float[] classID, Mat rgbaMat)
if ((int)classID[0] != 0 || confidence[0] < 0.8f) //只检测人体且置信度大于80%
return false;
float width = rgbaMat.width();
float centerX = (box[0] + box[2]) / 2;
return true;
private void DebugPrintObjectLayout(Mat image, float[] box, float[] conf, float[] cls, bool isRGB = false)
float left = box[0];
float top = box[1];
float right = box[2];
float bottom = box[3];
int classId = (int)cls[0];
Scalar c = _objectDetector.palette[classId % _objectDetector.palette.Count];
Scalar color = isRGB ? c : new Scalar(c.val[2], c.val[1], c.val[0], c.val[3]);
Imgproc.rectangle(image, new Point(left, top), new Point(right, bottom), color, 2);
string label = String.Format("{0:0.00}", conf[0]);
if (_objectDetector.classNames != null && _objectDetector.classNames.Count != 0)
if (classId < (int)_objectDetector.classNames.Count)
label = _objectDetector.classNames[classId] + " " + label;
int[] baseLine = new int[1];
Size labelSize = Imgproc.getTextSize(label, Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, 1, baseLine);
top = Mathf.Max((float)top, (float)labelSize.height);
Imgproc.rectangle(image, new Point(left, top - labelSize.height),
new Point(left + labelSize.width, top + baseLine[0]), color, Core.FILLED);
Imgproc.putText(image, label, new Point(left, top), Imgproc.FONT_HERSHEY_SIMPLEX, 0.5, Scalar.all(255), 1, Imgproc.LINE_AA);
/// <summary>
/// Raises the destroy event.
/// </summary>
void OnDestroy()
if (_net != null)
public void OnWebCamTextureToMatHelperInitialized()
Mat webCamTextureMat = _webCamTextureToMatHelper.GetMat();
texture = new Texture2D(webCamTextureMat.cols(), webCamTextureMat.rows(), TextureFormat.RGB24, false);
Utils.matToTexture2D(webCamTextureMat, texture);
this.gameObject.GetComponent<Renderer>().material.mainTexture = texture;
gameObject.transform.localScale = new Vector3(webCamTextureMat.cols() / 10, webCamTextureMat.rows() / 10, 1);
Debug.Log("Screen.width " + Screen.width + " Screen.height " + Screen.height + " Screen.orientation " + Screen.orientation);
float width = webCamTextureMat.width();
float height = webCamTextureMat.height();
float widthScale = (float)Screen.width / width;
float heightScale = (float)Screen.height / height;
if (widthScale < heightScale)
Camera.main.orthographicSize = (width * (float)Screen.height / (float)Screen.width) / 2;
Camera.main.orthographicSize = height / 2;
_bgrMat = new Mat(webCamTextureMat.rows(), webCamTextureMat.cols(), CvType.CV_8UC3);
//event call
public void OnWebCamTextureToMatHelperDisposed()
if (_bgrMat != null)
if (texture != null)
texture = null;
public void ScoreUpdate()
if (_isCorrectAction)
_isCorrectAction = false;//重置