diff --git a/Assets/Images/SignLanguage.mp4 b/Assets/Images/SignLanguage.mp4 deleted file mode 100644 index 95f33aa..0000000 --- a/Assets/Images/SignLanguage.mp4 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6fec61cbe98e8a8612e67d7d7653df1030095ad7da0a26e4dbee1c672e350757 -size 10142038 diff --git a/Assets/Images/sample.mp4 b/Assets/Images/sample.mp4 new file mode 100644 index 0000000..4c34fda --- /dev/null +++ b/Assets/Images/sample.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e49273e2e6cad7dcd4e94c591fdb9cc1900734bd50e80c402800336a149f4368 +size 4118908 diff --git a/Assets/Images/SignLanguage.mp4.meta b/Assets/Images/sample.mp4.meta similarity index 88% rename from Assets/Images/SignLanguage.mp4.meta rename to Assets/Images/sample.mp4.meta index bf64e07..2e022b7 100644 --- a/Assets/Images/SignLanguage.mp4.meta +++ b/Assets/Images/sample.mp4.meta @@ -1,5 +1,5 @@ fileFormatVersion: 2 -guid: fe097d08b4bc3d547819e723bcc49b71 +guid: c15773ede1129c044a1cdcd7fe466cc7 VideoClipImporter: externalObjects: {} serializedVersion: 2 diff --git a/Assets/Scenes/Sample.unity b/Assets/Scenes/Sample.unity index a4053ff..5546fd7 100644 --- a/Assets/Scenes/Sample.unity +++ b/Assets/Scenes/Sample.unity @@ -409,7 +409,6 @@ MonoBehaviour: faceLineTemplateMesh: {fileID: 4300000, guid: 4d1b450bb6d281641af6c72e8b37cfcb, type: 2} handShader: {fileID: 4800000, guid: 1b792b8e71d565242ae238f771a1db59, type: 3} handScoreThreshold: 0.5 - holisticResource: {fileID: 11400000, guid: 3d7456c7320edba4caea63df7db64fb7, type: 2} holisticInferenceType: 0 --- !u!114 &1716695272 MonoBehaviour: @@ -448,7 +447,7 @@ VideoPlayer: m_PrefabAsset: {fileID: 0} m_GameObject: {fileID: 1716695270} m_Enabled: 1 - m_VideoClip: {fileID: 32900000, guid: fe097d08b4bc3d547819e723bcc49b71, type: 3} + m_VideoClip: {fileID: 32900000, guid: c15773ede1129c044a1cdcd7fe466cc7, type: 3} m_TargetCameraAlpha: 1 m_TargetCamera3DLayout: 0 m_TargetCamera: {fileID: 0} diff --git a/Assets/Scripts/Visuallizer.cs b/Assets/Scripts/Visuallizer.cs index 1e67f89..051631f 100644 --- a/Assets/Scripts/Visuallizer.cs +++ b/Assets/Scripts/Visuallizer.cs @@ -15,8 +15,6 @@ public class Visuallizer : MonoBehaviour [SerializeField] Mesh faceLineTemplateMesh; [SerializeField] Shader handShader; [SerializeField, Range(0, 1)] float handScoreThreshold = 0.5f; - // Set "Packages/HolisticBarracuda/ResourceSet/Holistic.asset" on the Unity Editor. - [SerializeField] HolisticResource holisticResource; // Select inference type with pull down on the Unity Editor. [SerializeField] HolisticInferenceType holisticInferenceType = HolisticInferenceType.full; @@ -42,7 +40,7 @@ public class Visuallizer : MonoBehaviour void Start() { // Make instance of HolisticPipeline - holisticPipeline = new HolisticPipeline(holisticResource); + holisticPipeline = new HolisticPipeline(); poseMaterial = new Material(poseShader); faceMeshMaterial = new Material(faceShader); diff --git a/Packages/HolisticBarracuda/CHANGELOG.md b/Packages/HolisticBarracuda/CHANGELOG.md index 09c9b33..1482ae2 100644 --- a/Packages/HolisticBarracuda/CHANGELOG.md +++ b/Packages/HolisticBarracuda/CHANGELOG.md @@ -1,2 +1,10 @@ +### [1.1.0] - 2022-09-25 +- Update BlazePose version +- Fixed an issue that estimation was not performed correctly when the color space was Liner. +- Automatically load `HolisticResource` asset data. The constructor arguments are not required. +- Add new methods (`GetPoseLandmark`, `GetPoseWorldLandmark`, `GetFaceLandmark`, `GetLeftEyeLandmark`, `GetRightEyeLandmark`, `GetLeftHandLandmark` and `GetRightHandLandmark`) for accessing data with CPU (C#). +- Add detection score variables (`faceDetectionScore`, `leftHandDetectionScore` and `rightHandDetectionScore`) +- Improve the stability of hand estimation. + ### [1.0.1] - 2021-10-06 This is the first release of `creativeIKEP/HolisticBarracuda`(`jp.ikep.mediapipe.holistic`). \ No newline at end of file diff --git a/Packages/HolisticBarracuda/ComputeShader/CommonProcess.compute b/Packages/HolisticBarracuda/ComputeShader/CommonProcess.compute index 2444feb..5819f9b 100644 --- a/Packages/HolisticBarracuda/ComputeShader/CommonProcess.compute +++ b/Packages/HolisticBarracuda/ComputeShader/CommonProcess.compute @@ -1,6 +1,8 @@ #pragma kernel LetterBoxImage +#include "UnityCG.cginc" +uint _isLinerColorSpace; float2 _spadScale; uint _letterboxWidth; sampler2D _letterboxInput; @@ -28,5 +30,7 @@ void LetterBoxImage(uint2 id : SV_DispatchThreadID) // Bounding rgb *= all(uv > 0) && all (uv < 1); + if(_isLinerColorSpace) rgb = LinearToGammaSpace(rgb); + _letterboxTexture[id] = float4(rgb, 1); } diff --git a/Packages/HolisticBarracuda/ComputeShader/HandProcess.compute b/Packages/HolisticBarracuda/ComputeShader/HandProcess.compute index 6ce10dc..862b0ed 100644 --- a/Packages/HolisticBarracuda/ComputeShader/HandProcess.compute +++ b/Packages/HolisticBarracuda/ComputeShader/HandProcess.compute @@ -11,24 +11,35 @@ #define HAND_KEYPOINT_COUNT 21 -uint _detectionCount; +ByteAddressBuffer _detectionCount; float _regionDetectDt; +StructuredBuffer _poseInput; StructuredBuffer _palmDetections; RWStructuredBuffer _handsRegionFromPalm; [numthreads(1, 1, 1)] void HandRegionDetectFromPalm(uint id : SV_DispatchThreadID) { - HandRegion pRegion0 = _handsRegionFromPalm[0]; - HandRegion pRegion1 = _handsRegionFromPalm[1]; + uint detectionCount = _detectionCount.Load(0); + if(detectionCount == 0) return; + + HandRegion pRegionL = _handsRegionFromPalm[0]; + HandRegion pRegionR = _handsRegionFromPalm[1]; - for(uint i = 0; i<_detectionCount; i++){ + float4 index_l = _poseInput[19]; + float4 pinky_l = _poseInput[17]; + float2 boundingBoxCenter_l = (2.0f * index_l.xy + pinky_l.xy) / 3.0f; + float4 index_r = _poseInput[20]; + float4 pinky_r = _poseInput[18]; + float2 boundingBoxCenter_r = (2.0f * index_r.xy + pinky_r.xy) / 3.0f; + + for(uint i = 0; i < detectionCount; i++){ // Input from the palm detection model const PalmDetection palm = _palmDetections[i]; // Palm region size (squarified and enlarged bounding box) - float size = max(palm.extent.x, palm.extent.y) * 3; + float size = max(palm.extent.x, palm.extent.y) * 3.05f; // Palm angle const float2 up = palm.keyPoints[3] - palm.keyPoints[0]; @@ -36,11 +47,12 @@ void HandRegionDetectFromPalm(uint id : SV_DispatchThreadID) const float4 box = float4(palm.center, size, angle); - float distance0 = distance(box.xy, pRegion0.box.xy); - float distance1 = distance(box.xy, pRegion1.box.xy); + float distanceFromL = distance(box.xy, boundingBoxCenter_l.xy); + float distanceFromR = distance(box.xy, boundingBoxCenter_r.xy); - HandRegion region = pRegion0; - if(distance0 > distance1) region = pRegion1; + uint isRight = distanceFromL > distanceFromR; + HandRegion region = pRegionL; + if(isRight) region = pRegionR; // Low pass filter parameters and input vector const float3 lpf_params = float3(2, 1.5f, _regionDetectDt); @@ -52,18 +64,17 @@ void HandRegionDetectFromPalm(uint id : SV_DispatchThreadID) float4x4 m2 = makeScalingMatrix(region.box.z); float4x4 m3 = makeTranslationMatrix(0.5); float4x4 m4 = makeRotationMatrix(region.box.w); - float4x4 m5 = makeTranslationMatrix(-0.5 + float2(0, 0.15)); + float4x4 m5 = makeTranslationMatrix(-0.5 + float2(0, 0.05f)); region.cropMatrix = mul(mul(mul(mul(m1, m2), m3), m4), m5); // Compute buffer update - _handsRegionFromPalm[i] = region; + _handsRegionFromPalm[isRight] = region; } } int _isRight; float _bboxDt; -StructuredBuffer _poseInput; RWStructuredBuffer _bboxRegion; [numthreads(1,1,1)] @@ -79,7 +90,7 @@ void HandRegionDetectFromPose(uint3 id : SV_DispatchThreadID) // bounding box float2 boundingBoxCenter = middle; - float boundingBoxSize = boxSize * 2.0f; + float boundingBoxSize = boxSize * 1.5f; // bounding box angle float target = PI * 0.5; diff --git a/Packages/HolisticBarracuda/Documentation~/HolisticBarracuda.md b/Packages/HolisticBarracuda/Documentation~/HolisticBarracuda.md index 8c2a461..7cb1217 100644 --- a/Packages/HolisticBarracuda/Documentation~/HolisticBarracuda.md +++ b/Packages/HolisticBarracuda/Documentation~/HolisticBarracuda.md @@ -26,7 +26,7 @@ To the `scopedRegistries` section: ``` To the `dependencies` section: ``` -"jp.ikep.mediapipe.holistic": "1.0.1" +"jp.ikep.mediapipe.holistic": "1.1.0" ``` Finally, the manifest file looks like below: ``` @@ -44,7 +44,7 @@ Finally, the manifest file looks like below: } ], "dependencies": { - "jp.ikep.mediapipe.holistic": "1.0.1", + "jp.ikep.mediapipe.holistic": "1.1.0", ... } } diff --git a/Packages/HolisticBarracuda/README.md b/Packages/HolisticBarracuda/README.md index eaaf5f5..220fd81 100644 --- a/Packages/HolisticBarracuda/README.md +++ b/Packages/HolisticBarracuda/README.md @@ -1,5 +1,23 @@ # HolisticBarracuda -![demo](https://user-images.githubusercontent.com/34697515/136178988-9a6c37cb-09a2-43e4-9f05-f8c4908b8665.gif) +`full` + +![full](https://user-images.githubusercontent.com/34697515/192131544-97d0aedb-bd4b-477c-a367-4c7f22f8f7cb.gif) + +`pose_and_face` + +![pose_and_face](https://user-images.githubusercontent.com/34697515/192131548-66a26715-cc9d-4a1c-a391-3ecd0d648f02.gif) + +`pose_and_hand` + +![pose_and_hand](https://user-images.githubusercontent.com/34697515/192131549-b5929bd0-de56-4938-9cb2-a816987a639b.gif) + +`pose_only` + +![pose_only](https://user-images.githubusercontent.com/34697515/192131552-6b2948a5-93f2-47b4-bd45-d11bffe5a58c.gif) + +`face_only` + +![face_only](https://user-images.githubusercontent.com/34697515/192131531-2b46cfb9-d6b8-4668-81a6-93d6e4595b3f.gif) **HolisticBarracuda** is the Unity Package that simultaneously estimates 33 pose, 21 per-hand, and 468 facial landmarks with a monocular color camera only. @@ -37,7 +55,7 @@ To the `scopedRegistries` section: ``` To the `dependencies` section: ``` -"jp.ikep.mediapipe.holistic": "1.0.1" +"jp.ikep.mediapipe.holistic": "1.1.0" ``` Finally, the manifest file looks like below: ``` @@ -55,7 +73,7 @@ Finally, the manifest file looks like below: } ], "dependencies": { - "jp.ikep.mediapipe.holistic": "1.0.1", + "jp.ikep.mediapipe.holistic": "1.1.0", ... } } @@ -67,7 +85,12 @@ This repository has the demo that inference pose, face and hands landmarks, and Check a Unity [scene](https://github.com/creativeIKEP/HolisticBarracuda/blob/main/Assets/Scenes/Sample.unity), [scripts](https://github.com/creativeIKEP/HolisticBarracuda/tree/main/Assets/Scripts) and [shaders](https://github.com/creativeIKEP/HolisticBarracuda/tree/main/Assets/Shaders) in the ["/Assets"](https://github.com/creativeIKEP/HolisticBarracuda/tree/main/Assets) directory. ## Demo image -Videos for demoe scene (["/Assets/Scenes/Sample.unity"](https://github.com/creativeIKEP/HolisticBarracuda/blob/main/Assets/Scenes/Sample.unity)) was downloaded from [here](https://www.pexels.com/ja-jp/video/7559286/). +Videos for demo was downloaded from [pexels](https://www.pexels.com/ja-jp/). +- https://www.pexels.com/ja-jp/video/5089491/ +- https://www.pexels.com/ja-jp/video/4492700/ +- https://www.pexels.com/ja-jp/video/8627747/ +- https://www.pexels.com/ja-jp/video/2795750/ +- https://www.pexels.com/ja-jp/video/6985340/ ## Author [IKEP](https://ikep.jp) diff --git a/Packages/HolisticBarracuda/ResourceSet.meta b/Packages/HolisticBarracuda/Resources.meta similarity index 100% rename from Packages/HolisticBarracuda/ResourceSet.meta rename to Packages/HolisticBarracuda/Resources.meta diff --git a/Packages/HolisticBarracuda/ResourceSet/Holistic.asset b/Packages/HolisticBarracuda/Resources/Holistic.asset similarity index 100% rename from Packages/HolisticBarracuda/ResourceSet/Holistic.asset rename to Packages/HolisticBarracuda/Resources/Holistic.asset diff --git a/Packages/HolisticBarracuda/ResourceSet/Holistic.asset.meta b/Packages/HolisticBarracuda/Resources/Holistic.asset.meta similarity index 100% rename from Packages/HolisticBarracuda/ResourceSet/Holistic.asset.meta rename to Packages/HolisticBarracuda/Resources/Holistic.asset.meta diff --git a/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Process.cs b/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Process.cs index 3af8476..3de6b52 100644 --- a/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Process.cs +++ b/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Process.cs @@ -14,6 +14,7 @@ partial class FacePipeline FaceRegion _faceRegion = new FaceRegion(); EyeRegion _leyeRegion = new EyeRegion(); EyeRegion _reyeRegion = new EyeRegion(true); + float _faceDetectionScore; // Vertex retrieval from the face landmark detector float4 GetFaceVertex(int index) @@ -26,7 +27,8 @@ void RunPipeline(Texture input) // Cancel if the face detection score is too low. var face = _faceDetector.Detections.FirstOrDefault(); - if (face.score < 0.5f) return; + _faceDetectionScore = face.score; + if (_faceDetectionScore < 0.5f) return; // Try updating the face region with the detection result. It's // actually updated only when there is a noticeable jump from the last diff --git a/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Public.cs b/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Public.cs index d2125bd..71ec3b9 100644 --- a/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Public.cs +++ b/Packages/HolisticBarracuda/Script/FaceMesh/Script/FacePipeline_Public.cs @@ -51,6 +51,9 @@ public float4x4 RightEyeCropMatrix #endregion + public float FaceDetectionScore + => _faceDetectionScore; + #region Public methods public FacePipeline(ResourceSet resources) diff --git a/Packages/HolisticBarracuda/Script/FaceMesh/Shader/Preprocess.shader b/Packages/HolisticBarracuda/Script/FaceMesh/Shader/Preprocess.shader index 4efb957..c4cf618 100644 --- a/Packages/HolisticBarracuda/Script/FaceMesh/Shader/Preprocess.shader +++ b/Packages/HolisticBarracuda/Script/FaceMesh/Shader/Preprocess.shader @@ -16,7 +16,15 @@ Shader "Hidden/MediaPipe/FaceMesh/Preprocess" float2 uv : TEXCOORD0) : SV_Target { uv = mul(_Xform, float4(uv, 0, 1)).xy; - return tex2D(_MainTex, uv); + float4 color = tex2D(_MainTex, uv); + + // The Shader will display the appropriate colors even in the liner color space, + // so the color representation will be wrong, but we will convert it for better estimation accuracy. + #if !UNITY_COLORSPACE_GAMMA + color.rgb = LinearToGammaSpace(color.rgb); + #endif + + return color; } ENDCG diff --git a/Packages/HolisticBarracuda/Script/HolisticPipeline.cs b/Packages/HolisticBarracuda/Script/HolisticPipeline.cs index 160a156..1151291 100644 --- a/Packages/HolisticBarracuda/Script/HolisticPipeline.cs +++ b/Packages/HolisticBarracuda/Script/HolisticPipeline.cs @@ -1,4 +1,5 @@ using UnityEngine; +using UnityEngine.Rendering; using Mediapipe.BlazePose; using MediaPipe.FaceMesh; using MediaPipe.FaceLandmark; @@ -44,6 +45,7 @@ 33 index data is the score whether human pose is visible ([0, 1]). This data is // Count of face landmarks vertices. public int faceVertexCount => FaceLandmarkDetector.VertexCount; + public float faceDetectionScore => facePipeline.FaceDetectionScore; /* Face landmark result buffer. 'faceVertexBuffer' is array of float4 type. @@ -72,6 +74,8 @@ Eye landmark result buffer. // Count of hand landmarks vertices. public int handVertexCount => HandLandmarkDetector.VertexCount; + public float leftHandDetectionScore; + public float rightHandDetectionScore; /* Hand landmark result buffer. 0~20 index datas are hand landmark. @@ -104,22 +108,27 @@ This data is (score, handedness, 0, 0). PalmDetector palmDetector; HandLandmarkDetector handLandmarkDetector; RenderTexture letterBoxTexture; + const float handFallbackThreshold = 0.1f; ComputeBuffer handsRegionFromPalm; ComputeBuffer leftHandRegionFromPose; ComputeBuffer rightHandRegionFromPose; ComputeBuffer handCropBuffer; ComputeBuffer deltaLeftHandVertexBuffer; ComputeBuffer deltaRightHandVertexBuffer; + // Array of landmarks for accessing data with CPU (C#). + Vector4[] faceLandmarks, leftEyeLandmarks, rightEyeLandmarks, leftHandLandmarks, rightHandLandmarks; #endregion #region public methods - public HolisticPipeline(HolisticResource resource, BlazePoseModel blazePoseModel = BlazePoseModel.full){ + public HolisticPipeline(BlazePoseModel blazePoseModel = BlazePoseModel.full){ + var resource = Resources.Load("Holistic"); + commonCs = resource.commonCs; faceCs = resource.faceCs; handCs = resource.handCs; - blazePoseDetecter = new BlazePoseDetecter(resource.blazePoseResource, blazePoseModel); + blazePoseDetecter = new BlazePoseDetecter(blazePoseModel); facePipeline = new FacePipeline(resource.faceMeshResource); palmDetector = new PalmDetector(resource.blazePalmResource); handLandmarkDetector = new HandLandmarkDetector(resource.handLandmarkResource); @@ -142,6 +151,12 @@ public HolisticPipeline(HolisticResource resource, BlazePoseModel blazePoseModel handCropBuffer = new ComputeBuffer(handCropImageSize * handCropImageSize * 3, sizeof(float)); deltaLeftHandVertexBuffer = new ComputeBuffer(handVertexCount, sizeof(float) * 4); deltaRightHandVertexBuffer = new ComputeBuffer(handVertexCount, sizeof(float) * 4); + + faceLandmarks = new Vector4[faceVertexCount]; + leftEyeLandmarks = new Vector4[eyeVertexCount]; + rightEyeLandmarks = new Vector4[eyeVertexCount]; + leftHandLandmarks = new Vector4[handVertexCount + 1]; + rightHandLandmarks = new Vector4[handVertexCount + 1]; } public void Dispose(){ @@ -168,6 +183,15 @@ public void Dispose(){ deltaRightHandVertexBuffer.Dispose(); } + // Provide cached landmarks. + public Vector4 GetPoseLandmark(int index) => blazePoseDetecter.GetPoseLandmark(index); + public Vector4 GetPoseWorldLandmark(int index) => blazePoseDetecter.GetPoseWorldLandmark(index); + public Vector4 GetFaceLandmark(int index) => faceLandmarks[index]; + public Vector4 GetLeftEyeLandmark(int index) => leftEyeLandmarks[index]; + public Vector4 GetRightEyeLandmark(int index) => rightEyeLandmarks[index]; + public Vector4 GetLeftHandLandmark(int index) => leftHandLandmarks[index]; + public Vector4 GetRightHandLandmark(int index) => rightHandLandmarks[index]; + public void ProcessImage( Texture inputTexture, HolisticInferenceType inferenceType = HolisticInferenceType.full, @@ -190,6 +214,7 @@ public void ProcessImage( // Image scaling and padding // Output image is letter-box image. // For example, top and bottom pixels of `letterboxTexture` are black if `inputTexture` size is 1920(width)*1080(height) + commonCs.SetInt("_isLinerColorSpace", QualitySettings.activeColorSpace == ColorSpace.Linear ? 1 : 0); commonCs.SetVector("_spadScale", scale); commonCs.SetInt("_letterboxWidth", letterboxWidth); commonCs.SetTexture(0, "_letterboxInput", inputTexture); @@ -236,32 +261,33 @@ void FaceProcess(Texture letterBoxTexture, Vector2 spadScale){ // The output of `facePipeline` is flipped horizontally. faceCs.SetBuffer(1, "_irisReconVertices", leftEyeVertexBuffer); faceCs.Dispatch(1, eyeVertexCount, 1, 1); + + // Cache landmarks to array for accessing data with CPU (C#). + AsyncGPUReadback.Request(faceVertexBuffer, request => { + request.GetData().CopyTo(faceLandmarks); + }); + AsyncGPUReadback.Request(leftEyeVertexBuffer, request => { + request.GetData().CopyTo(leftEyeLandmarks); + }); + AsyncGPUReadback.Request(rightEyeVertexBuffer, request => { + request.GetData().CopyTo(rightEyeLandmarks); + }); } void HandProcess(Texture inputTexture, Texture letterBoxTexture, Vector2 spadScale){ // Inference palm detection. palmDetector.ProcessImage(letterBoxTexture); - int[] countReadCache = new int[1]; - palmDetector.CountBuffer.GetData(countReadCache, 0, 0, 1); - var handDetectionCount = countReadCache[0]; - handDetectionCount = (int)Mathf.Min(handDetectionCount, 2); - - bool isNeedLeftFallback = (handDetectionCount == 0); - bool isNeedRightFallback = (handDetectionCount == 0); - - if(handDetectionCount > 0){ - // Hand region bounding box update - handCs.SetInt("_detectionCount", handDetectionCount); - handCs.SetFloat("_regionDetectDt", Time.deltaTime); - handCs.SetBuffer(0, "_palmDetections", palmDetector.DetectionBuffer); - handCs.SetBuffer(0, "_handsRegionFromPalm", handsRegionFromPalm); - handCs.Dispatch(0, 1, 1, 1); - } + handCs.SetBuffer(0, "_detectionCount", palmDetector.CountBuffer); + handCs.SetFloat("_regionDetectDt", Time.unscaledDeltaTime); + handCs.SetBuffer(0, "_poseInput", blazePoseDetecter.outputBuffer); + handCs.SetBuffer(0, "_palmDetections", palmDetector.DetectionBuffer); + handCs.SetBuffer(0, "_handsRegionFromPalm", handsRegionFromPalm); + handCs.Dispatch(0, 1, 1, 1); handCs.SetVector("_spadScale", spadScale); handCs.SetInt("_isVerticalFlip", 1); - for(int i=0; i 0.5f; - if(score < 0.5f){ - if(isRight) isNeedRightFallback = true; - else isNeedLeftFallback = true; - continue; + float score = handLandmarkDetector.Score; + bool isRight = (i==1); + if(isRight){ + rightHandDetectionScore = score; } + else{ + leftHandDetectionScore = score; + } + + if(score < handFallbackThreshold) continue; // Key point postprocess - handCs.SetFloat("_handPostDt", Time.deltaTime); + handCs.SetFloat("_handPostDt", Time.unscaledDeltaTime); handCs.SetBuffer(3, "_handPostInput", handLandmarkDetector.OutputBuffer); handCs.SetBuffer(3, "_handPostRegion", handsRegionFromPalm); handCs.SetBuffer(3, "_handPostOutput", isRight ? rightHandVertexBuffer : leftHandVertexBuffer); @@ -295,14 +321,22 @@ void HandProcess(Texture inputTexture, Texture letterBoxTexture, Vector2 spadSca } // Hand Re-track with pose landmark if hand is not detected or landmark's score is too low. - if(isNeedRightFallback) HandProcessFromPose(inputTexture, true); - if(isNeedLeftFallback) HandProcessFromPose(inputTexture, false); + if(rightHandDetectionScore < handFallbackThreshold) HandProcessFromPose(inputTexture, true); + if(leftHandDetectionScore < handFallbackThreshold) HandProcessFromPose(inputTexture, false); + + // Cache landmarks to array for accessing data with CPU (C#). + AsyncGPUReadback.Request(leftHandVertexBuffer, request => { + request.GetData().CopyTo(leftHandLandmarks); + }); + AsyncGPUReadback.Request(rightHandVertexBuffer, request => { + request.GetData().CopyTo(rightHandLandmarks); + }); } void HandProcessFromPose(Texture inputTexture, bool isRight){ // Calculate hand region with pose landmark handCs.SetInt("_isRight", isRight?1:0); - handCs.SetFloat("_bboxDt", Time.deltaTime); + handCs.SetFloat("_bboxDt", Time.unscaledDeltaTime); handCs.SetBuffer(1, "_poseInput", blazePoseDetecter.outputBuffer); handCs.SetBuffer(1, "_bboxRegion", isRight ? rightHandRegionFromPose : leftHandRegionFromPose); handCs.Dispatch(1, 1, 1, 1); @@ -323,12 +357,19 @@ void HandProcessFromPose(Texture inputTexture, bool isRight){ handLandmarkDetector.ProcessImage(handCropBuffer); // Key point postprocess - handCs.SetFloat("_handPostDt", Time.deltaTime); + handCs.SetFloat("_handPostDt", Time.unscaledDeltaTime); handCs.SetBuffer(3, "_handPostInput", handLandmarkDetector.OutputBuffer); handCs.SetBuffer(3, "_handPostRegion", isRight ? rightHandRegionFromPose : leftHandRegionFromPose); handCs.SetBuffer(3, "_handPostOutput", isRight ? rightHandVertexBuffer : leftHandVertexBuffer); handCs.SetBuffer(3, "_handPostDeltaOutput", isRight ? deltaRightHandVertexBuffer : deltaLeftHandVertexBuffer); handCs.Dispatch(3, 1, 1, 1); + + if(isRight){ + rightHandDetectionScore = blazePoseDetecter.GetPoseLandmark(16).w; + } + else{ + leftHandDetectionScore = blazePoseDetecter.GetPoseLandmark(15).w; + } } #endregion } diff --git a/Packages/HolisticBarracuda/package.json b/Packages/HolisticBarracuda/package.json index a324304..04420c9 100644 --- a/Packages/HolisticBarracuda/package.json +++ b/Packages/HolisticBarracuda/package.json @@ -6,7 +6,7 @@ "name": "jp.ikep.mediapipe.holistic", "displayName": "HolisticBarracuda", "description": "Mediapipe Holistic for Unity Barracuda.", - "version": "1.0.1", + "version": "1.1.0", "unity": "2020.3", "unityRelease": "11f1", "keywords": [ @@ -26,8 +26,8 @@ "jp.keijiro.mediapipe.iris": "1.1.0", "jp.keijiro.mediapipe.blazepalm": "1.0.2", "jp.keijiro.mediapipe.handlandmark": "1.0.1", - "jp.ikep.mediapipe.blazepose": "1.1.1" + "jp.ikep.mediapipe.blazepose": "1.3.0" }, "license": "Apache-2.0", "repository": "github:creativeIKEP/HolisticBarracuda" -} \ No newline at end of file +} diff --git a/Packages/packages-lock.json b/Packages/packages-lock.json index e9396a1..35b64e4 100644 --- a/Packages/packages-lock.json +++ b/Packages/packages-lock.json @@ -71,12 +71,12 @@ } }, "jp.ikep.mediapipe.blazepose": { - "version": "1.1.1", + "version": "1.3.0", "depth": 1, "source": "registry", "dependencies": { - "jp.ikep.mediapipe.posedetection": "1.0.0", - "jp.ikep.mediapipe.poselandmark": "1.1.0" + "jp.ikep.mediapipe.posedetection": "1.0.1", + "jp.ikep.mediapipe.poselandmark": "1.1.1" }, "url": "https://registry.npmjs.com" }, @@ -91,11 +91,11 @@ "jp.keijiro.mediapipe.iris": "1.1.0", "jp.keijiro.mediapipe.blazepalm": "1.0.2", "jp.keijiro.mediapipe.handlandmark": "1.0.1", - "jp.ikep.mediapipe.blazepose": "1.1.1" + "jp.ikep.mediapipe.blazepose": "1.3.0" } }, "jp.ikep.mediapipe.posedetection": { - "version": "1.0.0", + "version": "1.0.1", "depth": 2, "source": "registry", "dependencies": { @@ -104,7 +104,7 @@ "url": "https://registry.npmjs.com" }, "jp.ikep.mediapipe.poselandmark": { - "version": "1.1.0", + "version": "1.1.1", "depth": 2, "source": "registry", "dependencies": { diff --git a/ProjectSettings/ProjectSettings.asset b/ProjectSettings/ProjectSettings.asset index 64852be..77e9c3f 100644 --- a/ProjectSettings/ProjectSettings.asset +++ b/ProjectSettings/ProjectSettings.asset @@ -145,7 +145,8 @@ PlayerSettings: resolutionScalingMode: 0 androidSupportedAspectRatio: 1 androidMaxAspectRatio: 2.1 - applicationIdentifier: {} + applicationIdentifier: + Standalone: com.DefaultCompany.HolisticBarracuda buildNumber: Standalone: 0 iPhone: 0 diff --git a/README.md b/README.md index 07fa335..b1f7e94 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,23 @@ # HolisticBarracuda -![demo](https://user-images.githubusercontent.com/34697515/136178988-9a6c37cb-09a2-43e4-9f05-f8c4908b8665.gif) +`full` + +![full](https://user-images.githubusercontent.com/34697515/192131544-97d0aedb-bd4b-477c-a367-4c7f22f8f7cb.gif) + +`pose_and_face` + +![pose_and_face](https://user-images.githubusercontent.com/34697515/192131548-66a26715-cc9d-4a1c-a391-3ecd0d648f02.gif) + +`pose_and_hand` + +![pose_and_hand](https://user-images.githubusercontent.com/34697515/192131549-b5929bd0-de56-4938-9cb2-a816987a639b.gif) + +`pose_only` + +![pose_only](https://user-images.githubusercontent.com/34697515/192131552-6b2948a5-93f2-47b4-bd45-d11bffe5a58c.gif) + +`face_only` + +![face_only](https://user-images.githubusercontent.com/34697515/192131531-2b46cfb9-d6b8-4668-81a6-93d6e4595b3f.gif) **HolisticBarracuda** is the Unity Package that simultaneously estimates 33 pose, 21 per-hand, and 468 facial landmarks with a monocular color camera only. @@ -37,7 +55,7 @@ To the `scopedRegistries` section: ``` To the `dependencies` section: ``` -"jp.ikep.mediapipe.holistic": "1.0.1" +"jp.ikep.mediapipe.holistic": "1.1.0" ``` Finally, the manifest file looks like below: ``` @@ -55,7 +73,7 @@ Finally, the manifest file looks like below: } ], "dependencies": { - "jp.ikep.mediapipe.holistic": "1.0.1", + "jp.ikep.mediapipe.holistic": "1.1.0", ... } } @@ -67,7 +85,12 @@ This repository has the demo that inference pose, face and hands landmarks, and Check a Unity [scene](/Assets/Scenes/Sample.unity), [scripts](/Assets/Scripts) and [shaders](/Assets/Shaders) in the ["/Assets"](/Assets) directory. ## Demo image -Videos for demoe scene (["/Assets/Scenes/Sample.unity"](/Assets/Scenes/Sample.unity)) was downloaded from [here](https://www.pexels.com/ja-jp/video/7559286/). +Videos for demo was downloaded from [pexels](https://www.pexels.com/ja-jp/). +- https://www.pexels.com/ja-jp/video/5089491/ +- https://www.pexels.com/ja-jp/video/4492700/ +- https://www.pexels.com/ja-jp/video/8627747/ +- https://www.pexels.com/ja-jp/video/2795750/ +- https://www.pexels.com/ja-jp/video/6985340/ ## Author [IKEP](https://ikep.jp)