-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathposenet.py
141 lines (117 loc) · 4.14 KB
/
posenet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import math
import time
from enum import Enum
import numpy as np
import tensorflow as tf
from PIL import Image
class BodyPart(Enum):
NOSE = 0,
LEFT_EYE = 1,
RIGHT_EYE = 2,
LEFT_EAR = 3,
RIGHT_EAR = 4,
LEFT_SHOULDER = 5,
RIGHT_SHOULDER = 6,
LEFT_ELBOW = 7,
RIGHT_ELBOW = 8,
LEFT_WRIST = 9,
RIGHT_WRIST = 10,
LEFT_HIP = 11,
RIGHT_HIP = 12,
LEFT_KNEE = 13,
RIGHT_KNEE = 14,
LEFT_ANKLE = 15,
RIGHT_ANKLE = 16,
class Position:
def __init__(self):
self.x = 0
self.y = 0
class KeyPoint:
def __init__(self):
self.bodyPart = BodyPart.NOSE
self.position = Position()
self.score = 0.0
class Person:
def __init__(self):
self.keyPoints = []
self.score = 0.0
class PoseNet:
def __init__(self, model_path, image_path):
self.input_mean = 127.5
self.input_std = 127.5
self.image_path = image_path
self.image_width = 0
self.image_height = 0
self.interpreter = tf.lite.Interpreter(model_path=model_path)
self.interpreter.allocate_tensors()
self.input_details = self.interpreter.get_input_details()
self.output_details = self.interpreter.get_output_details()
print('input_details : ', self.input_details)
print('output_details : ', self.output_details)
def sigmoid(self, x):
return 1. / (1. + math.exp(-x))
def load_input_image(self):
height, width = self.input_details[0]['shape'][1], self.input_details[0]['shape'][2]
input_image = Image.open(self.image_path)
self.image_width, self.image_height = input_image.size
print('width, height = (', self.image_width, ',', self.image_height, ')')
resize_image = input_image.resize((width, height))
return np.expand_dims(resize_image, axis=0)
def estimate_pose(self):
input_data = self.load_input_image()
if self.input_details[0]['dtype'] == type(np.float32(1.0)):
input_data = (np.float32(input_data) - self.input_mean) / self.input_std
self.interpreter.set_tensor(self.input_details[0]['index'], input_data)
start_time = time.time()
self.interpreter.invoke()
end_time = time.time()
print("time spent:", ((end_time - start_time) * 1000))
heat_maps = self.interpreter.get_tensor(self.output_details[0]['index'])
offset_maps = self.interpreter.get_tensor(self.output_details[1]['index'])
print('heat_maps shape=', heat_maps.shape)
print('offset_maps shape=', offset_maps.shape)
height = len(heat_maps[0])
width = len(heat_maps[0][0])
num_key_points = len(heat_maps[0][0][0])
# HeatMap から有効度?の高い座標を抽出
key_point_positions = [[0] * 2 for i in range(num_key_points)]
for key_point in range(num_key_points):
max_val = heat_maps[0][0][0][key_point]
max_row = 0
max_col = 0
for row in range(height):
for col in range(width):
heat_maps[0][row][col][key_point] = self.sigmoid(heat_maps[0][row][col][key_point])
if heat_maps[0][row][col][key_point] > max_val:
max_val = heat_maps[0][row][col][key_point]
max_row = row
max_col = col
key_point_positions[key_point] = [max_row, max_col]
# オフセットを加えた座標の計算
x_coords = [0] * num_key_points
y_coords = [0] * num_key_points
confidenceScores = [0] * num_key_points
for i, position in enumerate(key_point_positions):
position_y = int(key_point_positions[i][0])
position_x = int(key_point_positions[i][1])
y_coords[i] = (position[0] / float(height - 1) * self.image_height +
offset_maps[0][position_y][position_x][i])
x_coords[i] = (position[1] / float(width - 1) * self.image_width +
offset_maps[0][position_y][position_x][i + num_key_points])
confidenceScores[i] = heat_maps[0][position_y][position_x][i]
print("confidenceScores[", i, "] = ", confidenceScores[i])
person = Person()
key_point_list = []
for i in range(num_key_points):
key_point = KeyPoint()
key_point_list.append(key_point)
total_score = 0
for i, body_part in enumerate(BodyPart):
key_point_list[i].bodyPart = body_part
key_point_list[i].position.x = x_coords[i]
key_point_list[i].position.y = y_coords[i]
key_point_list[i].score = confidenceScores[i]
total_score += confidenceScores[i]
person.keyPoints = key_point_list
person.score = total_score / num_key_points
return person