-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathLUNA_segment_lung_ROI.py
175 lines (168 loc) · 6.89 KB
/
LUNA_segment_lung_ROI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import numpy as np
from skimage import morphology
from skimage import measure
from sklearn.cluster import KMeans
from skimage.transform import resize
from glob import glob
working_path = "./LUNA/tutorial/"
file_list = glob(working_path + "images_*.npy")
for img_file in file_list:
# I ran into an error when using Kmean on np.float16, so I'm using np.float64 here
imgs_to_process = np.load(img_file).astype(np.float64)
print("on image", img_file)
for i in range(len(imgs_to_process)):
img = imgs_to_process[i]
# Standardize the pixel values
mean = np.mean(img)
std = np.std(img)
img = img - mean
img = img / std
# Find the average pixel value near the lungs
# to renormalize washed out images
middle = img[100:400, 100:400]
mean = np.mean(middle)
max = np.max(img)
min = np.min(img)
# To improve threshold finding, I'm moving the
# underflow and overflow on the pixel spectrum
img[img == max] = mean
img[img == min] = mean
#
# Using Kmeans to separate foreground (radio-opaque tissue)
# and background (radio transparent tissue ie lungs)
# Doing this only on the center of the image to avoid
# the non-tissue parts of the image as much as possible
#
kmeans = KMeans(n_clusters=2).fit(np.reshape(middle, [np.prod(middle.shape), 1]))
centers = sorted(kmeans.cluster_centers_.flatten())
threshold = np.mean(centers)
thresh_img = np.where(img < threshold, 1.0, 0.0) # threshold the image
#
# I found an initial erosion helful for removing graininess from some of the regions
# and then large dialation is used to make the lung region
# engulf the vessels and incursions into the lung cavity by
# radio opaque tissue
#
eroded = morphology.erosion(thresh_img, np.ones([4, 4]))
dilation = morphology.dilation(eroded, np.ones([10, 10]))
#
# Label each region and obtain the region properties
# The background region is removed by removing regions
# with a bbox that is to large in either dimnsion
# Also, the lungs are generally far away from the top
# and bottom of the image, so any regions that are too
# close to the top and bottom are removed
# This does not produce a perfect segmentation of the lungs
# from the image, but it is surprisingly good considering its
# simplicity.
#
labels = measure.label(dilation)
label_vals = np.unique(labels)
regions = measure.regionprops(labels)
good_labels = []
for prop in regions:
B = prop.bbox
if B[2] - B[0] < 475 and B[3] - B[1] < 475 and B[0] > 40 and B[2] < 472:
good_labels.append(prop.label)
mask = np.ndarray([512, 512], dtype=np.int8)
mask[:] = 0
#
# The mask here is the mask for the lungs--not the nodes
# After just the lungs are left, we do another large dilation
# in order to fill in and out the lung mask
#
for N in good_labels:
mask = mask + np.where(labels == N, 1, 0)
mask = morphology.dilation(mask, np.ones([10, 10])) # one last dilation
imgs_to_process[i] = mask
np.save(img_file.replace("images", "lungmask"), imgs_to_process)
#
# Here we're applying the masks and cropping and resizing the image
#
file_list = glob(working_path + "lungmask_*.npy")
out_images = [] # final set of images
out_nodemasks = [] # final set of nodemasks
for fname in file_list:
print("working on file ", fname)
imgs_to_process = np.load(fname.replace("lungmask", "images"))
masks = np.load(fname)
node_masks = np.load(fname.replace("lungmask", "masks"))
for i in range(len(imgs_to_process)):
mask = masks[i]
node_mask = node_masks[i]
img = imgs_to_process[i]
new_size = [512, 512] # we're scaling back up to the original size of the image
img = mask * img # apply lung mask
#
# renormalizing the masked image (in the mask region)
#
new_mean = np.mean(img[mask > 0])
new_std = np.std(img[mask > 0])
#
# Pulling the background color up to the lower end
# of the pixel range for the lungs
#
old_min = np.min(img) # background color
img[img == old_min] = new_mean - 1.2 * new_std # resetting backgound color
img = img - new_mean
img = img / new_std
# make image bounding box (min row, min col, max row, max col)
labels = measure.label(mask)
regions = measure.regionprops(labels)
#
# Finding the global min and max row over all regions
#
min_row = 512
max_row = 0
min_col = 512
max_col = 0
for prop in regions:
B = prop.bbox
if min_row > B[0]:
min_row = B[0]
if min_col > B[1]:
min_col = B[1]
if max_row < B[2]:
max_row = B[2]
if max_col < B[3]:
max_col = B[3]
width = max_col - min_col
height = max_row - min_row
if width > height:
max_row = min_row + width
else:
max_col = min_col + height
#
# cropping the image down to the bounding box for all regions
# (there's probably an skimage command that can do this in one line)
#
img = img[min_row:max_row, min_col:max_col]
mask = mask[min_row:max_row, min_col:max_col]
if max_row - min_row < 5 or max_col - min_col < 5: # skipping all images with no god regions
pass
else:
# moving range to -1 to 1 to accomodate the resize function
mean = np.mean(img)
img = img - mean
min = np.min(img)
max = np.max(img)
img = img / (max - min)
new_img = resize(img, [512, 512])
new_node_mask = resize(node_mask[min_row:max_row, min_col:max_col], [512, 512])
out_images.append(new_img)
out_nodemasks.append(new_node_mask)
num_images = len(out_images)
#
# Writing out images and masks as 1 channel arrays for input into network
#
final_images = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
final_masks = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
for i in range(num_images):
final_images[i, 0] = out_images[i]
final_masks[i, 0] = out_nodemasks[i]
rand_i = np.random.choice(range(num_images), size=num_images, replace=False)
test_i = int(0.2 * num_images)
np.save(working_path + "trainImages.npy", final_images[rand_i[test_i:]])
np.save(working_path + "trainMasks.npy", final_masks[rand_i[test_i:]])
np.save(working_path + "testImages.npy", final_images[rand_i[:test_i]])
np.save(working_path + "testMasks.npy", final_masks[rand_i[:test_i]])