RGB
img_path = "test.png"
image = Image.open(img_path)
image = cvtColor(image)
其中cvtColor
的函数实现如下:
#---------------------------------------------------------#
# 将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
def cvtColor(image):
if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
# print("bgr------------------------------")
return image
else:
# print("rgb-------------------------------")
image = image.convert('RGB')
return image
对原始备份,用于可视化绘图
old_img = copy.deepcopy(image)
orininal_h = np.array(image).shape[0]
orininal_w = np.array(image).shape[1]
letterbox的作用是: 给图像增加灰条,实现不失真的resize
,经过letterbox后获得指定input_size
大小的不失真图
image_data, nw, nh = resize_image(image, (self.input_shape[1],self.input_shape[0]))
self.input_shape
: 模型输入指定的图片大小, 其中通过resize_image
实现letterbox功能, resize_image代码如下:
def resize_image(image, size):
iw, ih = image.size # image size
w, h = size # input_size
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', size, (128,128,128))
new_image.paste(image, ((w-nw)//2, (h-nh)//2))
return new_image, nw, nh
长边为准
,将长边缩放到input_size
大小;对应的短边, 进行等比缩放scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
image = image.resize((nw,nh), Image.BICUBIC)
nw,nh为原图经过缩放的大小
居中对齐
,上下左右填充
new_image = Image.new('RGB', size, (128,128,128))
new_image.paste(image, ((w-nw)//2, (h-nh)//2))
input_size(size)
,大小的空图片,图片填充像素值(128,128,128)
image
, 从new_image
的左上角((w-nw)//2, (h-nh)//2))
处粘贴到new_image
, 这就相当于将缩放后的图片,前后左右padding, padding 的像素为(128,128,128)
,然后得到padding后的new_image
。new_image
, 以及原图image经缩放后的尺寸: nw, nh
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
def preprocess_input(image):
image /= 255.0
return image
np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1))
[N,C,H,W]
, 因此利用np.expand_dims
在第0维度扩充一个batch维度 image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
#---------------------------------------------------#
# 图片传入网络进行预测
#---------------------------------------------------#
pr = self.net(images)[0]
self.net(images)
, 因为是单张图片预测,所以输出的大小为 (1,c,h,w)
[0]
,去掉batch维度,方便后处理,,因此pr
的输出为c,h,w
(1) 利用softmax计算像素的类别概率
pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy()
c,h,w
的tensor, 利用torch.permute
转换为(h,w,c)
(2) 将预测的输出resize到原图尺寸
首先将输出的padding灰条部分截取掉
因为输入图片上下左右
四周都进行了padding
, 所以预测结果部分,四周的padding部分就需要截取掉, 上下左右padding的大小为: padding 大小 = int((self.input_shape[0] - nh) // 2)
, 截取灰条代码如下:
pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
将图片resize到原图尺寸
pr
, resize到原图大小pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
(3) 计算每个像素的类别
利用argmax
获得每个像素的类别预测概率最大的索引,也就是获得对应类别的索引
pr = pr.argmax(axis=-1)
将输出的分割图与原始图进行混合输出
rgb
颜色值if self.num_classes <= 21:
self.colors = [ (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), (0, 128, 128),
(128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0), (64, 0, 128), (192, 0, 128),
(64, 128, 128), (192, 128, 128), (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128),
(128, 64, 12)]
# self.colors = [ (0, 0, 0),(0, 0, 0), (0, 1, 0)]
else:
hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
通过以下方式(1)
和 方式(2)
都可以实现,方式(1)
的写法非常简洁
,值得借
鉴。
方式1:
seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
pr
大小为(h,w)
, 每个位置的值为像素索引
np.reshape(pr, [-1])
, 将pr的shape由(h,w), reshape为一维: (h*w, )
, 每个值为类别索引np.array(self.colors, np.uint8)
对应的值如下, shape大小为(22,3)
:array([[ 0, 0, 0],
[128, 0, 0],
[ 0, 128, 0],
[128, 128, 0],
[ 0, 0, 128],
[128, 0, 128],
[ 0, 128, 128],
[128, 128, 128],
[ 64, 0, 0],
[192, 0, 0],
[ 64, 128, 0],
[192, 128, 0],
[ 64, 0, 128],
[192, 0, 128],
[ 64, 128, 128],
[192, 128, 128],
[ 0, 64, 0],
[128, 64, 0],
[ 0, 192, 0],
[128, 192, 0],
[ 0, 64, 128],
[128, 64, 12]], dtype=uint8)
利用np.array(self.colors, np.uint8)[np.reshape(pr, [-1])]
,计算得到每个像素的颜色值array, 大小为(h*w,3)
[np.reshape(pr, [-1])]
为h*w个行索引值,根据行索引值
,然后从np.array(self.colors, np.uint8)
,取对应的行
,总共有h*w
个索引, 因此经过np.array(self.colors, np.uint8)[np.reshape(pr, [-1])]
, 得到(h*w,3)
大小的array,存储了每个像素的rgb颜色。然后通过reshape
,将大小为(h*w,3)
, reshape为(h,w,3)
, 得到分割后的3通道的图片seg_img
。
方式2:
seg_img = np.zeros((np.shape(pr)[0], np.shape(pr)[1], 3))
for c in range(self.num_classes):
seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8')
seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8')
seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8')
遍历所有类别,为各个类别位置,赋予对应的像素值。
image = Image.fromarray(np.uint8(seg_img))
image = Image.blend(old_img, image, 0.7)
pr = np.uint8(pr)
image = pr
image = Image.fromarray(pr)
seg_img = (np.expand_dims(pr != 0, -1) * np.array(old_img, np.float32)).astype('uint8')
#------------------------------------------------#
# 将新图片转换成Image的形式
#------------------------------------------------#
image = Image.fromarray(np.uint8(seg_img))
pr != 0
即去掉了背景区域,非背景区域都为True,背景区域为Falseold_img
做乘积,就可以扣去背景,仅保留图中的目标。pr
的shape为(h,w)
; 而old_img
shape为(h,w,c)
, 无法直接相乘,所以通过np.expand_dims(pr != 0, -1),将pr扩展到shape大小为(h,w,1)
if count:
classes_nums = np.zeros([self.num_classes])
total_points_num = orininal_h * orininal_w
print('-' * 63)
print("|%25s | %15s | %15s|"%("Key", "Value", "Ratio"))
print('-' * 63)
for i in range(self.num_classes):
num = np.sum(pr == i)
ratio = num / total_points_num * 100
if num > 0:
print("|%25s | %15s | %14.2f%%|"%(str(name_classes[i]), str(num), ratio))
print('-' * 63)
classes_nums[i] = num
print("classes_nums:", classes_nums)
import colorsys
import copy
import time
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from torch import nn
from nets.deeplabv3_plus import DeepLab
from utils.utils import cvtColor, preprocess_input, resize_image, show_config
#-----------------------------------------------------------------------------------#
# 使用自己训练好的模型预测需要修改3个参数
# model_path、backbone和num_classes都需要修改!
# 如果出现shape不匹配,一定要注意训练时的model_path、backbone和num_classes的修改
#-----------------------------------------------------------------------------------#
class DeeplabV3(object):
_defaults = {
#-------------------------------------------------------------------#
# model_path指向logs文件夹下的权值文件
# 训练好后logs文件夹下存在多个权值文件,选择验证集损失较低的即可。
# 验证集损失较低不代表miou较高,仅代表该权值在验证集上泛化性能较好。
#-------------------------------------------------------------------#
"model_path" : 'logs/best_epoch_weights1213_40_10.pth',
# "model_path" : 'logs/best_epoch_weights_prune1117_40_10.pth',
# "model_path" : 'logs/best_epoch_weights_1127_15fps.pth',
# "model_path" : 'model_data/best_11--3---.pth',
#----------------------------------------#
# 所需要区分的类的个数+1
#----------------------------------------#
"num_classes" : 2,
#----------------------------------------#
# 所使用的的主干网络:
# mobilenet
# xception
#----------------------------------------#
# "backbone" : "CSPnet",
"backbone": "mobilenet", # "RestNet18", #
#----------------------------------------#
# 输入图片的大小
#----------------------------------------#
# "input_shape" : [384, 640],
"input_shape" : [192, 320],
# "input_shape" : [256, 480],
# "input_shape" : [512, 512],
#----------------------------------------#
# 下采样的倍数,一般可选的为8和16
# 与训练时设置的一样即可
#----------------------------------------#
"downsample_factor" : 16,
#-------------------------------------------------#
# mix_type参数用于控制检测结果的可视化方式
#
# mix_type = 0的时候代表原图与生成的图进行混合
# mix_type = 1的时候代表仅保留生成的图
# mix_type = 2的时候代表仅扣去背景,仅保留原图中的目标
#-------------------------------------------------#
"mix_type" : 0,
#-------------------------------#
# 是否使用Cuda
# 没有GPU可以设置成False
#-------------------------------#
"cuda" : True,
}
#---------------------------------------------------#
# 初始化Deeplab
#---------------------------------------------------#
def __init__(self, **kwargs):
self.__dict__.update(self._defaults)
for name, value in kwargs.items():
setattr(self, name, value)
#---------------------------------------------------#
# 画框设置不同的颜色
#---------------------------------------------------#
if self.num_classes <= 21:
self.colors = [ (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), (0, 128, 128),
(128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0), (64, 0, 128), (192, 0, 128),
(64, 128, 128), (192, 128, 128), (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128),
(128, 64, 12)]
# self.colors = [ (0, 0, 0),(0, 0, 0), (0, 1, 0)]
else:
hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))
#---------------------------------------------------#
# 获得模型
#---------------------------------------------------#
self.generate()
show_config(**self._defaults)
#---------------------------------------------------#
# 获得所有的分类
#---------------------------------------------------#
def generate(self, onnx=False):
#-------------------------------#
# 载入模型与权值
#-------------------------------#
self.net = DeepLab(num_classes=self.num_classes, backbone=self.backbone, downsample_factor=self.downsample_factor, pretrained=False)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# self.net.load_state_dict(torch.load(self.model_path, map_location=device))
self.net = torch.load(self.model_path, map_location=device)
# self.net = self.net.eval()
print('{} model, and classes loaded.'.format(self.model_path))
if not onnx:
if self.cuda:
self.net = nn.DataParallel(self.net)
self.net = self.net.cuda()
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def detect_image(self, image, count=False, name_classes=None):
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------#
# 对输入图像进行一个备份,后面用于绘图
#---------------------------------------------------#
old_img = copy.deepcopy(image)
orininal_h = np.array(image).shape[0]
orininal_w = np.array(image).shape[1]
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data, nw, nh = resize_image(image, (self.input_shape[1],self.input_shape[0]))
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
#---------------------------------------------------#
# 图片传入网络进行预测
#---------------------------------------------------#
pr = self.net(images)[0]
#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy()
#--------------------------------------#
# 将灰条部分截取掉
#--------------------------------------#
pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
#---------------------------------------------------#
# 进行图片的resize
#---------------------------------------------------#
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
pr = pr.argmax(axis=-1)
# print(pr)
# file.write("/")
# import scipy.io as io
# pr = np.arange(pr)
# result1 = np.array(pr)
# np.savetxt('npresult1.txt', result1)
# io.savemat('save.mat', {'result1': result1})
#---------------------------------------------------------#
# 计数
#---------------------------------------------------------#
if count:
classes_nums = np.zeros([self.num_classes])
total_points_num = orininal_h * orininal_w
print('-' * 63)
print("|%25s | %15s | %15s|"%("Key", "Value", "Ratio"))
print('-' * 63)
for i in range(self.num_classes):
num = np.sum(pr == i)
ratio = num / total_points_num * 100
if num > 0:
print("|%25s | %15s | %14.2f%%|"%(str(name_classes[i]), str(num), ratio))
print('-' * 63)
classes_nums[i] = num
print("classes_nums:", classes_nums)
if self.mix_type == 0:
# seg_img = np.zeros((np.shape(pr)[0], np.shape(pr)[1], 3))
# for c in range(self.num_classes):
# seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8')
# seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8')
# seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8')
seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
#------------------------------------------------#
# 将新图片转换成Image的形式
#------------------------------------------------#
image = Image.fromarray(np.uint8(seg_img))
#------------------------------------------------#
# 将新图与原图及进行混合
#------------------------------------------------#
image = Image.blend(old_img, image, 0.7)
elif self.mix_type == 1:
seg_img = np.zeros((np.shape(pr)[0], np.shape(pr)[1], 3))
for c in range(self.num_classes):
#seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8')
#seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8')
#seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8')
seg_img[:, :, 0] += ((pr[:, :] == c ) * self.colors[c][0]).astype('uint8')
seg_img[:, :, 1] += ((pr[:, :] == c ) * self.colors[c][1]).astype('uint8')
seg_img[:, :, 2] += ((pr[:, :] == c ) * self.colors[c][2]).astype('uint8')
seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
#------------------------------------------------#
# 将新图片转换成Image的形式
#------------------------------------------------#
#image = Image.fromarray(np.uint8(seg_img))
# 没有梯形的二值图
pr = np.uint8(pr)
image = pr
image = Image.fromarray(pr)
elif self.mix_type == 2:
seg_img = (np.expand_dims(pr != 0, -1) * np.array(old_img, np.float32)).astype('uint8')
#------------------------------------------------#
# 将新图片转换成Image的形式
#------------------------------------------------#
image = Image.fromarray(np.uint8(seg_img))
return image
def get_FPS(self, image, test_interval):
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data, nw, nh = resize_image(image, (self.input_shape[1],self.input_shape[0]))
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
#---------------------------------------------------#
# 图片传入网络进行预测
#---------------------------------------------------#
pr = self.net(images)[0]
#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy().argmax(axis=-1)
#--------------------------------------#
# 将灰条部分截取掉
#--------------------------------------#
pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
t1 = time.time()
for _ in range(test_interval):
with torch.no_grad():
#---------------------------------------------------#
# 图片传入网络进行预测
#---------------------------------------------------#
pr = self.net(images)[0]
#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy().argmax(axis=-1)
#--------------------------------------#
# 将灰条部分截取掉
#--------------------------------------#
pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
t2 = time.time()
tact_time = (t2 - t1) / test_interval
return tact_time
def convert_to_onnx(self, simplify, model_path):
import onnx
self.generate(onnx=True)
im = torch.zeros(1, 3, *self.input_shape).to('cuda') # image size(1, 3, 512, 512) BCHW
input_layer_names = ["images"]
output_layer_names = ["output"]
# Export the model
print(f'Starting export with onnx {onnx.__version__}.')
torch.onnx.export(self.net,
im,
f = model_path,
verbose = False,
opset_version = 11, # 12,
training = torch.onnx.TrainingMode.EVAL,
do_constant_folding = True,
input_names = input_layer_names,
output_names = output_layer_names,
dynamic_axes = None)
# Checks
model_onnx = onnx.load(model_path) # load onnx model
onnx.checker.check_model(model_onnx) # check onnx model
# Simplify onnx
if simplify:
import onnxsim
print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.')
model_onnx, check = onnxsim.simplify(
model_onnx,
dynamic_input_shape=False,
input_shapes=None)
assert check, 'assert check failed'
onnx.save(model_onnx, model_path)
print('Onnx model save as {}'.format(model_path))
def get_miou_png(self, image):
#---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
# 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
#---------------------------------------------------------#
image = cvtColor(image)
orininal_h = np.array(image).shape[0]
orininal_w = np.array(image).shape[1]
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data, nw, nh = resize_image(image, (self.input_shape[1],self.input_shape[0]))
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
with torch.no_grad():
images = torch.from_numpy(image_data)
if self.cuda:
images = images.cuda()
#---------------------------------------------------#
# 图片传入网络进行预测
#---------------------------------------------------#
pr = self.net(images)[0]
#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
pr = F.softmax(pr.permute(1,2,0),dim = -1).cpu().numpy()
#--------------------------------------#
# 将灰条部分截取掉
#--------------------------------------#
pr = pr[int((self.input_shape[0] - nh) // 2) : int((self.input_shape[0] - nh) // 2 + nh), \
int((self.input_shape[1] - nw) // 2) : int((self.input_shape[1] - nw) // 2 + nw)]
#---------------------------------------------------#
# 进行图片的resize
#---------------------------------------------------#
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
pr = pr.argmax(axis=-1)
image = Image.fromarray(np.uint8(pr))
return image
代码地址:https://github.com/bubbliiiing/deeplabv3-plus-pytorch/blob/main/deeplab.py