COCO (Common Objects in Context) dataset数据集是一个广泛应用于目标检测、语义分割的数据集,包含330K 图片数据 与 2.5 million 个目标实体。
!wget http://images.cocodataset.org/zips/train2017.zip -O coco_train2017.zip
!wget http://images.cocodataset.org/zips/val2017.zip -O coco_val2017.zip
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -O coco_ann2017.zip
from zipfile import ZipFile, BadZipFile
import os
def extract_zip_file(extract_path):
try:
with ZipFile(extract_path+".zip") as zfile:
zfile.extractall(extract_path)
# remove zipfile
zfileTOremove=f"{extract_path}"+".zip"
if os.path.isfile(zfileTOremove):
os.remove(zfileTOremove)
else:
print("Error: %s file not found" % zfileTOremove)
except BadZipFile as e:
print("Error:", e)
extract_train_path = "./coco_train2017"
extract_val_path = "./coco_val2017"
extract_ann_path="./coco_ann2017"
extract_zip_file(extract_train_path)
extract_zip_file(extract_val_path)
extract_zip_file(extract_ann_path)
解压缩后,coco_train2017?与?coco_val2017文件夹下包含子文件夹train2017与val2017,各自包含有图片数据.
而coco_ann2017 子文件夹下有6个JSON 格式annotation 文件位于子文件夹annotations.
????????????????????????????????????????
随意调整一个format格式文件,说明如下:
{
"info":
{ "description": "COCO 2017 Dataset",
"url": "http://cocodataset.org",
"version": "1.0",
"year": 2017,
"contributor": "COCO Consortium",
"date_created": "2017/09/01"
},
"licenses":
[
{"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/","id": 1,"name": "Attribution-NonCommercial-ShareAlike License"},
{"url": "http://creativecommons.org/licenses/by-nc/2.0/","id": 2,"name": "Attribution-NonCommercial License"},
...,
...
],
"images":
[
{"license": 4,"file_name": "000000397133.jpg","coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg","height": 427,"width": 640,"date_captured": "2013-11-14 17:02:52","flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg","id": 397133},
{"license": 1,"file_name": "000000037777.jpg","coco_url": "http://images.cocodataset.org/val2017/000000037777.jpg","height": 230,"width": 352,"date_captured": "2013-11-14 20:55:31","flickr_url": "http://farm9.staticflickr.com/8429/7839199426_f6d48aa585_z.jpg","id": 37777},
...,
...
],
"annotations":
[
{"segmentation": [[510.66,423.01,511.72,...,...]],"area": 702.1057499999998,"iscrowd": 0,"image_id": 289343,"bbox": [473.07,395.93,38.65,28.67],"category_id": 18,"id": 1768},
{"segmentation": [[289.74,443.39,302.29,...,...]],"area": 27718.476299999995,"iscrowd": 0,"image_id": 61471,"bbox": [272.1,200.23,151.97,279.77],"category_id": 18,"id": 1773}
...,
...
],
"categories":
[
{"supercategory": "person","id": 1,"name": "person"},
{"supercategory": "vehicle","id": 2,"name": "bicycle"},
...,
...
]
}
info:
The “info” component provides metadata about the COCO dataset, including the version number, the date it was created, and the contact information for the creators of the dataset.
"info":
{ "description": "COCO 2017 Dataset",
"url": "http://cocodataset.org",
"version": "1.0",
"year": 2017,
"contributor": "COCO Consortium",
"date_created": "2017/09/01"
}
licenses
"license"
: an integer value indicating the license type of the image. This value corresponds to the license?"id"
?in the?"licenses"
?component."file_name"
: a string containing the name of the image file."coco_url"
: a string containing a URL to the image on the COCO website."height"
: an integer value representing the height of the image in pixels."width"
: an integer value representing the width of the image in pixels."date_captured"
: a string representing the date and time that the image was captured."flickr_url"
: a string containing a URL to the image on Flickr."id"
: a unique identifier for the image, as an integer value.
"licenses":
[
{
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/",
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License"
},
...,
...,
...
]
images
"license"
: an integer value indicating the license type of the image. This value corresponds to the license?"id"
?in the?"licenses"
?component."file_name"
: a string containing the name of the image file."coco_url"
: a string containing a URL to the image on the COCO website."height"
: an integer value representing the height of the image in pixels."width"
: an integer value representing the width of the image in pixels."date_captured"
: a string representing the date and time that the image was captured."flickr_url"
: a string containing a URL to the image on Flickr."id"
: a unique identifier for the image, as an integer value.
"images":
[
{
"license": 4,
"file_name": "000000397133.jpg",
"coco_url": "http://images.cocodataset.org/val2017/000000397133.jpg",
"height": 427,
"width": 640,
"date_captured": "2013-11-14 17:02:52",
"flickr_url": "http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg",
"id": 397133
},
...,
...,
...
]
annotations
"segmentation"
: The “segmentation” key in an annotation dictionary holds a list of floating point numbers that represent the pixel coordinates of an object’s segmentation mask. They can be used to plot the segmentation mask of the object on an image. To plot the mask, we need to take pairs of numbers (the first and second value, then the third and fourth, etc.) and use them as the x and y coordinates of the pixels."area"
: a floating point value indicating the area of the object in segmentation mask in pixels squared."iscrowd"
: a binary integer value indicating whether the object is part of a crowd (1) or not (0)."image_id"
: an integer value that is a unique identifier for the image in which the object appears. This?"image_id"
?corresponds to the?"id"
?in?"image"
?component."bbox"
: a list of four floating point values representing the bounding box of the object in the format?[x, y, width, height]
."category_id"
: an integer value indicating the category or class of the object."id"
: a unique identifier for the annotation across the entire COCO dataset, as an integer value.
"annotations":
[
{
"segmentation": [[510.66,423.01,511.72,...,...,...]],
"area": 702.1057499999998,
"iscrowd": 0,
"image_id": 289343,
"bbox": [473.07,395.93,38.65,28.67],
"category_id": 18,
"id": 1768
},
...,
...,
...
]
categories
- “supercategory”: a string indicating the supercategory or super class of an object. For example, in the second dictionary, “vehicle” is the supercategory of the bicycle.
- “id”: a unique identifier for identifying the category of an object , as an integer value.
- “name”: a string that represents the name of the category.
"categories":
[
{
"supercategory": "person",
"id": 1,
"name": "person"
},
{
"supercategory": "vehicle",
"id": 2,
"name": "bicycle"
},
...,
...,
...
]
COCO dataset 总共定义了91 类目标,但其中只有 80类用到了.
COCO dataset 以JSON 文件格式存储annotations,借助下属class可完成解析。
from collections import defaultdict
import json
import numpy as np
class COCOParser:
def __init__(self, anns_file, imgs_dir):
with open(anns_file, 'r') as f:
coco = json.load(f)
self.annIm_dict = defaultdict(list)
self.cat_dict = {}
self.annId_dict = {}
self.im_dict = {}
self.licenses_dict = {}
for ann in coco['annotations']:
self.annIm_dict[ann['image_id']].append(ann)
self.annId_dict[ann['id']]=ann
for img in coco['images']:
self.im_dict[img['id']] = img
for cat in coco['categories']:
self.cat_dict[cat['id']] = cat
for license in coco['licenses']:
self.licenses_dict[license['id']] = license
def get_imgIds(self):
return list(self.im_dict.keys())
def get_annIds(self, im_ids):
im_ids=im_ids if isinstance(im_ids, list) else [im_ids]
return [ann['id'] for im_id in im_ids for ann in self.annIm_dict[im_id]]
def load_anns(self, ann_ids):
im_ids=ann_ids if isinstance(ann_ids, list) else [ann_ids]
return [self.annId_dict[ann_id] for ann_id in ann_ids]
def load_cats(self, class_ids):
class_ids=class_ids if isinstance(class_ids, list) else [class_ids]
return [self.cat_dict[class_id] for class_id in class_ids]
def get_imgLicenses(self,im_ids):
im_ids=im_ids if isinstance(im_ids, list) else [im_ids]
lic_ids = [self.im_dict[im_id]["license"] for im_id in im_ids]
return [self.licenses_dict[lic_id] for lic_id in lic_ids]
if __name__ == "__main__"
coco_annotations_file="/content/coco_ann2017/annotations/instances_val2017.json"
coco_images_dir="/content/coco_val2017/val2017"
coco= COCOParser(coco_annotations_file, coco_images_dir)
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 24 14:11:25 2023
@author: scott
"""
import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET
# 项目根目录下放置data/seaships文件夹,里面分别有annotations、images两个文件夹。
# annotation文件夹中,train.json、val.json, test.json三个JSON文件里面为空,同时有一个xml文件夹存放全部xml文件。
# 转换后生成的json文件会放在根目录下,要把annotations里面的信息存入到三个json文件,
path2 = "."
START_BOUNDING_BOX_ID = 1
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.' % (name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.' % (name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def convert(xml_list, json_file):
json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
categories = pre_define_categories.copy()
bnd_id = START_BOUNDING_BOX_ID
all_categories = {}
for index, line in enumerate(xml_list):
# print("Processing %s"%(line))
if not line.endswith('.xml'):
continue
# xmlname = line.split('.xml')[0]
xml_f = line
tree = ET.parse(xml_f)
root = tree.getroot()
if root.tag != 'annotation':
raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
filename = os.path.basename(xml_f)[:-4] + ".jpg"
image_id = int(os.path.basename(xml_f)[:-4]) # index
size = get_and_check(root, 'size', 1)
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename, 'height': height, 'width': width, 'id': image_id}
json_dict['images'].append(image)
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
if category in all_categories:
all_categories[category] += 1
else:
all_categories[category] = 1
if category not in categories:
if only_care_pre_define_categories:
continue
new_id = len(categories) + 1
print(
"[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(
category, pre_define_categories, new_id))
categories[category] = new_id
category_id = categories[category] # from category get id_num
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
assert (xmax > xmin), "xmax <= xmin, {}".format(line)
assert (ymax > ymin), "ymax <= ymin, {}".format(line)
obj_width = abs(xmax - xmin)
obj_height = abs(ymax - ymin)
ann = {'area': obj_width * obj_height, 'iscrowd': 0, 'image_id':
image_id, 'bbox': [xmin, ymin, obj_width, obj_height],
'category_id': category_id, 'id': bnd_id, 'ignore': 0,
'segmentation': []}
json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for cate, cid in categories.items():
cat = {'supercategory': 'none', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
with open(json_file, 'w', encoding='utf-8') as json_fp:
json.dump(json_dict, json_fp, indent=4, ensure_ascii=False)
# json_fp.write(json_str)
json_fp.close()
print("------------create {} done--------------".format(json_file))
print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories),
all_categories.keys(),
len(pre_define_categories),
pre_define_categories.keys()))
print("category: id --> {}".format(categories))
print(categories.keys())
print(categories.values())
if __name__ == '__main__':
classes = ["ore carrier","fishing boat","bulk cargo carrier","passenger ship","container ship","general cargo ship"] # 需要检测的类别
pre_define_categories = {}
for i, clss in enumerate(classes):
pre_define_categories[clss] = i + 1
# pre_define_categories = {'c1': 1, 'c2': 2, 'c3': 3, 'c4': 4, "c5": 5,"c6",6}
only_care_pre_define_categories = True
# only_care_pre_define_categories = False
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 1 - train_ratio - val_ratio
save_json_train = 'train.json'
save_json_val = 'val.json'
save_json_test = "test.json"
xml_dir = "E:/MySoftApply/model_test/CenterNet_train/data/seaships/annotations/xml"
save_json_train = os.path.join(os.path.dirname(xml_dir),save_json_train)
save_json_val = os.path.join(os.path.dirname(xml_dir),save_json_val)
save_json_test = os.path.join(os.path.dirname(xml_dir),save_json_test)
xml_list = glob.glob(xml_dir + "/*.xml")
xml_list = np.sort(xml_list)
np.random.seed(101)
np.random.shuffle(xml_list)
train_num = int(len(xml_list) * train_ratio)
val_num = int(len(xml_list) * val_ratio)
test_num = int(len(xml_list) * test_ratio)
xml_list_train = xml_list[:train_num]
xml_list_val = xml_list[train_num:train_num+val_num]
xml_list_test = xml_list[train_num+val_num:]
convert(xml_list_train, save_json_train)
convert(xml_list_val, save_json_val)
convert(xml_list_val, save_json_test)
# if os.path.exists(path2 + "/annotations"):
# shutil.rmtree(path2 + "/annotations")
# os.makedirs(path2 + "/annotations")
# if os.path.exists(path2 + "/images/train"):
# shutil.rmtree(path2 + "/images/train")
# os.makedirs(path2 + "/images/train")
# if os.path.exists(path2 + "/images/val"):
# shutil.rmtree(path2 + "/images/val")
# os.makedirs(path2 + "/images/val")
# if os.path.exists(path2 + "/images/test"):
# shutil.rmtree(path2 + "/images/test")
# os.makedirs(path2 + "/images/test")
# f1 = open("train.txt", "w")
# for xml in xml_list_train:
# img = xml[:-4] + ".jpg"
# f1.write(os.path.basename(xml)[:-4] + "\n")
# shutil.copyfile(img, path2 + "/data/coco/train2017/" + os.path.basename(img))
# f2 = open("val.txt", "w")
# for xml in xml_list_val:
# img = xml[:-4] + ".jpg"
# f2.write(os.path.basename(xml)[:-4] + "\n")
# shutil.copyfile(img, path2 + "/data/coco/val2017/" + os.path.basename(img))
# f1.close()
# f2.close()
# print("-------------------------------")
# print("train number:", len(xml_list_train))
# print("val number:", len(xml_list_val))
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
# define a list of colors for drawing bounding boxes
color_list = ["pink", "red", "teal", "blue", "orange", "yellow", "black", "magenta","green","aqua"]*10
num_imgs_to_disp = 4
total_images = len(coco.get_imgIds()) # total number of images
sel_im_idxs = np.random.permutation(total_images)[:num_imgs_to_disp]
img_ids = coco.get_imgIds()
selected_img_ids = [img_ids[i] for i in sel_im_idxs]
ann_ids = coco.get_annIds(selected_img_ids)
im_licenses = coco.get_imgLicenses(selected_img_ids)
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15,10))
ax = ax.ravel()
for i, im in enumerate(selected_img_ids):
image = Image.open(f"{coco_images_dir}/{str(im).zfill(12)}.jpg")
ann_ids = coco.get_annIds(im)
annotations = coco.load_anns(ann_ids)
for ann in annotations:
bbox = ann['bbox']
x, y, w, h = [int(b) for b in bbox]
class_id = ann["category_id"]
class_name = coco.load_cats(class_id)[0]["name"]
license = coco.get_imgLicenses(im)[0]["name"]
color_ = color_list[class_id]
rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor=color_, facecolor='none')
t_box=ax[i].text(x, y, class_name, color='red', fontsize=10)
t_box.set_bbox(dict(boxstyle='square, pad=0',facecolor='white', alpha=0.6, edgecolor='blue'))
ax[i].add_patch(rect)
ax[i].axis('off')
ax[i].imshow(image)
ax[i].set_xlabel('Longitude')
ax[i].set_title(f"License: {license}")
plt.tight_layout()
plt.show()