RIR 使用教程
RIR 使用教程
Dahang Wan, Rongsheng Lu ∗, Ting Xu, Siyuan Shen, Xianli Lang, Zhijie Ren
不使用RIR 方法:多个epoch迭代,每张图片只有一种插值方式
使用RIR 方法:多次迭代,每张图片可以有不同的插值方式
训练阶段 验证阶段
使用RIR 方法 不使用RIR 方法
(采用常规的插值方式)
(YOLOv8,11月之前有两种插值方式,
11月以后全部变成了双线性插值)
interp = random_interpolation_resize(
cv_resize_flags_with_weights=self.cv_resize_flags_with_weights) interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
# ----------------------------------------------------rir start------------------------------------------
if self.use_rir:
if self.val_flag:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
# print("use_rir={},val_flag={}".format(self.use_rir,self.val_flag))
else:
interp = random_interpolation_resize(
使用RIR 方法 cv_resize_flags_with_weights=self.cv_resize_flags_with_weights)
# print("use_rir={},val_flag={}".format(self.use_rir, self.val_flag))
else:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
im = cv2.resize(im,
(min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)),
interpolation=interp)
# ----------------------------------------------------rir end------------------------------------------
3.添加教程
return random.choices(list(cv_resize_flags_with_weights.keys()),
weights=list(cv_resize_flags_with_weights.values()), k=1)[0] # random.choices return a list
# -------------------------------------------------------------------rir start------------------------------
# 1.3. 引入相关参数,可以修改各部分權重,默認全部1
self.use_rir = use_rir #
self.val_flag = val_flag
self.cv_resize_flags_with_weights = {cv2.INTER_NEAREST: 1,
cv2.INTER_LINEAR: 1,
cv2.INTER_CUBIC: 1,
cv2.INTER_AREA: 1,
cv2.INTER_LANCZOS4: 1,
cv2.INTER_LINEAR_EXACT: 1,
}
# -------------------------------------------------------------------rir end------------------------------
Rir初始化的部分一定要放在cache的前面
3.1 YOLOv8 添加步骤 if rect_mode: # resize long side to imgsz while maintaining aspect ratio
r = self.imgsz / max(h0, w0) # ratio
if r != 1: # if sizes are not equal
1. base.py 内部更改(11月以后)
# source code 注释部分为原YOLOv8 的代码,最新版改为了只用双线性插值
interpolation=cv2.INTER_LINEAR
# w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz))
1.4. 修改r!=1部分 # im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
class BaseDataset(Dataset):
if r != 1: # if sizes are not equal
#----------------------------------------------------rir start------------------------------------------
def load_image(self, i, rect_mode=True): if self.use_rir:
if self.val_flag:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
# print("use_rir={},val_flag={}".format({self.use_rir},{self.val_flag}))
else:
interp =
random_interpolation_resize(cv_resize_flags_with_weights=self.cv_resize_flags_with_weights
)
# print("use_rir={},val_flag={}".format({self.use_rir}, {self.val_flag}))
# ----------------------------------------------------rir end------------------------------------------
else:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
im = cv2.resize(im, (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)),
interpolation=interp)
# ----------------------------------------------------rir end------------------------------------------
3.1 YOLOv8 添加步骤 if rect_mode: # resize long side to imgsz while maintaining aspect ratio
r = self.imgsz / max(h0, w0) # ratio
if r != 1: # if sizes are not equal
1. base.py 内部更改(11月以后的版本)
# source code 注释部分为原YOLOv8 的代码,最新版改为了只用双线性插值
interpolation=cv2.INTER_LINEAR
# w, h = (min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz))
1.4. 修改r!=1部分 # im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
if rect_mode:
1.5. 修改 square imgsz 部分 这一部 # 1.4. 修改r!=1部分
# ----------------------------------------------------rir start------------------------------------------
class BaseDataset(Dataset):
分在YOLOv8 11月以前的版本没有 if self.use_rir:
if self.val_flag:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
def load_image(self, i, rect_mode=True): print("use_rir={},val_flag={}".format(self.use_rir,self.val_flag))
else:
interp = random_interpolation_resize(
cv_resize_flags_with_weights=self.cv_resize_flags_with_weights)
print("use_rir={},val_flag={}".format(self.use_rir, self.val_flag))
else:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
im = cv2.resize(im,
(min(math.ceil(w0 * r), self.imgsz), min(math.ceil(h0 * r), self.imgsz)),
interpolation=interp)
# ----------------------------------------------------rir end------------------------------------------
elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
# 1.5. 修改 square imgsz 部分 这一部分在yolov8 11月以前的版本没有
r = self.imgsz / max(h0, w0) # ratio
# ----------------------------------------------------rir start------------------------------------------
if self.use_rir:
if self.val_flag:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
print("use_rir={},val_flag={}".format(self.use_rir,self.val_flag))
else:
interp = random_interpolation_resize(
cv_resize_flags_with_weights=self.cv_resize_flags_with_weights)
print("use_rir={},val_flag={}".format(self.use_rir, self.val_flag))
# ----------------------------------------------------rir end------------------------------------------
else:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=interp)
3.1 YOLOv8 添加步骤 ultralytics/data/build.py
2. build.py build_yolo_dataset ultralytics/yolo/data/build.py
# 2.1 修改 use_rir=False
def build_yolo_dataset(cfg, img_path, batch, data, mode='train', rect=False, stride=32,use_rir=False):
"""Build YOLO Dataset."""
return YOLODataset(
img_path=img_path,
imgsz=cfg.imgsz,
batch_size=batch,
augment=mode == 'train', # augmentation
hyp=cfg, # TODO: probably add a get_hyps_from_cfg function
rect=cfg.rect or rect, # rectangular batches
cache=cfg.cache or None,
single_cls=cfg.single_cls or False,
stride=int(stride),
pad=0.0 if mode == 'train' else 0.5,
prefix=colorstr(f'{mode}: '),
use_segments=cfg.task == 'segment',
use_keypoints=cfg.task == 'pose',
classes=cfg.classes,
data=data,
fraction=cfg.fraction if mode == 'train' else 1.0,
use_rir=use_rir, # 2.2 修改 use_rir=use_rir
val_flag=False if mode == 'train' else True, # 2.3 修改 val_flag
)
3.1 YOLOv8 添加步骤
def build_dataset(self, img_path, mode='train', batch=None):
3. ultralytics/models/yolo/detect/train.py """
Build YOLO Dataset.
Args:
img_path (str): Path to the folder containing images.
class DetectionTrainer(BaseTrainer):
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
传参 """
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == 'val',
stride=gs,use_rir=self.args.use_rir) # 3. 传参
3.1 YOLOv8 添加步骤
def build_dataset(self, img_path, mode='val', batch=None):
"""
Build YOLO Dataset.
4.
Args:
ultralytics/models/yolo/detect/val.py img_path (str): Path to the folder containing images.
mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
传参 """
batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
(2)更改load_image函数内的插值方式
YOLOv7 项目地址(采用最新版进行演示,2023.12.19)
https://github.com/WongKinYiu/yolov7
3.3 YOLOv7 添加步骤
1.utils/dataset.py (1) 添加 random_interpolation_resize 函数
# 1. 添加 random_interpolation_resize 函数
def random_interpolation_resize(cv_resize_flags_with_weights={cv2.INTER_NEAREST: 1,
cv2.INTER_LINEAR: 1,
cv2.INTER_CUBIC: 1,
cv2.INTER_AREA: 1,
cv2.INTER_LANCZOS4: 1,
cv2.INTER_LINEAR_EXACT: 1
}):
return random.choices(list(cv_resize_flags_with_weights.keys()),
weights=list(cv_resize_flags_with_weights.values()), k=1)[0] # random.choices return a list
3.3 YOLOv7 添加步骤
1.utils/dataset.py (2)更改load_image函数内的插值方式
# Ancillary functions --------------------------------------------------------------------------------------------------
def load_image(self, index):
# loads 1 image from dataset, returns img, original hw, resized hw
img = self.imgs[index]
if img is None: # not cached
path = self.img_files[index]
img = cv2.imread(path) # BGR
assert img is not None, 'Image Not Found ' + path
h0, w0 = img.shape[:2] # orig hw
r = self.img_size / max(h0, w0) # resize image to img_size
if r != 1: # always resize down, only resize up if training with augmentation
# interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
# 2. 更改插值方式
# ----------------------------------------------------rir start------------------------------------------
if self.use_rir:
if self.val_flag:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
# print("use_rir={},val_flag={}".format({self.use_rir},{self.val_flag}))
# logging.info(f'use_rir={self.use_rir} val_flag={self.val_flag}')
else:
interp =
random_interpolation_resize(cv_resize_flags_with_weights=self.cv_resize_flags_with_weights)
# print("use_rir={},val_flag={}".format({self.use_rir}, {self.val_flag}))
# logging.info(f'use_rir={self.use_rir} val_flag={self.val_flag}')
else:
interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
# ----------------------------------------------------rir end------------------------------------------
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
else:
return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original,
hw_resized
3.3 YOLOv7 添加步骤
1.utils/dataset.py (3)在 LoadImagesAndLabels 内添加use_rir和 val_flag __init__
# 4. use_rir和val_flag
def create_dataloader(path: object, imgsz: object, batch_size: object, stride: object, opt: object, hyp: object = None, aug
cache: object = False, pad: object = 0.0,
rect: object = False,
rank: object = -1, world_size: object = 1, workers: object = 8, image_weights: object = False, quad: object
'',use_rir=False,val_flag=False) -> object:
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
with torch_distributed_zero_first(rank):
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
augment=augment, # augment images
hyp=hyp, # augmentation hyperparameters
rect=rect, # rectangular training
cache_images=cache,
single_cls=opt.single_cls,
stride=int(stride),
pad=pad,
image_weights=image_weights,
prefix=prefix,use_rir=use_rir,val_flag=val_flag)
# Trainloader
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
world_size=opt.world_size, workers=opt.workers,
image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '),use_rir=opt.use_rir,val_flag=False)
utils/dataset.py
LoadImagesAndLabels
__getitem__
4.延伸创新点:(未做试验,欢迎继续探讨)
(1)分patch,每个patch采用不同的插值方式
(2)在训练和测试阶段均采用随机的插值方式
(3)在训练阶段最后n(n=30)个epoch
(4)其他可以类比的方法也可以采用 random,试试效果
加工作量:
(1)搭配超参数进化,提升工作量(已在YOLOv5-6.1版本添加)
(2)搭配其他数据增强方法,组合成特定数据集(比如NEU-DET、GC10等)
或者特定领域(工业检测、遥感领域等)的方法