From 5f6b814090507767003a540b2f5e4c26e37e73f9 Mon Sep 17 00:00:00 2001 From: oatiz Date: Wed, 26 Jun 2024 00:00:36 +0800 Subject: [PATCH] Add an option to adjust the line width for Bbox edges (#22) * feat: add bbox thickness option * feat: add thickness threshold and side option * fix: clippy * Minor adjustments --------- Co-authored-by: jamjamjon --- examples/yolov8/main.rs | 1 + rust-toolchain.toml | 2 +- src/core/annotator.rs | 43 ++++++++++++++++++++------ src/core/logits_sampler.rs | 2 +- src/core/preprocess.cu | 59 ------------------------------------ src/models/depth_anything.rs | 2 +- 6 files changed, 38 insertions(+), 71 deletions(-) delete mode 100644 src/core/preprocess.cu diff --git a/examples/yolov8/main.rs b/examples/yolov8/main.rs index 171b712..579a7d4 100644 --- a/examples/yolov8/main.rs +++ b/examples/yolov8/main.rs @@ -31,6 +31,7 @@ fn main() -> Result<(), Box> { // build annotate let annotator = Annotator::default() .with_skeletons(&coco::SKELETONS_16) + .with_bboxes_thickness(7) .with_saveout("YOLOv8"); // run & annotate diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 6d833ff..c6e4d7d 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,2 @@ [toolchain] -channel = "1.75" +channel = "1.79" diff --git a/src/core/annotator.rs b/src/core/annotator.rs index 20621e4..de6af68 100644 --- a/src/core/annotator.rs +++ b/src/core/annotator.rs @@ -29,6 +29,8 @@ pub struct Annotator { without_bboxes_name: bool, without_bboxes_text_bg: bool, bboxes_text_color: Rgba, + bboxes_thickness: usize, + bboxes_thickness_threshold: f32, // About keypoints without_keypoints: bool, @@ -71,6 +73,8 @@ impl Default for Annotator { without_bboxes_conf: false, without_bboxes_name: false, bboxes_text_color: Rgba([0, 0, 0, 255]), + bboxes_thickness: 1, + bboxes_thickness_threshold: 0.3, without_bboxes_text_bg: false, without_mbrs: false, without_mbrs_conf: false, @@ -136,6 +140,16 @@ impl Annotator { self } + pub fn with_bboxes_thickness(mut self, thickness: usize) -> Self { + self.bboxes_thickness = thickness; + self + } + + pub fn with_bboxes_thickness_threshold(mut self, threshold: f32) -> Self { + self.bboxes_thickness_threshold = threshold; + self + } + pub fn without_keypoints(mut self, x: bool) -> Self { self.without_keypoints = x; self @@ -360,14 +374,25 @@ impl Annotator { /// Plot bounding bboxes and labels pub fn plot_bboxes(&self, img: &mut RgbaImage, bboxes: &[Bbox]) { + // bbox for bbox in bboxes.iter() { - // bbox - imageproc::drawing::draw_hollow_rect_mut( - img, - imageproc::rect::Rect::at(bbox.xmin().round() as i32, bbox.ymin().round() as i32) - .of_size(bbox.width().round() as u32, bbox.height().round() as u32), - image::Rgba(self.get_color(bbox.id() as usize).into()), - ); + let short_side_threshold = + bbox.width().min(bbox.height()) * self.bboxes_thickness_threshold; + let thickness = self.bboxes_thickness.min(short_side_threshold as usize); + for i in 0..thickness { + imageproc::drawing::draw_hollow_rect_mut( + img, + imageproc::rect::Rect::at( + (bbox.xmin().round() as i32) - (i as i32), + (bbox.ymin().round() as i32) - (i as i32), + ) + .of_size( + (bbox.width().round() as u32) + (2 * i as u32), + (bbox.height().round() as u32) + (2 * i as u32), + ), + image::Rgba(self.get_color(bbox.id() as usize).into()), + ); + } // label if !self.without_bboxes_name || !self.without_bboxes_conf { @@ -379,8 +404,8 @@ impl Annotator { self.put_text( img, &label, - bbox.xmin(), - bbox.ymin(), + (bbox.xmin().round() as i32 - (thickness - 1) as i32).max(0) as f32, + (bbox.ymin().round() as i32 - (thickness - 1) as i32).max(0) as f32, image::Rgba(self.get_color(bbox.id() as usize).into()), self.bboxes_text_color, self.without_bboxes_text_bg, diff --git a/src/core/logits_sampler.rs b/src/core/logits_sampler.rs index 3ae33c3..5867fd7 100644 --- a/src/core/logits_sampler.rs +++ b/src/core/logits_sampler.rs @@ -23,7 +23,7 @@ impl LogitsSampler { } pub fn with_topp(mut self, p: f32) -> Self { - self.p = p.max(0.0).min(1.0); + self.p = p.clamp(0.0, 1.0); self } diff --git a/src/core/preprocess.cu b/src/core/preprocess.cu deleted file mode 100644 index bc0e87a..0000000 --- a/src/core/preprocess.cu +++ /dev/null @@ -1,59 +0,0 @@ -extern "C" __global__ void rgb2bgr(int* xs, int* ys, const int h, const int w) { - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - int tid = (x + y * w) * 3; - if (x < w && y < h) { - ys[tid] = xs[tid+ 2]; - ys[tid + 1] = xs[tid + 1]; - ys[tid + 2] = xs[tid]; - } -} - -extern "C" __global__ void normalize(float* xs, float* ys, int h, int w, float* means, float* stds) { - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - int tid = (x + y * w) * 3; - if (x < w && y < h) { - ys[tid] = (xs[tid] - means[0]) / stds[0]; - ys[tid + 1] = (xs[tid + 1] - means[1]) / stds[1]; - ys[tid + 2] = (xs[tid + 2] - means[2]) / stds[2]; - } -} - - -extern "C" __global__ void hwc2chw(int* xs, int* ys, int h, int w) { - int x = threadIdx.x + blockIdx.x * blockDim.x; - int y = threadIdx.y + blockIdx.y * blockDim.y; - int tid = x + y * w; - if (x < w && y < h) { - ys[tid] = xs[tid * 3]; - ys[tid + h * w] = xs[tid * 3 + 1]; - ys[tid + h * w * 2] = xs[tid * 3 + 2]; - } -} - - -extern "C" __global__ void resize_bilinear(const float* input, float* output, int in_width, int in_height, int out_width, int out_height, int num_channel) { - int x = blockIdx.x * blockDim.x + threadIdx.x; - int y = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= out_width || y >= out_height) return; - - // align_corners - float scale_x = static_cast(in_width - 1) / static_cast(out_width - 1); - float scale_y = static_cast(in_height - 1) / static_cast(out_height - 1); - float src_x = x * scale_x; - float src_y = y * scale_y; - int x0 = src_x; - int y0 = src_y; - int x1 = min(x0 + 1, in_width - 1); - int y1 = min(y0 + 1, in_height - 1); - float dx = src_x - x0; - float dy = src_y - y0; - for (int c = 0; c < num_channel; ++c) { - float value = (1 - dx) * (1 - dy) * input[(y0 * in_width + x0) * num_channel + c] + - dx * (1 - dy) * input[(y0 * in_width + x1) * num_channel + c] + - (1 - dx) * dy * input[(y1 * in_width + x0) * num_channel + c] + - dx * dy * input[(y1 * in_width + x1) * num_channel + c]; - output[(y * out_width + x) * num_channel + c] = static_cast(value); - } -} diff --git a/src/models/depth_anything.rs b/src/models/depth_anything.rs index c5fd8d5..878d48c 100644 --- a/src/models/depth_anything.rs +++ b/src/models/depth_anything.rs @@ -53,7 +53,7 @@ impl DepthAnything { let min_ = v.iter().min_by(|x, y| x.total_cmp(y)).unwrap(); let v = v .iter() - .map(|x| (((*x - min_) / (max_ - min_)) * 255.).min(255.).max(0.) as u8) + .map(|x| (((*x - min_) / (max_ - min_)) * 255.).clamp(0., 255.) as u8) .collect::>(); let luma: ImageBuffer, Vec> = ImageBuffer::from_raw(self.width() as u32, self.height() as u32, v)