From f25f5cf2b54ae5bc5a48ed63327d19a088622e28 Mon Sep 17 00:00:00 2001 From: Jamjamjon <51357717+jamjamjon@users.noreply.github.com> Date: Tue, 27 Aug 2024 20:09:15 +0800 Subject: [PATCH] Bump the version to v0.0.11 - ONNXRuntime -> 1.19.x - CUDA -> 12.x - TensorRT -> 10.x --- Cargo.toml | 10 ++- README.md | 161 +++++++++++++++++++---------------- src/core/ort_engine.rs | 1 - src/models/db.rs | 3 +- src/models/depth_anything.rs | 2 +- src/models/modnet.rs | 2 +- src/models/sam.rs | 8 +- src/models/yolo.rs | 6 +- src/models/yolop.rs | 10 ++- 9 files changed, 116 insertions(+), 87 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3c5f59e..ca76358 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "usls" -version = "0.0.10" +version = "0.0.11" edition = "2021" description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models." repository = "https://github.com/jamjamjon/usls" @@ -11,11 +11,13 @@ exclude = ["assets/*", "examples/*", "scripts/*", "runs/*"] [dependencies] clap = { version = "4.2.4", features = ["derive"] } -ndarray = { version = "0.15.6", features = ["rayon"] } -ort = { version = "2.0.0-rc.2", git = "https://github.com/pykeio/ort.git", default-features = false, features = [ +ndarray = { version = "0.16.1", features = ["rayon"] } +ort = { version = "2.0.0-rc.5", default-features = false, features = [ "load-dynamic", "copy-dylibs", "half", + "cann", + "rknpu", "ndarray", "cuda", "tensorrt", @@ -24,7 +26,7 @@ ort = { version = "2.0.0-rc.2", git = "https://github.com/pykeio/ort.git", defau "rocm", "openvino", "operator-libraries" -], rev = "467d127c5877b099e1d0f605d38b74d221b6121c"} +]} anyhow = { version = "1.0.75" } regex = { version = "1.5.4" } rand = { version = "0.8.5" } diff --git a/README.md b/README.md index 9b230ac..b5dbca3 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,109 @@ -# usls +

+

usls

+

-[![Static Badge](https://img.shields.io/crates/v/usls.svg?style=for-the-badge&logo=rust)](https://crates.io/crates/usls) [![Static Badge](https://img.shields.io/badge/ONNXRuntime-v1.17.x-yellow?style=for-the-badge&logo=docs.rs)](https://github.com/microsoft/onnxruntime/releases) [![Static Badge](https://img.shields.io/badge/CUDA-11.x-green?style=for-the-badge&logo=docs.rs)](https://developer.nvidia.com/cuda-toolkit-archive) [![Static Badge](https://img.shields.io/badge/TRT-8.6.x.x-blue?style=for-the-badge&logo=docs.rs)](https://developer.nvidia.com/tensorrt) -[![Static Badge](https://img.shields.io/badge/Documents-usls-blue?style=for-the-badge&logo=docs.rs)](https://docs.rs/usls) ![Static Badge](https://img.shields.io/crates/d/usls?style=for-the-badge) +

+ | Documentation | +
+
+ + ONNXRuntime Release Page + + + CUDA Toolkit Page + + + TensorRT Page + +

+

+ + Crates Page + + + + + + Crates.io Total Downloads + + +

+**`usls`** is a Rust library integrated with **ONNXRuntime** that provides a collection of state-of-the-art models for **Computer Vision** and **Vision-Language** tasks, including: -A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [RTDETR](https://arxiv.org/abs/2304.08069), [SAM](https://github.com/facebookresearch/segment-anything), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [DINOv2](https://github.com/facebookresearch/dinov2), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) and others. - - -| Segment Anything | -| :------------------------------------------------------: | -| | - -| YOLO + SAM | -| :------------------------------------------------------: | -| | - - -| Monocular Depth Estimation | -| :--------------------------------------------------------------: | -| | - - -| Panoptic Driving Perception | Text-Detection-Recognition | -| :----------------------------------------------------: | :------------------------------------------------: | -| | | - - +- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10) +- **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) +- **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet) +- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World) +
+Click to expand Supported Models ## Supported Models -| Model | Task / Type | Example | CUDA
f32 | CUDA
f16 | TensorRT
f32 | TensorRT
f16 | -| :---------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------: | :--------------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: | -| [YOLOv5](https://github.com/ultralytics/yolov5) | Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [RTDETR](https://arxiv.org/abs/2304.08069) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | -| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | -| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision-Self-Supervised | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ | -| [CLIP](https://github.com/openai/CLIP) | Vision-Language | [demo](examples/clip) | ✅ | ✅ | ✅ visual
❌ textual | ✅ visual
❌ textual | -| [BLIP](https://github.com/salesforce/BLIP) | Vision-Language | [demo](examples/blip) | ✅ | ✅ | ✅ visual
❌ textual | ✅ visual
❌ textual | -| [DB](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ | -| [SVTR](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ | -| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ❌ | ❌ | -| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ | -| [Depth-Anything
(v1, v2)](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ | -| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ | -| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | | | - -## Installation - -Refer to [ort docs](https://ort.pyke.io/setup/linking) - -
-For Linux or MacOS users - -- Download from [ONNXRuntime Releases](https://github.com/microsoft/onnxruntime/releases) -- Then linking - ```Shell - export ORT_DYLIB_PATH=/Users/qweasd/Desktop/onnxruntime-osx-arm64-1.17.1/lib/libonnxruntime.1.17.1.dylib - ``` +| Model | Task / Type | Example | CUDA f32 | CUDA f16 | TensorRT f32 | TensorRT f16 | +|---------------------------------------------------------------------|----------------------------------------------------------------------------------------------|----------------------------|----------|----------|--------------|--------------| +| [YOLOv5](https://github.com/ultralytics/yolov5) | Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [RTDETR](https://arxiv.org/abs/2304.08069) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | +| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | +| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | +| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | +| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | | +| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ | +| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision-Self-Supervised | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ | +| [CLIP](https://github.com/openai/CLIP) | Vision-Language | [demo](examples/clip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual | +| [BLIP](https://github.com/salesforce/BLIP) | Vision-Language | [demo](examples/blip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual | +| [DB](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ | +| [SVTR](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ | +| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ❌ | ❌ | +| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ | +| [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ | +| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ | +| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | | |
-## Quick Start + +## ⛳️ Linking + +- #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking). + +- #### For Linux or macOS users + 1. Download the ONNXRuntime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases). + 2. Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable: + ```shell + export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0 + ``` + + +## 🎈 Quick Start ```Shell cargo run -r --example yolo # blip, clip, yolop, svtr, db, ... ``` -## Integrate into your own project - - +## 🥂 Integrate Into Your Own Project +Add `usls` as a dependency to your project's `Cargo.toml` ```Shell -# Add `usls` as a dependency to your project's `Cargo.toml` cargo add usls - -# Or you can use specific commit -usls = { git = "https://github.com/jamjamjon/usls", rev = "???sha???"} - ``` + +Or use a specific commit: +```Toml +[dependencies] +usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" } +``` + +## 📌 License +This project is licensed under [LICENSE](LICENSE). diff --git a/src/core/ort_engine.rs b/src/core/ort_engine.rs index a09aeeb..cec6cfd 100644 --- a/src/core/ort_engine.rs +++ b/src/core/ort_engine.rs @@ -290,7 +290,6 @@ impl OrtEngine { } let xs = Xs::from(xs); for _ in 0..self.num_dry_run { - // self.run(xs.as_ref())?; self.run(xs.clone())?; } self.ts.clear(); diff --git a/src/models/db.rs b/src/models/db.rs index f97684c..12d3e89 100644 --- a/src/models/db.rs +++ b/src/models/db.rs @@ -81,7 +81,8 @@ impl DB { let (ratio, _, _) = Ops::scale_wh(image_width, image_height, w as f32, h as f32); let v = luma .into_owned() - .into_raw_vec() + .into_raw_vec_and_offset() + .0 .iter() .map(|x| { if x <= &self.binary_thresh { diff --git a/src/models/depth_anything.rs b/src/models/depth_anything.rs index 2695a95..2a5c86b 100644 --- a/src/models/depth_anything.rs +++ b/src/models/depth_anything.rs @@ -49,7 +49,7 @@ impl DepthAnything { let mut ys: Vec = Vec::new(); for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() { let (w1, h1) = (xs0[idx].width(), xs0[idx].height()); - let v = luma.into_owned().into_raw_vec(); + let v = luma.into_owned().into_raw_vec_and_offset().0; let max_ = v.iter().max_by(|x, y| x.total_cmp(y)).unwrap(); let min_ = v.iter().min_by(|x, y| x.total_cmp(y)).unwrap(); let v = v diff --git a/src/models/modnet.rs b/src/models/modnet.rs index d606f15..57e647c 100644 --- a/src/models/modnet.rs +++ b/src/models/modnet.rs @@ -52,7 +52,7 @@ impl MODNet { let (w1, h1) = (xs0[idx].width(), xs0[idx].height()); let luma = luma.mapv(|x| (x * 255.0) as u8); let luma = Ops::resize_luma8_vec( - &luma.into_raw_vec(), + &luma.into_raw_vec_and_offset().0, self.width() as _, self.height() as _, w1 as _, diff --git a/src/models/sam.rs b/src/models/sam.rs index 8d56e96..6a03283 100644 --- a/src/models/sam.rs +++ b/src/models/sam.rs @@ -247,7 +247,8 @@ impl SAM { for (mask, iou) in masks.axis_iter(Axis(0)).zip(confs.axis_iter(Axis(0))) { let (i, conf) = match iou .to_owned() - .into_raw_vec() + .into_raw_vec_and_offset() + .0 .into_iter() .enumerate() .max_by(|a, b| a.1.total_cmp(&b.1)) @@ -264,7 +265,7 @@ impl SAM { let (h, w) = mask.dim(); let luma = if self.use_low_res_mask { Ops::resize_lumaf32_vec( - &mask.to_owned().into_raw_vec(), + &mask.into_owned().into_raw_vec_and_offset().0, w as _, h as _, image_width as _, @@ -274,7 +275,8 @@ impl SAM { )? } else { mask.mapv(|x| if x > 0. { 255u8 } else { 0u8 }) - .into_raw_vec() + .into_raw_vec_and_offset() + .0 }; let luma: image::ImageBuffer, Vec<_>> = diff --git a/src/models/yolo.rs b/src/models/yolo.rs index 5d75221..bf5a9d1 100644 --- a/src/models/yolo.rs +++ b/src/models/yolo.rs @@ -215,7 +215,7 @@ impl Vision for YOLO { } else { slice_clss.into_owned() }; - let mut probs = Prob::default().with_probs(&x.into_raw_vec()); + let mut probs = Prob::default().with_probs(&x.into_raw_vec_and_offset().0); if let Some(names) = &self.names { probs = probs.with_names(&names.iter().map(|x| x.as_str()).collect::>()); @@ -417,12 +417,12 @@ impl Vision for YOLO { // coefs * proto => mask let coefs = Array::from_shape_vec((1, nm), coefs).ok()?; // (n, nm) - let proto = proto.into_shape((nm, mh * mw)).ok()?; // (nm, mh * mw) + let proto = proto.to_shape((nm, mh * mw)).ok()?; // (nm, mh * mw) let mask = coefs.dot(&proto); // (mh, mw, n) // Mask rescale let mask = Ops::resize_lumaf32_vec( - &mask.into_raw_vec(), + &mask.into_raw_vec_and_offset().0, mw as _, mh as _, image_width as _, diff --git a/src/models/yolop.rs b/src/models/yolop.rs index fbb1794..2aefcd3 100644 --- a/src/models/yolop.rs +++ b/src/models/yolop.rs @@ -191,7 +191,15 @@ impl YOLOPv2 { h1: f32, ) -> Result>> { let mask = mask.mapv(|x| if x < thresh { 0u8 } else { 255u8 }); - let mask = Ops::resize_luma8_vec(&mask.into_raw_vec(), w0, h0, w1, h1, false, "Bilinear")?; + let mask = Ops::resize_luma8_vec( + &mask.into_raw_vec_and_offset().0, + w0, + h0, + w1, + h1, + false, + "Bilinear", + )?; let mask: image::ImageBuffer, Vec<_>> = image::ImageBuffer::from_raw(w1 as _, h1 as _, mask) .ok_or(anyhow::anyhow!("Failed to build image"))?;