From f25f5cf2b54ae5bc5a48ed63327d19a088622e28 Mon Sep 17 00:00:00 2001
From: Jamjamjon <51357717+jamjamjon@users.noreply.github.com>
Date: Tue, 27 Aug 2024 20:09:15 +0800
Subject: [PATCH] Bump the version to v0.0.11
- ONNXRuntime -> 1.19.x
- CUDA -> 12.x
- TensorRT -> 10.x
---
Cargo.toml | 10 ++-
README.md | 161 +++++++++++++++++++----------------
src/core/ort_engine.rs | 1 -
src/models/db.rs | 3 +-
src/models/depth_anything.rs | 2 +-
src/models/modnet.rs | 2 +-
src/models/sam.rs | 8 +-
src/models/yolo.rs | 6 +-
src/models/yolop.rs | 10 ++-
9 files changed, 116 insertions(+), 87 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index 3c5f59e..ca76358 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "usls"
-version = "0.0.10"
+version = "0.0.11"
edition = "2021"
description = "A Rust library integrated with ONNXRuntime, providing a collection of ML models."
repository = "https://github.com/jamjamjon/usls"
@@ -11,11 +11,13 @@ exclude = ["assets/*", "examples/*", "scripts/*", "runs/*"]
[dependencies]
clap = { version = "4.2.4", features = ["derive"] }
-ndarray = { version = "0.15.6", features = ["rayon"] }
-ort = { version = "2.0.0-rc.2", git = "https://github.com/pykeio/ort.git", default-features = false, features = [
+ndarray = { version = "0.16.1", features = ["rayon"] }
+ort = { version = "2.0.0-rc.5", default-features = false, features = [
"load-dynamic",
"copy-dylibs",
"half",
+ "cann",
+ "rknpu",
"ndarray",
"cuda",
"tensorrt",
@@ -24,7 +26,7 @@ ort = { version = "2.0.0-rc.2", git = "https://github.com/pykeio/ort.git", defau
"rocm",
"openvino",
"operator-libraries"
-], rev = "467d127c5877b099e1d0f605d38b74d221b6121c"}
+]}
anyhow = { version = "1.0.75" }
regex = { version = "1.5.4" }
rand = { version = "0.8.5" }
diff --git a/README.md b/README.md
index 9b230ac..b5dbca3 100644
--- a/README.md
+++ b/README.md
@@ -1,92 +1,109 @@
-# usls
+
+
usls
+
-[](https://crates.io/crates/usls) [](https://github.com/microsoft/onnxruntime/releases) [](https://developer.nvidia.com/cuda-toolkit-archive) [](https://developer.nvidia.com/tensorrt)
-[](https://docs.rs/usls) 
+
+ | Documentation |
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**`usls`** is a Rust library integrated with **ONNXRuntime** that provides a collection of state-of-the-art models for **Computer Vision** and **Vision-Language** tasks, including:
-A Rust library integrated with **ONNXRuntime**, providing a collection of **Computer Vison** and **Vision-Language** models including [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10), [RTDETR](https://arxiv.org/abs/2304.08069), [SAM](https://github.com/facebookresearch/segment-anything), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM), [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [DINOv2](https://github.com/facebookresearch/dinov2), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR), [Depth-Anything](https://github.com/LiheYoung/Depth-Anything), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) and others.
-
-
-| Segment Anything |
-| :------------------------------------------------------: |
-|
|
-
-| YOLO + SAM |
-| :------------------------------------------------------: |
-|
|
-
-
-| Monocular Depth Estimation |
-| :--------------------------------------------------------------: |
-|
|
-
-
-| Panoptic Driving Perception | Text-Detection-Recognition |
-| :----------------------------------------------------: | :------------------------------------------------: |
-|
|
|
-
-
+- **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10)
+- **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
+- **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet)
+- **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World)
+
+Click to expand Supported Models
## Supported Models
-| Model | Task / Type | Example | CUDA
f32 | CUDA
f16 | TensorRT
f32 | TensorRT
f16 |
-| :---------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------: | :--------------------------: | :-----------: | :-----------: | :------------------------: | :-----------------------: |
-| [YOLOv5](https://github.com/ultralytics/yolov5) | Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [RTDETR](https://arxiv.org/abs/2304.08069) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
-| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
-| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision-Self-Supervised | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
-| [CLIP](https://github.com/openai/CLIP) | Vision-Language | [demo](examples/clip) | ✅ | ✅ | ✅ visual
❌ textual | ✅ visual
❌ textual |
-| [BLIP](https://github.com/salesforce/BLIP) | Vision-Language | [demo](examples/blip) | ✅ | ✅ | ✅ visual
❌ textual | ✅ visual
❌ textual |
-| [DB](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ |
-| [SVTR](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ |
-| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ❌ | ❌ |
-| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ |
-| [Depth-Anything
(v1, v2)](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ |
-| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ |
-| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | | |
-
-## Installation
-
-Refer to [ort docs](https://ort.pyke.io/setup/linking)
-
-
-For Linux or MacOS users
-
-- Download from [ONNXRuntime Releases](https://github.com/microsoft/onnxruntime/releases)
-- Then linking
- ```Shell
- export ORT_DYLIB_PATH=/Users/qweasd/Desktop/onnxruntime-osx-arm64-1.17.1/lib/libonnxruntime.1.17.1.dylib
- ```
+| Model | Task / Type | Example | CUDA f32 | CUDA f16 | TensorRT f32 | TensorRT f16 |
+|---------------------------------------------------------------------|----------------------------------------------------------------------------------------------|----------------------------|----------|----------|--------------|--------------|
+| [YOLOv5](https://github.com/ultralytics/yolov5) | Classification
Object Detection
Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv6](https://github.com/meituan/YOLOv6) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv7](https://github.com/WongKinYiu/yolov7) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv8](https://github.com/ultralytics/ultralytics) | Object Detection
Instance Segmentation
Classification
Oriented Object Detection
Keypoint Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv9](https://github.com/WongKinYiu/yolov9) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [YOLOv10](https://github.com/THU-MIG/yolov10) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [RTDETR](https://arxiv.org/abs/2304.08069) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM) | Instance Segmentation | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [SAM](https://github.com/facebookresearch/segment-anything) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
+| [SAM2](https://github.com/facebookresearch/segment-anything-2) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
+| [MobileSAM](https://github.com/ChaoningZhang/MobileSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
+| [EdgeSAM](https://github.com/chongzhou96/EdgeSAM) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
+| [SAM-HQ](https://github.com/SysCV/sam-hq) | Segment Anything | [demo](examples/sam) | ✅ | ✅ | | |
+| [YOLO-World](https://github.com/AILab-CVC/YOLO-World) | Object Detection | [demo](examples/yolo) | ✅ | ✅ | ✅ | ✅ |
+| [DINOv2](https://github.com/facebookresearch/dinov2) | Vision-Self-Supervised | [demo](examples/dinov2) | ✅ | ✅ | ✅ | ✅ |
+| [CLIP](https://github.com/openai/CLIP) | Vision-Language | [demo](examples/clip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual |
+| [BLIP](https://github.com/salesforce/BLIP) | Vision-Language | [demo](examples/blip) | ✅ | ✅ | ✅ Visual
❌ Textual | ✅ Visual
❌ Textual |
+| [DB](https://arxiv.org/abs/1911.08947) | Text Detection | [demo](examples/db) | ✅ | ✅ | ✅ | ✅ |
+| [SVTR](https://arxiv.org/abs/2205.00159) | Text Recognition | [demo](examples/svtr) | ✅ | ✅ | ✅ | ✅ |
+| [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo) | Keypoint Detection | [demo](examples/rtmo) | ✅ | ✅ | ❌ | ❌ |
+| [YOLOPv2](https://arxiv.org/abs/2208.11434) | Panoptic Driving Perception | [demo](examples/yolop) | ✅ | ✅ | ✅ | ✅ |
+| [Depth-Anything](https://github.com/LiheYoung/Depth-Anything) | Monocular Depth Estimation | [demo](examples/depth-anything) | ✅ | ✅ | ❌ | ❌ |
+| [MODNet](https://github.com/ZHKKKe/MODNet) | Image Matting | [demo](examples/modnet) | ✅ | ✅ | ✅ | ✅ |
+| [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) | Open-Set Detection With Language | [demo](examples/grounding-dino) | ✅ | ✅ | | |
-## Quick Start
+
+## ⛳️ Linking
+
+- #### For detailed setup instructions, refer to the [ORT documentation](https://ort.pyke.io/setup/linking).
+
+- #### For Linux or macOS users
+ 1. Download the ONNXRuntime package from the [Releases page](https://github.com/microsoft/onnxruntime/releases).
+ 2. Set up the library path by exporting the `ORT_DYLIB_PATH` environment variable:
+ ```shell
+ export ORT_DYLIB_PATH=/path/to/onnxruntime/lib/libonnxruntime.so.1.19.0
+ ```
+
+
+## 🎈 Quick Start
```Shell
cargo run -r --example yolo # blip, clip, yolop, svtr, db, ...
```
-## Integrate into your own project
-
-
+## 🥂 Integrate Into Your Own Project
+Add `usls` as a dependency to your project's `Cargo.toml`
```Shell
-# Add `usls` as a dependency to your project's `Cargo.toml`
cargo add usls
-
-# Or you can use specific commit
-usls = { git = "https://github.com/jamjamjon/usls", rev = "???sha???"}
-
```
+
+Or use a specific commit:
+```Toml
+[dependencies]
+usls = { git = "https://github.com/jamjamjon/usls", rev = "commit-sha" }
+```
+
+## 📌 License
+This project is licensed under [LICENSE](LICENSE).
diff --git a/src/core/ort_engine.rs b/src/core/ort_engine.rs
index a09aeeb..cec6cfd 100644
--- a/src/core/ort_engine.rs
+++ b/src/core/ort_engine.rs
@@ -290,7 +290,6 @@ impl OrtEngine {
}
let xs = Xs::from(xs);
for _ in 0..self.num_dry_run {
- // self.run(xs.as_ref())?;
self.run(xs.clone())?;
}
self.ts.clear();
diff --git a/src/models/db.rs b/src/models/db.rs
index f97684c..12d3e89 100644
--- a/src/models/db.rs
+++ b/src/models/db.rs
@@ -81,7 +81,8 @@ impl DB {
let (ratio, _, _) = Ops::scale_wh(image_width, image_height, w as f32, h as f32);
let v = luma
.into_owned()
- .into_raw_vec()
+ .into_raw_vec_and_offset()
+ .0
.iter()
.map(|x| {
if x <= &self.binary_thresh {
diff --git a/src/models/depth_anything.rs b/src/models/depth_anything.rs
index 2695a95..2a5c86b 100644
--- a/src/models/depth_anything.rs
+++ b/src/models/depth_anything.rs
@@ -49,7 +49,7 @@ impl DepthAnything {
let mut ys: Vec = Vec::new();
for (idx, luma) in xs[0].axis_iter(Axis(0)).enumerate() {
let (w1, h1) = (xs0[idx].width(), xs0[idx].height());
- let v = luma.into_owned().into_raw_vec();
+ let v = luma.into_owned().into_raw_vec_and_offset().0;
let max_ = v.iter().max_by(|x, y| x.total_cmp(y)).unwrap();
let min_ = v.iter().min_by(|x, y| x.total_cmp(y)).unwrap();
let v = v
diff --git a/src/models/modnet.rs b/src/models/modnet.rs
index d606f15..57e647c 100644
--- a/src/models/modnet.rs
+++ b/src/models/modnet.rs
@@ -52,7 +52,7 @@ impl MODNet {
let (w1, h1) = (xs0[idx].width(), xs0[idx].height());
let luma = luma.mapv(|x| (x * 255.0) as u8);
let luma = Ops::resize_luma8_vec(
- &luma.into_raw_vec(),
+ &luma.into_raw_vec_and_offset().0,
self.width() as _,
self.height() as _,
w1 as _,
diff --git a/src/models/sam.rs b/src/models/sam.rs
index 8d56e96..6a03283 100644
--- a/src/models/sam.rs
+++ b/src/models/sam.rs
@@ -247,7 +247,8 @@ impl SAM {
for (mask, iou) in masks.axis_iter(Axis(0)).zip(confs.axis_iter(Axis(0))) {
let (i, conf) = match iou
.to_owned()
- .into_raw_vec()
+ .into_raw_vec_and_offset()
+ .0
.into_iter()
.enumerate()
.max_by(|a, b| a.1.total_cmp(&b.1))
@@ -264,7 +265,7 @@ impl SAM {
let (h, w) = mask.dim();
let luma = if self.use_low_res_mask {
Ops::resize_lumaf32_vec(
- &mask.to_owned().into_raw_vec(),
+ &mask.into_owned().into_raw_vec_and_offset().0,
w as _,
h as _,
image_width as _,
@@ -274,7 +275,8 @@ impl SAM {
)?
} else {
mask.mapv(|x| if x > 0. { 255u8 } else { 0u8 })
- .into_raw_vec()
+ .into_raw_vec_and_offset()
+ .0
};
let luma: image::ImageBuffer, Vec<_>> =
diff --git a/src/models/yolo.rs b/src/models/yolo.rs
index 5d75221..bf5a9d1 100644
--- a/src/models/yolo.rs
+++ b/src/models/yolo.rs
@@ -215,7 +215,7 @@ impl Vision for YOLO {
} else {
slice_clss.into_owned()
};
- let mut probs = Prob::default().with_probs(&x.into_raw_vec());
+ let mut probs = Prob::default().with_probs(&x.into_raw_vec_and_offset().0);
if let Some(names) = &self.names {
probs =
probs.with_names(&names.iter().map(|x| x.as_str()).collect::>());
@@ -417,12 +417,12 @@ impl Vision for YOLO {
// coefs * proto => mask
let coefs = Array::from_shape_vec((1, nm), coefs).ok()?; // (n, nm)
- let proto = proto.into_shape((nm, mh * mw)).ok()?; // (nm, mh * mw)
+ let proto = proto.to_shape((nm, mh * mw)).ok()?; // (nm, mh * mw)
let mask = coefs.dot(&proto); // (mh, mw, n)
// Mask rescale
let mask = Ops::resize_lumaf32_vec(
- &mask.into_raw_vec(),
+ &mask.into_raw_vec_and_offset().0,
mw as _,
mh as _,
image_width as _,
diff --git a/src/models/yolop.rs b/src/models/yolop.rs
index fbb1794..2aefcd3 100644
--- a/src/models/yolop.rs
+++ b/src/models/yolop.rs
@@ -191,7 +191,15 @@ impl YOLOPv2 {
h1: f32,
) -> Result>> {
let mask = mask.mapv(|x| if x < thresh { 0u8 } else { 255u8 });
- let mask = Ops::resize_luma8_vec(&mask.into_raw_vec(), w0, h0, w1, h1, false, "Bilinear")?;
+ let mask = Ops::resize_luma8_vec(
+ &mask.into_raw_vec_and_offset().0,
+ w0,
+ h0,
+ w1,
+ h1,
+ false,
+ "Bilinear",
+ )?;
let mask: image::ImageBuffer, Vec<_>> =
image::ImageBuffer::from_raw(w1 as _, h1 as _, mask)
.ok_or(anyhow::anyhow!("Failed to build image"))?;