259 lines
8.9 KiB
Rust
259 lines
8.9 KiB
Rust
//! **usls** is a Rust library integrated with **ONNXRuntime** that provides a collection of state-of-the-art models for **Computer Vision** and **Vision-Language** tasks, including:
|
|
//!
|
|
//! - **YOLO Models**: [YOLOv5](https://github.com/ultralytics/yolov5), [YOLOv6](https://github.com/meituan/YOLOv6), [YOLOv7](https://github.com/WongKinYiu/yolov7), [YOLOv8](https://github.com/ultralytics/ultralytics), [YOLOv9](https://github.com/WongKinYiu/yolov9), [YOLOv10](https://github.com/THU-MIG/yolov10)
|
|
//! - **SAM Models**: [SAM](https://github.com/facebookresearch/segment-anything), [SAM2](https://github.com/facebookresearch/segment-anything-2), [MobileSAM](https://github.com/ChaoningZhang/MobileSAM), [EdgeSAM](https://github.com/chongzhou96/EdgeSAM), [SAM-HQ](https://github.com/SysCV/sam-hq), [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
|
|
//! - **Vision Models**: [RTDETR](https://arxiv.org/abs/2304.08069), [RTMO](https://github.com/open-mmlab/mmpose/tree/main/projects/rtmo), [DB](https://arxiv.org/abs/1911.08947), [SVTR](https://arxiv.org/abs/2205.00159), [Depth-Anything-v1-v2](https://github.com/LiheYoung/Depth-Anything), [DINOv2](https://github.com/facebookresearch/dinov2), [MODNet](https://github.com/ZHKKKe/MODNet), [Sapiens](https://arxiv.org/abs/2408.12569)
|
|
//! - **Vision-Language Models**: [CLIP](https://github.com/openai/CLIP), [BLIP](https://arxiv.org/abs/2201.12086), [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO), [YOLO-World](https://github.com/AILab-CVC/YOLO-World), [Florence2](https://arxiv.org/abs/2311.06242)
|
|
//!
|
|
//! # Examples
|
|
//!
|
|
//! Refer to [All Runnable Demos](https://github.com/jamjamjon/usls/tree/main/examples)
|
|
//!
|
|
//! # Quick Start
|
|
//!
|
|
//! The following demo shows how to build a `YOLO` with [`Options`], load `image(s)`, `video` and `stream` with [`DataLoader`], and annotate the model's inference results with [`Annotator`].
|
|
//!
|
|
//! ```ignore
|
|
//! use usls::{models::YOLO, Annotator, DataLoader, Options, Vision, YOLOTask, YOLOVersion};
|
|
//!
|
|
//! fn main() -> anyhow::Result<()> {
|
|
//! // Build model with Options
|
|
//! let options = Options::new()
|
|
//! .with_trt(0)
|
|
//! .with_model("yolo/v8-m-dyn.onnx")?
|
|
//! .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
|
|
//! .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
|
|
//! .with_i00((1, 1, 4).into())
|
|
//! .with_i02((0, 640, 640).into())
|
|
//! .with_i03((0, 640, 640).into())
|
|
//! .with_confs(&[0.2]);
|
|
//! let mut model = YOLO::new(options)?;
|
|
//!
|
|
//! // Build DataLoader to load image(s), video, stream
|
|
//! let dl = DataLoader::new(
|
|
//! "./assets/bus.jpg", // local image
|
|
//! // "images/bus.jpg", // remote image
|
|
//! // "../set-negs", // local images (from folder)
|
|
//! // "../hall.mp4", // local video
|
|
//! // "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
|
|
//! // "rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream
|
|
//! )?
|
|
//! .with_batch(3) // iterate with batch_size = 3
|
|
//! .build()?;
|
|
//!
|
|
//! // Build annotator
|
|
//! let annotator = Annotator::new().with_saveout("YOLO-Demo");
|
|
//!
|
|
//! // Run and Annotate images
|
|
//! for (xs, _) in dl {
|
|
//! let ys = model.forward(&xs, false)?;
|
|
//! annotator.annotate(&xs, &ys);
|
|
//! }
|
|
//!
|
|
//! Ok(())
|
|
//! }
|
|
//! ```
|
|
//!
|
|
|
|
//! # What's More
|
|
//!
|
|
//! This guide covers the process of using provided models for inference, including how to build a model, load data, annotate results, and retrieve the outputs. Click the sections below to expand for detailed instructions.
|
|
//!
|
|
//! <details>
|
|
//! <summary>Build the Model</summary>
|
|
//!
|
|
//! To build a model, you can use the provided [models] with [Options]:
|
|
//!
|
|
//! ```ignore
|
|
//! use usls::{models::YOLO, Annotator, DataLoader, Options, Vision};
|
|
//!
|
|
//! let options = Options::default()
|
|
//! .with_yolo_version(YOLOVersion::V8) // YOLOVersion: V5, V6, V7, V8, V9, V10, RTDETR
|
|
//! .with_yolo_task(YOLOTask::Detect) // YOLOTask: Classify, Detect, Pose, Segment, Obb
|
|
//! .with_model("xxxx.onnx")?;
|
|
//! let mut model = YOLO::new(options)?;
|
|
//! ```
|
|
//!
|
|
//! **And there're many options provided by [Options]**
|
|
//!
|
|
//! - **Choose Execution Provider:**
|
|
//! Select `CUDA` (default), `TensorRT`, or `CoreML`:
|
|
//!
|
|
//! ```ignore
|
|
//! let options = Options::default()
|
|
//! .with_cuda(0)
|
|
//! // .with_trt(0)
|
|
//! // .with_coreml(0)
|
|
//! // .with_cpu();
|
|
//! ```
|
|
//!
|
|
//! - **Dynamic Input Shapes:**
|
|
//! Specify dynamic shapes with [MinOptMax]:
|
|
//!
|
|
//! ```ignore
|
|
//! let options = Options::default()
|
|
//! .with_i00((1, 2, 4).into()) // batch(min=1, opt=2, max=4)
|
|
//! .with_i02((416, 640, 800).into()) // height(min=416, opt=640, max=800)
|
|
//! .with_i03((416, 640, 800).into()); // width(min=416, opt=640, max=800)
|
|
//! ```
|
|
//!
|
|
//! - **Set Confidence Thresholds:**
|
|
//! Adjust thresholds for each category:
|
|
//!
|
|
//! ```ignore
|
|
//! let options = Options::default()
|
|
//! .with_confs(&[0.4, 0.15]); // class_0: 0.4, others: 0.15
|
|
//! ```
|
|
//!
|
|
//! - **Set Class Names:**
|
|
//! Provide class names if needed:
|
|
//!
|
|
//! ```ignore
|
|
//! let options = Options::default()
|
|
//! .with_names(&COCO_CLASS_NAMES_80);
|
|
//! ```
|
|
//!
|
|
//! **More options are detailed in the [Options] documentation.**
|
|
//!
|
|
//!
|
|
//! </details>
|
|
//!
|
|
//! <details>
|
|
//! <summary>Load Images, Video and Stream</summary>
|
|
//!
|
|
//! - **Load a Single Image**
|
|
//! Use [DataLoader::try_read] to load an image from a local file or remote source:
|
|
//!
|
|
//! ```ignore
|
|
//! let x = DataLoader::try_read("./assets/bus.jpg")?; // from local
|
|
//! let x = DataLoader::try_read("images/bus.jpg")?; // from remote
|
|
//! ```
|
|
//!
|
|
//! Alternatively, use [image::ImageReader] directly:
|
|
//!
|
|
//! ```ignore
|
|
//! let x = image::ImageReader::open("myimage.png")?.decode()?;
|
|
//! ```
|
|
//!
|
|
//! - **Load Multiple Images, Videos, or Streams**
|
|
//! Create a [DataLoader] instance for batch processing:
|
|
//!
|
|
//! ```ignore
|
|
//! let dl = DataLoader::new(
|
|
//! "./assets/bus.jpg", // local image
|
|
//! // "images/bus.jpg", // remote image
|
|
//! // "../set-negs", // local images (from folder)
|
|
//! // "../hall.mp4", // local video
|
|
//! // "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4", // remote video
|
|
//! // "rtsp://admin:kkasd1234@192.168.2.217:554/h264/ch1/", // stream
|
|
//! )?
|
|
//! .with_batch(3) // iterate with batch_size = 3
|
|
//! .build()?;
|
|
//!
|
|
//! // Iterate through the data
|
|
//! for (xs, _) in dl {}
|
|
//! ```
|
|
//!
|
|
//! - **Convert Images to Video**
|
|
//! Use [DataLoader::is2v] to create a video from a sequence of images:
|
|
//!
|
|
//! ```ignore
|
|
//! let fps = 24;
|
|
//! let image_folder = "runs/YOLO-DataLoader";
|
|
//! let saveout = ["runs", "is2v"];
|
|
//! DataLoader::is2v(image_folder, &saveout, fps)?;
|
|
//! ```
|
|
//!
|
|
//! </details>
|
|
//!
|
|
//! <details>
|
|
//! <summary>Annotate Inference Results</summary>
|
|
//!
|
|
//! - **Create an Annotator Instance**
|
|
//!
|
|
//! ```ignore
|
|
//! let annotator = Annotator::default();
|
|
//! ```
|
|
//!
|
|
//! - **Set Saveout Name:**
|
|
//!
|
|
//! ```ignore
|
|
//! let annotator = Annotator::default()
|
|
//! .with_saveout("YOLOs");
|
|
//! ```
|
|
//!
|
|
//! - **Set Bounding Box Line Width:**
|
|
//!
|
|
//! ```ignore
|
|
//! let annotator = Annotator::default()
|
|
//! .with_bboxes_thickness(4);
|
|
//! ```
|
|
//!
|
|
//! - **Disable Mask Plotting**
|
|
//!
|
|
//! ```ignore
|
|
//! let annotator = Annotator::default()
|
|
//! .without_masks(true);
|
|
//! ```
|
|
//!
|
|
//! - **Perform Inference and nnotate the results**
|
|
//!
|
|
//! ```ignore
|
|
//! for (xs, _paths) in dl {
|
|
//! let ys = model.run(&xs)?;
|
|
//! annotator.annotate(&xs, &ys);
|
|
//! }
|
|
//! ```
|
|
//!
|
|
//! More options are detailed in the [Annotator] documentation.
|
|
//!
|
|
//! </details>
|
|
//!
|
|
//! <details>
|
|
//! <summary>Retrieve Model's Inference Results</summary>
|
|
//!
|
|
//! Retrieve the inference outputs, which are saved in a [`Vec<Y>`]:
|
|
//!
|
|
//! - **Get Detection Bounding Boxes**
|
|
//!
|
|
//! ```ignore
|
|
//! let ys = model.run(&xs)?;
|
|
//! for y in ys {
|
|
//! // bboxes
|
|
//! if let Some(bboxes) = y.bboxes() {
|
|
//! for bbox in bboxes {
|
|
//! println!(
|
|
//! "Bbox: {}, {}, {}, {}, {}, {}",
|
|
//! bbox.xmin(),
|
|
//! bbox.ymin(),
|
|
//! bbox.xmax(),
|
|
//! bbox.ymax(),
|
|
//! bbox.confidence(),
|
|
//! bbox.id(),
|
|
//! );
|
|
//! }
|
|
//! }
|
|
//! }
|
|
//! ```
|
|
//!
|
|
//! </details>
|
|
//!
|
|
//! <details>
|
|
//! <summary>Custom Model Implementation</summary>
|
|
//!
|
|
//! You can also implement your own model using [OrtEngine] and [Options]. [OrtEngine] supports ONNX model loading, metadata parsing, dry_run, inference, and other functions, with execution providers such as CUDA, TensorRT, CoreML, etc.
|
|
//!
|
|
//! For more details, refer to the [Demo: Depth-Anything](https://github.com/jamjamjon/usls/blob/main/src/models/depth_anything.rs).
|
|
//!
|
|
//! </details>
|
|
|
|
mod core;
|
|
pub mod models;
|
|
mod utils;
|
|
mod ys;
|
|
|
|
pub use core::*;
|
|
pub use models::*;
|
|
pub use utils::*;
|
|
pub use ys::*;
|