palmprint-recognition/yolo_split.ipynb

377 lines
13 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"origin_data_dir = os.path.normpath(\"./dataset/origin/yolo_seg_1612\")\n",
"target_data_dir = os.path.normpath(os.path.join(\"../datasets/\",\"yolo_seg_1612\"))"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: https://mirrors.aliyun.com/pypi/simple\n",
"Requirement already satisfied: pandas in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (2.2.3)\n",
"Requirement already satisfied: scikit-learn in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (1.5.2)\n",
"Requirement already satisfied: numpy>=1.22.4 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (1.26.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: scipy>=1.6.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (1.14.1)\n",
"Requirement already satisfied: joblib>=1.2.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (3.5.0)\n",
"Requirement already satisfied: six>=1.5 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install pandas scikit-learn"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1607</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1608</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1609</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1610</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1611</th>\n",
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1612 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" 0\n",
"0 data/images/train/Birjand University Mobile Pa...\n",
"1 data/images/train/Birjand University Mobile Pa...\n",
"2 data/images/train/Birjand University Mobile Pa...\n",
"3 data/images/train/Birjand University Mobile Pa...\n",
"4 data/images/train/Birjand University Mobile Pa...\n",
"... ...\n",
"1607 data/images/train/Birjand University Mobile Pa...\n",
"1608 data/images/train/Birjand University Mobile Pa...\n",
"1609 data/images/train/Birjand University Mobile Pa...\n",
"1610 data/images/train/Birjand University Mobile Pa...\n",
"1611 data/images/train/Birjand University Mobile Pa...\n",
"\n",
"[1612 rows x 1 columns]"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(os.path.join(origin_data_dir, \"train.txt\"),header=None)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 images/train/Birjand University Mobile Palmpri...\n",
"1 images/train/Birjand University Mobile Palmpri...\n",
"2 images/train/Birjand University Mobile Palmpri...\n",
"3 images/train/Birjand University Mobile Palmpri...\n",
"4 images/train/Birjand University Mobile Palmpri...\n",
" ... \n",
"1607 images/train/Birjand University Mobile Palmpri...\n",
"1608 images/train/Birjand University Mobile Palmpri...\n",
"1609 images/train/Birjand University Mobile Palmpri...\n",
"1610 images/train/Birjand University Mobile Palmpri...\n",
"1611 images/train/Birjand University Mobile Palmpri...\n",
"Name: 0, Length: 1612, dtype: object"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[0] = df[0].apply(lambda x: x.lstrip(\"/data\"))\n",
"df[0]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 0\n",
"1138 images/train/Birjand University Mobile Palmpri...\n",
"316 images/train/Birjand University Mobile Palmpri...\n",
"303 images/train/Birjand University Mobile Palmpri...\n",
"545 images/train/Birjand University Mobile Palmpri...\n",
"744 images/train/Birjand University Mobile Palmpri...\n",
"... ...\n",
"406 images/train/Birjand University Mobile Palmpri...\n",
"1414 images/train/Birjand University Mobile Palmpri...\n",
"143 images/train/Birjand University Mobile Palmpri...\n",
"1265 images/train/Birjand University Mobile Palmpri...\n",
"623 images/train/Birjand University Mobile Palmpri...\n",
"\n",
"[1289 rows x 1 columns]\n",
" 0 \\\n",
"1574 images/train/Birjand University Mobile Palmpri... \n",
"1097 images/train/Birjand University Mobile Palmpri... \n",
"141 images/train/Birjand University Mobile Palmpri... \n",
"958 images/train/Birjand University Mobile Palmpri... \n",
"609 images/train/Birjand University Mobile Palmpri... \n",
"... ... \n",
"750 images/train/Birjand University Mobile Palmpri... \n",
"1547 images/train/Birjand University Mobile Palmpri... \n",
"817 images/train/Birjand University Mobile Palmpri... \n",
"1514 images/train/Birjand University Mobile Palmpri... \n",
"1164 images/train/Birjand University Mobile Palmpri... \n",
"\n",
" 1 \n",
"1574 images/val/Birjand University Mobile Palmprint... \n",
"1097 images/val/Birjand University Mobile Palmprint... \n",
"141 images/val/Birjand University Mobile Palmprint... \n",
"958 images/val/Birjand University Mobile Palmprint... \n",
"609 images/val/Birjand University Mobile Palmprint... \n",
"... ... \n",
"750 images/val/Birjand University Mobile Palmprint... \n",
"1547 images/val/Birjand University Mobile Palmprint... \n",
"817 images/val/Birjand University Mobile Palmprint... \n",
"1514 images/val/Birjand University Mobile Palmprint... \n",
"1164 images/val/Birjand University Mobile Palmprint... \n",
"\n",
"[323 rows x 2 columns]\n"
]
}
],
"source": [
"train,test = train_test_split(df,test_size=0.2,random_state=17)\n",
"test[1]=test[0].apply(lambda x: \"images/val/\" + x.lstrip(\"images/train\"))\n",
"print(train)\n",
"print(test)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"if not os.path.exists(target_data_dir):\n",
" os.makedirs(target_data_dir)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"train.to_csv(os.path.join(target_data_dir,\"train.txt\"),index=False,header=None)\n",
"test[1].to_csv(os.path.join(target_data_dir,\"val.txt\"),index=False,header=None)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"import shutil"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"def copy_images(df:pd.DataFrame,target:str,labels:bool):\n",
" kind = \"labels\" if labels else \"images\"\n",
" targetRoot = os.path.join(target_data_dir,kind,target)\n",
" if os.path.exists(targetRoot):\n",
" shutil.rmtree(targetRoot)\n",
" for _,it in df.iterrows():\n",
" f:str = it[0]\n",
" f = re.sub(r'\\.[^.]+$', '.txt', f) if labels else f\n",
" f = kind + f.lstrip(\"images\")\n",
" # print(f)\n",
" source_f = os.path.normpath(os.path.join(origin_data_dir,f))\n",
" target_f = os.path.normpath(os.path.join(targetRoot,f.lstrip(kind + \"/train\")))\n",
" os.makedirs(os.path.dirname(target_f), exist_ok=True)\n",
" shutil.copyfile(source_f,target_f)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"copy_images(df=train,target=\"train\",labels=False)"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"copy_images(df=test,target=\"val\",labels=False)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"copy_images(df=train,target=\"train\",labels=True)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"copy_images(df=test,target=\"val\",labels=True)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}