377 lines
13 KiB
Plaintext
377 lines
13 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import os\n",
|
||
"import re"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"origin_data_dir = os.path.normpath(\"./dataset/origin/yolo_seg_1612\")\n",
|
||
"target_data_dir = os.path.normpath(os.path.join(\"../datasets/\",\"yolo_seg_1612\"))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Looking in indexes: https://mirrors.aliyun.com/pypi/simple\n",
|
||
"Requirement already satisfied: pandas in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (2.2.3)\n",
|
||
"Requirement already satisfied: scikit-learn in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (1.5.2)\n",
|
||
"Requirement already satisfied: numpy>=1.22.4 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (1.26.4)\n",
|
||
"Requirement already satisfied: python-dateutil>=2.8.2 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
|
||
"Requirement already satisfied: pytz>=2020.1 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2024.2)\n",
|
||
"Requirement already satisfied: tzdata>=2022.7 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2024.2)\n",
|
||
"Requirement already satisfied: scipy>=1.6.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (1.14.1)\n",
|
||
"Requirement already satisfied: joblib>=1.2.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (1.4.2)\n",
|
||
"Requirement already satisfied: threadpoolctl>=3.1.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (3.5.0)\n",
|
||
"Requirement already satisfied: six>=1.5 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
|
||
"Note: you may need to restart the kernel to use updated packages.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%pip install pandas scikit-learn"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>0</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1607</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1608</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1609</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1610</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1611</th>\n",
|
||
" <td>data/images/train/Birjand University Mobile Pa...</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1612 rows × 1 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" 0\n",
|
||
"0 data/images/train/Birjand University Mobile Pa...\n",
|
||
"1 data/images/train/Birjand University Mobile Pa...\n",
|
||
"2 data/images/train/Birjand University Mobile Pa...\n",
|
||
"3 data/images/train/Birjand University Mobile Pa...\n",
|
||
"4 data/images/train/Birjand University Mobile Pa...\n",
|
||
"... ...\n",
|
||
"1607 data/images/train/Birjand University Mobile Pa...\n",
|
||
"1608 data/images/train/Birjand University Mobile Pa...\n",
|
||
"1609 data/images/train/Birjand University Mobile Pa...\n",
|
||
"1610 data/images/train/Birjand University Mobile Pa...\n",
|
||
"1611 data/images/train/Birjand University Mobile Pa...\n",
|
||
"\n",
|
||
"[1612 rows x 1 columns]"
|
||
]
|
||
},
|
||
"execution_count": 58,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = pd.read_csv(os.path.join(origin_data_dir, \"train.txt\"),header=None)\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0 images/train/Birjand University Mobile Palmpri...\n",
|
||
"1 images/train/Birjand University Mobile Palmpri...\n",
|
||
"2 images/train/Birjand University Mobile Palmpri...\n",
|
||
"3 images/train/Birjand University Mobile Palmpri...\n",
|
||
"4 images/train/Birjand University Mobile Palmpri...\n",
|
||
" ... \n",
|
||
"1607 images/train/Birjand University Mobile Palmpri...\n",
|
||
"1608 images/train/Birjand University Mobile Palmpri...\n",
|
||
"1609 images/train/Birjand University Mobile Palmpri...\n",
|
||
"1610 images/train/Birjand University Mobile Palmpri...\n",
|
||
"1611 images/train/Birjand University Mobile Palmpri...\n",
|
||
"Name: 0, Length: 1612, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 59,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df[0] = df[0].apply(lambda x: x.lstrip(\"/data\"))\n",
|
||
"df[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.model_selection import train_test_split"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" 0\n",
|
||
"1138 images/train/Birjand University Mobile Palmpri...\n",
|
||
"316 images/train/Birjand University Mobile Palmpri...\n",
|
||
"303 images/train/Birjand University Mobile Palmpri...\n",
|
||
"545 images/train/Birjand University Mobile Palmpri...\n",
|
||
"744 images/train/Birjand University Mobile Palmpri...\n",
|
||
"... ...\n",
|
||
"406 images/train/Birjand University Mobile Palmpri...\n",
|
||
"1414 images/train/Birjand University Mobile Palmpri...\n",
|
||
"143 images/train/Birjand University Mobile Palmpri...\n",
|
||
"1265 images/train/Birjand University Mobile Palmpri...\n",
|
||
"623 images/train/Birjand University Mobile Palmpri...\n",
|
||
"\n",
|
||
"[1289 rows x 1 columns]\n",
|
||
" 0 \\\n",
|
||
"1574 images/train/Birjand University Mobile Palmpri... \n",
|
||
"1097 images/train/Birjand University Mobile Palmpri... \n",
|
||
"141 images/train/Birjand University Mobile Palmpri... \n",
|
||
"958 images/train/Birjand University Mobile Palmpri... \n",
|
||
"609 images/train/Birjand University Mobile Palmpri... \n",
|
||
"... ... \n",
|
||
"750 images/train/Birjand University Mobile Palmpri... \n",
|
||
"1547 images/train/Birjand University Mobile Palmpri... \n",
|
||
"817 images/train/Birjand University Mobile Palmpri... \n",
|
||
"1514 images/train/Birjand University Mobile Palmpri... \n",
|
||
"1164 images/train/Birjand University Mobile Palmpri... \n",
|
||
"\n",
|
||
" 1 \n",
|
||
"1574 images/val/Birjand University Mobile Palmprint... \n",
|
||
"1097 images/val/Birjand University Mobile Palmprint... \n",
|
||
"141 images/val/Birjand University Mobile Palmprint... \n",
|
||
"958 images/val/Birjand University Mobile Palmprint... \n",
|
||
"609 images/val/Birjand University Mobile Palmprint... \n",
|
||
"... ... \n",
|
||
"750 images/val/Birjand University Mobile Palmprint... \n",
|
||
"1547 images/val/Birjand University Mobile Palmprint... \n",
|
||
"817 images/val/Birjand University Mobile Palmprint... \n",
|
||
"1514 images/val/Birjand University Mobile Palmprint... \n",
|
||
"1164 images/val/Birjand University Mobile Palmprint... \n",
|
||
"\n",
|
||
"[323 rows x 2 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"train,test = train_test_split(df,test_size=0.2,random_state=17)\n",
|
||
"test[1]=test[0].apply(lambda x: \"images/val/\" + x.lstrip(\"images/train\"))\n",
|
||
"print(train)\n",
|
||
"print(test)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"if not os.path.exists(target_data_dir):\n",
|
||
" os.makedirs(target_data_dir)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"train.to_csv(os.path.join(target_data_dir,\"train.txt\"),index=False,header=None)\n",
|
||
"test[1].to_csv(os.path.join(target_data_dir,\"val.txt\"),index=False,header=None)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import shutil"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def copy_images(df:pd.DataFrame,target:str,labels:bool):\n",
|
||
" kind = \"labels\" if labels else \"images\"\n",
|
||
" targetRoot = os.path.join(target_data_dir,kind,target)\n",
|
||
" if os.path.exists(targetRoot):\n",
|
||
" shutil.rmtree(targetRoot)\n",
|
||
" for _,it in df.iterrows():\n",
|
||
" f:str = it[0]\n",
|
||
" f = re.sub(r'\\.[^.]+$', '.txt', f) if labels else f\n",
|
||
" f = kind + f.lstrip(\"images\")\n",
|
||
" # print(f)\n",
|
||
" source_f = os.path.normpath(os.path.join(origin_data_dir,f))\n",
|
||
" target_f = os.path.normpath(os.path.join(targetRoot,f.lstrip(kind + \"/train\")))\n",
|
||
" os.makedirs(os.path.dirname(target_f), exist_ok=True)\n",
|
||
" shutil.copyfile(source_f,target_f)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"copy_images(df=train,target=\"train\",labels=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"copy_images(df=test,target=\"val\",labels=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"copy_images(df=train,target=\"train\",labels=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"copy_images(df=test,target=\"val\",labels=True)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": ".venv",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.11"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|