{ "cells": [ { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "import os\n", "import re" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "origin_data_dir = os.path.normpath(\"./dataset/origin/yolo_seg_1612\")\n", "target_data_dir = os.path.normpath(os.path.join(\"../datasets/\",\"yolo_seg_1612\"))" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://mirrors.aliyun.com/pypi/simple\n", "Requirement already satisfied: pandas in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (2.2.3)\n", "Requirement already satisfied: scikit-learn in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (1.5.2)\n", "Requirement already satisfied: numpy>=1.22.4 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (1.26.4)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from pandas) (2024.2)\n", "Requirement already satisfied: scipy>=1.6.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (1.14.1)\n", "Requirement already satisfied: joblib>=1.2.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from scikit-learn) (3.5.0)\n", "Requirement already satisfied: six>=1.5 in d:\\projects\\test\\palmprint-recognition\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install pandas scikit-learn" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
0data/images/train/Birjand University Mobile Pa...
1data/images/train/Birjand University Mobile Pa...
2data/images/train/Birjand University Mobile Pa...
3data/images/train/Birjand University Mobile Pa...
4data/images/train/Birjand University Mobile Pa...
......
1607data/images/train/Birjand University Mobile Pa...
1608data/images/train/Birjand University Mobile Pa...
1609data/images/train/Birjand University Mobile Pa...
1610data/images/train/Birjand University Mobile Pa...
1611data/images/train/Birjand University Mobile Pa...
\n", "

1612 rows × 1 columns

\n", "
" ], "text/plain": [ " 0\n", "0 data/images/train/Birjand University Mobile Pa...\n", "1 data/images/train/Birjand University Mobile Pa...\n", "2 data/images/train/Birjand University Mobile Pa...\n", "3 data/images/train/Birjand University Mobile Pa...\n", "4 data/images/train/Birjand University Mobile Pa...\n", "... ...\n", "1607 data/images/train/Birjand University Mobile Pa...\n", "1608 data/images/train/Birjand University Mobile Pa...\n", "1609 data/images/train/Birjand University Mobile Pa...\n", "1610 data/images/train/Birjand University Mobile Pa...\n", "1611 data/images/train/Birjand University Mobile Pa...\n", "\n", "[1612 rows x 1 columns]" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(os.path.join(origin_data_dir, \"train.txt\"),header=None)\n", "df" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 images/train/Birjand University Mobile Palmpri...\n", "1 images/train/Birjand University Mobile Palmpri...\n", "2 images/train/Birjand University Mobile Palmpri...\n", "3 images/train/Birjand University Mobile Palmpri...\n", "4 images/train/Birjand University Mobile Palmpri...\n", " ... \n", "1607 images/train/Birjand University Mobile Palmpri...\n", "1608 images/train/Birjand University Mobile Palmpri...\n", "1609 images/train/Birjand University Mobile Palmpri...\n", "1610 images/train/Birjand University Mobile Palmpri...\n", "1611 images/train/Birjand University Mobile Palmpri...\n", "Name: 0, Length: 1612, dtype: object" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[0] = df[0].apply(lambda x: x.lstrip(\"/data\"))\n", "df[0]" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0\n", "1138 images/train/Birjand University Mobile Palmpri...\n", "316 images/train/Birjand University Mobile Palmpri...\n", "303 images/train/Birjand University Mobile Palmpri...\n", "545 images/train/Birjand University Mobile Palmpri...\n", "744 images/train/Birjand University Mobile Palmpri...\n", "... ...\n", "406 images/train/Birjand University Mobile Palmpri...\n", "1414 images/train/Birjand University Mobile Palmpri...\n", "143 images/train/Birjand University Mobile Palmpri...\n", "1265 images/train/Birjand University Mobile Palmpri...\n", "623 images/train/Birjand University Mobile Palmpri...\n", "\n", "[1289 rows x 1 columns]\n", " 0 \\\n", "1574 images/train/Birjand University Mobile Palmpri... \n", "1097 images/train/Birjand University Mobile Palmpri... \n", "141 images/train/Birjand University Mobile Palmpri... \n", "958 images/train/Birjand University Mobile Palmpri... \n", "609 images/train/Birjand University Mobile Palmpri... \n", "... ... \n", "750 images/train/Birjand University Mobile Palmpri... \n", "1547 images/train/Birjand University Mobile Palmpri... \n", "817 images/train/Birjand University Mobile Palmpri... \n", "1514 images/train/Birjand University Mobile Palmpri... \n", "1164 images/train/Birjand University Mobile Palmpri... \n", "\n", " 1 \n", "1574 images/val/Birjand University Mobile Palmprint... \n", "1097 images/val/Birjand University Mobile Palmprint... \n", "141 images/val/Birjand University Mobile Palmprint... \n", "958 images/val/Birjand University Mobile Palmprint... \n", "609 images/val/Birjand University Mobile Palmprint... \n", "... ... \n", "750 images/val/Birjand University Mobile Palmprint... \n", "1547 images/val/Birjand University Mobile Palmprint... \n", "817 images/val/Birjand University Mobile Palmprint... \n", "1514 images/val/Birjand University Mobile Palmprint... \n", "1164 images/val/Birjand University Mobile Palmprint... \n", "\n", "[323 rows x 2 columns]\n" ] } ], "source": [ "train,test = train_test_split(df,test_size=0.2,random_state=17)\n", "test[1]=test[0].apply(lambda x: \"images/val/\" + x.lstrip(\"images/train\"))\n", "print(train)\n", "print(test)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "if not os.path.exists(target_data_dir):\n", " os.makedirs(target_data_dir)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "train.to_csv(os.path.join(target_data_dir,\"train.txt\"),index=False,header=None)\n", "test[1].to_csv(os.path.join(target_data_dir,\"val.txt\"),index=False,header=None)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "import shutil" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "def copy_images(df:pd.DataFrame,target:str,labels:bool):\n", " kind = \"labels\" if labels else \"images\"\n", " targetRoot = os.path.join(target_data_dir,kind,target)\n", " if os.path.exists(targetRoot):\n", " shutil.rmtree(targetRoot)\n", " for _,it in df.iterrows():\n", " f:str = it[0]\n", " f = re.sub(r'\\.[^.]+$', '.txt', f) if labels else f\n", " f = kind + f.lstrip(\"images\")\n", " # print(f)\n", " source_f = os.path.normpath(os.path.join(origin_data_dir,f))\n", " target_f = os.path.normpath(os.path.join(targetRoot,f.lstrip(kind + \"/train\")))\n", " os.makedirs(os.path.dirname(target_f), exist_ok=True)\n", " shutil.copyfile(source_f,target_f)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "copy_images(df=train,target=\"train\",labels=False)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "copy_images(df=test,target=\"val\",labels=False)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "copy_images(df=train,target=\"train\",labels=True)" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "copy_images(df=test,target=\"val\",labels=True)" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.11" } }, "nbformat": 4, "nbformat_minor": 2 }