import os import re import random import shutil from collections import defaultdict # 配置路径 SRC_DIR = os.path.join(os.path.dirname(__file__), 'UAV') TEST_DIR = os.path.join(os.path.dirname(__file__), 'TEST') # 创建目标目录 os.makedirs(TEST_DIR, exist_ok=True) def split_dataset(): # 1. 扫描源目录 if not os.path.exists(SRC_DIR): print(f"源目录不存在: {SRC_DIR}") return all_files = [f for f in os.listdir(SRC_DIR) if f.lower().endswith('.jpg')] if not all_files: print("未找到JPG文件") return # 2. 按前缀分组 pattern = re.compile(r'(\d{2}_\d{2})') groups = defaultdict(list) for filename in all_files: match = pattern.match(filename) if match: group_key = match.group(1) groups[group_key].append(filename) # 3. 统计并复制文件 total_copied = 0 for group_key, files in groups.items(): group_size = len(files) test_count = min(round(group_size * 0.1), 10) test_count = max(1, test_count) # 随机抽样 selected = random.sample(files, test_count) # 直接使用TEST目录 for f in selected: src = os.path.join(SRC_DIR, f) dst = os.path.join(TEST_DIR, f) if not os.path.exists(dst): shutil.copy(src, dst) total_copied += 1 # 输出统计信息 print(f"处理完成:\n" f"- 共发现 {len(groups)} 个分组\n" f"- 总计复制 {total_copied} 张测试图片\n" f"- 输出目录: {TEST_DIR}") if __name__ == '__main__': split_dataset()