import cv2
import os
import pickle
from glob import glob

class FalsePositiveFeatureExtractor:
    def __init__(self, feature_type='ORB', max_features=1000):
        self.feature_type = feature_type
        self.max_features = max_features
        # 调整ORB参数
        self.detector = cv2.ORB_create(
            nfeatures=max_features,
            scaleFactor=1.2,  # 金字塔缩放因子
            edgeThreshold=15,  # 边缘阈值
            patchSize=31  # 特征点邻域大小
        )
        
    def extract_features(self, img_path):
        # 检查文件是否存在
        if not os.path.exists(img_path):
            print(f"警告：文件不存在 {img_path}")
            return None
            
        # 读取图片并检查有效性
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            print(f"警告：无法读取图像 {img_path}")
            return None
            
        # 检查图片尺寸和内容
        if img.size == 0:
            print(f"警告：空图像 {img_path}")
            return None
            
        # 计算图像清晰度(拉普拉斯方差)
        blur_value = cv2.Laplacian(img, cv2.CV_64F).var()
        if blur_value < 50:  # 阈值可根据实际情况调整
            print(f"警告：图像模糊 {img_path} (清晰度: {blur_value:.2f})")
            return None
            
        # 提取特征
        kp, des = self.detector.detectAndCompute(img, None)
        
        # 检查特征数量和质量
        if des is None or len(des) < 10:
            print(f"警告：特征不足 {img_path} (特征数: {len(kp) if kp else 0})")
            return None
            
        return des

    def build_feature_db(self, input_dir, output_file='false_positive_features.pkl'):
        all_features = {}
        for img_file in glob(os.path.join(input_dir, '*.jpg')) + glob(os.path.join(input_dir, '*.png')):
            features = self.extract_features(img_file)
            if features is not None and len(features) > 10:  # 过滤无效特征
                all_features[os.path.basename(img_file)] = {
                    'features': features,
                    'image_size': cv2.imread(img_file).shape[:2]
                }
        
        with open(output_file, 'wb') as f:
            pickle.dump(all_features, f)
        return len(all_features)

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', type=str, default='false_positive', 
                      help='误报图像目录路径')
    parser.add_argument('--output', type=str, default='false_positive_features.pkl',
                      help='输出特征文件路径')
    args = parser.parse_args()
    
    extractor = FalsePositiveFeatureExtractor()
    count = extractor.build_feature_db(args.input, args.output)
    print(f'成功提取{count}个误报样本的特征')