import pandas as pd
import requests
import os

# --- 配置信息 ---
FILE_PATH = r'D:\1-working\1-tagging\1-new-round\Export 1DEU-Round2.xlsx'
SAVE_DIR = r'D:\1-working\1-tagging\1-new-round\Export 1DEU-Round2-800-wasser\upload-compare'

def download_images():
    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

    print(f"正在读取文件: {FILE_PATH}...")
    
    try:
        # 加载 Excel
        all_sheets = pd.read_excel(FILE_PATH, sheet_name=None)
        
        for sheet_name, df in all_sheets.items():
            # 1. 彻底清洗列名
            df.columns = [str(c).strip() for c in df.columns]
            
            # 强力布尔转换函数
            def to_boolean(val):
                s = str(val).strip().upper()
                return s in ['TRUE', '1', '1.0', 'T']

            if 'Top Row' not in df.columns or 'Handle' not in df.columns:
                continue

            print(f"  - 正在处理分表: {sheet_name}")

            # 2. 建立 Handle 到 主SKU 的映射
            handle_to_sku = {}
            top_rows = df[df['Top Row'].apply(to_boolean)]
            
            for _, row in top_rows.iterrows():
                h = str(row.get('Handle', ''))
                s = str(row.get('Variant SKU', ''))
                if h and s and s.lower() != 'nan':
                    handle_to_sku[h] = s.strip()

            # 3. 筛选 Image Position 为 2 的行
            # 兼容数字 2 和 字符串 "2" 或 "2.0"
            df['Image Position Clean'] = df['Image Position'].astype(str).str.strip().str.replace('.0', '', regex=False)
            target_rows = df[df['Image Position Clean'] == '2']

            success_count = 0
            for _, row in target_rows.iterrows():
                handle = str(row.get('Handle', ''))
                img_url = str(row.get('Image Src', ''))
                
                # 获取该 Handle 对应的 Top Row SKU
                main_sku = handle_to_sku.get(handle)
                
                if not main_sku or not img_url or img_url.lower() == 'nan':
                    continue

                if img_url.startswith('//'):
                    img_url = 'https:' + img_url

                try:
                    res = requests.get(img_url, timeout=15)
                    if res.status_code == 200:
                        # 识别扩展名
                        ext = ".jpg"
                        ctype = res.headers.get('Content-Type', '').lower()
                        if 'png' in ctype: ext = ".png"
                        elif 'webp' in ctype: ext = ".webp"
                        
                        # 再次校准：如果URL明显包含扩展名则优先
                        if '.png' in img_url.lower(): ext = ".png"
                        
                        # 构造文件名：主SKU + "-yb" + 后缀
                        clean_base = "".join([c for c in main_sku if c.isalnum() or c in (' ', '-', '_')])
                        # --- 核心修改处 ---
                        file_name = f"{clean_base}-yb{ext}"
                        # -----------------
                        
                        full_path = os.path.join(SAVE_DIR, file_name)
                        
                        with open(full_path, 'wb') as f:
                            f.write(res.content)
                        
                        success_count += 1
                        print(f"    成功下载: {file_name}")
                    else:
                        print(f"    跳过 (HTTP {res.status_code}): {img_url}")
                except Exception as e:
                    print(f"    网络异常: {img_url} -> {e}")

            print(f"  - 分表 {sheet_name} 处理完毕，本表下载: {success_count} 张")

    except Exception as e:
        print(f"程序运行过程中遇到错误: {e}")

if __name__ == "__main__":
    download_images()