import base64
import os
from openai import OpenAI

def encode_image_to_base64(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def recognize_captcha_with_qwen(image_path, api_key):
    # 利用 OpenAI 的包，调用阿里云的兼容 API 接口
    client = OpenAI(
        api_key=api_key,
        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
    )
    
    base64_image = encode_image_to_base64(image_path)
    
    prompt = "你是一个精确的OCR机器人。请识别图片中的验证码字符。只输出验证码本身的内容，不要任何多余的汉字或标点符号。"

    try:
        response = client.chat.completions.create(
            model="qwen-vl-max", # 也可以用更便宜的 qwen-vl-plus
            messages=[
                {
                    "role": "user",
                    "content":[
                        {"type": "text", "text": prompt},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            temperature=0.0
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Qwen 识别错误: {e}")
        return None

if __name__ == "__main__":
    # 填入阿里云百炼 (DashScope) 的 API-KEY
    API_KEY = "sk-893e895724c6403d81374e515ffaf427"
    IMAGE_PATH = "captcha.png"
    
    print(f"Qwen-VL 识别结果: {recognize_captcha_with_qwen(IMAGE_PATH, API_KEY)}")