"""
Sistema multi-proveedor para extracción de datos de facturas con IA.
Proporciona fallbacks automáticos entre diferentes proveedores LLM.
"""
import os
import json
import logging
import requests
from typing import Optional, Dict, Any, List
from pydantic import BaseModel, ValidationError
from dotenv import load_dotenv
from datetime import datetime

# Cargar variables de entorno
load_dotenv()

# Configuración de proveedores
PROVIDERS_CONFIG = {
    "openai": {
        "api_key": os.getenv("OPENAI_API_KEY"),
        "url": "https://api.openai.com/v1/chat/completions",
        "model": "gpt-4o-mini",
        "headers": lambda key: {
            "Authorization": f"Bearer {key}",
            "Content-Type": "application/json"
        }
    },
    "deepseek": {
        "api_key": os.getenv("DEEPSEEK_API_KEY"),
        "url": "https://api.deepseek.com/chat/completions",
        "model": "deepseek-chat",
        "headers": lambda key: {
            "Authorization": f"Bearer {key}",
            "Content-Type": "application/json"
        }
    },
    "groq": {
        "api_key": os.getenv("GROQ_API_KEY"),
        "url": "https://api.groq.com/openai/v1/chat/completions",
        "model": "llama-3.3-70b-versatile",
        "headers": lambda key: {
            "Authorization": f"Bearer {key}",
            "Content-Type": "application/json"
        }
    }
}

# Esquema de respuesta
class InvoiceSchema(BaseModel):
    """Esquema para la respuesta del LLM."""
    cups: Optional[str] = None
    client_name: Optional[str] = None
    supply_address: Optional[str] = None
    invoice_date: Optional[str] = None
    billing_start: Optional[str] = None
    billing_end: Optional[str] = None
    billing_days: Optional[int] = None
    power_contracted: Optional[float] = None
    power_periods: Optional[Dict[str, Any]] = None
    energy_consumption: Optional[Dict[str, Any]] = None
    energy_prices: Optional[Dict[str, Any]] = None
    power_prices: Optional[Dict[str, Any]] = None
    reactive_energy: Optional[Dict[str, Any]] = None
    reactive_energy_prices: Optional[Dict[str, Any]] = None
    reactive_energy_total_cost: Optional[float] = None
    provider_name: Optional[str] = None

def get_extraction_prompt(text: str) -> str:
    """Genera el prompt para extracción de datos."""
    return f"""
You are an accountant AI specialized in Spanish electricity bills. 
Return ONLY valid JSON that matches exactly this schema:

{{
  "cups": string,
  "client_name": string,
  "supply_address": string,
  "invoice_date": "DD/MM/YYYY",
  "billing_start": "DD/MM/YYYY",
  "billing_end": "DD/MM/YYYY",
  "billing_days": int,
  "power_contracted": float,
  "power_periods": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "energy_consumption": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "energy_prices": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "power_prices": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "reactive_energy": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "reactive_energy_prices": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "reactive_energy_total_cost": float|null,
  "provider_name": string
}}

The "cups" field should be in this format: ES0031607025821013ZW or similar.
For "power_periods", extract the contracted power for each period (P1 through P6) in kW.
For "energy_consumption", extract consumption values in kWh.
For "energy_prices", extract prices in €/kWh.
Look for a breakdown of prices per period (P1, P2, etc.).
**IF NO PER-PERIOD PRICES ARE FOUND**, look for a summary line like "Facturación del Consumo" or "Término de energía" that shows a total kWh consumption multiplied by a single unit price (€/kWh). Extract that single unit price.
**IMPORTANT RULE**: If you find a single energy price, you MUST apply that same price to all `energy_prices` periods (P1 to P6) that have a non-zero `energy_consumption`. If a period has zero consumption, its price can be null.
For "power_prices", extract power prices in €/kW/year or €/kW/day and convert to the same unit.
For "reactive_energy", extract reactive energy consumption in kVArh (kilovar-hour).
For "reactive_energy_prices", extract reactive energy prices in €/kVArh. **This is low priority.**
For "reactive_energy_total_cost", find the line item for reactive energy (e.g., 'Complemento por Energía Reactiva', 'Penalización reactiva') and extract its final monetary value. If not found, set it to null.
Look for "Energía reactiva" entries which usually appear after the active energy and power sections.
If any value is missing, set it to null.

### DOCUMENT
{text}
"""

def call_llm_provider(provider_name: str, messages: List[Dict], temperature: float = 0) -> Dict[str, Any]:
    """
    Llama a un proveedor LLM específico.
    
    Args:
        provider_name: Nombre del proveedor (openai, deepseek, groq)
        messages: Lista de mensajes
        temperature: Temperatura para la generación
        
    Returns:
        Diccionario con la respuesta o error
    """
    if provider_name not in PROVIDERS_CONFIG:
        return {"error": f"Proveedor {provider_name} no configurado"}
    
    config = PROVIDERS_CONFIG[provider_name]
    api_key = config["api_key"]
    
    if not api_key:
        return {"error": f"API KEY para {provider_name} no configurada"}
    
    payload = {
        "model": config["model"],
        "messages": messages,
        "temperature": temperature
    }
    
    # Solo OpenAI y algunos otros soportan response_format
    if provider_name in ["openai", "deepseek"]:
        payload["response_format"] = {"type": "json_object"}
    
    try:
        logging.info(f"Llamando a proveedor {provider_name}")
        
        response = requests.post(
            config["url"],
            headers=config["headers"](api_key),
            json=payload,
            timeout=30
        )
        
        logging.info(f"{provider_name} - Status Code: {response.status_code}")
        
        if response.status_code == 401:
            return {"error": f"{provider_name}: API KEY inválida o expirada"}
        elif response.status_code == 429:
            return {"error": f"{provider_name}: Límite de peticiones excedido"}
        elif response.status_code == 403:
            return {"error": f"{provider_name}: Acceso denegado o sin créditos"}
        
        response.raise_for_status()
        return response.json()
        
    except requests.exceptions.Timeout:
        return {"error": f"{provider_name}: Timeout en la solicitud"}
    except requests.exceptions.RequestException as e:
        logging.error(f"Error en {provider_name}: {e}")
        return {"error": f"{provider_name}: {str(e)}"}
    except Exception as e:
        logging.error(f"Error inesperado en {provider_name}: {e}")
        return {"error": f"{provider_name}: {str(e)}"}

def extract_with_multi_provider(text: str, preferred_providers: List[str] = None) -> Optional[Dict[str, Any]]:
    """
    Extrae información de facturas usando múltiples proveedores LLM con fallback automático.
    
    Args:
        text: Texto extraído de la factura
        preferred_providers: Lista de proveedores preferidos en orden
        
    Returns:
        dict: Datos estructurados de la factura o None si todos fallan
    """
    if preferred_providers is None:
        # Orden de prioridad para ESTUDIO DE AHORRO: DeepSeek (especializado) -> OpenAI -> Groq
        preferred_providers = ["deepseek", "openai", "groq"]
    
    prompt = get_extraction_prompt(text)
    messages = [{"role": "user", "content": prompt}]
    
    extraction_log = {
        "timestamp": datetime.now().isoformat(),
        "attempts": [],
        "final_result": None
    }
    
    for provider in preferred_providers:
        if provider not in PROVIDERS_CONFIG:
            logging.warning(f"Proveedor {provider} no configurado, saltando...")
            continue
        
        logging.info(f"Intentando extracción con {provider}")
        
        try:
            response = call_llm_provider(provider, messages, temperature=0)
            
            extraction_log["attempts"].append({
                "provider": provider,
                "timestamp": datetime.now().isoformat(),
                "success": "error" not in response,
                "error": response.get("error") if "error" in response else None
            })
            
            if "error" in response:
                logging.warning(f"{provider} falló: {response['error']}")
                continue
            
            # Extraer JSON de la respuesta
            if "choices" in response and len(response["choices"]) > 0:
                if "message" in response["choices"][0]:
                    content = response["choices"][0]["message"].get("content", "{}")
                else:
                    content = str(response["choices"][0])
            else:
                logging.warning(f"{provider}: Formato de respuesta inesperado")
                continue
            
            # Validar y convertir a nuestro modelo
            try:
                data = InvoiceSchema.model_validate_json(content).model_dump()
                logging.info(f"✅ Extracción exitosa con {provider}")
                
                extraction_log["final_result"] = {
                    "provider_used": provider,
                    "data": data
                }
                
                # Guardar log de extracción exitosa
                try:
                    with open('extraction_success_log.json', 'w', encoding='utf-8') as f:
                        json.dump(extraction_log, f, ensure_ascii=False, indent=2)
                except Exception:
                    pass
                
                return data
                
            except ValidationError as e:
                logging.warning(f"{provider}: Error de validación - {e}")
                extraction_log["attempts"][-1]["validation_error"] = str(e)
                continue
                
        except Exception as e:
            logging.error(f"Error inesperado con {provider}: {e}")
            extraction_log["attempts"].append({
                "provider": provider,
                "timestamp": datetime.now().isoformat(),
                "success": False,
                "error": f"Excepción: {str(e)}"
            })
            continue
    
    # Si llegamos aquí, todos los proveedores fallaron
    logging.error("❌ Todos los proveedores LLM fallaron")
    
    # Guardar log de fallo para diagnóstico
    try:
        with open('extraction_failure_log.json', 'w', encoding='utf-8') as f:
            json.dump(extraction_log, f, ensure_ascii=False, indent=2)
    except Exception:
        pass
    
    return None

def get_provider_status() -> Dict[str, Any]:
    """
    Verifica el estado de todos los proveedores configurados.
    
    Returns:
        dict: Estado de cada proveedor
    """
    status = {}
    test_messages = [{"role": "user", "content": "Responde solo con: {'test': 'ok'}"}]
    
    for provider_name in PROVIDERS_CONFIG.keys():
        config = PROVIDERS_CONFIG[provider_name]
        
        if not config["api_key"]:
            status[provider_name] = {"available": False, "error": "API KEY no configurada"}
            continue
        
        response = call_llm_provider(provider_name, test_messages)
        
        if "error" in response:
            status[provider_name] = {"available": False, "error": response["error"]}
        else:
            status[provider_name] = {"available": True, "model": config["model"]}
    
    return status

# Mantener compatibilidad con el código existente
def extract_with_llm(text: str) -> Optional[Dict[str, Any]]:
    """
    Función de compatibilidad con el código existente.
    Usa el nuevo sistema multi-proveedor.
    """
    return extract_with_multi_provider(text)
