"""
Cliente LLM para DeepSeek para mejorar la extracción de datos de facturas.
Proporciona funciones para extraer información de facturas usando LLMs.
"""
import os
import json
import logging
import requests
from typing import Optional, Union, List, Dict, Any
from pydantic import BaseModel, ValidationError
from dotenv import load_dotenv
from typing import Dict, Any, Optional, Union

# Intentar cargar variables de entorno desde diferentes ubicaciones
load_dotenv()  # Intenta cargar desde .env en el directorio raíz
load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'instance', '.env'))

# Obtener la API KEY desde variable de entorno o usar un valor fijo (solo para desarrollo)
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "sk-aff80045c6cf44c5b4d1a619de9e40e4")

# Configuración para llamadas directas a la API de DeepSeek
DEEPSEEK_API_URL = "https://api.deepseek.com/v1/chat/completions"
HEADERS = {
    "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
    "Content-Type": "application/json"
}

# Función para hacer llamadas directas a la API
def call_deepseek_api(messages: list, model: str = "deepseek-llm", temperature: float = 0) -> Dict[str, Any]:
    """
    Llama directamente a la API de DeepSeek sin usar el cliente OpenAI.
    
    Args:
        messages: Lista de mensajes en formato compatible con la API
        model: Modelo a usar
        temperature: Temperatura para la generación (0-1)
        
    Returns:
        Diccionario con la respuesta de la API
    """
    payload = {
        "model": model,
        "messages": messages,
        "temperature": temperature,
        "response_format": {"type": "json_object"}
    }
    
    try:
        logging.info(f"Enviando solicitud a DeepSeek API: {model}")
        response = requests.post(DEEPSEEK_API_URL, headers=HEADERS, json=payload)
        response.raise_for_status()  # Lanza excepción si hay error HTTP
        
        # Retornar respuesta como diccionario
        return response.json()
    except requests.exceptions.RequestException as e:
        logging.error(f"Error en solicitud a DeepSeek API: {e}")
        if hasattr(e, 'response') and e.response:
            logging.error(f"Detalles: {e.response.text}")
        return {"error": str(e)}
    except Exception as e:
        logging.error(f"Error inesperado llamando a DeepSeek API: {e}")
        return {"error": str(e)}

# ----------  esquema de respuesta ----------
class InvoiceSchema(BaseModel):
    """Esquema para la respuesta del LLM de DeepSeek."""
    cups: Optional[str] = None
    client_name: Optional[str] = None
    supply_address: Optional[str] = None
    invoice_date: Optional[str] = None
    billing_start: Optional[str] = None
    billing_end: Optional[str] = None
    billing_days: Optional[int] = None
    power_contracted: Optional[float] = None
    power_periods: Optional[Dict[str, Any]] = None
    energy_consumption: Optional[Dict[str, Any]] = None
    energy_prices: Optional[Dict[str, Any]] = None
    power_prices: Optional[Dict[str, Any]] = None
    reactive_energy: Optional[Dict[str, Any]] = None
    reactive_energy_prices: Optional[Dict[str, Any]] = None
    reactive_energy_total_cost: Optional[float] = None
    provider_name: Optional[str] = None

# ----------  función principal ----------
def extract_with_llm(text: str) -> Optional[Dict[str, Any]]:
    """
    Extrae información de facturas usando el LLM de DeepSeek.
    
    Args:
        text: Texto extraído de la factura
        
    Returns:
        dict: Datos estructurados de la factura o None si hay error
    """
    prompt = f"""
You are an accountant AI specialized in Spanish electricity bills. 
Return ONLY valid JSON that matches exactly this schema:

{{
  "cups": string,
  "client_name": string,
  "supply_address": string,
  "invoice_date": "DD/MM/YYYY",
  "billing_start": "DD/MM/YYYY",
  "billing_end": "DD/MM/YYYY",
  "billing_days": int,
  "power_contracted": float,
  "power_periods": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "energy_consumption": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "energy_prices": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "power_prices": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "reactive_energy": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "reactive_energy_prices": {{"P1": float|null,"P2": float|null,"P3": float|null,"P4": float|null,"P5": float|null,"P6": float|null}},
  "reactive_energy_total_cost": float|null,
  "provider_name": string
}}

The "cups" field should be in this format: ES0031607025821013ZW or similar.
For "power_periods", extract the contracted power for each period (P1 through P6) in kW.
For "energy_consumption", extract consumption values in kWh.
For "energy_prices", extract prices in €/kWh.
Look for a breakdown of prices per period (P1, P2, etc.).
**IF NO PER-PERIOD PRICES ARE FOUND**, look for a summary line like "Facturación del Consumo" or "Término de energía" that shows a total kWh consumption multiplied by a single unit price (€/kWh). Extract that single unit price.
**IMPORTANT RULE**: If you find a single energy price, you MUST apply that same price to all `energy_prices` periods (P1 to P6) that have a non-zero `energy_consumption`. If a period has zero consumption, its price can be null.
For "power_prices", extract power prices in €/kW/year or €/kW/day and convert to the same unit.
For "reactive_energy", extract reactive energy consumption in kVArh (kilovar-hour).
For "reactive_energy_prices", extract reactive energy prices in €/kVArh. **This is low priority.**
For "reactive_energy_total_cost", find the line item for reactive energy (e.g., 'Complemento por Energía Reactiva', 'Penalización reactiva') and extract its final monetary value. If not found, set it to null.
Look for "Energía reactiva" entries which usually appear after the active energy and power sections.
If any value is missing, set it to null.

### DOCUMENT
{text}
"""

    try:
        logging.info("Iniciando extracción con DeepSeek LLM usando requests directo")
        
        # Usar nuestra función personalizada que usa requests
        response = call_deepseek_api(
            messages=[{"role": "user", "content": prompt}],
            model="deepseek-chat",
            temperature=0
        )
        
        # Verificar si hay error en la respuesta
        if "error" in response:
            logging.error(f"Error en API DeepSeek: {response['error']}")
            return None
            
        # Extraer JSON de la respuesta
        try:
            # Primero intentamos el formato estándar de la API
            if "choices" in response and len(response["choices"]) > 0:
                if "message" in response["choices"][0]:
                    content = response["choices"][0]["message"].get("content", "{}")
                    raw_json = content
                else:
                    raw_json = str(response["choices"][0])
            else:
                # Si no sigue el formato esperado, usamos toda la respuesta
                raw_json = json.dumps(response)
                
            logging.info(f"Respuesta LLM recibida: {len(raw_json)} caracteres")
            
            # Guardar respuesta para diagnóstico
            try:
                with open('llm_response_debug.json', 'w', encoding='utf-8') as f:
                    json.dump({"api_response": response, "extracted_json": raw_json}, 
                             f, ensure_ascii=False, indent=2)
            except Exception as e:
                logging.warning(f"No se pudo guardar archivo de diagnóstico: {e}")
            
            # Validar y convertir a nuestro modelo
            data = InvoiceSchema.model_validate_json(raw_json).model_dump()
            logging.info(f"Datos extraídos por LLM: {json.dumps(data, indent=2, ensure_ascii=False)}")
            return data
            
        except (KeyError, IndexError, ValueError) as e:
            logging.error(f"Error procesando respuesta de API: {e}")
            logging.debug(f"Respuesta recibida: {response}")
            return None
            
    except ValidationError as e:
        logging.warning(f"Error de validación en respuesta LLM: {e}")
        return None
    except Exception as e:
        logging.warning(f"Error inesperado en extracción LLM: {e}")
        import traceback
        logging.error(traceback.format_exc())
        return None
