import asyncio
import glob
import json
import logging
import os
from datetime import datetime, timezone

import pymupdf
from dotenv import load_dotenv
from livekit.agents import (
    Agent,
    AgentServer,
    AgentSession,
    JobContext,
    cli,
    inference,
    llm,
    room_io,
)
from livekit.plugins import ai_coustics, openai
from openai.types.beta.realtime.session import TurnDetection
from tavily import TavilyClient

logger = logging.getLogger("agent")

load_dotenv(".env.local")

AGENT_MODEL = "openai/gpt-4.1-mini"
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY", "")
PROJECT_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")
DATA_DIR = os.getenv("R1_DATA_DIR", PROJECT_ROOT)
DOCS_DIR = os.path.join(PROJECT_ROOT, "docs")
MEMORY_FILE = os.path.join(DATA_DIR, "memory.json")


# --- Memory ---


def load_memory() -> list[dict]:
    """Load conversation memory from disk."""
    if not os.path.exists(MEMORY_FILE):
        return []
    try:
        with open(MEMORY_FILE) as f:
            data = json.load(f)
        return data if isinstance(data, list) else []
    except Exception:
        logger.exception("Failed to load memory")
        return []


def save_memory(entries: list[dict]) -> None:
    """Save conversation memory to disk."""
    try:
        with open(MEMORY_FILE, "w") as f:
            json.dump(entries, f, indent=2)
        logger.info("Saved %d memory entries", len(entries))
    except Exception:
        logger.exception("Failed to save memory")


def format_memory_for_prompt(entries: list[dict]) -> str:
    """Format memory entries into a prompt-friendly string."""
    if not entries:
        return ""
    # Keep the most recent 20 sessions to avoid bloating the prompt
    recent = entries[-20:]
    lines = []
    for entry in recent:
        date = entry.get("date", "unknown date")
        summary = entry.get("summary", "")
        lines.append(f"[{date}] {summary}")
    return "\n".join(lines)


async def summarise_and_save(session: AgentSession) -> None:
    """Use the LLM to summarise the conversation, then append to memory."""
    history = session.history
    if not history or not history.items:
        return

    # Build a transcript from the chat history
    transcript_lines = []
    for item in history.items:
        if hasattr(item, "role") and hasattr(item, "text_content"):
            text = item.text_content
            if text:
                transcript_lines.append(f"{item.role}: {text}")

    if len(transcript_lines) < 2:
        # Too short to bother summarising
        return

    transcript = "\n".join(transcript_lines[-40:])  # last 40 turns max

    summary_llm = inference.LLM(model=AGENT_MODEL)
    summary_ctx = llm.ChatContext()
    summary_ctx.add_message(
        role="system",
        content=(
            "You are a memory system. Given this voice conversation transcript, "
            "write a concise 2-3 sentence summary capturing: "
            "1) What the user wanted or talked about. "
            "2) Any personal details, preferences, or facts you learned about the user. "
            "3) If it was a study/quiz session, what topics were covered and how they performed. "
            "Write in third person (e.g. 'The user asked about...'). Be factual and brief."
        ),
    )
    summary_ctx.add_message(role="user", content=transcript)

    try:
        response = summary_llm.chat(chat_ctx=summary_ctx)
        summary_text = ""
        async for chunk in response:
            if chunk.delta and chunk.delta.content:
                summary_text += chunk.delta.content

        if summary_text.strip():
            entries = load_memory()
            entries.append(
                {
                    "date": datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"),
                    "summary": summary_text.strip(),
                }
            )
            save_memory(entries)
            logger.info("Saved session summary to memory")
    except Exception:
        logger.exception("Failed to generate session summary")


# --- PDF Loading ---


def load_pdf_content() -> str:
    """Load and extract text from all PDFs in the docs/ folder."""
    pdf_files = sorted(glob.glob(os.path.join(DOCS_DIR, "*.pdf")))
    if not pdf_files:
        logger.warning("No PDF files found in %s", DOCS_DIR)
        return ""

    all_text = []
    for pdf_path in pdf_files:
        filename = os.path.basename(pdf_path)
        logger.info("Loading PDF: %s", filename)
        try:
            doc = pymupdf.open(pdf_path)
            pages = []
            for page in doc:
                text = page.get_text()
                if text.strip():
                    pages.append(text)
            doc.close()
            if pages:
                all_text.append(
                    f"--- Document: {filename} ---\n" + "\n".join(pages)
                )
                logger.info("Extracted %d pages from %s", len(pages), filename)
        except Exception:
            logger.exception("Failed to read %s", filename)

    return "\n\n".join(all_text)


# --- Web Search ---


tavily_client = TavilyClient(api_key=TAVILY_API_KEY) if TAVILY_API_KEY else None


@llm.function_tool(
    description=(
        "Search the web for current, up-to-date information. Use this when the user "
        "asks about recent events, news, weather, prices, live scores, or anything "
        "that requires real-time data you might not know."
    )
)
async def web_search(
    query: str,
):
    if not tavily_client:
        return "Web search is not available — TAVILY_API_KEY is not set."

    logger.info("Web search: %s", query)
    try:
        response = tavily_client.search(
            query=query,
            search_depth="basic",
            max_results=3,
        )
        results = response.get("results", [])
        if not results:
            return "No results found for that search."

        summaries = []
        for r in results:
            title = r.get("title", "")
            content = r.get("content", "")
            summaries.append(f"{title}: {content}")
        return "\n\n".join(summaries)
    except Exception as e:
        logger.exception("Web search failed")
        return f"Search failed: {e}"


# --- System Instructions ---

PDF_CONTENT = load_pdf_content()
MEMORY_ENTRIES = load_memory()
MEMORY_TEXT = format_memory_for_prompt(MEMORY_ENTRIES)

SYSTEM_INSTRUCTIONS = f"""You are a friendly, helpful voice assistant running on a Rabbit R1 device. You speak through a tiny speaker, so keep every response short and punchy — two to three sentences max.

You are a general-purpose assistant. Answer questions, have conversations, help with anything the user asks — just like ChatGPT or Claude would. Do NOT start replies with greetings like "Hey", "Hi", or "How are you" — just answer the user's question directly. The opening greeting only happens once at the start of the session.

WEB SEARCH: You have a web search tool. Use it whenever the user asks about current events, news, weather, prices, sports scores, or anything that requires up-to-date information beyond your training data. After getting search results, summarize the key points conversationally — do not read URLs or raw data aloud.

PRONUNCIATION: Whenever you refer to the company or product "BRCKS", you must write it as "Bricks" in your response text. Never write "BRCKS" — always write "Bricks" so the speech engine says it correctly.

MEMORY: You remember previous conversations. Use this context to be more helpful, personal, and aware of what the user has discussed before. Reference past conversations naturally when relevant — but do not recite them. If the user asks what you remember, give a natural summary.

{("--- PREVIOUS CONVERSATIONS ---" + chr(10) + MEMORY_TEXT + chr(10) + "--- END PREVIOUS CONVERSATIONS ---") if MEMORY_TEXT else ""}

TUTOR MODE: You also have study material loaded below. When the user asks you to quiz them, test them, or study — switch into tutor mode. In tutor mode:
- Ask one question at a time based on the study material.
- After the user answers, say whether they are right or wrong, give a one-sentence explanation, then ask the next question.
- Build confidence progressively. Start with foundational questions. Only increase difficulty when the user asks to level up or move to the next step.
- Use your memory of past sessions to pick up where you left off — skip topics already mastered, focus on weak areas.
- Mix question types: recall, conceptual understanding, and application.
- If the user asks to stop or change topic, smoothly exit tutor mode and go back to being a general assistant.

Rules:
- Never use markdown, emojis, bullet points, or numbered lists in your spoken responses.
- Do not read long passages aloud. Summarize or paraphrase instead.
- Keep it conversational and natural. You are not a robot.

--- STUDY MATERIAL ---
{PDF_CONTENT if PDF_CONTENT else "(No documents loaded yet. If the user asks to study, let them know to add PDF files to the docs folder and restart.)"}
--- END STUDY MATERIAL ---"""


class Assistant(Agent):
    def __init__(self) -> None:
        super().__init__(
            instructions=SYSTEM_INSTRUCTIONS,
            tools=[web_search],
        )


server = AgentServer(port=0)


@server.rtc_session(agent_name="r1-tutor")
async def my_agent(ctx: JobContext):
    ctx.log_context_fields = {"room": ctx.room.name}
    # Connect first so audio is established before start/greet (avoids first-connect race).
    await ctx.connect()

    # OpenAI gpt-realtime-2, single speech-to-speech model.
    # threshold=0.9 -> very aggressive: only loud, clear voice triggers as speech.
    # This stops walking-around-the-room noise (footsteps, breathing, clothing
    # rustle) from interrupting the agent mid-reply.
    session = AgentSession(
        llm=openai.realtime.RealtimeModel(
            model="gpt-realtime-2",
            voice="cedar",  # natural male. Alts: marin, verse, sage, ash, ballad, coral, echo, shimmer, alloy
            speed=0.9,  # 0.25-1.5, default 1.0. Lower = slower delivery.
            turn_detection=TurnDetection(
                type="server_vad",
                threshold=0.9,  # max practical is ~0.95. 1.0 = never triggers (broken).
                prefix_padding_ms=300,
                silence_duration_ms=600,
                create_response=True,
                interrupt_response=True,
            ),
        ),
    )

    await session.start(
        agent=Assistant(),
        room=ctx.room,
        room_options=room_io.RoomOptions(
            audio_input=room_io.AudioInputOptions(
                noise_cancellation=ai_coustics.audio_enhancement(
                    model=ai_coustics.EnhancerModel.QUAIL_VF_L
                ),
            ),
        ),
    )

    # One-time greeting on connect (avoids the model repeating it every turn)
    session.generate_reply(
        instructions="Greet the user briefly and casually, like 'Hey, how can I help you?'. Keep it to one short sentence."
    )

    # Save a summary when the session ends
    async def on_shutdown():
        # Bound the summary so a slow LLM call can't hang shutdown and get the
        # process force-killed ("did not ack shutdown in time").
        try:
            await asyncio.wait_for(summarise_and_save(session), timeout=10)
        except Exception:
            logger.exception("shutdown summary skipped (timed out or errored)")

    ctx.add_shutdown_callback(on_shutdown)


if __name__ == "__main__":
    cli.run_app(server)