"""
Google Drive Reader Module.
Downloads PDF reports from a Google Drive folder.

This module handles:
- Google Drive API authentication
- Listing files in a specific folder
- Downloading PDF files to local storage
- Tracking processed files to avoid re-downloads
"""

import os
import io
from pathlib import Path
from typing import List, Dict, Optional
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
from google.oauth2.credentials import Credentials
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
try:
    from .logger import get_logger
except ImportError:
    from logger import get_logger

logger = get_logger('DriveReader')


class DriveReader:
    """Reads PDF files from Google Drive folder."""

    SCOPES = [
        'https://www.googleapis.com/auth/drive.readonly',
        'https://www.googleapis.com/auth/drive.metadata.readonly',
        'https://www.googleapis.com/auth/gmail.readonly',
        'https://www.googleapis.com/auth/gmail.compose',
        'https://www.googleapis.com/auth/spreadsheets',
        'https://www.googleapis.com/auth/drive.file'
    ]

    def __init__(self, credentials_path: str = 'credentials.json', token_path: str = 'token.json'):
        """
        Initialize Drive reader.

        Args:
            credentials_path: Path to OAuth credentials JSON
            token_path: Path to store/load token
        """
        self.credentials_path = credentials_path
        self.token_path = token_path
        self.service = None
        self.logger = logger

        self._authenticate()

    def _authenticate(self):
        """Authenticate with Google Drive API using OAuth 2.0."""
        creds = None

        # Check if token exists
        if os.path.exists(self.token_path):
            try:
                creds = Credentials.from_authorized_user_file(self.token_path, self.SCOPES)
                self.logger.info("Loaded existing Drive credentials")
            except Exception as e:
                self.logger.warning(f"Failed to load existing credentials: {e}")

        # If credentials are invalid or don't exist, authenticate
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                try:
                    creds.refresh(Request())
                    self.logger.info("Refreshed Drive credentials")
                except Exception as e:
                    self.logger.error(f"Failed to refresh credentials: {e}")
                    creds = None

            if not creds:
                if not os.path.exists(self.credentials_path):
                    self.logger.error(f"Credentials file not found: {self.credentials_path}")
                    raise FileNotFoundError(
                        f"Drive credentials not found. Please place credentials.json in the project directory."
                    )

                flow = InstalledAppFlow.from_client_secrets_file(
                    self.credentials_path,
                    self.SCOPES
                )
                creds = flow.run_local_server(port=0)
                self.logger.info("Completed OAuth authentication")

            # Save credentials for next run
            with open(self.token_path, 'w') as token:
                token.write(creds.to_json())
            self.logger.info(f"Saved credentials to {self.token_path}")

        # Build Drive service
        self.service = build('drive', 'v3', credentials=creds)
        self.logger.info("Google Drive service initialized")

    def find_subfolder_by_name(self, parent_folder_id: str, subfolder_name: str) -> Optional[str]:
        """
        Find a subfolder by name within a parent folder.

        Args:
            parent_folder_id: Google Drive parent folder ID
            subfolder_name: Name of subfolder to find (e.g., "January")

        Returns:
            Folder ID if found, None otherwise
        """
        try:
            query = (
                f"'{parent_folder_id}' in parents "
                f"and name='{subfolder_name}' "
                f"and mimeType='application/vnd.google-apps.folder' "
                f"and trashed=false"
            )

            results = self.service.files().list(
                q=query,
                pageSize=10,
                fields="files(id, name)",
                supportsAllDrives=True,
                includeItemsFromAllDrives=True
            ).execute()

            files = results.get('files', [])

            if files:
                folder_id = files[0]['id']
                self.logger.info(f"Found subfolder '{subfolder_name}': {folder_id}")
                return folder_id
            else:
                self.logger.warning(f"Subfolder '{subfolder_name}' not found in parent {parent_folder_id}")
                return None

        except HttpError as e:
            self.logger.error(f"Failed to find subfolder: {e}")
            return None

    def list_pdfs_in_folder(self, folder_id: str, max_results: int = 100) -> List[Dict]:
        """
        List all PDF files in a Google Drive folder.

        Args:
            folder_id: Google Drive folder ID (from folder URL)
            max_results: Maximum number of files to return

        Returns:
            List of file metadata dictionaries
        """
        try:
            query = f"'{folder_id}' in parents and mimeType='application/pdf' and trashed=false"

            results = self.service.files().list(
                q=query,
                pageSize=max_results,
                fields="files(id, name, createdTime, modifiedTime, size)",
                orderBy="modifiedTime desc",  # Most recent first
                supportsAllDrives=True,
                includeItemsFromAllDrives=True
            ).execute()

            files = results.get('files', [])
            self.logger.info(f"Found {len(files)} PDF files in folder {folder_id}")

            return files

        except HttpError as e:
            self.logger.error(f"Failed to list files: {e}")
            raise

    def download_pdf(self, file_id: str, file_name: str, output_dir: str) -> str:
        """
        Download a PDF file from Google Drive.

        Args:
            file_id: Google Drive file ID
            file_name: Name to save file as
            output_dir: Directory to save file to

        Returns:
            Path to downloaded file
        """
        try:
            # Create output directory if it doesn't exist
            output_path = Path(output_dir)
            output_path.mkdir(parents=True, exist_ok=True)

            # Sanitize filename
            safe_filename = "".join(c for c in file_name if c.isalnum() or c in (' ', '-', '_', '.')).rstrip()
            file_path = output_path / safe_filename

            # Check if file already exists
            if file_path.exists():
                self.logger.info(f"File already exists, skipping: {safe_filename}")
                return str(file_path)

            # Download file
            request = self.service.files().get_media(fileId=file_id)
            fh = io.BytesIO()
            downloader = MediaIoBaseDownload(fh, request)

            done = False
            while not done:
                status, done = downloader.next_chunk()
                if status:
                    self.logger.debug(f"Download progress: {int(status.progress() * 100)}%")

            # Save to disk
            with open(file_path, 'wb') as f:
                f.write(fh.getvalue())

            self.logger.info(f"Downloaded: {safe_filename} ({fh.tell()} bytes)")
            return str(file_path)

        except HttpError as e:
            self.logger.error(f"Failed to download {file_name}: {e}")
            raise

    def download_all_pdfs(self, folder_id: str, output_dir: str, max_files: int = 50) -> List[str]:
        """
        Download all PDF files from a Google Drive folder.

        Args:
            folder_id: Google Drive folder ID
            output_dir: Directory to save files to
            max_files: Maximum number of files to download

        Returns:
            List of paths to downloaded files
        """
        self.logger.info(f"Downloading PDFs from folder {folder_id}")

        # List all PDFs in folder
        files = self.list_pdfs_in_folder(folder_id, max_results=max_files)

        if not files:
            self.logger.warning("No PDF files found in folder")
            return []

        # Download each file
        downloaded_files = []
        for idx, file in enumerate(files, 1):
            self.logger.info(f"Downloading file {idx}/{len(files)}: {file['name']}")

            try:
                file_path = self.download_pdf(
                    file_id=file['id'],
                    file_name=file['name'],
                    output_dir=output_dir
                )
                downloaded_files.append(file_path)

            except Exception as e:
                self.logger.error(f"Failed to download {file['name']}: {e}")
                continue

        self.logger.info(f"Downloaded {len(downloaded_files)} PDF files")
        return downloaded_files

    def get_folder_id_from_url(self, folder_url: str) -> str:
        """
        Extract folder ID from Google Drive folder URL.

        Args:
            folder_url: Google Drive folder URL
                       Format: https://drive.google.com/drive/folders/FOLDER_ID

        Returns:
            Folder ID string
        """
        # Handle different URL formats
        if '/folders/' in folder_url:
            folder_id = folder_url.split('/folders/')[-1].split('?')[0]
        else:
            # Assume it's already just the ID
            folder_id = folder_url

        self.logger.info(f"Extracted folder ID: {folder_id}")
        return folder_id
