"""
Unit tests for PDF Extractor Module.
Tests PDF data extraction with sample PDFs.
"""

import unittest
from unittest.mock import Mock, MagicMock, patch
import sys
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from src.pdf_extractor import PDFExtractor


class TestPDFExtractor(unittest.TestCase):
    """Test cases for PDFExtractor class."""

    def setUp(self):
        """Set up test fixtures."""
        self.extractor = PDFExtractor()

    def test_detect_report_type_seo(self):
        """Test SEO report type detection."""
        seo_text = """
        The George Centre
        Traffic acquisition - 2024 October
        Sessions 500
        Active users 450
        """
        report_type = self.extractor._detect_report_type(seo_text)
        self.assertEqual(report_type, 'SEO')

    def test_detect_report_type_google_ads(self):
        """Test Google Ads report type detection."""
        ads_text = """
        Business Name
        Google Ads Report
        Clicks 1000
        Impressions 5000
        CTR 5.0%
        """
        report_type = self.extractor._detect_report_type(ads_text)
        self.assertEqual(report_type, 'Google Ads')

    def test_extract_business_name_simple(self):
        """Test business name extraction from simple text."""
        text = """
        ABC Corporation
        Monthly Report
        Date: 2024 October
        """
        business_name = self.extractor._extract_business_name(text)
        self.assertIsNotNone(business_name)
        self.assertIn('ABC Corporation', business_name)

    def test_extract_business_name_with_special_chars(self):
        """Test business name extraction with special characters."""
        text = """
        Smith & Sons Ltd.
        Report Details
        """
        business_name = self.extractor._extract_business_name(text)
        self.assertIsNotNone(business_name)

    def test_extract_date_month_year_format(self):
        """Test date extraction in Month Year format."""
        text = "Report for 2024 October"
        date, month = self.extractor._extract_date(text)
        self.assertIsNotNone(date)
        self.assertEqual(month, 'October')

    def test_extract_date_date_range_format(self):
        """Test date extraction in date range format."""
        text = "Period: Oct 1, 2024 - Oct 31, 2024"
        date, month = self.extractor._extract_date(text)
        self.assertIsNotNone(date)
        self.assertIsNotNone(month)

    def test_format_kpi_value_integer(self):
        """Test KPI value formatting for integers."""
        value = self.extractor._format_kpi_value('1234', 'Sessions')
        self.assertEqual(value, '1,234')

    def test_format_kpi_value_percentage(self):
        """Test KPI value formatting for percentages."""
        value = self.extractor._format_kpi_value('45.67%', 'Engagement rate')
        self.assertEqual(value, '45.67%')

    def test_format_kpi_value_currency(self):
        """Test KPI value formatting for currency."""
        value = self.extractor._format_kpi_value('$12.50', 'Avg. CPC')
        self.assertEqual(value, '$12.50')

    def test_format_kpi_value_time(self):
        """Test KPI value formatting for time duration."""
        value = self.extractor._format_kpi_value('00:02:34', 'Average session duration')
        self.assertEqual(value, '2m 34s')

    def test_format_kpi_value_na(self):
        """Test KPI value formatting for N/A values."""
        value = self.extractor._format_kpi_value('N/A', 'Sessions')
        self.assertEqual(value, 'N/A')

    @patch('pdfplumber.open')
    def test_extract_report_data_seo_success(self, mock_pdf_open):
        """Test successful extraction of SEO report data."""
        # Mock PDF structure
        mock_page = MagicMock()
        mock_page.extract_text.return_value = """
        The George Centre
        2024 October
        Sessions 500
        Active users 450
        """
        mock_page.extract_tables.return_value = [
            [['Metric', 'Value', 'Change'],
             ['Sessions', '500', '+10%'],
             ['Active users', '450', '+5%']]
        ]

        mock_pdf = MagicMock()
        mock_pdf.pages = [mock_page]
        mock_pdf.__enter__ = Mock(return_value=mock_pdf)
        mock_pdf.__exit__ = Mock(return_value=False)
        mock_pdf_open.return_value = mock_pdf

        result = self.extractor.extract_report_data('test.pdf')

        self.assertIsNotNone(result)
        self.assertEqual(result['report_type'], 'SEO')
        self.assertIn('business_name', result)
        self.assertIn('kpis', result)
        self.assertIn('extraction_errors', result)

    @patch('pdfplumber.open')
    def test_extract_report_data_google_ads_success(self, mock_pdf_open):
        """Test successful extraction of Google Ads report data."""
        # Mock PDF structure
        mock_page = MagicMock()
        mock_page.extract_text.return_value = """
        ABC Company
        Google Ads Report
        2024 October
        Clicks 1000
        """
        mock_page.extract_tables.return_value = [
            [['Metric', 'Value', 'Change'],
             ['Clicks', '1000', '+15%'],
             ['Impressions', '5000', '+20%']]
        ]

        mock_pdf = MagicMock()
        mock_pdf.pages = [mock_page]
        mock_pdf.__enter__ = Mock(return_value=mock_pdf)
        mock_pdf.__exit__ = Mock(return_value=False)
        mock_pdf_open.return_value = mock_pdf

        result = self.extractor.extract_report_data('test_ads.pdf')

        self.assertIsNotNone(result)
        self.assertEqual(result['report_type'], 'Google Ads')
        self.assertIn('business_name', result)
        self.assertIn('kpis', result)

    @patch('pdfplumber.open')
    def test_extract_report_data_missing_business_name(self, mock_pdf_open):
        """Test extraction when business name is missing."""
        mock_page = MagicMock()
        mock_page.extract_text.return_value = """
        2024 October
        Sessions 500
        """
        mock_page.extract_tables.return_value = []

        mock_pdf = MagicMock()
        mock_pdf.pages = [mock_page]
        mock_pdf.__enter__ = Mock(return_value=mock_pdf)
        mock_pdf.__exit__ = Mock(return_value=False)
        mock_pdf_open.return_value = mock_pdf

        result = self.extractor.extract_report_data('test.pdf')

        self.assertIn('extraction_errors', result)
        self.assertTrue(any('business name' in error.lower() for error in result['extraction_errors']))

    @patch('pdfplumber.open')
    def test_extract_report_data_file_not_found(self, mock_pdf_open):
        """Test extraction with non-existent PDF file."""
        mock_pdf_open.side_effect = FileNotFoundError("PDF not found")

        result = self.extractor.extract_report_data('nonexistent.pdf')

        self.assertIn('extraction_errors', result)
        self.assertTrue(len(result['extraction_errors']) > 0)

    @patch('pdfplumber.open')
    def test_extract_report_data_corrupted_pdf(self, mock_pdf_open):
        """Test extraction with corrupted PDF."""
        mock_pdf_open.side_effect = Exception("Corrupted PDF")

        result = self.extractor.extract_report_data('corrupted.pdf')

        self.assertIn('extraction_errors', result)
        self.assertTrue(len(result['extraction_errors']) > 0)

    def test_extract_kpis_seo_complete_table(self):
        """Test KPI extraction from complete SEO table."""
        text = "Some report text"
        table = [
            ['Metric', 'Value', 'Change'],
            ['Sessions', '1,234', '+10%'],
            ['Active users', '987', '+5%'],
            ['New users', '567', '+8%'],
            ['Key events', '45', '+12%'],
            ['Engagement rate', '56.78%', '+3%'],
            ['Bounce rate', '34.56%', '-2%'],
            ['Average session duration', '00:03:29', '+5%']
        ]

        kpis = self.extractor._extract_kpis(text, [table], 'SEO')

        self.assertEqual(len(kpis), 7)
        self.assertIn('Sessions', kpis)
        self.assertIn('Active users', kpis)
        self.assertIn('Key events', kpis)

    def test_extract_kpis_google_ads_complete_table(self):
        """Test KPI extraction from complete Google Ads table."""
        text = "Some ads report text"
        table = [
            ['Metric', 'Value', 'Change'],
            ['Clicks', '5,678', '+15%'],
            ['Impressions', '123,456', '+20%'],
            ['CTR', '4.60%', '+2%'],
            ['Conversions', '89', '+25%'],
            ['Conv. rate', '1.57%', '+3%'],
            ['Avg. CPC', '$2.96', '-5%'],
            ['Cost', '$16,798.88', '+10%']
        ]

        kpis = self.extractor._extract_kpis(text, [table], 'Google Ads')

        self.assertEqual(len(kpis), 7)
        self.assertIn('Clicks', kpis)
        self.assertIn('Impressions', kpis)
        self.assertIn('Conversions', kpis)

    def test_extract_kpis_missing_values(self):
        """Test KPI extraction with missing values."""
        text = "Report text"
        table = [
            ['Metric', 'Value', 'Change'],
            ['Sessions', '1,234', '+10%'],
            ['Active users', 'N/A', 'N/A']
        ]

        kpis = self.extractor._extract_kpis(text, [table], 'SEO')

        self.assertIn('Sessions', kpis)
        if 'Active users' in kpis:
            self.assertEqual(kpis['Active users']['value'], 'N/A')


if __name__ == '__main__':
    unittest.main()
