<?php
// generate_excel_from_html.php

// Ensure Composer's autoloader is included using a robust path.
require_once __DIR__ . '/vendor/autoload.php';

use PhpOffice\PhpSpreadsheet\Spreadsheet;
use PhpOffice\PhpSpreadsheet\Writer\Xlsx;
use PhpOffice\PhpSpreadsheet\Cell\Coordinate;
use PhpOffice\PhpSpreadsheet\Cell\DataType;
use PhpOffice\PhpSpreadsheet\Style\Alignment;
use PhpOffice\PhpSpreadsheet\Style\Border;
use PhpOffice\PhpSpreadsheet\Style\Fill;
use PhpOffice\PhpSpreadsheet\Style\Font;
use PhpOffice\PhpSpreadsheet\Style\NumberFormat; // Added missing use statement

// --- Step 1: Extract Table Data from HTML ---
$html_file_path = __DIR__ . '/sample.html';
if (!file_exists($html_file_path)) {
    error_log("Error: sample.html not found at path: " . $html_file_path);
    die("Error: The required HTML file (sample.html) was not found. Please ensure it exists in the script's directory.");
}
$html_content = file_get_contents($html_file_path);
if ($html_content === false) {
    error_log("Error: Could not read sample.html from path: " . $html_file_path);
    die("Error: Could not read sample.html. Please check file permissions and path.");
}

$dom = new DOMDocument();
// Suppress errors for potentially malformed HTML and avoid adding implicit <html><body> tags.
@$dom->loadHTML($html_content, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_NOWARNING | LIBXML_NOERROR);

$tables = $dom->getElementsByTagName('table');
$allTableData = [];

if ($tables->length === 0) {
    // It's better to output a valid (empty) Excel file than to die abruptly if no tables are found.
    // Or, you could output an Excel file with a message.
    error_log("No tables found in sample.html. An empty Excel file will be generated or one with a notice.");
    // Let's proceed to create an Excel file with a notice.
}

foreach ($tables as $tableIndex => $tableNode) {
    $currentTableData = ['headings' => [], 'rows' => []];
    
    $sheetTitle = "Table " . ($tableIndex + 1);
    $captionNode = $tableNode->getElementsByTagName('caption')->item(0);
    if ($captionNode) {
        $sheetTitle = trim($captionNode->nodeValue);
    } else {
        $prevSibling = $tableNode->previousSibling;
        while ($prevSibling && !($prevSibling instanceof DOMElement && in_array(strtolower($prevSibling->nodeName), ['h2', 'h3', 'h4']))) {
            $prevSibling = $prevSibling->previousSibling;
        }
        if ($prevSibling && $prevSibling instanceof DOMElement) {
            $sheetTitle = trim($prevSibling->nodeValue);
        }
    }
    $sheetTitle = empty($sheetTitle) ? "Table " . ($tableIndex + 1) : $sheetTitle;


    $headerRows = $tableNode->getElementsByTagName('thead');
    $headerCellsNodes = null;
    if ($headerRows->length > 0 && $headerRows->item(0)->getElementsByTagName('tr')->length > 0) {
        $firstHeaderRow = $headerRows->item(0)->getElementsByTagName('tr')->item(0);
        $headerCellsNodes = $firstHeaderRow->getElementsByTagName('th');
        if ($headerCellsNodes->length === 0) {
            $headerCellsNodes = $firstHeaderRow->getElementsByTagName('td');
        }
    } else { 
        $firstTr = $tableNode->getElementsByTagName('tr')->item(0);
        if ($firstTr) {
            $headerCellsNodes = $firstTr->getElementsByTagName('th');
            if ($headerCellsNodes->length === 0) {
                $headerCellsNodes = $firstTr->getElementsByTagName('td');
            }
        }
    }

    if ($headerCellsNodes) {
        foreach ($headerCellsNodes as $cell) {
            $currentTableData['headings'][] = trim($cell->nodeValue);
        }
    }

    $bodyRowsContainer = $tableNode->getElementsByTagName('tbody');
    $dataRowNodes = [];
    $tempDataRowNodes = []; // Use a temporary array to collect DOMNodeList items
    if ($bodyRowsContainer->length > 0) {
        $trNodes = $bodyRowsContainer->item(0)->getElementsByTagName('tr');
        foreach ($trNodes as $node) $tempDataRowNodes[] = $node;
    } else {
        // If no tbody, get all trs and skip the first if headers were found in it
        $allTrs = $tableNode->getElementsByTagName('tr');
        $startIdx = ($headerCellsNodes && $headerCellsNodes->length > 0 && $allTrs->length > 0 && $allTrs->item(0)->contains($headerCellsNodes->item(0))) ? 1 : 0;
        for ($i = $startIdx; $i < $allTrs->length; $i++) {
            $tempDataRowNodes[] = $allTrs->item($i);
        }
    }
     $dataRowNodes = $tempDataRowNodes;


    foreach ($dataRowNodes as $rowNode) {
        $rowData = [];
        $cells = $rowNode->getElementsByTagName('td');
        foreach ($cells as $cell) {
            $rowData[] = trim($cell->nodeValue);
        }
        if (!empty($rowData)) { // Only add if row has data
            $currentTableData['rows'][] = $rowData;
        }
    }

    if (!empty($currentTableData['headings']) || !empty($currentTableData['rows'])) {
        $allTableData[] = ['title' => $sheetTitle, 'data' => $currentTableData];
    }
}

// --- Step 2: Create Spreadsheet and Populate Sheets ---
$spreadsheet = new Spreadsheet();
// Remove the default sheet created by new Spreadsheet()
if ($spreadsheet->getSheetCount() > 0) {
    $spreadsheet->removeSheetByIndex(0); 
}

if (empty($allTableData)) {
    // If no tables were found or no data extracted, create a sheet with a notice
    $noticeSheet = $spreadsheet->createSheet();
    $noticeSheet->setTitle('Notice');
    $noticeSheet->setCellValue('A1', 'No table data found in the HTML file or extracted.');
    $noticeSheet->getColumnDimension('A')->setAutoSize(true);
} else {
    foreach ($allTableData as $sheetIndex => $tableInfo) {
        $sheet = $spreadsheet->createSheet($sheetIndex);
        // Sanitize sheet title (max 31 chars, no invalid chars)
        $safeSheetTitle = substr(preg_replace('/[\\\\\\/\\?\\*\\:\\\[\\]]/', '', $tableInfo['title']), 0, 31);
        $safeSheetTitle = empty($safeSheetTitle) ? "Sheet" . ($sheetIndex + 1) : $safeSheetTitle;
        $sheet->setTitle($safeSheetTitle);

        $col = 1; 
        foreach ($tableInfo['data']['headings'] as $heading) {
            $cellCoordinate = Coordinate::stringFromColumnIndex($col++) . '1';
            $sheet->setCellValue($cellCoordinate, $heading);
        }
        
        if (!empty($tableInfo['data']['headings'])) {
            $headerStyle = [
                'font' => ['bold' => true, 'color' => ['rgb' => 'FFFFFF'], 'size' => 11],
                'fill' => ['fillType' => Fill::FILL_SOLID, 'startColor' => ['rgb' => '3498DB']], // Blue
                'alignment' => ['horizontal' => Alignment::HORIZONTAL_CENTER, 'vertical' => Alignment::VERTICAL_CENTER],
                'borders' => ['allBorders' => ['borderStyle' => Border::BORDER_THIN, 'color' => ['rgb' => '7F8C8D']]] // Gray
            ];
            $lastHeaderColumnLetter = Coordinate::stringFromColumnIndex(count($tableInfo['data']['headings']));
            $sheet->getStyle('A1:' . $lastHeaderColumnLetter . '1')->applyFromArray($headerStyle);
            $sheet->getRowDimension(1)->setRowHeight(20);
        }

        $rowIdx = 2; 
        foreach ($tableInfo['data']['rows'] as $rowData) {
            $col = 1;
            foreach ($rowData as $cellValue) {
                $trimmedValue = trim($cellValue);
                // Attempt to set numeric types explicitly for better Excel handling
                if (is_numeric($trimmedValue) && strlen($trimmedValue) < 15 && !preg_match('/^0\d/', $trimmedValue)) { // Avoid treating '01' as number
                     $cellCoordinate = Coordinate::stringFromColumnIndex($col) . $rowIdx;
                     $sheet->setCellValueExplicit($cellCoordinate, $trimmedValue, DataType::TYPE_NUMERIC);
                } elseif (preg_match('/^\$?\s*([0-9,]+(\.\d{1,2})?)$/', $trimmedValue, $matches)) { // Improved currency check
                     $cleanedCurrency = (float)str_replace(',', '', $matches[1]);
                     $cellCoordinate = Coordinate::stringFromColumnIndex($col) . $rowIdx;
                     $sheet->setCellValueExplicit($cellCoordinate, $cleanedCurrency, DataType::TYPE_NUMERIC);
                     $cellCoordinate = Coordinate::stringFromColumnIndex($col) . $rowIdx;
                     $sheet->getStyle($cellCoordinate)->getNumberFormat()->setFormatCode(NumberFormat::FORMAT_CURRENCY_USD);
                } else {
                    $cellCoordinate = Coordinate::stringFromColumnIndex($col) . $rowIdx;
                    $sheet->setCellValueExplicit($cellCoordinate, $trimmedValue, DataType::TYPE_STRING);
                }
                $col++;
            }
            $rowIdx++;
        }

        // Auto-size columns based on content
        $maxColNum = 0;
        if (!empty($tableInfo['data']['headings'])) {
            $maxColNum = count($tableInfo['data']['headings']);
        } elseif (!empty($tableInfo['data']['rows'][0])) {
            $maxColNum = count($tableInfo['data']['rows'][0]);
        }
        if ($maxColNum > 0) {
            for ($colNumber = 1; $colNumber <= $maxColNum; $colNumber++) {
                $sheet->getColumnDimension(Coordinate::stringFromColumnIndex($colNumber))->setAutoSize(true);
            }
        }
    }
}

if ($spreadsheet->getSheetCount() > 0) {
    $spreadsheet->setActiveSheetIndex(0); // Ensure the first sheet is active
} else {
    // This case should ideally be handled by the notice sheet logic above
    $fallbackSheet = $spreadsheet->createSheet();
    $fallbackSheet->setTitle('Error');
    $fallbackSheet->setCellValue('A1', 'An unexpected error occurred, and no data could be processed.');
}

// --- Step 3: Output the Spreadsheet ---
$outputFilename = "html_tables_extracted_" . date('Ymd_His') . ".xlsx";

header('Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
header('Content-Disposition: attachment;filename="' . $outputFilename . '"');
header('Cache-Control: max-age=0');
// Additional headers for compatibility
header('Cache-Control: max-age=1'); 
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT'); 
header('Last-Modified: ' . gmdate('D, d M Y H:i:s') . ' GMT'); 
header('Cache-Control: cache, must-revalidate'); 
header('Pragma: public'); 

$writer = new Xlsx($spreadsheet);
$writer->save('php://output');
exit;
?>
