import { format, parse } from 'date-fns';
import { getDocument } from 'pdfjs-dist';
import 'pdfjs-dist/build/pdf.worker.mjs'; // Ensure worker is bundled
import { TextItem } from 'pdfjs-dist/types/src/display/api';
import { Transaction } from '../Transaction';
import { clean, cleanAmount } from './helpUtils';

// Helper function to clean strings.
const formatDate = (inputDate: string): string => {
  // Parse the date from M/d/yyyy format
  const parsedDate = parse(inputDate, 'MMM d, yyyy', new Date());

  // Format it as MMM dd, yyyy
  return format(parsedDate, 'MMM dd, yyyy');
};

const isTransactionStart = (startString: string) => {
  //if the startString is date of the format Apr 1, 2024 return true
  const lower = startString.toLowerCase();
  const datePattern = /^[A-Za-z]{3} \d{1,2}, \d{4}$/;
  return datePattern.test(startString);
};

// Updated helper function to filter and parse only transaction rows.
const parseTransactionsFromRows = (tokens: TextItem[]): Transaction[] => {
  // Group tokens into row groups based on EOL.
  let rowTokens: TextItem[][] = [];
  let currentRow: TextItem[] = [];

  for (let i = 0; i < tokens.length; i++) {
    const token = tokens[i];
    currentRow.push(token);
    const startString = currentRow[0]?.str ?? '';
    const nextToken: TextItem | undefined = i + 1 < tokens.length ? tokens[i + 1] : undefined;
    const nextStartString = nextToken?.str ?? '';
    const prevToken = i - 1 >= 0 ? tokens[i - 1] : undefined;
    const prevStartString = prevToken?.str ?? '';
    if (
      (token.hasEOL && prevStartString.trim() === 'Balance') ||
      (token.hasEOL && tokens[i + 1].str.trim() === 'Name') ||
      // (token.hasEOL && isTransactionStart(startString) && isTransactionStart(nextStartString)) ||
      (token.hasEOL && prevToken?.transform[4] > 480) ||
      (token.hasEOL &&
        isTransactionStart(startString) &&
        nextStartString?.startsWith('Revolut Bank')) ||
      (token.hasEOL &&
        isTransactionStart(startString) &&
        nextStartString === 'IBAN' &&
        nextToken?.hasEOL) ||
      !nextToken

      // (token.hasEOL && token.transform[4] > 500) ||
      // (token.hasEOL && token.transform[5] > 500 && token.width === 0 && token.height === 0)
    ) {
      rowTokens.push(currentRow);
      currentRow = [];
    }
  }

  // Find and keep the header row separately.
  const isHeaderRow = (row: TextItem[]): boolean =>
    row.length > 0 && row[0].str.replace(/\s+/g, '') === 'Name';

  let headerRow: TextItem[] | null = null;

  for (const row of rowTokens) {
    if (isHeaderRow(row)) {
      headerRow = [...row];
      break;
    }
  }
  if (!headerRow) return [];

  // while (rowTokens.length > 0 && !monthRegex.test(rowTokens[0][0].str.trim())) {
  //   rowTokens.shift();
  // }

  // Use the header row to determine column boundaries.
  const headerX = headerRow
    .filter((token) => !!token.str.trim())
    .map((token) => token.transform[4])
    .sort((a, b) => a - b);
  const boundaries: number[] = [];
  for (let i = 0; i < headerX.length - 1; i++) {
    // boundaries.push((headerX[i] + headerX[i + 1]) / 2);
    boundaries.push(headerX[i + 1] - 20);
  }
  boundaries.push(Infinity);

  // For each transaction row, group tokens into columns based on token.transform[4] and boundaries.
  const transactions: Transaction[] = [];
  let currency = '';
  for (const row of rowTokens) {
    const sorted = row.slice().sort((a, b) => a.transform[4] - b.transform[4]);

    const numCols = headerX.length;
    const columns: string[] = new Array(numCols).fill('');

    //if sorted[1].str is of the format Transaction History - USD, then we can extract the currency
    currency = 'USD';

    //check the first element of the row to see if it is a date of format d/M/yyyy
    //otherwise continue
    const datePattern = /^[A-Za-z]{3} \d{1,2}, \d{4}$/;

    if (
      !(
        datePattern.test(sorted[5]?.str) ||
        datePattern.test(sorted[7]?.str) ||
        datePattern.test(sorted[3]?.str)
      )
    ) {
      continue;
    }

    for (const token of sorted) {
      const x = token.transform[4];
      let colIndex = 0;
      while (colIndex < boundaries.length && x > boundaries[colIndex]) {
        colIndex++;
      }
      const data = token.str;
      if (!data || data.trim() === '') continue;
      if (data.startsWith('Fee')) continue;
      if (data.startsWith('Revolut')) continue;
      columns[colIndex] += token.str + ' ';
    }
    const dateVal = clean(columns[1]);

    const SEPARATOR = ' ';

    const amountVal = cleanAmount(columns[3]);

    // const amountVal = cleanAmount(columns[6]); // convert cleaned amount to number
    transactions.push({
      date: dateVal, // first column as date
      description: clean(columns[0]),
      // name: clean(columns[1]), // combine columns for description
      amount: Math.abs(amountVal), //Change the sign of the amount, as plaid expects so.
      direction: amountVal > 0 ? 'CREDIT' : 'DEBIT',
      currency: currency,
    } as Transaction);
  }
  return transactions;
};

const transform = (transactions: Transaction[], year: number) => {
  // incoming dates are in the form d/M/yyyy. We need to convert them to MMM dd, yyyy string
  // so that they can be parsed by the backend
  return transactions.map((transaction) => {
    return {
      ...transaction,
      date: formatDate(transaction.date),
    };
  });
};

const procesRelayfiFile = async (files: File[]): Promise<Transaction[]> => {
  const extractTransactions = (file: File): Promise<Transaction[]> =>
    new Promise<Transaction[]>((resolve, reject) => {
      const reader = new FileReader();
      reader.onload = async () => {
        const typedArray = new Uint8Array(reader.result as ArrayBuffer);
        const pdf = await getDocument({ data: typedArray }).promise;

        let allPageTransactions: Transaction[] = [];
        let itemsArray = [];
        for (let i = 1; i <= pdf.numPages; i++) {
          const page = await pdf.getPage(i);
          const textContent = await page.getTextContent();
          const items = textContent.items as TextItem[];
          itemsArray.push(...items);
          let pageTransactions = parseTransactionsFromRows(items);
          if (pageTransactions.length > 0) {
            allPageTransactions.push(...pageTransactions);
          }
        }
        //We are processing all pages at once, so we can parse all transactions at once.
        // allPageTransactions = parseTransactionsFromRows(itemsArray);
        resolve(allPageTransactions);
      };
      reader.onerror = reject;
      reader.readAsArrayBuffer(file);
    });

  const allTransactions: Transaction[] = [];
  for (const file of files) {
    const transactions = await extractTransactions(file);
    if (!transactions || transactions.length === 0) {
      throw new Error('No transactions found in the file ' + file.name);
    }
    allTransactions.push(...transactions);
  }

  return transform(allTransactions, 0);
};

export default procesRelayfiFile;
