import { format, isValid, parse } from 'date-fns';
import { getDocument } from 'pdfjs-dist';
import 'pdfjs-dist/build/pdf.worker.mjs'; // Ensure worker is bundled
import { TextItem } from 'pdfjs-dist/types/src/display/api';
import { Transaction } from '../Transaction';
import { clean, cleanAmount } from './helpUtils';

// Helper function to clean strings.
const formatDate = (inputDate: string): string => {
  // Parse the date from M/d/yyyy format
  // const parsedDate = parse(inputDate, 'dd MMMM yyyy', new Date());
  const possibleFormats = [
    'dd MMMM yyyy',
    'MMMM dd, yyyy',
    'dd MMMM, yyyy',
    'MMMM dd yyyy',
    'MM/dd/yyyy',
    'MM-dd-yyyy',
    'dd/MM/yyyy',
    'dd-MM-yyyy',
    'dd MMM, yyyy',
    'MMM dd, yyyy',
    'dd MMM', // without year, will use current year
    'MMM dd', // without year, will use current year
  ];
  let parsedDate;
  for (const fmt of possibleFormats) {
    parsedDate = parse(inputDate, fmt, new Date());
    if (isValid(parsedDate)) {
      return format(parsedDate, 'MMM dd, yyyy');
    }
  }
  throw new Error(`Unrecognized date format: ${inputDate}`);
};

const SEPARATOR = '#~#';

const isTransactionStart = (startString: string) => {
  const lower = startString.toLowerCase();
  return (
    lower.startsWith('sent money') ||
    lower.startsWith('received money') ||
    lower.startsWith('withdrawal') ||
    lower.startsWith('deposit') ||
    lower.startsWith('card payment') ||
    lower.startsWith('card refund') ||
    lower.startsWith('card chargeback') ||
    lower.startsWith('card chargeback reversal') ||
    lower.startsWith('card payment reversal') ||
    lower.startsWith('card refund reversal') ||
    lower.startsWith('wise charges') ||
    lower.startsWith('converted ')
  );
};

// Updated helper function to filter and parse only transaction rows.
const parseTransactionsFromRows = (tokens: TextItem[]): Transaction[] => {
  // Group tokens into row groups based on EOL.
  let rowTokens: TextItem[][] = [];
  let currentRow: TextItem[] = [];
  for (let i = 0; i < tokens.length; i++) {
    const token = tokens[i];
    currentRow.push(token);
    const startString = currentRow[0]?.str ?? '';
    const nextToken: TextItem | undefined = i + 1 < tokens.length ? tokens[i + 1] : undefined;
    const nextStartString = nextToken?.str ?? '';

    if (
      (token.hasEOL && startString.trim() === 'Description') ||
      (token.hasEOL && tokens[i + 1].str.trim() === 'Description') ||
      (token.hasEOL && isTransactionStart(startString) && isTransactionStart(nextStartString)) ||
      (token.hasEOL && isTransactionStart(startString) && token.transform[4] > 500) ||
      (token.hasEOL &&
        isTransactionStart(startString) &&
        nextStartString?.startsWith('For any questions'))

      // (token.hasEOL && token.transform[4] > 500) ||
      // (token.hasEOL && token.transform[5] > 500 && token.width === 0 && token.height === 0)
    ) {
      rowTokens.push(currentRow);
      currentRow = [];
    }
  }

  // Find and keep the header row separately.
  const isHeaderRow = (row: TextItem[]): boolean =>
    row.length > 4 &&
    row[0].str.replace(/\s+/g, '') === 'Description' &&
    row[2].str.replace(/\s+/g, '') === 'Incoming' &&
    row[4].str.replace(/\s+/g, '') === 'Outgoing';

  let headerRow: TextItem[] | null = null;

  for (const row of rowTokens) {
    if (isHeaderRow(row)) {
      headerRow = [...row];
      break;
    }
  }
  if (!headerRow) return [];

  // while (rowTokens.length > 0 && !monthRegex.test(rowTokens[0][0].str.trim())) {
  //   rowTokens.shift();
  // }

  // Use the header row to determine column boundaries.
  const headerX = headerRow
    .filter((token) => !!token.str.trim())
    .map((token) => token.transform[4])
    .sort((a, b) => a - b);
  const boundaries: number[] = [];
  for (let i = 0; i < headerX.length - 1; i++) {
    // boundaries.push((headerX[i] + headerX[i + 1]) / 2);
    boundaries.push(headerX[i + 1] - 20);
  }
  boundaries.push(Infinity);

  // For each transaction row, group tokens into columns based on token.transform[4] and boundaries.
  const transactions: Transaction[] = [];
  let currency = '';
  for (const row of rowTokens) {
    const sorted = row.slice().sort((a, b) => a.transform[4] - b.transform[4]);

    const numCols = headerX.length;
    const columns: string[] = new Array(numCols).fill('');

    if (sorted[0]?.str.startsWith('ref')) {
      // remove the first element from the sorted array
      sorted.shift();
    }

    if (sorted && sorted.length > 30) {
      //loop through the elements.
      // if any of the elems is of the format USD statement,
      // or EUR balance on 31 December 2024 [GMT]
      //extract EUR as currency.
      for (let i = 0; i < sorted.length; i++) {
        const lower = sorted[i].str.toLowerCase();
        if (lower.includes('statement') || lower.includes('balance on')) {
          const currencyMatch = lower.match(/[a-z]{3}/);
          if (currencyMatch) {
            currency = currencyMatch[0].toUpperCase();
          }
        }
      }
    }

    //check the first element of the row to see if it is a date of format d/M/yyyy
    //otherwise continue
    const startString = clean(sorted[0]?.str);
    if (!isTransactionStart(startString)) {
      continue;
    }

    for (const token of sorted) {
      const x = token.transform[4];
      let colIndex = 0;
      while (x > boundaries[colIndex]) {
        colIndex++;
      }
      columns[colIndex] += token.str + '#~#';
    }

    const ref1 = columns[0].split(SEPARATOR).filter((val) => val.trim() !== '');
    // Find date in the format of "dd MMMM yyyy", e.g. "24 December 2024"
    const dateRegex = /^(\d{1,2})\s+[A-Za-z]+\s+\d{4}$/;
    //For some statements, the date is in the format December 18, 2024
    const dateRegex2 =
      /^(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}$/;
    // const dateVal = ref1.find((text) => dateRegex.test(text.trim())) || '';

    let desc = '';
    let dateVal = '';
    for (let i = 0; i < ref1.length; i++) {
      if (dateRegex.test(ref1[i].trim())) {
        dateVal = ref1[i].trim();
      } else if (dateRegex2.test(ref1[i].trim())) {
        dateVal = ref1[i].trim();
      } else if (i === 0) {
        continue;
      } else {
        desc += ref1[i] + ' ';
      }
    }

    const incoming = columns[1]
      .split(SEPARATOR)
      .filter((val) => val.trim() !== '')
      .join('');
    const outgoing = columns[2]
      .split(SEPARATOR)
      .filter((val) => val.trim() !== '')
      .join('');
    const amountVal = cleanAmount(incoming || outgoing);

    transactions.push({
      date: dateVal, // first column as date
      description: desc,
      name: ref1[0],
      amount: Math.abs(amountVal), //Change the sign of the amount, as plaid expects so.
      direction: amountVal > 0 ? 'CREDIT' : 'DEBIT',
      currency: currency,
    } as Transaction);
  }
  return transactions;
};
const transform = (transactions: Transaction[], year: number) => {
  // incoming dates are in the form d/M/yyyy. We need to convert them to MMM dd, yyyy string
  // so that they can be parsed by the backend
  return transactions.map((transaction) => {
    return {
      ...transaction,
      date: formatDate(transaction.date),
    };
  });
};
const processWiseFile = async (files: File[]): Promise<Transaction[]> => {
  const extractTransactions = (file: File): Promise<Transaction[]> =>
    new Promise<Transaction[]>((resolve, reject) => {
      const reader = new FileReader();
      reader.onload = async () => {
        const typedArray = new Uint8Array(reader.result as ArrayBuffer);
        const pdf = await getDocument({ data: typedArray }).promise;

        let allPageTransactions: Transaction[] = [];
        let itemsArray = [];
        for (let i = 1; i <= pdf.numPages; i++) {
          const page = await pdf.getPage(i);
          const textContent = await page.getTextContent();
          const items = textContent.items as TextItem[];
          itemsArray.push(...items);
        }
        //We are processing all pages at once, so we can parse all transactions at once.
        allPageTransactions = parseTransactionsFromRows(itemsArray);
        resolve(allPageTransactions);
      };
      reader.onerror = reject;
      reader.readAsArrayBuffer(file);
    });

  const allTransactions: Transaction[] = [];
  for (const file of files) {
    const transactions = await extractTransactions(file);
    if (!transactions || transactions.length === 0) {
      throw new Error('No transactions found in the file ' + file.name);
    }
    allTransactions.push(...transactions);
  }

  return transform(allTransactions, 0);
};

export default processWiseFile;
