import * as FileSystem from 'expo-file-system';
import { Platform } from 'react-native';
// @ts-expect-error
import computeMd5 from 'blueimp-md5';

type VTTCaption = {
  startMillis: number;
  endMillis: number;
  caption: string;
};

export type VTTObject = {
  captions: VTTCaption[];
};

// copied from MediaPlayer to prevent circular reference
function getCachedUri<T extends string | number>(uri: T): T extends number ? null : string {
  type Return = T extends number ? null : string;
  if (Platform.OS === 'web' || typeof uri === 'number') return null as Return;
  const extension = (uri as string).split('.').reverse()[0];
  const hash = computeMd5(uri);
  return `${FileSystem.documentDirectory}mediaplayer-preload/${hash}.${extension}` as Return;
}

async function preloadMedia(uri: string) {
  if (Platform.OS === 'web') return;
  await FileSystem.makeDirectoryAsync(`${FileSystem.documentDirectory}mediaplayer-preload`, {
    intermediates: true,
  });
  return FileSystem.downloadAsync(uri, getCachedUri(uri));
}

// '00:00:01.040' => number
function timestampToMillis(timestamp: string): number {
  const [hours, minutes, seconds] = timestamp.split(':').map((str) => Number.parseFloat(str));
  const totalSeconds = hours * 60 * 80 + minutes * 60 + seconds;
  return totalSeconds * 1000;
}

export function parseVTT(text: string): VTTObject {
  const captions: VTTCaption[] = [];

  text = text.replace(/\r/g, '');
  const parts = text.split('\n\n');

  for (let part of parts) {
    if (part.startsWith('WEBVTT') || part.startsWith('NOTE')) continue;
    const timestampStartIndex = part.match(/\d\d:/)?.index ?? 0;
    const [timestamps, ...texts] = part.slice(timestampStartIndex).split('\n');
    if (timestamps.includes(' --> ')) {
      const [start, end] = timestamps.split(' --> ');
      captions.push({
        startMillis: timestampToMillis(start),
        endMillis: timestampToMillis(end),
        caption: texts
          .join('\n')
          .replace(/\&nbsp;/g, ' ')
          // i think these <v> blocks are to identify speakers. Sometimes they put the speaker names
          // other times they use a placeholder "-"
          .replace(/\<v [\w-]*\>/, '')
          .replace(/\<\/v\>/, ''),
      });
    }
  }

  return {
    captions,
  };
}

export async function parseRemoteVTT(uri: string) {
  if (Platform.OS === 'web') {
    const res = await fetch(uri);
    return parseVTT(await res.text());
  }

  const sourceUri = getCachedUri(uri);
  const { exists } = await FileSystem.getInfoAsync(sourceUri);
  if (!exists) {
    await preloadMedia(uri);
  }
  const data = await FileSystem.readAsStringAsync(sourceUri);
  return parseVTT(data);
}

export async function getVTTTranscript(uri: string): Promise<string[]> {
  const vtt = await parseRemoteVTT(uri);
  return vtt.captions.map((cap) => cap.caption.replace(/\n/g, ' ').trim());
}
