import { v4 as uuidv4 } from 'uuid';

const BLOCK_END_BUFFER = 0; // Define or import this constant as needed

/**
 * Processes the transcript into groups based on the given criteria.
 * @param {Object} transcriptData - The transcript object containing words.
 * @param {boolean} breakAtPunctuation - Whether to break at punctuation end.
 * @param {boolean} useMaxCharacters - Whether to use max characters per section.
 * @param {number} maxCharPerSection - Maximum characters per section.
 * @param {boolean} breakOnSpeakerChange - Whether to break on speaker change.
 * @param {boolean} rejectLongSilences - Whether to reject long silences.
 * @param {boolean} breakOnLongSilences - Whether to break on long silences.
 * @param {number} longSilenceThreshold - Threshold for long silences in seconds.
 * @param {number} transcriptDuration - Duration of the transcript in seconds.
 * @param {boolean} rejectBlocksContaining - Whether to reject blocks containing a specific word.
 * @param {string} rejectBlocksCondition - Condition for rejecting blocks containing a specific word.
 * @param {string} rejectBlocksWord - Word to check for rejection.
 * @returns {Array} - Array of grouped transcript sections.
 */
const processTranscriptToGroups = (
  transcriptData,
  breakAtPunctuation,
  useMaxCharacters,
  maxCharPerSection,
  breakOnSpeakerChange,
  rejectLongSilences,
  breakOnLongSilences,
  longSilenceThreshold,
  transcriptDuration,
  rejectBlocksContaining,
  rejectBlocksCondition,
  rejectBlocksWord
) => {
  console.log('Received transcript data:', JSON.stringify(transcriptData, null, 2));
  
  // More detailed validation
  if (!transcriptData) {
    console.error('transcriptData is null or undefined:', transcriptData);
    throw new Error('Invalid transcript format: transcriptData is missing');
  }

  if (typeof transcriptData !== 'object') {
    console.error('transcriptData is not an object:', typeof transcriptData);
    throw new Error('Invalid transcript format: transcriptData must be an object');
  }

  if (!Array.isArray(transcriptData.words)) {
    console.error('transcriptData.words is not an array:', transcriptData.words);
    throw new Error('Invalid transcript format: words array is missing or invalid');
  }

  if (transcriptData.words.length === 0) {
    console.warn('Warning: transcript contains no words');
  }

  // Log the first few words to verify structure
  console.log('First few words:', transcriptData.words.slice(0, 3));

  console.log('Processing transcript with duration:', transcriptDuration);
  console.log('Long silence threshold:', longSilenceThreshold);

  if (!transcriptData || !Array.isArray(transcriptData.words)) {
    console.error('Invalid transcript format:', transcriptData);
    throw new Error('Invalid transcript format');
  }

  const words = transcriptData.words;
  const groups = [];
  let currentGroup = { content: '', words: [], speaker: 'default' };

  const finalizeGroup = () => {
    if (currentGroup.words.length > 0) {
      // Check for word-based rejection before adding the group
      let isRejected = false;
      let rejectReason = '';

      // Only process word rejection if enabled and we have a word to check
      if (rejectBlocksContaining && rejectBlocksWord.trim()) {
        const content = currentGroup.content.toLowerCase();
        const searchWord = rejectBlocksWord.toLowerCase();
        const containsWord = content.includes(searchWord);
        
        if (rejectBlocksCondition === 'contain') {
          isRejected = containsWord;
          rejectReason = containsWord ? `Contains "${rejectBlocksWord}"` : '';
        } else {
          isRejected = !containsWord;
          rejectReason = !containsWord ? `Does not contain "${rejectBlocksWord}"` : '';
        }
      }

      groups.push({
        ...currentGroup,
        id: uuidv4(),
        start: currentGroup.words[0].start,
        end: currentGroup.words[currentGroup.words.length - 1].end,
        isRejected: isRejected,
        rejectReason: rejectReason
      });
      currentGroup = { content: '', words: []};
    }
  };

  const processSilence = (start, end) => {
    const silenceDuration = end - start;
    if (silenceDuration >= longSilenceThreshold) {
      const renderedContent = '[' + '.'.repeat(Math.ceil(silenceDuration)) + ']';
      console.log('Processing silence:', { start, end, duration: silenceDuration, renderedContent });
      
      const silenceWord = {
        type: 'silence',
        renderedContent,
        start,
        end
      };

      // Check if adding this silence would exceed max characters
      const totalChars = currentGroup.content.length + renderedContent.length;
      if (useMaxCharacters && totalChars > maxCharPerSection) {
        finalizeGroup();
      }

      if (currentGroup.words.length === 0) {
        currentGroup.content = renderedContent;
        currentGroup.words.push(silenceWord);
      } else if (breakOnLongSilences) {
        finalizeGroup();
        groups.push({
          id: uuidv4(),
          start,
          end,
          speaker: '',
          content: renderedContent,
          words: [silenceWord],
          isRejected: rejectLongSilences
        });
      } else {
        currentGroup.content += (currentGroup.content ? ' ' : '') + renderedContent;
        currentGroup.words.push(silenceWord);
      }
    }
  };

  // Check for silence at the beginning
  if (words.length > 0 && words[0].start >= longSilenceThreshold) {
    const renderedContent = '[' + '.'.repeat(Math.ceil(words[0].start)) + ']';
    if (useMaxCharacters && maxCharPerSection <= 1) {
      groups.push({
        id: uuidv4(),
        start: 0,
        end: words[0].start,
        speaker: '',
        content: renderedContent,
        words: [{
          type: 'silence',
          renderedContent,
          start: 0,
          end: words[0].start
        }],
        isRejected: rejectLongSilences
      });
    } else if (breakOnLongSilences) {
      groups.push({
        id: uuidv4(),
        start: 0,
        end: words[0].start,
        speaker: '',
        content: renderedContent,
        words: [{
          type: 'silence',
          renderedContent,
          start: 0,
          end: words[0].start
        }],
        isRejected: rejectLongSilences
      });
    } else {
      currentGroup.content = renderedContent;
      currentGroup.words.push({
        type: 'silence',
        renderedContent,
        start: 0,
        end: words[0].start
      });
    }
  }

  words.forEach((word, index) => {
    if (index > 0) {
      const prevWordEnd = words[index - 1].end;
      const currentWordStart = word.start;
      if (currentWordStart > prevWordEnd) {
        processSilence(prevWordEnd, currentWordStart);
      }
    }

    // New function to find last punctuation before max chars
    const findLastPunctuationBreak = () => {
      if (!breakAtPunctuation || !useMaxCharacters) return false;
      
      // If we're already over the limit
      if (currentGroup.content.length + word.word.length > maxCharPerSection) {
        // Look backwards through current words to find last punctuation
        for (let i = currentGroup.words.length - 1; i >= 0; i--) {
          if (/[.!?]$/.test(currentGroup.words[i].word)) {
            // We found a punctuation mark, split here
            const wordsToKeep = currentGroup.words.slice(0, i + 1);
            const wordsToMove = currentGroup.words.slice(i + 1);
            
            // Update current group to only include words up to punctuation
            currentGroup.words = wordsToKeep;
            currentGroup.content = wordsToKeep.map(w => w.word).join(' ');
            
            // Finalize this group
            finalizeGroup();
            
            // Start new group with the remaining words
            wordsToMove.forEach(w => {
              currentGroup.content += (currentGroup.content ? ' ' : '') + w.word;
              currentGroup.words.push(w);
              currentGroup.speaker = w.speaker;
            });
            
            return true;
          }
        }
      }
      return false;
    };

    const shouldBreak =
      // Only check punctuation alone if we're not using max characters
      (!useMaxCharacters && breakAtPunctuation && index > 0 && /[.!?]$/.test(words[index - 1].word)) ||
      // If no punctuation found before max chars, break at max chars
      (useMaxCharacters && !findLastPunctuationBreak() && 
       currentGroup.content.length + word.word.length > maxCharPerSection) ||
      (breakOnSpeakerChange && currentGroup.speaker !== word.speaker) ||
      (breakOnLongSilences && index > 0 && (word.start - words[index - 1].end) >= longSilenceThreshold);

    if (shouldBreak && currentGroup.words.length > 0) {
      finalizeGroup();
    }

    // Check if adding this word would exceed max characters
    if (useMaxCharacters) {
      const wordLength = word.word.length;
      const spaceNeeded = currentGroup.content ? 1 : 0; // Account for space if not first word
      const totalChars = currentGroup.content.length + wordLength + spaceNeeded;
      
      if (totalChars > maxCharPerSection && currentGroup.words.length > 0) {
        finalizeGroup();
      }
    }

    // Add word to current group
    currentGroup.content += (currentGroup.content ? ' ' : '') + word.word;
    currentGroup.words.push(word);
    currentGroup.speaker = word.speaker;

    if (index === words.length - 1) {
      finalizeGroup();
    }
  });

  // Finalize the last group
  finalizeGroup();

  // Check for silence at the end
  if (words.length > 0) {
    const lastWordEnd = words[words.length - 1].end;
    const silenceDuration = transcriptDuration - lastWordEnd;
    
    if (silenceDuration >= longSilenceThreshold) {
      const renderedContent = '[' + '.'.repeat(Math.ceil(silenceDuration)) + ']';
      if (breakOnLongSilences) {
        groups.push({
          id: uuidv4(),
          start: lastWordEnd,
          end: transcriptDuration,
          speaker: '',
          content: renderedContent,
          words: [{
            type: 'silence',
            renderedContent,
            start: lastWordEnd,
            end: transcriptDuration
          }],
          isRejected: rejectLongSilences
        });
      } else {
        const lastGroup = groups[groups.length - 1];
        if (lastGroup) {
          lastGroup.content += ' ' + renderedContent;
          lastGroup.words.push({
            type: 'silence',
            renderedContent,
            start: lastWordEnd,
            end: transcriptDuration
          });
          lastGroup.end = transcriptDuration;
        }
      }
    }
  }

  console.log('Final processed groups:', groups);
  return groups;
};

const TranscriptProcessorService = {
  processTranscriptToGroups,
};

export default TranscriptProcessorService;

//Path: src/services/TranscriptProcessorService/index.js
