#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Flask Web Application untuk Extractive Summarization Liputan6
"""

from flask import Flask, render_template, request, jsonify, flash, redirect, url_for
import os
import json
import traceback
from datetime import datetime
import logging
from werkzeug.utils import secure_filename
import shutil
import tempfile

# Download NLTK data jika belum ada
def ensure_nltk_data():
    """Pastikan NLTK data tersedia"""
    try:
        import nltk
        
        # Download data yang diperlukan
        required_data = [
            'punkt',
            'punkt_tab', 
            'stopwords'
        ]
        
        for data_name in required_data:
            try:
                nltk.data.find(f'tokenizers/{data_name}')
            except LookupError:
                try:
                    print(f"Downloading NLTK {data_name}...")
                    nltk.download(data_name, quiet=True)
                except:
                    # Jika gagal download, coba alternatif
                    if data_name == 'punkt_tab':
                        try:
                            nltk.download('punkt', quiet=True)
                        except:
                            pass
        
        # Test apakah tokenizer berfungsi
        from nltk.tokenize import sent_tokenize
        sent_tokenize("Test sentence.")
        
        return True
        
    except Exception as e:
        print(f"Warning: NLTK setup error: {e}")
        return False

# Setup NLTK data
ensure_nltk_data()

# Import modules kita
from web_scraper import Liputan6Scraper
from preprocessing import ArticlePreprocessor, NewsArticleExtractor
from models import BaseExtractiveModel, create_model

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
app.secret_key = 'your-secret-key-here'  # Ganti dengan secret key yang aman

# Configuration
app.config['MAX_CONTENT_LENGTH'] = 2 * 1024 * 1024 * 1024  # 2GB max file size for BERT models
app.config['UPLOAD_FOLDER'] = 'uploaded_models'
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0  # Disable caching for uploads

# Ensure upload folder exists
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# Global variables for analytics
analytics_data = {
    'total_summaries': 0,
    'processing_times': [],
    'recent_activity': []
}

class SummarizationService:
    """
    Service class untuk menggabungkan semua komponen summarization
    """
    
    def __init__(self, model_path: str = None):
        self.scraper = Liputan6Scraper()
        self.preprocessor = ArticlePreprocessor()
        self.news_extractor = NewsArticleExtractor()
        self.model = None
        self.model_type = None
        
        # Load model jika path diberikan
        if model_path and os.path.exists(model_path):
            self.load_model(model_path)
    
    def load_model(self, model_path: str):
        """Load model dari file"""
        try:
            self.model = BaseExtractiveModel.load_model(model_path)
            
            # Deteksi tipe model dari nama file
            if 'textrank' in model_path.lower():
                self.model_type = 'TextRank'
            elif 'tfidf' in model_path.lower():
                self.model_type = 'TF-IDF + Logistic Regression'
            elif 'bert' in model_path.lower():
                self.model_type = 'BERT-based'
            else:
                self.model_type = 'Unknown'
                
            logger.info(f"Model loaded: {self.model_type}")
            
        except Exception as e:
            logger.error(f"Error loading model: {e}")
            raise
    
    def summarize_from_url(self, url: str, num_sentences: int = 3) -> dict:
        """
        Summarize artikel dari URL
        """
        result = {
            'success': False,
            'url': url,
            'title': '',
            'original_content': '',
            'summary': '',
            'selected_sentences': [],
            'total_sentences': 0,
            'model_type': self.model_type,
            'processing_time': 0,
            'error': None
        }
        
        start_time = datetime.now()
        
        try:
            # Step 1: Scrape artikel
            logger.info(f"Scraping artikel dari: {url}")
            scraped_data = self.scraper.scrape_article(url)
            
            if not scraped_data['success']:
                result['error'] = f"Gagal scrape artikel: {scraped_data.get('error', 'Unknown error')}"
                return result
            
            result['title'] = scraped_data['title']
            result['original_content'] = scraped_data['content']
            
            # Step 2: Preprocessing
            logger.info("Preprocessing artikel...")
            processed = self.preprocessor.preprocess_article(scraped_data['content'])
            sentences = processed['sentences']
            
            if not sentences:
                result['error'] = "Tidak ada kalimat yang valid ditemukan dalam artikel"
                return result
            
            result['total_sentences'] = len(sentences)
            
            # Step 3: Summarization
            if not self.model:
                result['error'] = "Model belum dimuat"
                return result
            
            logger.info(f"Generating summary dengan {self.model_type}...")
            
            # Adjust num_sentences jika lebih besar dari jumlah kalimat
            num_sentences = min(num_sentences, len(sentences))
            
            # Prediksi
            prediction = self.model.predict(sentences, num_sentences=num_sentences)
            
            # Extract kalimat yang dipilih
            selected_sentences = []
            for i, label in enumerate(prediction):
                if label == 1:
                    selected_sentences.append({
                        'index': i,
                        'text': sentences[i]
                    })
            
            # Buat summary
            summary_text = ' '.join([sent['text'] for sent in selected_sentences])
            
            result['summary'] = summary_text
            result['selected_sentences'] = selected_sentences
            result['success'] = True
            
            # Hitung processing time
            end_time = datetime.now()
            result['processing_time'] = (end_time - start_time).total_seconds()
            
            logger.info(f"Summarization berhasil dalam {result['processing_time']:.2f} detik")
            
            # Update analytics
            update_analytics('url', result)
            
        except Exception as e:
            logger.error(f"Error dalam summarization: {e}")
            result['error'] = str(e)
        
        return result
    
    def summarize_from_text(self, text: str, title: str = "", num_sentences: int = 3) -> dict:
        """
        Summarize dari teks langsung
        """
        result = {
            'success': False,
            'title': title,
            'original_content': text,
            'summary': '',
            'selected_sentences': [],
            'total_sentences': 0,
            'model_type': self.model_type,
            'processing_time': 0,
            'error': None
        }
        
        start_time = datetime.now()
        
        try:
            # Preprocessing
            processed = self.preprocessor.preprocess_article(text)
            sentences = processed['sentences']
            
            if not sentences:
                result['error'] = "Tidak ada kalimat yang valid ditemukan dalam teks"
                return result
            
            result['total_sentences'] = len(sentences)
            
            # Summarization
            if not self.model:
                result['error'] = "Model belum dimuat"
                return result
            
            num_sentences = min(num_sentences, len(sentences))
            prediction = self.model.predict(sentences, num_sentences=num_sentences)
            
            # Extract kalimat yang dipilih
            selected_sentences = []
            for i, label in enumerate(prediction):
                if label == 1:
                    selected_sentences.append({
                        'index': i,
                        'text': sentences[i]
                    })
            
            summary_text = ' '.join([sent['text'] for sent in selected_sentences])
            
            result['summary'] = summary_text
            result['selected_sentences'] = selected_sentences
            result['success'] = True
            
            end_time = datetime.now()
            result['processing_time'] = (end_time - start_time).total_seconds()
            
            # Update analytics
            update_analytics('text', result)
            
        except Exception as e:
            logger.error(f"Error dalam summarization: {e}")
            result['error'] = str(e)
        
        return result

def update_analytics(request_type: str, result: dict):
    """Update analytics data"""
    global analytics_data
    
    analytics_data['total_summaries'] += 1
    
    if result.get('success'):
        analytics_data['processing_times'].append(result['processing_time'])
        
        # Keep only last 100 processing times
        if len(analytics_data['processing_times']) > 100:
            analytics_data['processing_times'] = analytics_data['processing_times'][-100:]
        
        # Add to recent activity
        activity = {
            'time': datetime.now().strftime('%H:%M:%S'),
            'type': request_type.upper(),
            'model': result.get('model_type', 'Unknown'),
            'input_length': len(result.get('original_content', '')),
            'summary_length': len(result.get('summary', '')),
            'processing_time': int(result['processing_time'] * 1000),  # Convert to ms
            'status': 'success'
        }
        
        analytics_data['recent_activity'].insert(0, activity)
        
        # Keep only last 20 activities
        if len(analytics_data['recent_activity']) > 20:
            analytics_data['recent_activity'] = analytics_data['recent_activity'][:20]

def allowed_file(filename):
    """Check if file extension is allowed"""
    return '.' in filename and filename.rsplit('.', 1)[1].lower() == 'pkl'

# Initialize service
summarization_service = SummarizationService()

@app.route('/')
def index():
    """Landing page"""
    return render_template('landing.html')

def list_saved_models():
    """List file model .pkl di folder saved_models"""
    models_dir = 'saved_models'
    model_files = []
    try:
        if os.path.exists(models_dir):
            for name in os.listdir(models_dir):
                if name.lower().endswith('.pkl'):
                    model_files.append(os.path.join(models_dir, name))
        model_files.sort()
    except Exception as e:
        logger.warning(f"Gagal membaca daftar model: {e}")
    return model_files

@app.route('/app')
def app_page():
    """Halaman aplikasi summarizer"""
    model_files = list_saved_models()
    return render_template('index.html', model_files=model_files)

@app.route('/load_model', methods=['POST'])
def load_model():
    """Load model dari dropdown atau path fallback"""
    try:
        # Prioritaskan dropdown
        model_path = request.form.get('selected_model') or request.form.get('model_path')

        if not model_path:
            flash('Pilih model terlebih dahulu', 'error')
            return redirect(url_for('app_page'))

        if not os.path.exists(model_path):
            flash(f'File model tidak ditemukan: {model_path}', 'error')
            return redirect(url_for('app_page'))

        summarization_service.load_model(model_path)
        flash(f'Model berhasil dimuat: {summarization_service.model_type}', 'success')

    except Exception as e:
        flash(f'Error loading model: {str(e)}', 'error')

    return redirect(url_for('app_page'))

@app.route('/summarize_url', methods=['POST'])
def summarize_url():
    """Summarize dari URL"""
    try:
        data = request.get_json() if request.is_json else request.form
        
        url = data.get('url', '').strip()
        num_sentences = int(data.get('num_sentences', 3))
        
        if not url:
            return jsonify({'success': False, 'error': 'URL tidak boleh kosong'})
        
        # Validasi URL Liputan6
        if 'liputan6.com' not in url.lower():
            return jsonify({'success': False, 'error': 'URL harus dari Liputan6.com'})
        
        # Summarize
        result = summarization_service.summarize_from_url(url, num_sentences)
        
        return jsonify(result)
        
    except Exception as e:
        logger.error(f"Error in summarize_url: {e}")
        return jsonify({'success': False, 'error': str(e)})

@app.route('/summarize_text', methods=['POST'])
def summarize_text():
    """Summarize dari teks langsung"""
    try:
        data = request.get_json() if request.is_json else request.form
        
        text = data.get('text', '').strip()
        title = data.get('title', '').strip()
        num_sentences = int(data.get('num_sentences', 3))
        
        if not text:
            return jsonify({'success': False, 'error': 'Teks tidak boleh kosong'})
        
        # Summarize
        result = summarization_service.summarize_from_text(text, title, num_sentences)
        
        return jsonify(result)
        
    except Exception as e:
        logger.error(f"Error in summarize_text: {e}")
        return jsonify({'success': False, 'error': str(e)})

@app.route('/upload_model', methods=['POST'])
def upload_model():
    """Upload and load model file with support for large files"""
    try:
        if 'model_file' not in request.files:
            return jsonify({'success': False, 'error': 'No file provided'})
        
        file = request.files['model_file']
        
        if file.filename == '':
            return jsonify({'success': False, 'error': 'No file selected'})
        
        if not allowed_file(file.filename):
            return jsonify({'success': False, 'error': 'File must be a .pkl file'})
        
        # Check file size before saving (additional check)
        file.seek(0, 2)  # Seek to end
        file_size = file.tell()
        file.seek(0)  # Reset to beginning
        
        max_size = 2 * 1024 * 1024 * 1024  # 2GB
        if file_size > max_size:
            return jsonify({
                'success': False, 
                'error': f'File too large: {file_size / (1024*1024*1024):.2f}GB. Maximum allowed: 2GB'
            })
        
        # Save uploaded file with progress tracking
        filename = secure_filename(file.filename)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"{timestamp}_{filename}"
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        
        # Save file in chunks for large files
        chunk_size = 8192  # 8KB chunks
        with open(filepath, 'wb') as f:
            while True:
                chunk = file.read(chunk_size)
                if not chunk:
                    break
                f.write(chunk)
        
        logger.info(f"File uploaded successfully: {filename} ({file_size / (1024*1024):.2f}MB)")
        
        # Try to load the model
        try:
            summarization_service.load_model(filepath)
            logger.info(f"Model loaded successfully: {filename}")
        except Exception as load_error:
            # Clean up uploaded file if loading fails
            if os.path.exists(filepath):
                os.remove(filepath)
            logger.error(f"Failed to load model {filename}: {load_error}")
            return jsonify({
                'success': False, 
                'error': f'Model file uploaded but failed to load: {str(load_error)}'
            })
        
        return jsonify({
            'success': True, 
            'message': f'Model {filename} uploaded and loaded successfully ({file_size / (1024*1024):.2f}MB)',
            'model_type': summarization_service.model_type,
            'file_size_mb': round(file_size / (1024*1024), 2)
        })
        
    except Exception as e:
        logger.error(f"Error uploading model: {e}")
        return jsonify({'success': False, 'error': str(e)})

@app.route('/analytics')
def analytics():
    """Analytics page"""
    return render_template('analytics.html')

@app.route('/api/analytics')
def api_analytics():
    """API analytics endpoint"""
    global analytics_data
    
    # Calculate statistics
    avg_processing_time = 0
    if analytics_data['processing_times']:
        avg_processing_time = sum(analytics_data['processing_times']) / len(analytics_data['processing_times']) * 1000  # Convert to ms
    
    return jsonify({
        'total_summaries': analytics_data['total_summaries'],
        'avg_processing_time': round(avg_processing_time, 2),
        'model_accuracy': 0.75,  # Placeholder - could be calculated from actual model performance
        'avg_rouge_score': 0.35,  # Placeholder - could be calculated from actual evaluations
        'recent_activity': analytics_data['recent_activity']
    })

@app.route('/api/status')
def api_status():
    """API status endpoint - untuk internal use saja"""
    return jsonify({
        'status': 'running',
        'model_loaded': summarization_service.model is not None,
        'model_type': summarization_service.model_type,
        'timestamp': datetime.now().isoformat(),
        'total_summaries': analytics_data['total_summaries']
    })

@app.errorhandler(404)
def not_found(error):
    return render_template('error.html', 
                         error_code=404, 
                         error_message="Halaman tidak ditemukan"), 404

@app.errorhandler(413)
def request_entity_too_large(error):
    return render_template('error.html', 
                         error_code=413, 
                         error_message="File terlalu besar. Maksimal ukuran file adalah 2GB. Silakan gunakan model yang lebih kecil atau kompres file terlebih dahulu."), 413

@app.errorhandler(500)
def internal_error(error):
    return render_template('error.html', 
                         error_code=500, 
                         error_message="Terjadi kesalahan server"), 500

if __name__ == '__main__':
    # Coba load model default jika ada
    default_models = [
        'saved_models/textrank_model.pkl',
        'saved_models/tfidf_lr_model.pkl',
        'saved_models/bert_model.pkl'
    ]
    
    for model_path in default_models:
        if os.path.exists(model_path):
            try:
                summarization_service.load_model(model_path)
                logger.info(f"Default model loaded: {model_path}")
                break
            except Exception as e:
                logger.warning(f"Failed to load {model_path}: {e}")
    
    # Jalankan Flask app
    app.run(debug=True, host='0.0.0.0', port=5000)
