// yt - A fully featured command line YouTube client
//
// Copyright (C) 2025 Benedikt Peetz <benedikt.peetz@b-peetz.de>
// Copyright (C) 2025 uutils developers
// SPDX-License-Identifier: MIT
//
// This file is part of Yt.
//
// You should have received a copy of the License along with this program.
// If not, see <https://www.gnu.org/licenses/gpl-3.0.txt>.

// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use std::fmt::Write;
use std::{cmp, mem};

use crate::FmtOptions;
use crate::parasplit::{ParaWords, Paragraph, WordInfo};

struct BreakArgs<'a> {
    opts: &'a FmtOptions,
    init_len: usize,
    indent_str: &'a str,
    indent_len: usize,
    uniform: bool,
    output: String,
}

impl BreakArgs<'_> {
    fn compute_width(&self, winfo: &WordInfo<'_>, position_n: usize, fresh: bool) -> usize {
        if fresh {
            0
        } else {
            let post = winfo.after_tab;
            match winfo.before_tab {
                None => post,
                Some(pre) => {
                    post + ((pre + position_n) / self.opts.tabwidth + 1) * self.opts.tabwidth
                        - position_n
                }
            }
        }
    }
}

pub(super) fn break_lines(para: &Paragraph, opts: &FmtOptions) -> String {
    let mut output = String::new();

    // indent
    let p_indent = &para.indent_str;
    let p_indent_len = para.indent_len;

    // words
    let p_words = ParaWords::new(opts, para);
    let mut p_words_words = p_words.words();

    // the first word will *always* appear on the first line
    // make sure of this here
    let Some(winfo) = p_words_words.next() else {
        return "\n".to_owned();
    };

    // print the init, if it exists, and get its length
    let p_init_len = winfo.word_nchars
        + if opts.crown_margin || opts.tagged_paragraph {
            // handle "init" portion
            output.push_str(&para.init_str);
            para.init_len
        } else if !para.mail_header {
            // for non-(crown, tagged) that's the same as a normal indent
            output.push_str(p_indent);
            p_indent_len
        } else {
            // except that mail headers get no indent at all
            0
        };

    // write first word after writing init
    write!(output, "{}", winfo.word).expect("Works");

    // does this paragraph require uniform spacing?
    let uniform = para.mail_header || opts.uniform;

    let mut break_args = BreakArgs {
        opts,
        init_len: p_init_len,
        indent_str: p_indent,
        indent_len: p_indent_len,
        uniform,
        output,
    };

    if opts.quick || para.mail_header {
        break_simple(p_words_words, &mut break_args);
    } else {
        break_knuth_plass(p_words_words, &mut break_args);
    };

    break_args.output
}

// break_simple implements a "greedy" breaking algorithm: print words until
// maxlength would be exceeded, then print a linebreak and indent and continue.
fn break_simple<'a, T: Iterator<Item = &'a WordInfo<'a>>>(iter: T, args: &mut BreakArgs<'a>) {
    iter.fold((args.init_len, false), |(l, prev_punct), winfo| {
        accum_words_simple(args, l, prev_punct, winfo)
    });
    args.output.push('\n');
}

fn accum_words_simple<'a>(
    args: &mut BreakArgs<'a>,
    l: usize,
    prev_punct: bool,
    winfo: &'a WordInfo<'a>,
) -> (usize, bool) {
    // compute the length of this word, considering how tabs will expand at this position on the line
    let wlen = winfo.word_nchars + args.compute_width(winfo, l, false);

    let slen = compute_slen(
        args.uniform,
        winfo.new_line,
        winfo.sentence_start,
        prev_punct,
    );

    if l + wlen + slen > args.opts.width {
        write_newline(args.indent_str, &mut args.output);
        write_with_spaces(&winfo.word[winfo.word_start..], 0, &mut args.output);
        (args.indent_len + winfo.word_nchars, winfo.ends_punct)
    } else {
        write_with_spaces(winfo.word, slen, &mut args.output);
        (l + wlen + slen, winfo.ends_punct)
    }
}

// break_knuth_plass implements an "optimal" breaking algorithm in the style of
//    Knuth, D.E., and Plass, M.F. "Breaking Paragraphs into Lines." in Software,
//    Practice and Experience. Vol. 11, No. 11, November 1981.
//    http://onlinelibrary.wiley.com/doi/10.1002/spe.4380111102/pdf
#[allow(trivial_casts)]
fn break_knuth_plass<'a, T: Clone + Iterator<Item = &'a WordInfo<'a>>>(
    mut iter: T,
    args: &mut BreakArgs<'a>,
) {
    // run the algorithm to get the breakpoints
    let breakpoints = find_kp_breakpoints(iter.clone(), args);

    // iterate through the breakpoints (note that breakpoints is in reverse break order, so we .rev() it
    let result: (bool, bool) = breakpoints.iter().rev().fold(
        (false, false),
        |(mut prev_punct, mut fresh), &(next_break, break_before)| {
            if fresh {
                write_newline(args.indent_str, &mut args.output);
            }
            // at each breakpoint, keep emitting words until we find the word matching this breakpoint
            for winfo in &mut iter {
                let (slen, word) = slice_if_fresh(
                    fresh,
                    winfo.word,
                    winfo.word_start,
                    args.uniform,
                    winfo.new_line,
                    winfo.sentence_start,
                    prev_punct,
                );
                fresh = false;
                prev_punct = winfo.ends_punct;

                // We find identical breakpoints here by comparing addresses of the references.
                // This is OK because the backing vector is not mutating once we are linebreaking.
                let winfo_ptr = winfo as *const _;
                let next_break_ptr = next_break as *const _;
                if winfo_ptr == next_break_ptr {
                    // OK, we found the matching word
                    if break_before {
                        write_newline(args.indent_str, &mut args.output);
                        write_with_spaces(&winfo.word[winfo.word_start..], 0, &mut args.output);
                    } else {
                        // breaking after this word, so that means "fresh" is true for the next iteration
                        write_with_spaces(word, slen, &mut args.output);
                        fresh = true;
                    }
                    break;
                }
                write_with_spaces(word, slen, &mut args.output);
            }
            (prev_punct, fresh)
        },
    );
    let (mut prev_punct, mut fresh) = result;

    // after the last linebreak, write out the rest of the final line.
    for winfo in iter {
        if fresh {
            write_newline(args.indent_str, &mut args.output);
        }
        let (slen, word) = slice_if_fresh(
            fresh,
            winfo.word,
            winfo.word_start,
            args.uniform,
            winfo.new_line,
            winfo.sentence_start,
            prev_punct,
        );
        prev_punct = winfo.ends_punct;
        fresh = false;
        write_with_spaces(word, slen, &mut args.output);
    }

    args.output.push('\n');
}

struct LineBreak<'a> {
    prev: usize,
    linebreak: Option<&'a WordInfo<'a>>,
    break_before: bool,
    demerits: i64,
    prev_rat: f32,
    length: usize,
    fresh: bool,
}

#[allow(clippy::cognitive_complexity)]
#[allow(clippy::cast_possible_wrap)]
fn find_kp_breakpoints<'a, T: Iterator<Item = &'a WordInfo<'a>>>(
    iter: T,
    args: &BreakArgs<'a>,
) -> Vec<(&'a WordInfo<'a>, bool)> {
    let mut iter = iter.peekable();
    // set up the initial null linebreak
    let mut linebreaks = vec![LineBreak {
        prev: 0,
        linebreak: None,
        break_before: false,
        demerits: 0,
        prev_rat: 0.0,
        length: args.init_len,
        fresh: false,
    }];
    // this vec holds the current active linebreaks; next_ holds the breaks that will be active for
    // the next word
    let mut active_breaks = vec![0];
    let mut next_active_breaks = vec![];

    let stretch = args.opts.width - args.opts.goal;
    let minlength = args.opts.goal.max(stretch + 1) - stretch;
    let mut new_linebreaks = vec![];
    let mut is_sentence_start = false;
    let mut least_demerits = 0;
    loop {
        let Some(w) = iter.next() else { break };

        // if this is the last word, we don't add additional demerits for this break
        let (is_last_word, is_sentence_end) = match iter.peek() {
            None => (true, true),
            Some(&&WordInfo {
                sentence_start: st,
                new_line: nl,
                ..
            }) => (false, st || (nl && w.ends_punct)),
        };

        // should we be adding extra space at the beginning of the next sentence?
        let slen = compute_slen(args.uniform, w.new_line, is_sentence_start, false);

        let mut ld_new = i64::MAX;
        let mut ld_next = i64::MAX;
        let mut ld_idx = 0;
        new_linebreaks.clear();
        next_active_breaks.clear();
        // go through each active break, extending it and possibly adding a new active
        // break if we are above the minimum required length
        #[allow(clippy::explicit_iter_loop)]
        for &i in active_breaks.iter() {
            let active = &mut linebreaks[i];
            // normalize demerits to avoid overflow, and record if this is the least
            active.demerits -= least_demerits;
            if active.demerits < ld_next {
                ld_next = active.demerits;
                ld_idx = i;
            }

            // get the new length
            let tlen = w.word_nchars
                + args.compute_width(w, active.length, active.fresh)
                + slen
                + active.length;

            // if tlen is longer than args.opts.width, we drop this break from the active list
            // otherwise, we extend the break, and possibly add a new break at this point
            if tlen <= args.opts.width {
                // this break will still be active next time
                next_active_breaks.push(i);
                // we can put this word on this line
                active.fresh = false;
                active.length = tlen;

                // if we're above the minlength, we can also consider breaking here
                if tlen >= minlength {
                    let (new_demerits, new_ratio) = if is_last_word {
                        // there is no penalty for the final line's length
                        (0, 0.0)
                    } else {
                        compute_demerits(
                            args.opts.goal as isize - tlen as isize,
                            stretch,
                            w.word_nchars,
                            active.prev_rat,
                        )
                    };

                    // do not even consider adding a line that has too many demerits
                    // also, try to detect overflow by checking signum
                    let total_demerits = new_demerits + active.demerits;
                    if new_demerits < BAD_INFTY_SQ
                        && total_demerits < ld_new
                        && active.demerits.signum() <= new_demerits.signum()
                    {
                        ld_new = total_demerits;
                        new_linebreaks.push(LineBreak {
                            prev: i,
                            linebreak: Some(w),
                            break_before: false,
                            demerits: total_demerits,
                            prev_rat: new_ratio,
                            length: args.indent_len,
                            fresh: true,
                        });
                    }
                }
            }
        }

        // if we generated any new linebreaks, add the last one to the list
        // the last one is always the best because we don't add to new_linebreaks unless
        // it's better than the best one so far
        match new_linebreaks.pop() {
            None => (),
            Some(lb) => {
                next_active_breaks.push(linebreaks.len());
                linebreaks.push(lb);
            }
        }

        if next_active_breaks.is_empty() {
            // every potential linebreak is too long! choose the linebreak with the least demerits, ld_idx
            let new_break =
                restart_active_breaks(args, &linebreaks[ld_idx], ld_idx, w, slen, minlength);
            next_active_breaks.push(linebreaks.len());
            linebreaks.push(new_break);
            least_demerits = 0;
        } else {
            // next time around, normalize out the demerits fields
            // on active linebreaks to make overflow less likely
            least_demerits = cmp::max(ld_next, 0);
        }
        // swap in new list of active breaks
        mem::swap(&mut active_breaks, &mut next_active_breaks);
        // If this was the last word in a sentence, the next one must be the first in the next.
        is_sentence_start = is_sentence_end;
    }

    // return the best path
    build_best_path(&linebreaks, &active_breaks)
}

fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> {
    // of the active paths, we select the one with the fewest demerits
    active
        .iter()
        .min_by_key(|&&a| paths[a].demerits)
        .map(|&(mut best_idx)| {
            let mut breakwords = vec![];
            // now, chase the pointers back through the break list, recording
            // the words at which we should break
            loop {
                let next_best = &paths[best_idx];
                match next_best.linebreak {
                    None => return breakwords,
                    Some(prev) => {
                        breakwords.push((prev, next_best.break_before));
                        best_idx = next_best.prev;
                    }
                }
            }
        })
        .unwrap_or_default()
}

// "infinite" badness is more like (1+BAD_INFTY)^2 because of how demerits are computed
const BAD_INFTY: i64 = 10_000_000;
const BAD_INFTY_SQ: i64 = BAD_INFTY * BAD_INFTY;
// badness = BAD_MULT * abs(r) ^ 3
const BAD_MULT: f32 = 100.0;
// DR_MULT is multiplier for delta-R between lines
const DR_MULT: f32 = 600.0;
// DL_MULT is penalty multiplier for short words at end of line
const DL_MULT: f32 = 300.0;

#[allow(clippy::cast_precision_loss)]
#[allow(clippy::cast_possible_truncation)]
fn compute_demerits(delta_len: isize, stretch: usize, wlen: usize, prev_rat: f32) -> (i64, f32) {
    // how much stretch are we using?
    let ratio = if delta_len == 0 {
        0.0f32
    } else {
        delta_len as f32 / stretch as f32
    };

    // compute badness given the stretch ratio
    let bad_linelen = if ratio.abs() > 1.0f32 {
        BAD_INFTY
    } else {
        (BAD_MULT * ratio.powi(3).abs()) as i64
    };

    // we penalize lines ending in really short words
    let bad_wordlen = if wlen >= stretch {
        0
    } else {
        (DL_MULT
            * ((stretch - wlen) as f32 / (stretch - 1) as f32)
                .powi(3)
                .abs()) as i64
    };

    // we penalize lines that have very different ratios from previous lines
    let bad_delta_r = (DR_MULT * ((ratio - prev_rat) / 2.0).powi(3).abs()) as i64;

    let demerits = i64::pow(1 + bad_linelen + bad_wordlen + bad_delta_r, 2);

    (demerits, ratio)
}

#[allow(clippy::cast_possible_wrap)]
fn restart_active_breaks<'a>(
    args: &BreakArgs<'a>,
    active: &LineBreak<'a>,
    act_idx: usize,
    w: &'a WordInfo<'a>,
    slen: usize,
    min: usize,
) -> LineBreak<'a> {
    let (break_before, line_length) = if active.fresh {
        // never break before a word if that word would be the first on a line
        (false, args.indent_len)
    } else {
        // choose the lesser evil: breaking too early, or breaking too late
        let wlen = w.word_nchars + args.compute_width(w, active.length, active.fresh);
        let underlen = min as isize - active.length as isize;
        let overlen = (wlen + slen + active.length) as isize - args.opts.width as isize;
        if overlen > underlen {
            // break early, put this word on the next line
            (true, args.indent_len + w.word_nchars)
        } else {
            (false, args.indent_len)
        }
    };

    // restart the linebreak. This will be our only active path.
    LineBreak {
        prev: act_idx,
        linebreak: Some(w),
        break_before,
        demerits: 0, // this is the only active break, so we can reset the demerit count
        prev_rat: if break_before { 1.0 } else { -1.0 },
        length: line_length,
        fresh: !break_before,
    }
}

// Number of spaces to add before a word, based on mode, newline, sentence start.
#[allow(clippy::fn_params_excessive_bools)]
fn compute_slen(uniform: bool, newline: bool, start: bool, punct: bool) -> usize {
    if uniform || newline {
        if start || (newline && punct) { 2 } else { 1 }
    } else {
        0
    }
}

// If we're on a fresh line, slen=0 and we slice off leading whitespace.
// Otherwise, compute slen and leave whitespace alone.
#[allow(clippy::fn_params_excessive_bools)]
fn slice_if_fresh(
    fresh: bool,
    word: &str,
    start: usize,
    uniform: bool,
    newline: bool,
    second_start: bool,
    punct: bool,
) -> (usize, &str) {
    if fresh {
        (0, &word[start..])
    } else {
        (compute_slen(uniform, newline, second_start, punct), word)
    }
}

// Write a newline and add the indent.
fn write_newline(indent: &str, output: &mut String) {
    output.push('\n');
    output.push_str(indent);
}

// Write the word, along with slen spaces.
fn write_with_spaces(word: &str, slen: usize, output: &mut String) {
    if slen == 2 {
        output.push_str("  ");
    } else if slen == 1 {
        output.push(' ');
    }
    output.push_str(word);
}