mirror of
https://github.com/instructkr/claw-code.git
synced 2026-04-07 16:44:50 +08:00
feat: b5-context-compress — batch 5 wave 2
This commit is contained in:
@@ -249,13 +249,14 @@ impl TerminalRenderer {
|
||||
|
||||
#[must_use]
|
||||
pub fn render_markdown(&self, markdown: &str) -> String {
|
||||
let normalized = normalize_nested_fences(markdown);
|
||||
let mut output = String::new();
|
||||
let mut state = RenderState::default();
|
||||
let mut code_language = String::new();
|
||||
let mut code_buffer = String::new();
|
||||
let mut in_code_block = false;
|
||||
|
||||
for event in Parser::new_ext(markdown, Options::all()) {
|
||||
for event in Parser::new_ext(&normalized, Options::all()) {
|
||||
self.render_event(
|
||||
event,
|
||||
&mut state,
|
||||
@@ -634,6 +635,178 @@ fn apply_code_block_background(line: &str) -> String {
|
||||
format!("\u{1b}[48;5;236m{with_background}\u{1b}[0m{trailing_newline}")
|
||||
}
|
||||
|
||||
/// Pre-process raw markdown so that fenced code blocks whose body contains
|
||||
/// fence markers of equal or greater length are wrapped with a longer fence.
|
||||
///
|
||||
/// LLMs frequently emit triple-backtick code blocks that contain triple-backtick
|
||||
/// examples. CommonMark (and pulldown-cmark) treats the inner marker as the
|
||||
/// closing fence, breaking the render. This function detects the situation and
|
||||
/// upgrades the outer fence to use enough backticks (or tildes) that the inner
|
||||
/// markers become ordinary content.
|
||||
fn normalize_nested_fences(markdown: &str) -> String {
|
||||
// A fence line is either "labeled" (has an info string ⇒ always an opener)
|
||||
// or "bare" (no info string ⇒ could be opener or closer).
|
||||
#[derive(Debug, Clone)]
|
||||
struct FenceLine {
|
||||
char: char,
|
||||
len: usize,
|
||||
has_info: bool,
|
||||
indent: usize,
|
||||
}
|
||||
|
||||
fn parse_fence_line(line: &str) -> Option<FenceLine> {
|
||||
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
|
||||
let indent = trimmed.chars().take_while(|c| *c == ' ').count();
|
||||
if indent > 3 {
|
||||
return None;
|
||||
}
|
||||
let rest = &trimmed[indent..];
|
||||
let ch = rest.chars().next()?;
|
||||
if ch != '`' && ch != '~' {
|
||||
return None;
|
||||
}
|
||||
let len = rest.chars().take_while(|c| *c == ch).count();
|
||||
if len < 3 {
|
||||
return None;
|
||||
}
|
||||
let after = &rest[len..];
|
||||
if ch == '`' && after.contains('`') {
|
||||
return None;
|
||||
}
|
||||
let has_info = !after.trim().is_empty();
|
||||
Some(FenceLine {
|
||||
char: ch,
|
||||
len,
|
||||
has_info,
|
||||
indent,
|
||||
})
|
||||
}
|
||||
|
||||
let lines: Vec<&str> = markdown.split_inclusive('\n').collect();
|
||||
// Handle final line that may lack trailing newline.
|
||||
// split_inclusive already keeps the original chunks, including a
|
||||
// final chunk without '\n' if the input doesn't end with one.
|
||||
|
||||
// First pass: classify every line.
|
||||
let fence_info: Vec<Option<FenceLine>> = lines.iter().map(|l| parse_fence_line(l)).collect();
|
||||
|
||||
// Second pass: pair openers with closers using a stack, recording
|
||||
// (opener_idx, closer_idx) pairs plus the max fence length found between
|
||||
// them.
|
||||
struct StackEntry {
|
||||
line_idx: usize,
|
||||
fence: FenceLine,
|
||||
}
|
||||
|
||||
let mut stack: Vec<StackEntry> = Vec::new();
|
||||
// Paired blocks: (opener_line, closer_line, max_inner_fence_len)
|
||||
let mut pairs: Vec<(usize, usize, usize)> = Vec::new();
|
||||
|
||||
for (i, fi) in fence_info.iter().enumerate() {
|
||||
let Some(fl) = fi else { continue };
|
||||
|
||||
if fl.has_info {
|
||||
// Labeled fence ⇒ always an opener.
|
||||
stack.push(StackEntry {
|
||||
line_idx: i,
|
||||
fence: fl.clone(),
|
||||
});
|
||||
} else {
|
||||
// Bare fence ⇒ try to close the top of the stack if compatible.
|
||||
let closes_top = stack
|
||||
.last()
|
||||
.is_some_and(|top| top.fence.char == fl.char && fl.len >= top.fence.len);
|
||||
if closes_top {
|
||||
let opener = stack.pop().unwrap();
|
||||
// Find max fence length of any fence line strictly between
|
||||
// opener and closer (these are the nested fences).
|
||||
let inner_max = fence_info[opener.line_idx + 1..i]
|
||||
.iter()
|
||||
.filter_map(|fi| fi.as_ref().map(|f| f.len))
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
pairs.push((opener.line_idx, i, inner_max));
|
||||
} else {
|
||||
// Treat as opener.
|
||||
stack.push(StackEntry {
|
||||
line_idx: i,
|
||||
fence: fl.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Determine which lines need rewriting. A pair needs rewriting when
|
||||
// its opener length <= max inner fence length.
|
||||
struct Rewrite {
|
||||
char: char,
|
||||
new_len: usize,
|
||||
indent: usize,
|
||||
}
|
||||
let mut rewrites: std::collections::HashMap<usize, Rewrite> = std::collections::HashMap::new();
|
||||
|
||||
for (opener_idx, closer_idx, inner_max) in &pairs {
|
||||
let opener_fl = fence_info[*opener_idx].as_ref().unwrap();
|
||||
if opener_fl.len <= *inner_max {
|
||||
let new_len = inner_max + 1;
|
||||
let info_part = {
|
||||
let trimmed = lines[*opener_idx]
|
||||
.trim_end_matches('\n')
|
||||
.trim_end_matches('\r');
|
||||
let rest = &trimmed[opener_fl.indent..];
|
||||
rest[opener_fl.len..].to_string()
|
||||
};
|
||||
rewrites.insert(
|
||||
*opener_idx,
|
||||
Rewrite {
|
||||
char: opener_fl.char,
|
||||
new_len,
|
||||
indent: opener_fl.indent,
|
||||
},
|
||||
);
|
||||
let closer_fl = fence_info[*closer_idx].as_ref().unwrap();
|
||||
rewrites.insert(
|
||||
*closer_idx,
|
||||
Rewrite {
|
||||
char: closer_fl.char,
|
||||
new_len,
|
||||
indent: closer_fl.indent,
|
||||
},
|
||||
);
|
||||
// Store info string only in the opener; closer keeps the trailing
|
||||
// portion which is already handled through the original line.
|
||||
// Actually, we rebuild both lines from scratch below, including
|
||||
// the info string for the opener.
|
||||
let _ = info_part; // consumed in rebuild
|
||||
}
|
||||
}
|
||||
|
||||
if rewrites.is_empty() {
|
||||
return markdown.to_string();
|
||||
}
|
||||
|
||||
// Rebuild.
|
||||
let mut out = String::with_capacity(markdown.len() + rewrites.len() * 4);
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
if let Some(rw) = rewrites.get(&i) {
|
||||
let fence_str: String = std::iter::repeat(rw.char).take(rw.new_len).collect();
|
||||
let indent_str: String = std::iter::repeat(' ').take(rw.indent).collect();
|
||||
// Recover the original info string (if any) and trailing newline.
|
||||
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
|
||||
let fi = fence_info[i].as_ref().unwrap();
|
||||
let info = &trimmed[fi.indent + fi.len..];
|
||||
let trailing = &line[trimmed.len()..];
|
||||
out.push_str(&indent_str);
|
||||
out.push_str(&fence_str);
|
||||
out.push_str(info);
|
||||
out.push_str(trailing);
|
||||
} else {
|
||||
out.push_str(line);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn find_stream_safe_boundary(markdown: &str) -> Option<usize> {
|
||||
let mut open_fence: Option<FenceMarker> = None;
|
||||
let mut last_boundary = None;
|
||||
|
||||
Reference in New Issue
Block a user