Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Folding Range functionality to air #146

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/lsp/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ futures.workspace = true
itertools.workspace = true
line_ending.workspace = true
memchr.workspace = true
regex = "1.11.1"
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
Expand Down
379 changes: 379 additions & 0 deletions crates/lsp/src/folding_range.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,379 @@
//
// folding_range.rs
//
// Copyright (C) 2025 Posit Software, PBC. All rights reserved.
//
//

use regex::Regex;
use std::sync::LazyLock;

use tower_lsp::lsp_types::FoldingRange;
use tower_lsp::lsp_types::FoldingRangeKind;

use crate::documents::Document;

pub fn folding_range(document: &Document) -> anyhow::Result<Vec<FoldingRange>> {
let mut folding_ranges: Vec<FoldingRange> = Vec::new();

// Activate the parser
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_r::LANGUAGE.into())
.unwrap();

let ast = parser.parse(&document.contents, None).unwrap();

if ast.root_node().has_error() {
tracing::error!("Folding range service: Parse error");
return Err(anyhow::anyhow!("Parse error"));
}

// Traverse the AST
let mut cursor = ast.root_node().walk();
parse_ts_node(
&mut cursor,
0,
&mut folding_ranges,
document,
&mut vec![Vec::new()],
&mut None,
&mut None,
);

Ok(folding_ranges)
}

fn parse_ts_node(
cursor: &mut tree_sitter::TreeCursor,
_depth: usize,
folding_ranges: &mut Vec<FoldingRange>,
document: &Document,
comment_stack: &mut Vec<Vec<(usize, usize)>>,
region_marker: &mut Option<usize>,
cell_marker: &mut Option<usize>,
) {
let node = cursor.node();
let _field_name = match cursor.field_name() {
Some(name) => format!("{name}: "),
None => String::new(),
};

let start = node.start_position();
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't find an equivalent of this using biome_rowan

let end = node.end_position();
let node_type = node.kind();

match node_type {
"parameters" | "arguments" | "braced_expression" => {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me know if I miss some of other node types with brackets that I should take care of

// ignore same line folding
if start.row == end.row {
return;
}
let folding_range = bracket_range(
start.row,
start.column,
end.row,
end.column - 1,
count_leading_whitespaces(document, end.row),
);
folding_ranges.push(folding_range);
}
"comment" => {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

biome_rowan treats comments as children of the next command. This is super annoying. Things are much neater with treesitter

// Only process standalone comment
if count_leading_whitespaces(document, start.row) != start.column {
return;
}

// Nested comment section handling
let comment_line = get_line_text(document, start.row, None, None);

nested_processor(comment_stack, folding_ranges, start.row, &comment_line);
region_processor(folding_ranges, region_marker, start.row, &comment_line);
cell_processor(folding_ranges, cell_marker, start.row, &comment_line);
}
_ => (),
}

if cursor.goto_first_child() {
// create node child stacks
// This is a stack of stacks for each bracket level, within each stack is a vector of (level, start_line) tuples
let mut child_comment_stack: Vec<Vec<(usize, usize)>> = vec![Vec::new()];
let mut child_region_marker: Option<usize> = None;
let mut child_cell_marker: Option<usize> = None;

// recursive loop
loop {
parse_ts_node(
cursor,
_depth + 1,
folding_ranges,
document,
&mut child_comment_stack,
&mut child_region_marker,
&mut child_cell_marker,
);
if !cursor.goto_next_sibling() {
break;
}
}
// End of node handling
end_node_handler(
folding_ranges,
end.row,
&mut child_comment_stack,
&mut child_region_marker,
&mut child_cell_marker,
);

cursor.goto_parent();
}
}

// Function to create a folding range that specifically deals with bracket ending
fn bracket_range(
start_line: usize,
start_char: usize,
end_line: usize,
end_char: usize,
white_space_count: usize,
) -> FoldingRange {
let mut end_line: u32 = end_line.try_into().unwrap();
let mut end_char: Option<u32> = Some(end_char.try_into().unwrap());

let adjusted_end_char = end_char.and_then(|val| val.checked_sub(white_space_count as u32));

match adjusted_end_char {
Some(0) => {
end_line -= 1;
end_char = None;
}
Some(_) => {
if let Some(ref mut value) = end_char {
*value -= 1;
}
}
None => {
tracing::error!(
"Folding Range (bracket_range): adjusted_end_char should not be None here"
);
}
}

FoldingRange {
start_line: start_line.try_into().unwrap(),
start_character: Some(start_char as u32),
end_line,
end_character: end_char,
kind: Some(FoldingRangeKind::Region),
collapsed_text: None,
}
}

fn comment_range(start_line: usize, end_line: usize) -> FoldingRange {
FoldingRange {
start_line: start_line.try_into().unwrap(),
start_character: None,
end_line: end_line.try_into().unwrap(),
end_character: None,
kind: Some(FoldingRangeKind::Region),
collapsed_text: None,
}
}

fn get_line_text(
document: &Document,
line_num: usize,
start_char: Option<usize>,
end_char: Option<usize>,
) -> String {
let text = &document.contents;
// Split the text into lines
let lines: Vec<&str> = text.lines().collect();

// Ensure the start_line is within bounds
if line_num >= lines.len() {
return String::new(); // Return an empty string if out of bounds
}

// Get the line corresponding to start_line
let line = lines[line_num];

// Determine the start and end character indices
let start_idx = start_char.unwrap_or(0); // Default to 0 if None
let end_idx = end_char.unwrap_or(line.len()); // Default to the line's length if None

// Ensure indices are within bounds for the line
let start_idx = start_idx.min(line.len());
let end_idx = end_idx.min(line.len());

// Extract the substring and return it
line[start_idx..end_idx].to_string() // TODO
}

fn count_leading_whitespaces(document: &Document, line_num: usize) -> usize {
let line_text = get_line_text(document, line_num, None, None);
line_text.chars().take_while(|c| c.is_whitespace()).count()
}

pub static RE_COMMENT_SECTION: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*(#+)\s*(.*?)\s*[#=-]{4,}\s*$").unwrap());

fn parse_comment_as_section(comment: &str) -> Option<(usize, String)> {
// Match lines starting with one or more '#' followed by some non-empty content and must end with 4 or more '-', '#', or `=`
// Ensure that there's actual content between the start and the trailing symbols.
if let Some(caps) = RE_COMMENT_SECTION.captures(comment) {
let hashes = caps.get(1)?.as_str().len(); // Count the number of '#'
let title = caps.get(2)?.as_str().trim().to_string(); // Extract the title text without trailing punctuations
if title.is_empty() {
return None; // Return None for lines with only hashtags
}
return Some((hashes, title)); // Return the level based on the number of '#' and the title
}

None
}

fn nested_processor(
comment_stack: &mut Vec<Vec<(usize, usize)>>,
folding_ranges: &mut Vec<FoldingRange>,
line_num: usize,
comment_line: &str,
) {
let Some((level, _title)) = parse_comment_as_section(comment_line) else {
return; // return if the line is not a comment section
};
if comment_stack.is_empty() {
tracing::error!(
"Folding Range: comment_stack should always contain at least one element here"
);
return;
}
loop {
if comment_stack.last().unwrap().is_empty() {
comment_stack.last_mut().unwrap().push((level, line_num));
return; // return if the stack is empty
}

let Some((last_level, _)) = comment_stack.last().unwrap().last() else {
tracing::error!("Folding Range: comment_stacks should not be empty here");
return;
};
if *last_level < level {
comment_stack.last_mut().unwrap().push((level, line_num));
break;
} else if *last_level == level {
folding_ranges.push(comment_range(
comment_stack.last().unwrap().last().unwrap().1,
line_num - 1,
));
comment_stack.last_mut().unwrap().pop();
comment_stack.last_mut().unwrap().push((level, line_num));
break;
} else {
folding_ranges.push(comment_range(
comment_stack.last().unwrap().last().unwrap().1,
line_num - 1,
));
comment_stack.last_mut().unwrap().pop(); // TODO: Handle case where comment_stack is empty
}
}
}

fn region_processor(
folding_ranges: &mut Vec<FoldingRange>,
region_marker: &mut Option<usize>,
line_idx: usize,
line_text: &str,
) {
let Some(region_type) = parse_region_type(line_text) else {
return; // return if the line is not a region section
};
match region_type.as_str() {
"start" => {
region_marker.replace(line_idx);
}
"end" => {
if let Some(region_start) = region_marker {
let folding_range = comment_range(*region_start, line_idx);
folding_ranges.push(folding_range);
*region_marker = None;
}
}
_ => {}
}
}

fn parse_region_type(line_text: &str) -> Option<String> {
// return the region type
// "start": "^\\s*#\\s*region\\b"
// "end": "^\\s*#\\s*endregion\\b"
// None: otherwise
let region_start = Regex::new(r"^\s*#\s*region\b").unwrap();
let region_end = Regex::new(r"^\s*#\s*endregion\b").unwrap();

if region_start.is_match(line_text) {
Some("start".to_string())
} else if region_end.is_match(line_text) {
Some("end".to_string())
} else {
None
}
}

fn cell_processor(
// Almost identical to region_processor
folding_ranges: &mut Vec<FoldingRange>,
cell_marker: &mut Option<usize>,
line_idx: usize,
line_text: &str,
) {
let cell_pattern: Regex = Regex::new(r"^#\s*(%%|\+)(.*)").unwrap();

if !cell_pattern.is_match(line_text) {
} else {
let Some(start_line) = cell_marker else {
cell_marker.replace(line_idx);
return;
};

let folding_range = comment_range(*start_line, line_idx - 1);
folding_ranges.push(folding_range);
cell_marker.replace(line_idx);
}
}

fn end_node_handler(
folding_ranges: &mut Vec<FoldingRange>,
line_idx: usize,
comment_stack: &mut Vec<Vec<(usize, usize)>>,
region_marker: &mut Option<usize>,
cell_marker: &mut Option<usize>,
) {
// Nested comment handling
// Iterate over the last element of the comment stack and add it to the folding ranges by using the comment_range function
if let Some(last_section) = comment_stack.last() {
// Iterate over each (start level, start line) in the last section
for &(_level, start_line) in last_section.iter() {
// Add a new folding range for each range in the last section
let folding_range = comment_range(start_line, line_idx - 1);

folding_ranges.push(folding_range);
}
}
// Remove the last element from the comment stack after processing
comment_stack.pop();

// Unclosed region handling
if let Some(region_start) = region_marker {
let folding_range = comment_range(*region_start, line_idx - 1);
folding_ranges.push(folding_range);
*region_marker = None;
}

// End cell Handling
if let Some(cell_start) = cell_marker {
let folding_range = comment_range(*cell_start, line_idx - 1);
folding_ranges.push(folding_range);
*cell_marker = None;
}
}
Loading