posit-dev · kv9898 · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/lsp/Cargo.toml b/crates/lsp/Cargo.toml
@@ -28,6 +28,7 @@ futures.workspace = true
 itertools.workspace = true
 line_ending.workspace = true
 memchr.workspace = true
+regex = "1.11.1"
 serde.workspace = true
 serde_json.workspace = true
 settings.workspace = true

diff --git a/crates/lsp/src/folding_range.rs b/crates/lsp/src/folding_range.rs
@@ -0,0 +1,379 @@
+//
+// folding_range.rs
+//
+// Copyright (C) 2025 Posit Software, PBC. All rights reserved.
+//
+//
+
+use regex::Regex;
+use std::sync::LazyLock;
+
+use tower_lsp::lsp_types::FoldingRange;
+use tower_lsp::lsp_types::FoldingRangeKind;
+
+use crate::documents::Document;
+
+pub fn folding_range(document: &Document) -> anyhow::Result<Vec<FoldingRange>> {
+    let mut folding_ranges: Vec<FoldingRange> = Vec::new();
+
+    // Activate the parser
+    let mut parser = tree_sitter::Parser::new();
+    parser
+        .set_language(&tree_sitter_r::LANGUAGE.into())
+        .unwrap();
+
+    let ast = parser.parse(&document.contents, None).unwrap();
+
+    if ast.root_node().has_error() {
+        tracing::error!("Folding range service: Parse error");
+        return Err(anyhow::anyhow!("Parse error"));
+    }
+
+    // Traverse the AST
+    let mut cursor = ast.root_node().walk();
+    parse_ts_node(
+        &mut cursor,
+        0,
+        &mut folding_ranges,
+        document,
+        &mut vec![Vec::new()],
+        &mut None,
+        &mut None,
+    );
+
+    Ok(folding_ranges)
+}
+
+fn parse_ts_node(
+    cursor: &mut tree_sitter::TreeCursor,
+    _depth: usize,
+    folding_ranges: &mut Vec<FoldingRange>,
+    document: &Document,
+    comment_stack: &mut Vec<Vec<(usize, usize)>>,
+    region_marker: &mut Option<usize>,
+    cell_marker: &mut Option<usize>,
+) {
+    let node = cursor.node();
+    let _field_name = match cursor.field_name() {
+        Some(name) => format!("{name}: "),
+        None => String::new(),
+    };
+
+    let start = node.start_position();
+    let end = node.end_position();
+    let node_type = node.kind();
+
+    match node_type {
+        "parameters" | "arguments" | "braced_expression" => {
+            // ignore same line folding
+            if start.row == end.row {
+                return;
+            }
+            let folding_range = bracket_range(
+                start.row,
+                start.column,
+                end.row,
+                end.column - 1,
+                count_leading_whitespaces(document, end.row),
+            );
+            folding_ranges.push(folding_range);
+        }
+        "comment" => {
+            // Only process standalone comment
+            if count_leading_whitespaces(document, start.row) != start.column {
+                return;
+            }
+
+            // Nested comment section handling
+            let comment_line = get_line_text(document, start.row, None, None);
+
+            nested_processor(comment_stack, folding_ranges, start.row, &comment_line);
+            region_processor(folding_ranges, region_marker, start.row, &comment_line);
+            cell_processor(folding_ranges, cell_marker, start.row, &comment_line);
+        }
+        _ => (),
+    }
+
+    if cursor.goto_first_child() {
+        // create node child stacks
+        // This is a stack of stacks for each bracket level, within each stack is a vector of (level, start_line) tuples
+        let mut child_comment_stack: Vec<Vec<(usize, usize)>> = vec![Vec::new()];
+        let mut child_region_marker: Option<usize> = None;
+        let mut child_cell_marker: Option<usize> = None;
+
+        // recursive loop
+        loop {
+            parse_ts_node(
+                cursor,
+                _depth + 1,
+                folding_ranges,
+                document,
+                &mut child_comment_stack,
+                &mut child_region_marker,
+                &mut child_cell_marker,
+            );
+            if !cursor.goto_next_sibling() {
+                break;
+            }
+        }
+        // End of node handling
+        end_node_handler(
+            folding_ranges,
+            end.row,
+            &mut child_comment_stack,
+            &mut child_region_marker,
+            &mut child_cell_marker,
+        );
+
+        cursor.goto_parent();
+    }
+}
+
+// Function to create a folding range that specifically deals with bracket ending
+fn bracket_range(
+    start_line: usize,
+    start_char: usize,
+    end_line: usize,
+    end_char: usize,
+    white_space_count: usize,
+) -> FoldingRange {
+    let mut end_line: u32 = end_line.try_into().unwrap();
+    let mut end_char: Option<u32> = Some(end_char.try_into().unwrap());
+
+    let adjusted_end_char = end_char.and_then(|val| val.checked_sub(white_space_count as u32));
+
+    match adjusted_end_char {
+        Some(0) => {
+            end_line -= 1;
+            end_char = None;
+        }
+        Some(_) => {
+            if let Some(ref mut value) = end_char {
+                *value -= 1;
+            }
+        }
+        None => {
+            tracing::error!(
+                "Folding Range (bracket_range): adjusted_end_char should not be None here"
+            );
+        }
+    }
+
+    FoldingRange {
+        start_line: start_line.try_into().unwrap(),
+        start_character: Some(start_char as u32),
+        end_line,
+        end_character: end_char,
+        kind: Some(FoldingRangeKind::Region),
+        collapsed_text: None,
+    }
+}
+
+fn comment_range(start_line: usize, end_line: usize) -> FoldingRange {
+    FoldingRange {
+        start_line: start_line.try_into().unwrap(),
+        start_character: None,
+        end_line: end_line.try_into().unwrap(),
+        end_character: None,
+        kind: Some(FoldingRangeKind::Region),
+        collapsed_text: None,
+    }
+}
+
+fn get_line_text(
+    document: &Document,
+    line_num: usize,
+    start_char: Option<usize>,
+    end_char: Option<usize>,
+) -> String {
+    let text = &document.contents;
+    // Split the text into lines
+    let lines: Vec<&str> = text.lines().collect();
+
+    // Ensure the start_line is within bounds
+    if line_num >= lines.len() {
+        return String::new(); // Return an empty string if out of bounds
+    }
+
+    // Get the line corresponding to start_line
+    let line = lines[line_num];
+
+    // Determine the start and end character indices
+    let start_idx = start_char.unwrap_or(0); // Default to 0 if None
+    let end_idx = end_char.unwrap_or(line.len()); // Default to the line's length if None
+
+    // Ensure indices are within bounds for the line
+    let start_idx = start_idx.min(line.len());
+    let end_idx = end_idx.min(line.len());
+
+    // Extract the substring and return it
+    line[start_idx..end_idx].to_string() // TODO
+}
+
+fn count_leading_whitespaces(document: &Document, line_num: usize) -> usize {
+    let line_text = get_line_text(document, line_num, None, None);
+    line_text.chars().take_while(|c| c.is_whitespace()).count()
+}
+
+pub static RE_COMMENT_SECTION: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"^\s*(#+)\s*(.*?)\s*[#=-]{4,}\s*$").unwrap());
+
+fn parse_comment_as_section(comment: &str) -> Option<(usize, String)> {
+    // Match lines starting with one or more '#' followed by some non-empty content and must end with 4 or more '-', '#', or `=`
+    // Ensure that there's actual content between the start and the trailing symbols.
+    if let Some(caps) = RE_COMMENT_SECTION.captures(comment) {
+        let hashes = caps.get(1)?.as_str().len(); // Count the number of '#'
+        let title = caps.get(2)?.as_str().trim().to_string(); // Extract the title text without trailing punctuations
+        if title.is_empty() {
+            return None; // Return None for lines with only hashtags
+        }
+        return Some((hashes, title)); // Return the level based on the number of '#' and the title
+    }
+
+    None
+}
+
+fn nested_processor(
+    comment_stack: &mut Vec<Vec<(usize, usize)>>,
+    folding_ranges: &mut Vec<FoldingRange>,
+    line_num: usize,
+    comment_line: &str,
+) {
+    let Some((level, _title)) = parse_comment_as_section(comment_line) else {
+        return; // return if the line is not a comment section
+    };
+    if comment_stack.is_empty() {
+        tracing::error!(
+            "Folding Range: comment_stack should always contain at least one element here"
+        );
+        return;
+    }
+    loop {
+        if comment_stack.last().unwrap().is_empty() {
+            comment_stack.last_mut().unwrap().push((level, line_num));
+            return; // return if the stack is empty
+        }
+
+        let Some((last_level, _)) = comment_stack.last().unwrap().last() else {
+            tracing::error!("Folding Range: comment_stacks should not be empty here");
+            return;
+        };
+        if *last_level < level {
+            comment_stack.last_mut().unwrap().push((level, line_num));
+            break;
+        } else if *last_level == level {
+            folding_ranges.push(comment_range(
+                comment_stack.last().unwrap().last().unwrap().1,
+                line_num - 1,
+            ));
+            comment_stack.last_mut().unwrap().pop();
+            comment_stack.last_mut().unwrap().push((level, line_num));
+            break;
+        } else {
+            folding_ranges.push(comment_range(
+                comment_stack.last().unwrap().last().unwrap().1,
+                line_num - 1,
+            ));
+            comment_stack.last_mut().unwrap().pop(); // TODO: Handle case where comment_stack is empty
+        }
+    }
+}
+
+fn region_processor(
+    folding_ranges: &mut Vec<FoldingRange>,
+    region_marker: &mut Option<usize>,
+    line_idx: usize,
+    line_text: &str,
+) {
+    let Some(region_type) = parse_region_type(line_text) else {
+        return; // return if the line is not a region section
+    };
+    match region_type.as_str() {
+        "start" => {
+            region_marker.replace(line_idx);
+        }
+        "end" => {
+            if let Some(region_start) = region_marker {
+                let folding_range = comment_range(*region_start, line_idx);
+                folding_ranges.push(folding_range);
+                *region_marker = None;
+            }
+        }
+        _ => {}
+    }
+}
+
+fn parse_region_type(line_text: &str) -> Option<String> {
+    // return the region type
+    // "start": "^\\s*#\\s*region\\b"
+    // "end": "^\\s*#\\s*endregion\\b"
+    // None: otherwise
+    let region_start = Regex::new(r"^\s*#\s*region\b").unwrap();
+    let region_end = Regex::new(r"^\s*#\s*endregion\b").unwrap();
+
+    if region_start.is_match(line_text) {
+        Some("start".to_string())
+    } else if region_end.is_match(line_text) {
+        Some("end".to_string())
+    } else {
+        None
+    }
+}
+
+fn cell_processor(
+    // Almost identical to region_processor
+    folding_ranges: &mut Vec<FoldingRange>,
+    cell_marker: &mut Option<usize>,
+    line_idx: usize,
+    line_text: &str,
+) {
+    let cell_pattern: Regex = Regex::new(r"^#\s*(%%|\+)(.*)").unwrap();
+
+    if !cell_pattern.is_match(line_text) {
+    } else {
+        let Some(start_line) = cell_marker else {
+            cell_marker.replace(line_idx);
+            return;
+        };
+
+        let folding_range = comment_range(*start_line, line_idx - 1);
+        folding_ranges.push(folding_range);
+        cell_marker.replace(line_idx);
+    }
+}
+
+fn end_node_handler(
+    folding_ranges: &mut Vec<FoldingRange>,
+    line_idx: usize,
+    comment_stack: &mut Vec<Vec<(usize, usize)>>,
+    region_marker: &mut Option<usize>,
+    cell_marker: &mut Option<usize>,
+) {
+    // Nested comment handling
+    // Iterate over the last element of the comment stack and add it to the folding ranges by using the comment_range function
+    if let Some(last_section) = comment_stack.last() {
+        // Iterate over each (start level, start line) in the last section
+        for &(_level, start_line) in last_section.iter() {
+            // Add a new folding range for each range in the last section
+            let folding_range = comment_range(start_line, line_idx - 1);
+
+            folding_ranges.push(folding_range);
+        }
+    }
+    // Remove the last element from the comment stack after processing
+    comment_stack.pop();
+
+    // Unclosed region handling
+    if let Some(region_start) = region_marker {
+        let folding_range = comment_range(*region_start, line_idx - 1);
+        folding_ranges.push(folding_range);
+        *region_marker = None;
+    }
+
+    // End cell Handling
+    if let Some(cell_start) = cell_marker {
+        let folding_range = comment_range(*cell_start, line_idx - 1);
+        folding_ranges.push(folding_range);
+        *cell_marker = None;
+    }
+}