perf(parser)!: store Position line/column as u32

nlopes · nlopes · commit cddb806d23f9 · 2026-06-28T13:45:16.000+02:00
This keeps Location a bit more compact, which gains us a few percent points on parse
speed.

BREAKING CHANGE: `Position::line` and `Position::column` are now `u32` instead of `usize`.
diff --git a/acdc-cli/src/error.rs b/acdc-cli/src/error.rs
@@ -79,12 +79,12 @@ fn source_span_from_source_location(loc: &SourceLocation, source: &str) -> Sourc
     SourceSpan::new(start_offset.into(), length)
 }
 
-fn source_location_line_column(loc: &SourceLocation) -> (usize, usize) {
+fn source_location_line_column(loc: &SourceLocation) -> (u32, u32) {
     (loc.location.start.line, loc.location.start.column)
 }
 
 /// Calculate byte offset from line and column numbers (both 1-indexed).
-fn calculate_offset_from_position(source: &str, line: usize, column: usize) -> usize {
+fn calculate_offset_from_position(source: &str, line: u32, column: u32) -> usize {
     let mut current_line = 1;
 
     for (idx, ch) in source.char_indices() {
diff --git a/acdc-editor-wasm/src/lib.rs b/acdc-editor-wasm/src/lib.rs
@@ -26,8 +26,8 @@ pub struct ParseResult {
 pub struct EditorWarning {
     pub message: String,
     pub advice: Option<String>,
-    pub line: Option<usize>,
-    pub column: Option<usize>,
+    pub line: Option<u32>,
+    pub column: Option<u32>,
 }
 
 /// Initialize panic hook and set up the editor DOM orchestration.
@@ -118,7 +118,7 @@ pub fn parse_and_render(input: &str) -> Result<ParseResult, String> {
     })
 }
 
-fn location_line_col(loc: Option<&acdc_parser::SourceLocation>) -> (Option<usize>, Option<usize>) {
+fn location_line_col(loc: Option<&acdc_parser::SourceLocation>) -> (Option<u32>, Option<u32>) {
     let Some(loc) = loc else {
         return (None, None);
     };
diff --git a/acdc-lsp/src/capabilities/folding.rs b/acdc-lsp/src/capabilities/folding.rs
@@ -3,8 +3,6 @@
 use acdc_parser::{Block, DelimitedBlockType, Document, Location};
 use tower_lsp_server::ls_types::{FoldingRange, FoldingRangeKind};
 
-use crate::convert::to_lsp_u32;
-
 /// Compute all folding ranges in a document
 ///
 /// Returns ranges for:
@@ -133,9 +131,9 @@ fn make_folding_range(loc: &Location, kind: FoldingRangeKind) -> Option<FoldingR
     // Only create folding range if it spans at least 2 lines
     if loc.end.line > loc.start.line {
         Some(FoldingRange {
-            start_line: to_lsp_u32(loc.start.line.saturating_sub(1)),
+            start_line: loc.start.line.saturating_sub(1),
             start_character: None,
-            end_line: to_lsp_u32(loc.end.line.saturating_sub(1)),
+            end_line: loc.end.line.saturating_sub(1),
             end_character: None,
             kind: Some(kind),
             collapsed_text: None,
diff --git a/acdc-lsp/src/capabilities/formatting.rs b/acdc-lsp/src/capabilities/formatting.rs
@@ -109,8 +109,8 @@ fn collect_protected_ranges_from_blocks(blocks: &[Block], ranges: &mut Vec<Prote
                 if is_verbatim_block_type(&db.inner) {
                     // Location is 1-indexed, convert to 0-indexed
                     ranges.push(ProtectedRange {
-                        start_line: db.location.start.line.saturating_sub(1),
-                        end_line: db.location.end.line.saturating_sub(1),
+                        start_line: db.location.start.line.saturating_sub(1) as usize,
+                        end_line: db.location.end.line.saturating_sub(1) as usize,
                     });
                 } else {
                     // Non-verbatim delimited blocks can contain nested verbatim blocks
@@ -365,8 +365,8 @@ fn ensure_block_separation(
         };
 
         // Convert 1-indexed AST locations to 0-indexed
-        let prev_end_line = block_location(prev_block).end.line.saturating_sub(1);
-        let curr_start_line = block_location(curr_block).start.line.saturating_sub(1);
+        let prev_end_line = block_location(prev_block).end.line.saturating_sub(1) as usize;
+        let curr_start_line = block_location(curr_block).start.line.saturating_sub(1) as usize;
 
         // Only process blocks within our range
         if prev_end_line < range.start || curr_start_line >= range.end {
diff --git a/acdc-lsp/src/capabilities/semantic_tokens.rs b/acdc-lsp/src/capabilities/semantic_tokens.rs
@@ -165,7 +165,7 @@ fn collect_tokens_from_block(block: &Block, tokens: &mut Vec<RawToken>) {
 
             if title_len > 0 {
                 tokens.push(RawToken {
-                    line: to_lsp_u32(section.location.start.line.saturating_sub(1)),
+                    line: section.location.start.line.saturating_sub(1),
                     // Skip the = markers and space
                     start_char: u32::from(section.level) + 2, // Skip = markers and space
                     length: to_lsp_u32(title_len),
@@ -212,7 +212,7 @@ fn collect_tokens_from_block(block: &Block, tokens: &mut Vec<RawToken>) {
         Block::DocumentAttribute(attr) => {
             // Attribute name as property
             tokens.push(RawToken {
-                line: to_lsp_u32(attr.location.start.line.saturating_sub(1)),
+                line: attr.location.start.line.saturating_sub(1),
                 start_char: 1, // Skip leading :
                 length: to_lsp_u32(attr.name.len()),
                 token_type: 2, // PROPERTY
@@ -341,21 +341,20 @@ fn add_token_for_location(
         // anyway, so emit a minimal 1-char token rather than a bogus length.
         None => 1,
         // Same line (and same file): use the column span.
-        Some(_) if loc.start.line == loc.end.line => to_lsp_u32(
-            loc.end
-                .column
-                .saturating_sub(loc.start.column)
-                .saturating_add(1),
-        ),
+        Some(_) if loc.start.line == loc.end.line => loc
+            .end
+            .column
+            .saturating_sub(loc.start.column)
+            .saturating_add(1),
         // Multi-line within one file: use the byte length (simplified — first-line only
         // would need the line width, which we don't have here).
         Some(bytes) => to_lsp_u32(bytes),
     };
 
     if length > 0 {
         tokens.push(RawToken {
-            line: to_lsp_u32(loc.start.line.saturating_sub(1)),
-            start_char: to_lsp_u32(loc.start.column.saturating_sub(1)),
+            line: loc.start.line.saturating_sub(1),
+            start_char: loc.start.column.saturating_sub(1),
             length,
             token_type,
             token_modifiers,
diff --git a/acdc-lsp/src/convert.rs b/acdc-lsp/src/convert.rs
@@ -54,12 +54,12 @@ pub(crate) fn offset_in_location(offset: usize, location: &Location) -> bool {
 pub(crate) fn location_to_range(loc: &Location) -> Range {
     Range {
         start: Position {
-            line: to_lsp_u32(loc.start.line.saturating_sub(1)),
-            character: to_lsp_u32(loc.start.column.saturating_sub(1)),
+            line: loc.start.line.saturating_sub(1),
+            character: loc.start.column.saturating_sub(1),
         },
         end: Position {
-            line: to_lsp_u32(loc.end.line.saturating_sub(1)),
-            character: to_lsp_u32(loc.end.column),
+            line: loc.end.line.saturating_sub(1),
+            character: loc.end.column,
         },
     }
 }
diff --git a/acdc-lsp/src/state/document.rs b/acdc-lsp/src/state/document.rs
@@ -4,7 +4,7 @@ use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::{Mutex, MutexGuard};
 
-use acdc_parser::{Document, DocumentAttributes, Location};
+use acdc_parser::{Document, DocumentAttributes, Location, Position};
 use tower_lsp_server::ls_types::Diagnostic;
 
 /// Owned counterpart to `acdc_parser::Source<'_>`, detached from the parser arena
@@ -238,10 +238,8 @@ pub(crate) fn extract_attribute_defs(text: &str) -> Vec<(String, Location)> {
             let line_end = line.len();
 
             let mut location = Location::default();
-            location.start.line = line_idx + 1;
-            location.start.column = col_offset + 1;
-            location.end.line = line_idx + 1;
-            location.end.column = line_end;
+            location.start = Position::from_line_col(line_idx + 1, col_offset + 1);
+            location.end = Position::from_line_col(line_idx + 1, line_end);
             location.absolute_start = this_line_start + col_offset;
             location.absolute_end = this_line_start + line_end;
 
@@ -321,10 +319,8 @@ fn extract_refs_from_line(
             let col_end = segment_offset_in_line + close + 1;
 
             let mut location = Location::default();
-            location.start.line = line_idx + 1;
-            location.start.column = col_in_line + 1;
-            location.end.line = line_idx + 1;
-            location.end.column = col_end;
+            location.start = Position::from_line_col(line_idx + 1, col_in_line + 1);
+            location.end = Position::from_line_col(line_idx + 1, col_end);
             location.absolute_start = line_start + col_in_line;
             location.absolute_end = line_start + col_end;
 
@@ -492,10 +488,8 @@ pub(crate) fn extract_includes(text: &str) -> Vec<(String, Location)> {
                 let target_end = target_start + target.len();
 
                 let mut location = Location::default();
-                location.start.line = line_idx + 1;
-                location.start.column = target_start + 1;
-                location.end.line = line_idx + 1;
-                location.end.column = target_end;
+                location.start = Position::from_line_col(line_idx + 1, target_start + 1);
+                location.end = Position::from_line_col(line_idx + 1, target_end);
                 location.absolute_start = this_line_start + target_start;
                 location.absolute_end = this_line_start + target_end;
 
diff --git a/acdc-parser/CHANGELOG.md b/acdc-parser/CHANGELOG.md
@@ -40,6 +40,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `Location::byte_len()` returns the location's inclusive byte length, or `None` when its
   start and end fall in different files (where the byte offsets are in different coordinate
   spaces and can't be subtracted). Prefer it over `absolute_end - absolute_start`.
+- `Position::from_line_col(line, column)` builds a `Position` from `usize` line/column,
+  saturating at `u32::MAX`. Use it when constructing from `usize` indices; prefer
+  `Position::new` when the values are already `u32`.
 - `SectionKind` enum and a `kind` field on `Section` (and `TocEntry`) classifying
   a section as an `AsciiDoc` *special section* (`Preface`, `Glossary`, `Appendix`,
   …) or `Normal`, derived from its style. This is a structural classification only
@@ -87,6 +90,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   `SectionKind::as_style`).
 - Updated the parser grammar implementation to reduce location-tracking overhead while
   preserving the same parse output and diagnostics.
+- **Breaking:** `Position::line` and `Position::column` are now `u32` instead of `usize`
+  (saturating at `u32::MAX` for inputs beyond ~4 billion lines/columns), keeping the
+  per-node `Location` compact now that each boundary also carries its originating `file`.
 - **Breaking:** the `Positioning` enum is removed and `SourceLocation` now holds a
   single `location: Location` (a point diagnostic is a zero-width span with
   `start == end`). Read `source_location.location.start` for the line/column instead
diff --git a/acdc-parser/src/grammar/line_map.rs b/acdc-parser/src/grammar/line_map.rs
@@ -150,7 +150,10 @@ impl LineMap {
                 .map_or(0, |s| s.chars().count())
         };
 
-        Position::new(line, chars_in_line + 1)
+        Position::new(
+            u32::try_from(line).unwrap_or(u32::MAX),
+            u32::try_from(chars_in_line + 1).unwrap_or(u32::MAX),
+        )
     }
 
     /// Source line (1-indexed) for a preprocessed `offset` in a span that begins at
@@ -163,11 +166,11 @@ impl LineMap {
     /// once per range) save a lookup by calling this instead of `source_line`.
     pub(crate) fn source_line_from(
         &self,
-        start_line: usize,
-        preproc_start_line: usize,
+        start_line: u32,
+        preproc_start_line: u32,
         input: &str,
         offset: usize,
-    ) -> usize {
+    ) -> u32 {
         let offset_line = self.offset_to_position(offset, input).line;
         start_line + offset_line.saturating_sub(preproc_start_line)
     }
@@ -176,9 +179,14 @@ impl LineMap {
     /// range's preprocessed start line on the fly. Convenience over
     /// [`source_line_from`](Self::source_line_from) for callers without a cached start
     /// line (the rare diagnostic paths).
-    pub(crate) fn source_line(&self, range: &SourceRange, input: &str, offset: usize) -> usize {
+    pub(crate) fn source_line(&self, range: &SourceRange, input: &str, offset: usize) -> u32 {
         let preproc_start_line = self.offset_to_position(range.start_offset, input).line;
-        self.source_line_from(range.start_line, preproc_start_line, input, offset)
+        self.source_line_from(
+            u32::try_from(range.start_line).unwrap_or(u32::MAX),
+            preproc_start_line,
+            input,
+            offset,
+        )
     }
 }
 
diff --git a/acdc-parser/src/grammar/location_mapping.rs b/acdc-parser/src/grammar/location_mapping.rs
@@ -285,8 +285,9 @@ macro_rules! remap_simple_location {
     ($node:expr, $base_offset:expr) => {{
         $node.location.absolute_start += $base_offset;
         $node.location.absolute_end += $base_offset;
-        $node.location.start.column += $base_offset;
-        $node.location.end.column += $base_offset;
+        let col_shift = u32::try_from($base_offset).unwrap_or(u32::MAX);
+        $node.location.start.column += col_shift;
+        $node.location.end.column += col_shift;
     }};
 }
 
diff --git a/acdc-parser/src/grammar/passthrough_processing.rs b/acdc-parser/src/grammar/passthrough_processing.rs
@@ -45,15 +45,17 @@ pub(crate) fn process_passthrough_with_quotes<'a>(
             let prefix_len = total_span - content.len() - suffix_len;
 
             let content_abs_start = passthrough.location.absolute_start + prefix_len;
-            let content_col_start = passthrough.location.start.column + prefix_len;
+            let content_col_start =
+                passthrough.location.start.column + u32::try_from(prefix_len).unwrap_or(u32::MAX);
+            let content_line = passthrough.location.start.line;
 
             Location {
                 absolute_start: content_abs_start,
                 absolute_end: content_abs_start + content.len(),
-                start: crate::Position::new(passthrough.location.start.line, content_col_start),
+                start: crate::Position::new(content_line, content_col_start),
                 end: crate::Position::new(
-                    passthrough.location.start.line,
-                    content_col_start + content.len(),
+                    content_line,
+                    content_col_start + u32::try_from(content.len()).unwrap_or(u32::MAX),
                 ),
             }
         } else {
@@ -158,14 +160,18 @@ pub(crate) fn parse_text_for_quotes_in<'a>(
 /// the same line.
 fn plain_text_at<'a>(text: &'a str, base_location: &Location, offset: usize) -> InlineNode<'a> {
     let abs_start = base_location.absolute_start + offset;
-    let col_start = base_location.start.column + offset;
+    let col_start = base_location.start.column + u32::try_from(offset).unwrap_or(u32::MAX);
+    let line = base_location.start.line;
     InlineNode::PlainText(Plain {
         content: text,
         location: Location {
             absolute_start: abs_start,
             absolute_end: abs_start + text.len(),
-            start: crate::Position::new(base_location.start.line, col_start),
-            end: crate::Position::new(base_location.start.line, col_start + text.len()),
+            start: crate::Position::new(line, col_start),
+            end: crate::Position::new(
+                line,
+                col_start + u32::try_from(text.len()).unwrap_or(u32::MAX),
+            ),
         },
         escaped: false,
     })
diff --git a/acdc-parser/src/grammar/source_remap.rs b/acdc-parser/src/grammar/source_remap.rs
@@ -106,7 +106,7 @@ struct Remapper<'a> {
     /// Preprocessed (1-indexed) start line of each range, precomputed so `map_offset`
     /// resolves a source line with a single `LineMap` lookup (for the queried offset)
     /// instead of also looking up the range start every time. Parallel to `ranges`.
-    preproc_start_lines: Vec<usize>,
+    preproc_start_lines: Vec<u32>,
     /// One shared `Arc<Vec<String>>` per distinct `include::` chain, keyed by the
     /// range's `file_chain`, so stamping a node's file is a refcount bump rather than
     /// cloning the chain per node. Primary-input ranges (empty chain) are not interned
@@ -201,13 +201,16 @@ impl<'a> Remapper<'a> {
     /// innermost containing range, plus that range — via a binary search over the
     /// precomputed segment cover (O(log R)). The source line goes through the shared
     /// [`LineMap::source_line`] so it can't drift from the diagnostic paths.
-    fn map_offset(&self, abs: usize) -> Option<(usize, usize, &'a SourceRange)> {
+    fn map_offset(&self, abs: usize) -> Option<(usize, u32, &'a SourceRange)> {
         let range_index = covering_range_index(&self.boundaries, &self.segment_cover, abs)?;
         let range = self.ranges.get(range_index)?;
         let preproc_start_line = *self.preproc_start_lines.get(range_index)?;
-        let source_line =
-            self.line_map
-                .source_line_from(range.start_line, preproc_start_line, self.input, abs);
+        let source_line = self.line_map.source_line_from(
+            u32::try_from(range.start_line).unwrap_or(u32::MAX),
+            preproc_start_line,
+            self.input,
+            abs,
+        );
         Some((range.source_offset(abs), source_line, range))
     }
 
@@ -293,12 +296,12 @@ fn covering_range_index(
 
 /// Subtract a re-indent's `column_shift` from a preprocessed `column` to recover the
 /// origin column, clamped to a minimum of 1. A `0` shift returns `column` unchanged.
-fn shift_column(column: usize, column_shift: isize) -> usize {
+fn shift_column(column: u32, column_shift: isize) -> u32 {
     if column_shift == 0 {
         return column;
     }
     let shifted = isize::try_from(column).unwrap_or(isize::MAX) - column_shift;
-    usize::try_from(shifted.max(1)).unwrap_or(1)
+    u32::try_from(shifted.max(1)).unwrap_or(1)
 }
 
 #[cfg(test)]
diff --git a/acdc-parser/src/lib.rs b/acdc-parser/src/lib.rs
@@ -359,13 +359,13 @@ fn peg_error_to_source_location(
             file: range.file.clone(),
             location: crate::Location::point(Position::new(
                 state.line_map.source_line(range, state.input, offset),
-                error.location.column,
+                u32::try_from(error.location.column).unwrap_or(u32::MAX),
             )),
         }
     } else {
         SourceLocation {
             file: state.current_file.as_deref().cloned(),
-            location: crate::Location::point(Position::new(
+            location: crate::Location::point(Position::from_line_col(
                 error.location.line,
                 error.location.column,
             )),
diff --git a/acdc-parser/src/model/location.rs b/acdc-parser/src/model/location.rs
diff --git a/acdc-parser/src/preprocessor/conditional.rs b/acdc-parser/src/preprocessor/conditional.rs
diff --git a/acdc-parser/src/preprocessor/include.rs b/acdc-parser/src/preprocessor/include.rs
diff --git a/acdc-parser/src/preprocessor/mod.rs b/acdc-parser/src/preprocessor/mod.rs
diff --git a/acdc-parser/tests/unterminated_tables.rs b/acdc-parser/tests/unterminated_tables.rs

Original file line number	Diff line number	Diff line change
`@@ -79,12 +79,12 @@ fn source_span_from_source_location(loc: &SourceLocation, source: &str) -> Sourc`
`79`	`79`	`SourceSpan::new(start_offset.into(), length)`
`80`	`80`	`}`
`81`	`81`
`82`		`-fn source_location_line_column(loc: &SourceLocation) -> (usize, usize) {`
	`82`	`+fn source_location_line_column(loc: &SourceLocation) -> (u32, u32) {`
`83`	`83`	`(loc.location.start.line, loc.location.start.column)`
`84`	`84`	`}`
`85`	`85`
`86`	`86`	`/// Calculate byte offset from line and column numbers (both 1-indexed).`
`87`		`-fn calculate_offset_from_position(source: &str, line: usize, column: usize) -> usize {`
	`87`	`+fn calculate_offset_from_position(source: &str, line: u32, column: u32) -> usize {`
`88`	`88`	`let mut current_line = 1;`
`89`	`89`
`90`	`90`	`for (idx, ch) in source.char_indices() {`