Skip to content

Commit cddb806

Browse files
committed
perf(parser)!: store Position line/column as u32
This keeps Location a bit more compact, which gains us a few percent points on parse speed. BREAKING CHANGE: `Position::line` and `Position::column` are now `u32` instead of `usize`.
1 parent a67a415 commit cddb806

18 files changed

Lines changed: 115 additions & 80 deletions

File tree

acdc-cli/src/error.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,12 @@ fn source_span_from_source_location(loc: &SourceLocation, source: &str) -> Sourc
7979
SourceSpan::new(start_offset.into(), length)
8080
}
8181

82-
fn source_location_line_column(loc: &SourceLocation) -> (usize, usize) {
82+
fn source_location_line_column(loc: &SourceLocation) -> (u32, u32) {
8383
(loc.location.start.line, loc.location.start.column)
8484
}
8585

8686
/// Calculate byte offset from line and column numbers (both 1-indexed).
87-
fn calculate_offset_from_position(source: &str, line: usize, column: usize) -> usize {
87+
fn calculate_offset_from_position(source: &str, line: u32, column: u32) -> usize {
8888
let mut current_line = 1;
8989

9090
for (idx, ch) in source.char_indices() {

acdc-editor-wasm/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ pub struct ParseResult {
2626
pub struct EditorWarning {
2727
pub message: String,
2828
pub advice: Option<String>,
29-
pub line: Option<usize>,
30-
pub column: Option<usize>,
29+
pub line: Option<u32>,
30+
pub column: Option<u32>,
3131
}
3232

3333
/// Initialize panic hook and set up the editor DOM orchestration.
@@ -118,7 +118,7 @@ pub fn parse_and_render(input: &str) -> Result<ParseResult, String> {
118118
})
119119
}
120120

121-
fn location_line_col(loc: Option<&acdc_parser::SourceLocation>) -> (Option<usize>, Option<usize>) {
121+
fn location_line_col(loc: Option<&acdc_parser::SourceLocation>) -> (Option<u32>, Option<u32>) {
122122
let Some(loc) = loc else {
123123
return (None, None);
124124
};

acdc-lsp/src/capabilities/folding.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
use acdc_parser::{Block, DelimitedBlockType, Document, Location};
44
use tower_lsp_server::ls_types::{FoldingRange, FoldingRangeKind};
55

6-
use crate::convert::to_lsp_u32;
7-
86
/// Compute all folding ranges in a document
97
///
108
/// Returns ranges for:
@@ -133,9 +131,9 @@ fn make_folding_range(loc: &Location, kind: FoldingRangeKind) -> Option<FoldingR
133131
// Only create folding range if it spans at least 2 lines
134132
if loc.end.line > loc.start.line {
135133
Some(FoldingRange {
136-
start_line: to_lsp_u32(loc.start.line.saturating_sub(1)),
134+
start_line: loc.start.line.saturating_sub(1),
137135
start_character: None,
138-
end_line: to_lsp_u32(loc.end.line.saturating_sub(1)),
136+
end_line: loc.end.line.saturating_sub(1),
139137
end_character: None,
140138
kind: Some(kind),
141139
collapsed_text: None,

acdc-lsp/src/capabilities/formatting.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ fn collect_protected_ranges_from_blocks(blocks: &[Block], ranges: &mut Vec<Prote
109109
if is_verbatim_block_type(&db.inner) {
110110
// Location is 1-indexed, convert to 0-indexed
111111
ranges.push(ProtectedRange {
112-
start_line: db.location.start.line.saturating_sub(1),
113-
end_line: db.location.end.line.saturating_sub(1),
112+
start_line: db.location.start.line.saturating_sub(1) as usize,
113+
end_line: db.location.end.line.saturating_sub(1) as usize,
114114
});
115115
} else {
116116
// Non-verbatim delimited blocks can contain nested verbatim blocks
@@ -365,8 +365,8 @@ fn ensure_block_separation(
365365
};
366366

367367
// Convert 1-indexed AST locations to 0-indexed
368-
let prev_end_line = block_location(prev_block).end.line.saturating_sub(1);
369-
let curr_start_line = block_location(curr_block).start.line.saturating_sub(1);
368+
let prev_end_line = block_location(prev_block).end.line.saturating_sub(1) as usize;
369+
let curr_start_line = block_location(curr_block).start.line.saturating_sub(1) as usize;
370370

371371
// Only process blocks within our range
372372
if prev_end_line < range.start || curr_start_line >= range.end {

acdc-lsp/src/capabilities/semantic_tokens.rs

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ fn collect_tokens_from_block(block: &Block, tokens: &mut Vec<RawToken>) {
165165

166166
if title_len > 0 {
167167
tokens.push(RawToken {
168-
line: to_lsp_u32(section.location.start.line.saturating_sub(1)),
168+
line: section.location.start.line.saturating_sub(1),
169169
// Skip the = markers and space
170170
start_char: u32::from(section.level) + 2, // Skip = markers and space
171171
length: to_lsp_u32(title_len),
@@ -212,7 +212,7 @@ fn collect_tokens_from_block(block: &Block, tokens: &mut Vec<RawToken>) {
212212
Block::DocumentAttribute(attr) => {
213213
// Attribute name as property
214214
tokens.push(RawToken {
215-
line: to_lsp_u32(attr.location.start.line.saturating_sub(1)),
215+
line: attr.location.start.line.saturating_sub(1),
216216
start_char: 1, // Skip leading :
217217
length: to_lsp_u32(attr.name.len()),
218218
token_type: 2, // PROPERTY
@@ -341,21 +341,20 @@ fn add_token_for_location(
341341
// anyway, so emit a minimal 1-char token rather than a bogus length.
342342
None => 1,
343343
// Same line (and same file): use the column span.
344-
Some(_) if loc.start.line == loc.end.line => to_lsp_u32(
345-
loc.end
346-
.column
347-
.saturating_sub(loc.start.column)
348-
.saturating_add(1),
349-
),
344+
Some(_) if loc.start.line == loc.end.line => loc
345+
.end
346+
.column
347+
.saturating_sub(loc.start.column)
348+
.saturating_add(1),
350349
// Multi-line within one file: use the byte length (simplified — first-line only
351350
// would need the line width, which we don't have here).
352351
Some(bytes) => to_lsp_u32(bytes),
353352
};
354353

355354
if length > 0 {
356355
tokens.push(RawToken {
357-
line: to_lsp_u32(loc.start.line.saturating_sub(1)),
358-
start_char: to_lsp_u32(loc.start.column.saturating_sub(1)),
356+
line: loc.start.line.saturating_sub(1),
357+
start_char: loc.start.column.saturating_sub(1),
359358
length,
360359
token_type,
361360
token_modifiers,

acdc-lsp/src/convert.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ pub(crate) fn offset_in_location(offset: usize, location: &Location) -> bool {
5454
pub(crate) fn location_to_range(loc: &Location) -> Range {
5555
Range {
5656
start: Position {
57-
line: to_lsp_u32(loc.start.line.saturating_sub(1)),
58-
character: to_lsp_u32(loc.start.column.saturating_sub(1)),
57+
line: loc.start.line.saturating_sub(1),
58+
character: loc.start.column.saturating_sub(1),
5959
},
6060
end: Position {
61-
line: to_lsp_u32(loc.end.line.saturating_sub(1)),
62-
character: to_lsp_u32(loc.end.column),
61+
line: loc.end.line.saturating_sub(1),
62+
character: loc.end.column,
6363
},
6464
}
6565
}

acdc-lsp/src/state/document.rs

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::collections::HashMap;
44
use std::path::PathBuf;
55
use std::sync::{Mutex, MutexGuard};
66

7-
use acdc_parser::{Document, DocumentAttributes, Location};
7+
use acdc_parser::{Document, DocumentAttributes, Location, Position};
88
use tower_lsp_server::ls_types::Diagnostic;
99

1010
/// Owned counterpart to `acdc_parser::Source<'_>`, detached from the parser arena
@@ -238,10 +238,8 @@ pub(crate) fn extract_attribute_defs(text: &str) -> Vec<(String, Location)> {
238238
let line_end = line.len();
239239

240240
let mut location = Location::default();
241-
location.start.line = line_idx + 1;
242-
location.start.column = col_offset + 1;
243-
location.end.line = line_idx + 1;
244-
location.end.column = line_end;
241+
location.start = Position::from_line_col(line_idx + 1, col_offset + 1);
242+
location.end = Position::from_line_col(line_idx + 1, line_end);
245243
location.absolute_start = this_line_start + col_offset;
246244
location.absolute_end = this_line_start + line_end;
247245

@@ -321,10 +319,8 @@ fn extract_refs_from_line(
321319
let col_end = segment_offset_in_line + close + 1;
322320

323321
let mut location = Location::default();
324-
location.start.line = line_idx + 1;
325-
location.start.column = col_in_line + 1;
326-
location.end.line = line_idx + 1;
327-
location.end.column = col_end;
322+
location.start = Position::from_line_col(line_idx + 1, col_in_line + 1);
323+
location.end = Position::from_line_col(line_idx + 1, col_end);
328324
location.absolute_start = line_start + col_in_line;
329325
location.absolute_end = line_start + col_end;
330326

@@ -492,10 +488,8 @@ pub(crate) fn extract_includes(text: &str) -> Vec<(String, Location)> {
492488
let target_end = target_start + target.len();
493489

494490
let mut location = Location::default();
495-
location.start.line = line_idx + 1;
496-
location.start.column = target_start + 1;
497-
location.end.line = line_idx + 1;
498-
location.end.column = target_end;
491+
location.start = Position::from_line_col(line_idx + 1, target_start + 1);
492+
location.end = Position::from_line_col(line_idx + 1, target_end);
499493
location.absolute_start = this_line_start + target_start;
500494
location.absolute_end = this_line_start + target_end;
501495

acdc-parser/CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4040
- `Location::byte_len()` returns the location's inclusive byte length, or `None` when its
4141
start and end fall in different files (where the byte offsets are in different coordinate
4242
spaces and can't be subtracted). Prefer it over `absolute_end - absolute_start`.
43+
- `Position::from_line_col(line, column)` builds a `Position` from `usize` line/column,
44+
saturating at `u32::MAX`. Use it when constructing from `usize` indices; prefer
45+
`Position::new` when the values are already `u32`.
4346
- `SectionKind` enum and a `kind` field on `Section` (and `TocEntry`) classifying
4447
a section as an `AsciiDoc` *special section* (`Preface`, `Glossary`, `Appendix`,
4548
…) or `Normal`, derived from its style. This is a structural classification only
@@ -87,6 +90,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
8790
`SectionKind::as_style`).
8891
- Updated the parser grammar implementation to reduce location-tracking overhead while
8992
preserving the same parse output and diagnostics.
93+
- **Breaking:** `Position::line` and `Position::column` are now `u32` instead of `usize`
94+
(saturating at `u32::MAX` for inputs beyond ~4 billion lines/columns), keeping the
95+
per-node `Location` compact now that each boundary also carries its originating `file`.
9096
- **Breaking:** the `Positioning` enum is removed and `SourceLocation` now holds a
9197
single `location: Location` (a point diagnostic is a zero-width span with
9298
`start == end`). Read `source_location.location.start` for the line/column instead

acdc-parser/src/grammar/line_map.rs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,10 @@ impl LineMap {
150150
.map_or(0, |s| s.chars().count())
151151
};
152152

153-
Position::new(line, chars_in_line + 1)
153+
Position::new(
154+
u32::try_from(line).unwrap_or(u32::MAX),
155+
u32::try_from(chars_in_line + 1).unwrap_or(u32::MAX),
156+
)
154157
}
155158

156159
/// Source line (1-indexed) for a preprocessed `offset` in a span that begins at
@@ -163,11 +166,11 @@ impl LineMap {
163166
/// once per range) save a lookup by calling this instead of `source_line`.
164167
pub(crate) fn source_line_from(
165168
&self,
166-
start_line: usize,
167-
preproc_start_line: usize,
169+
start_line: u32,
170+
preproc_start_line: u32,
168171
input: &str,
169172
offset: usize,
170-
) -> usize {
173+
) -> u32 {
171174
let offset_line = self.offset_to_position(offset, input).line;
172175
start_line + offset_line.saturating_sub(preproc_start_line)
173176
}
@@ -176,9 +179,14 @@ impl LineMap {
176179
/// range's preprocessed start line on the fly. Convenience over
177180
/// [`source_line_from`](Self::source_line_from) for callers without a cached start
178181
/// line (the rare diagnostic paths).
179-
pub(crate) fn source_line(&self, range: &SourceRange, input: &str, offset: usize) -> usize {
182+
pub(crate) fn source_line(&self, range: &SourceRange, input: &str, offset: usize) -> u32 {
180183
let preproc_start_line = self.offset_to_position(range.start_offset, input).line;
181-
self.source_line_from(range.start_line, preproc_start_line, input, offset)
184+
self.source_line_from(
185+
u32::try_from(range.start_line).unwrap_or(u32::MAX),
186+
preproc_start_line,
187+
input,
188+
offset,
189+
)
182190
}
183191
}
184192

acdc-parser/src/grammar/location_mapping.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,9 @@ macro_rules! remap_simple_location {
285285
($node:expr, $base_offset:expr) => {{
286286
$node.location.absolute_start += $base_offset;
287287
$node.location.absolute_end += $base_offset;
288-
$node.location.start.column += $base_offset;
289-
$node.location.end.column += $base_offset;
288+
let col_shift = u32::try_from($base_offset).unwrap_or(u32::MAX);
289+
$node.location.start.column += col_shift;
290+
$node.location.end.column += col_shift;
290291
}};
291292
}
292293

0 commit comments

Comments
 (0)