Skip to content

Display spans correctly when there are zero-width or wide characters #45711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 5, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions src/librustc/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
@@ -364,6 +364,7 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
end_pos: _,
ref lines,
ref multibyte_chars,
ref non_narrow_chars,
} = *self;

name.hash_stable(hcx, hasher);
@@ -389,6 +390,12 @@ impl<'gcx> HashStable<StableHashingContext<'gcx>> for FileMap {
for &char_pos in multibyte_chars.iter() {
stable_multibyte_char(char_pos, start_pos).hash_stable(hcx, hasher);
}

let non_narrow_chars = non_narrow_chars.borrow();
non_narrow_chars.len().hash_stable(hcx, hasher);
for &char_pos in non_narrow_chars.iter() {
stable_non_narrow_char(char_pos, start_pos).hash_stable(hcx, hasher);
}
}
}

@@ -408,3 +415,12 @@ fn stable_multibyte_char(mbc: ::syntax_pos::MultiByteChar,

(pos.0 - filemap_start.0, bytes as u32)
}

fn stable_non_narrow_char(swc: ::syntax_pos::NonNarrowChar,
filemap_start: ::syntax_pos::BytePos)
-> (u32, u32) {
let pos = swc.pos();
let width = swc.width();

(pos.0 - filemap_start.0, width as u32)
}
14 changes: 7 additions & 7 deletions src/librustc_errors/emitter.rs
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@

use self::Destination::*;

use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan, CharPos};
use syntax_pos::{DUMMY_SP, FileMap, Span, MultiSpan};

use {Level, CodeSuggestion, DiagnosticBuilder, SubDiagnostic, CodeMapper};
use RenderSpan::*;
@@ -201,17 +201,17 @@ impl EmitterWriter {
// 6..7. This is degenerate input, but it's best to degrade
// gracefully -- and the parser likes to supply a span like
// that for EOF, in particular.
if lo.col == hi.col && lo.line == hi.line {
hi.col = CharPos(lo.col.0 + 1);
if lo.col_display == hi.col_display && lo.line == hi.line {
hi.col_display += 1;
}

let ann_type = if lo.line != hi.line {
let ml = MultilineAnnotation {
depth: 1,
line_start: lo.line,
line_end: hi.line,
start_col: lo.col.0,
end_col: hi.col.0,
start_col: lo.col_display,
end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
};
@@ -221,8 +221,8 @@ impl EmitterWriter {
AnnotationType::Singleline
};
let ann = Annotation {
start_col: lo.col.0,
end_col: hi.col.0,
start_col: lo.col_display,
end_col: hi.col_display,
is_primary: span_label.is_primary,
label: span_label.label.clone(),
annotation_type: ann_type,
8 changes: 7 additions & 1 deletion src/librustc_metadata/decoder.rs
Original file line number Diff line number Diff line change
@@ -1189,6 +1189,7 @@ impl<'a, 'tcx> CrateMetadata {
end_pos,
lines,
multibyte_chars,
non_narrow_chars,
.. } = filemap_to_import;

let source_length = (end_pos - start_pos).to_usize();
@@ -1206,14 +1207,19 @@ impl<'a, 'tcx> CrateMetadata {
for mbc in &mut multibyte_chars {
mbc.pos = mbc.pos - start_pos;
}
let mut non_narrow_chars = non_narrow_chars.into_inner();
for swc in &mut non_narrow_chars {
*swc = *swc - start_pos;
}

let local_version = local_codemap.new_imported_filemap(name,
name_was_remapped,
self.cnum.as_u32(),
src_hash,
source_length,
lines,
multibyte_chars);
multibyte_chars,
non_narrow_chars);
debug!("CrateMetaData::imported_filemaps alloc \
filemap {:?} original (start_pos {:?} end_pos {:?}) \
translated (start_pos {:?} end_pos {:?})",
42 changes: 40 additions & 2 deletions src/libsyntax/codemap.rs
Original file line number Diff line number Diff line change
@@ -242,7 +242,8 @@ impl CodeMap {
src_hash: u128,
source_len: usize,
mut file_local_lines: Vec<BytePos>,
mut file_local_multibyte_chars: Vec<MultiByteChar>)
mut file_local_multibyte_chars: Vec<MultiByteChar>,
mut file_local_non_narrow_chars: Vec<NonNarrowChar>)
-> Rc<FileMap> {
let start_pos = self.next_start_pos();
let mut files = self.files.borrow_mut();
@@ -258,6 +259,10 @@ impl CodeMap {
mbc.pos = mbc.pos + start_pos;
}

for swc in &mut file_local_non_narrow_chars {
*swc = *swc + start_pos;
}

let filemap = Rc::new(FileMap {
name: filename,
name_was_remapped,
@@ -270,6 +275,7 @@ impl CodeMap {
end_pos,
lines: RefCell::new(file_local_lines),
multibyte_chars: RefCell::new(file_local_multibyte_chars),
non_narrow_chars: RefCell::new(file_local_non_narrow_chars),
});

files.push(filemap.clone());
@@ -297,6 +303,24 @@ impl CodeMap {
let line = a + 1; // Line numbers start at 1
let linebpos = (*f.lines.borrow())[a];
let linechpos = self.bytepos_to_file_charpos(linebpos);
let col = chpos - linechpos;

let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let start_width_idx = non_narrow_chars
.binary_search_by_key(&linebpos, |x| x.pos())
.unwrap_or_else(|x| x);
let end_width_idx = non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let special_chars = end_width_idx - start_width_idx;
let non_narrow: usize =
non_narrow_chars[start_width_idx..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
col.0 - special_chars + non_narrow
};
debug!("byte pos {:?} is on the line at byte pos {:?}",
pos, linebpos);
debug!("char pos {:?} is on the line at char pos {:?}",
@@ -306,14 +330,28 @@ impl CodeMap {
Loc {
file: f,
line,
col: chpos - linechpos,
col,
col_display,
}
}
Err(f) => {
let col_display = {
let non_narrow_chars = f.non_narrow_chars.borrow();
let end_width_idx = non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let non_narrow: usize =
non_narrow_chars[0..end_width_idx]
.into_iter()
.map(|x| x.width())
.sum();
chpos.0 - end_width_idx + non_narrow
};
Loc {
file: f,
line: 0,
col: chpos,
col_display,
}
}
}
1 change: 1 addition & 0 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -433,6 +433,7 @@ impl<'a> StringReader<'a> {
self.filemap.record_multibyte_char(self.pos, new_ch_len);
}
}
self.filemap.record_width(self.pos, new_ch);
} else {
self.ch = None;
self.pos = new_pos;
1 change: 1 addition & 0 deletions src/libsyntax_pos/Cargo.toml
Original file line number Diff line number Diff line change
@@ -11,3 +11,4 @@ crate-type = ["dylib"]
[dependencies]
serialize = { path = "../libserialize" }
rustc_data_structures = { path = "../librustc_data_structures" }
unicode-width = "0.1.4"
95 changes: 91 additions & 4 deletions src/libsyntax_pos/lib.rs
Original file line number Diff line number Diff line change
@@ -44,6 +44,8 @@ use serialize::{Encodable, Decodable, Encoder, Decoder};
extern crate serialize;
extern crate serialize as rustc_serialize; // used by deriving

extern crate unicode_width;

pub mod hygiene;
pub use hygiene::{SyntaxContext, ExpnInfo, ExpnFormat, NameAndSpan, CompilerDesugaringKind};

@@ -494,6 +496,63 @@ pub struct MultiByteChar {
pub bytes: usize,
}

/// Identifies an offset of a non-narrow character in a FileMap
#[derive(Copy, Clone, RustcEncodable, RustcDecodable, Eq, PartialEq)]
pub enum NonNarrowChar {
/// Represents a zero-width character
ZeroWidth(BytePos),
/// Represents a wide (fullwidth) character
Wide(BytePos),
}

impl NonNarrowChar {
fn new(pos: BytePos, width: usize) -> Self {
match width {
0 => NonNarrowChar::ZeroWidth(pos),
2 => NonNarrowChar::Wide(pos),
_ => panic!("width {} given for non-narrow character", width),
}
}

/// Returns the absolute offset of the character in the CodeMap
pub fn pos(&self) -> BytePos {
match *self {
NonNarrowChar::ZeroWidth(p) |
NonNarrowChar::Wide(p) => p,
}
}

/// Returns the width of the character, 0 (zero-width) or 2 (wide)
pub fn width(&self) -> usize {
match *self {
NonNarrowChar::ZeroWidth(_) => 0,
NonNarrowChar::Wide(_) => 2,
}
}
}

impl Add<BytePos> for NonNarrowChar {
type Output = Self;

fn add(self, rhs: BytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos + rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos + rhs),
}
}
}

impl Sub<BytePos> for NonNarrowChar {
type Output = Self;

fn sub(self, rhs: BytePos) -> Self {
match self {
NonNarrowChar::ZeroWidth(pos) => NonNarrowChar::ZeroWidth(pos - rhs),
NonNarrowChar::Wide(pos) => NonNarrowChar::Wide(pos - rhs),
}
}
}

/// The state of the lazy external source loading mechanism of a FileMap.
#[derive(PartialEq, Eq, Clone)]
pub enum ExternalSource {
@@ -552,11 +611,13 @@ pub struct FileMap {
pub lines: RefCell<Vec<BytePos>>,
/// Locations of multi-byte characters in the source code
pub multibyte_chars: RefCell<Vec<MultiByteChar>>,
/// Width of characters that are not narrow in the source code
pub non_narrow_chars: RefCell<Vec<NonNarrowChar>>,
}

impl Encodable for FileMap {
fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> {
s.emit_struct("FileMap", 7, |s| {
s.emit_struct("FileMap", 8, |s| {
s.emit_struct_field("name", 0, |s| self.name.encode(s))?;
s.emit_struct_field("name_was_remapped", 1, |s| self.name_was_remapped.encode(s))?;
s.emit_struct_field("src_hash", 6, |s| self.src_hash.encode(s))?;
@@ -610,6 +671,9 @@ impl Encodable for FileMap {
})?;
s.emit_struct_field("multibyte_chars", 5, |s| {
(*self.multibyte_chars.borrow()).encode(s)
})?;
s.emit_struct_field("non_narrow_chars", 7, |s| {
(*self.non_narrow_chars.borrow()).encode(s)
})
})
}
@@ -618,7 +682,7 @@ impl Encodable for FileMap {
impl Decodable for FileMap {
fn decode<D: Decoder>(d: &mut D) -> Result<FileMap, D::Error> {

d.read_struct("FileMap", 6, |d| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was the 6 a bug?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea, but for me 6 there didn't make sense. There were total of 7 (now 8) fields.

d.read_struct("FileMap", 8, |d| {
let name: String = d.read_struct_field("name", 0, |d| Decodable::decode(d))?;
let name_was_remapped: bool =
d.read_struct_field("name_was_remapped", 1, |d| Decodable::decode(d))?;
@@ -657,6 +721,8 @@ impl Decodable for FileMap {
})?;
let multibyte_chars: Vec<MultiByteChar> =
d.read_struct_field("multibyte_chars", 5, |d| Decodable::decode(d))?;
let non_narrow_chars: Vec<NonNarrowChar> =
d.read_struct_field("non_narrow_chars", 7, |d| Decodable::decode(d))?;
Ok(FileMap {
name,
name_was_remapped,
@@ -671,7 +737,8 @@ impl Decodable for FileMap {
src_hash,
external_src: RefCell::new(ExternalSource::AbsentOk),
lines: RefCell::new(lines),
multibyte_chars: RefCell::new(multibyte_chars)
multibyte_chars: RefCell::new(multibyte_chars),
non_narrow_chars: RefCell::new(non_narrow_chars)
})
})
}
@@ -709,6 +776,7 @@ impl FileMap {
end_pos: Pos::from_usize(end_pos),
lines: RefCell::new(Vec::new()),
multibyte_chars: RefCell::new(Vec::new()),
non_narrow_chars: RefCell::new(Vec::new()),
}
}

@@ -798,6 +866,23 @@ impl FileMap {
self.multibyte_chars.borrow_mut().push(mbc);
}

pub fn record_width(&self, pos: BytePos, ch: char) {
let width = match ch {
'\t' | '\n' =>
// Tabs will consume one column.
// Make newlines take one column so that displayed spans can point them.
1,
Copy link
Contributor

@estebank estebank Nov 2, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you're adding support for characters with varying widths, adding support for either 4 or 8 width tabs should be easy to add. Let's do so in a subsequent PR.

Done in #45953.

ch =>
// Assume control characters are zero width.
// FIXME: How can we decide between `width` and `width_cjk`?
unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0),
};
// Only record non-narrow characters.
if width != 1 {
self.non_narrow_chars.borrow_mut().push(NonNarrowChar::new(pos, width));
}
}

pub fn is_real_file(&self) -> bool {
!(self.name.starts_with("<") &&
self.name.ends_with(">"))
@@ -944,7 +1029,9 @@ pub struct Loc {
/// The (1-based) line number
pub line: usize,
/// The (0-based) column offset
pub col: CharPos
pub col: CharPos,
/// The (0-based) column offset when displayed
pub col_display: usize,
}

/// A source code location used as the result of lookup_char_pos_adj
2 changes: 1 addition & 1 deletion src/test/ui/codemap_tests/unicode.stderr
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@ error: invalid ABI: expected one of [cdecl, stdcall, fastcall, vectorcall, thisc
--> $DIR/unicode.rs:11:8
|
11 | extern "路濫狼á́́" fn foo() {}
| ^^^^^^^^
| ^^^^^^^^^

error: aborting due to previous error

Loading