Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor advancing token to avoid duplication, avoid borrow checker issues #1618

Merged
merged 3 commits into from
Dec 27, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 70 additions & 30 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,9 @@ impl<'a> Parser<'a> {

let dialect = self.dialect;

let (next_token, next_token_index) = self.next_token_ref_with_index();
self.advance_token();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has two benefits:

  1. I think it is more explicit
  2. It doesn't not result in a mut borrow of self(this is the key difference)

let next_token_index = self.get_current_index();
let next_token = self.get_current_token();
let span = next_token.span;
let expr = match &next_token.token {
Token::Word(w) => {
Expand Down Expand Up @@ -2914,7 +2916,9 @@ impl<'a> Parser<'a> {

let dialect = self.dialect;

let (tok, tok_index) = self.next_token_ref_with_index();
self.advance_token();
let tok = self.get_current_token();
let tok_index = self.get_current_index();
let span = tok.span;
let regular_binary_operator = match &tok.token {
Token::Spaceship => Some(BinaryOperator::Spaceship),
Expand Down Expand Up @@ -2994,7 +2998,8 @@ impl<'a> Parser<'a> {
// See https://www.postgresql.org/docs/current/sql-createoperator.html
let mut idents = vec![];
loop {
idents.push(self.next_token_ref().to_string());
self.advance_token();
idents.push(self.get_current_token().to_string());
if !self.consume_token(&Token::Period) {
break;
}
Expand Down Expand Up @@ -3441,6 +3446,8 @@ impl<'a> Parser<'a> {

/// Return the first non-whitespace token that has not yet been processed
/// or Token::EOF
///
/// See [`Self::peek_token_ref`] to avoid the copy.
pub fn peek_token(&self) -> TokenWithSpan {
self.peek_nth_token(0)
}
Expand Down Expand Up @@ -3557,45 +3564,67 @@ impl<'a> Parser<'a> {
///
/// See [`Self::next_token_ref`] to avoid the copy.
pub fn next_token(&mut self) -> TokenWithSpan {
self.next_token_ref().clone()
self.advance_token();
self.get_current_token().clone()
}

pub fn next_token_ref(&mut self) -> &TokenWithSpan {
self.next_token_ref_with_index().0
/// Returns the index of the current token
///
/// This can be used with APIs that expect an index, such as
/// [`Self::token_at`]
pub fn get_current_index(&self) -> usize {
self.index.saturating_sub(1)
}

/// Return the first non-whitespace token that has not yet been processed
/// and that tokens index and advances the tokens
/// Return the next unprocessed token, possibly whitespace.
pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
self.index += 1;
self.tokens.get(self.index - 1)
}

/// Advances the current token to the next non-whitespace token
///
/// # Notes:
/// OK to call repeatedly after reaching EOF.
pub fn next_token_ref_with_index(&mut self) -> (&TokenWithSpan, usize) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note thse APIs were added in #1587 (so not yet released) so this isn't a breaking API change

/// See [`Self::get_current_token`] to get the current token after advancing
pub fn advance_token(&mut self) {
loop {
self.index += 1;
match self.tokens.get(self.index - 1) {
Some(TokenWithSpan {
token: Token::Whitespace(_),
span: _,
}) => continue,
token => return (token.unwrap_or(&EOF_TOKEN), self.index - 1),
_ => break,
}
}
}

/// Returns a reference to the current token
pub fn current_token(&self) -> &TokenWithSpan {
self.tokens.get(self.index - 1).unwrap_or(&EOF_TOKEN)
///
/// Does not advance the current token.
pub fn get_current_token(&self) -> &TokenWithSpan {
self.token_at(self.index.saturating_sub(1))
}

/// Return the first unprocessed token, possibly whitespace.
pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> {
self.index += 1;
self.tokens.get(self.index - 1)
/// Returns a reference to the previous token
///
/// Does not advance the current token.
pub fn get_previous_token(&self) -> &TokenWithSpan {
self.token_at(self.index.saturating_sub(2))
}

/// Push back the last one non-whitespace token. Must be called after
/// `next_token()`, otherwise might panic. OK to call after
/// `next_token()` indicates an EOF.
/// Returns a reference to the next token
///
/// Does not advance the current token.
pub fn get_next_token(&self) -> &TokenWithSpan {
self.token_at(self.index)
}

/// Seek back the last one non-whitespace token.
///
/// Must be called after `next_token()`, otherwise might panic. OK to call
/// after `next_token()` indicates an EOF.
///
// TODO rename to backup_token and deprecate prev_token?
pub fn prev_token(&mut self) {
loop {
assert!(self.index > 0);
Expand Down Expand Up @@ -3641,22 +3670,31 @@ impl<'a> Parser<'a> {
#[must_use]
pub fn parse_keyword(&mut self, expected: Keyword) -> bool {
if self.peek_keyword(expected) {
self.next_token_ref();
self.advance_token();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is nicer too as it makes clear the token is unused (it is just being advanced)

true
} else {
false
}
}

/// If the current token is the `expected` keyword, consume it and returns
///
/// See [`Self::parse_keyword_token_ref`] to avoid the copy.
#[must_use]
pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option<TokenWithSpan> {
self.parse_keyword_token_ref(expected).cloned()
}

/// If the current token is the `expected` keyword, consume it and returns a reference to the next token.
///
// TODO remove this in favor of advance??
iffyio marked this conversation as resolved.
Show resolved Hide resolved
#[must_use]
pub fn parse_keyword_token_ref(&mut self, expected: Keyword) -> Option<&TokenWithSpan> {
match &self.peek_token_ref().token {
Token::Word(w) if expected == w.keyword => Some(self.next_token_ref()),
Token::Word(w) if expected == w.keyword => {
self.advance_token();
Some(self.get_current_token())
}
_ => None,
}
}
Expand All @@ -3683,7 +3721,7 @@ impl<'a> Parser<'a> {
}
// consume all tokens
for _ in 0..(tokens.len() + 1) {
self.next_token_ref();
self.advance_token();
}
true
}
Expand Down Expand Up @@ -3719,7 +3757,7 @@ impl<'a> Parser<'a> {
.iter()
.find(|keyword| **keyword == w.keyword)
.map(|keyword| {
self.next_token_ref();
self.advance_token();
*keyword
})
}
Expand Down Expand Up @@ -3774,10 +3812,12 @@ impl<'a> Parser<'a> {
}

/// Consume the next token if it matches the expected token, otherwise return false
///
/// See [Self::advance_token] to consume the token unconditionally
#[must_use]
pub fn consume_token(&mut self, expected: &Token) -> bool {
if self.peek_token_ref() == expected {
self.next_token_ref();
self.advance_token();
true
} else {
false
Expand Down Expand Up @@ -8278,9 +8318,9 @@ impl<'a> Parser<'a> {
&mut self,
) -> Result<(DataType, MatchedTrailingBracket), ParserError> {
let dialect = self.dialect;
let (next_token, next_token_index) = self.next_token_ref_with_index();
let _ = next_token; // release ref
let next_token = self.current_token();
self.advance_token();
let next_token = self.get_current_token();
let next_token_index = self.get_current_index();

let mut trailing_bracket: MatchedTrailingBracket = false.into();
let mut data = match &next_token.token {
Expand Down Expand Up @@ -8806,7 +8846,7 @@ impl<'a> Parser<'a> {
Token::EOF | Token::Eq => break,
_ => {}
}
self.next_token_ref();
self.advance_token();
}
Ok(idents)
}
Expand Down
Loading