blob: 81f34c382886b058aec7af6eed9cb514f0d32c41 [file] [log] [blame]
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00001// Copyright 2014 The Bazel Authors. All rights reserved.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package com.google.devtools.build.lib.syntax;
16
17import com.google.common.annotations.VisibleForTesting;
tomlua155b532017-11-08 20:12:47 +010018import com.google.common.base.Preconditions;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +000019import com.google.common.base.Supplier;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010020import com.google.common.collect.ImmutableList;
21import com.google.common.collect.ImmutableMap;
nharmata6dbfafe2019-02-05 08:55:07 -080022import com.google.common.collect.Interner;
Laurent Le Brune51a4d22016-10-11 18:04:16 +000023import com.google.common.collect.Iterables;
nharmata6dbfafe2019-02-05 08:55:07 -080024import com.google.devtools.build.lib.concurrent.BlazeInterners;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010025import com.google.devtools.build.lib.events.Event;
26import com.google.devtools.build.lib.events.EventHandler;
27import com.google.devtools.build.lib.events.Location;
Googler768cbc42015-08-28 12:52:14 +000028import com.google.devtools.build.lib.profiler.Profiler;
29import com.google.devtools.build.lib.profiler.ProfilerTask;
twerthee91e232018-07-09 02:33:27 -070030import com.google.devtools.build.lib.profiler.SilentCloseable;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010031import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral;
32import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010033import java.util.ArrayList;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010034import java.util.EnumSet;
laurentlb14c0f402018-11-09 13:59:34 -080035import java.util.HashSet;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010036import java.util.List;
37import java.util.Map;
laurentlb14c0f402018-11-09 13:59:34 -080038import java.util.Set;
laurentlb9b96c0b2018-02-12 02:53:19 -080039import javax.annotation.Nullable;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010040
41/**
42 * Recursive descent parser for LL(2) BUILD language.
43 * Loosely based on Python 2 grammar.
44 * See https://docs.python.org/2/reference/grammar.html
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010045 */
Han-Wen Nienhuysceae8c52015-09-22 16:24:45 +000046@VisibleForTesting
47public class Parser {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010048
49 /**
50 * Combines the parser result into a single value object.
51 */
52 public static final class ParseResult {
53 /** The statements (rules, basically) from the parsed file. */
54 public final List<Statement> statements;
55
56 /** The comments from the parsed file. */
57 public final List<Comment> comments;
58
Lukacs Berkid9e733d2015-09-18 08:18:11 +000059 /** Represents every statement in the file. */
60 public final Location location;
61
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010062 /** Whether the file contained any errors. */
63 public final boolean containsErrors;
64
Lukacs Berkid9e733d2015-09-18 08:18:11 +000065 public ParseResult(List<Statement> statements, List<Comment> comments, Location location,
66 boolean containsErrors) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010067 // No need to copy here; when the object is created, the parser instance is just about to go
68 // out of scope and be garbage collected.
69 this.statements = Preconditions.checkNotNull(statements);
70 this.comments = Preconditions.checkNotNull(comments);
Lukacs Berkid9e733d2015-09-18 08:18:11 +000071 this.location = location;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010072 this.containsErrors = containsErrors;
73 }
74 }
75
brandjon733a97d2017-06-27 17:11:27 +020076 /** Used to select what constructs are allowed based on whether we're at the top level. */
77 public enum ParsingLevel {
78 TOP_LEVEL,
79 LOCAL_LEVEL
80 }
81
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010082 private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET =
Googlercc0d9952015-08-10 12:01:34 +000083 EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010084
85 private static final EnumSet<TokenKind> LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000086 EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010087
88 private static final EnumSet<TokenKind> DICT_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000089 EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010090
Laurent Le Brun56093892015-03-20 13:01:58 +000091 private static final EnumSet<TokenKind> EXPR_LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000092 EnumSet.of(
93 TokenKind.EOF,
94 TokenKind.NEWLINE,
Laurent Le Brun29ad8622015-09-18 10:45:07 +000095 TokenKind.EQUALS,
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000096 TokenKind.RBRACE,
97 TokenKind.RBRACKET,
98 TokenKind.RPAREN,
99 TokenKind.SEMI);
Laurent Le Brun56093892015-03-20 13:01:58 +0000100
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000101 private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET =
102 EnumSet.of(
103 TokenKind.COLON,
104 TokenKind.COMMA,
105 TokenKind.EOF,
106 TokenKind.FOR,
107 TokenKind.MINUS,
108 TokenKind.PERCENT,
109 TokenKind.PLUS,
110 TokenKind.RBRACKET,
111 TokenKind.RPAREN,
112 TokenKind.SLASH);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100113
laurentlb566ef5a2018-05-22 10:35:06 -0700114 /** Current lookahead token. May be mutated by the parser. */
115 private Token token;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100116
117 private static final boolean DEBUGGING = false;
118
119 private final Lexer lexer;
120 private final EventHandler eventHandler;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100121
122 private static final Map<TokenKind, Operator> binaryOperators =
123 new ImmutableMap.Builder<TokenKind, Operator>()
124 .put(TokenKind.AND, Operator.AND)
125 .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS)
126 .put(TokenKind.GREATER, Operator.GREATER)
127 .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS)
128 .put(TokenKind.IN, Operator.IN)
129 .put(TokenKind.LESS, Operator.LESS)
130 .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS)
131 .put(TokenKind.MINUS, Operator.MINUS)
132 .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS)
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000133 .put(TokenKind.NOT_IN, Operator.NOT_IN)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100134 .put(TokenKind.OR, Operator.OR)
135 .put(TokenKind.PERCENT, Operator.PERCENT)
Laurent Le Brun8a528262015-04-15 14:23:35 +0000136 .put(TokenKind.SLASH, Operator.DIVIDE)
laurentlb094bb262017-05-19 21:18:25 +0200137 .put(TokenKind.SLASH_SLASH, Operator.FLOOR_DIVIDE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100138 .put(TokenKind.PLUS, Operator.PLUS)
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000139 .put(TokenKind.PIPE, Operator.PIPE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100140 .put(TokenKind.STAR, Operator.MULT)
141 .build();
142
143 private static final Map<TokenKind, Operator> augmentedAssignmentMethods =
144 new ImmutableMap.Builder<TokenKind, Operator>()
Googler13151752016-06-02 18:37:13 +0000145 .put(TokenKind.PLUS_EQUALS, Operator.PLUS)
146 .put(TokenKind.MINUS_EQUALS, Operator.MINUS)
147 .put(TokenKind.STAR_EQUALS, Operator.MULT)
148 .put(TokenKind.SLASH_EQUALS, Operator.DIVIDE)
laurentlb094bb262017-05-19 21:18:25 +0200149 .put(TokenKind.SLASH_SLASH_EQUALS, Operator.FLOOR_DIVIDE)
Googler13151752016-06-02 18:37:13 +0000150 .put(TokenKind.PERCENT_EQUALS, Operator.PERCENT)
151 .build();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100152
153 /** Highest precedence goes last.
154 * Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence
155 **/
156 private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of(
157 EnumSet.of(Operator.OR),
158 EnumSet.of(Operator.AND),
159 EnumSet.of(Operator.NOT),
160 EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS,
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000161 Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN, Operator.NOT_IN),
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000162 EnumSet.of(Operator.PIPE),
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100163 EnumSet.of(Operator.MINUS, Operator.PLUS),
laurentlb094bb262017-05-19 21:18:25 +0200164 EnumSet.of(Operator.DIVIDE, Operator.FLOOR_DIVIDE, Operator.MULT, Operator.PERCENT));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100165
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100166 private int errorsCount;
167 private boolean recoveryMode; // stop reporting errors until next statement
168
nharmata6dbfafe2019-02-05 08:55:07 -0800169 private final Interner<String> stringInterner = BlazeInterners.newStrongInterner();
170
laurentlbab58a922017-08-22 16:45:28 +0200171 private Parser(Lexer lexer, EventHandler eventHandler) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100172 this.lexer = lexer;
173 this.eventHandler = eventHandler;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100174 nextToken();
175 }
176
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000177 private static Location locationFromStatements(Lexer lexer, List<Statement> statements) {
178 if (!statements.isEmpty()) {
179 return lexer.createLocation(
180 statements.get(0).getLocation().getStartOffset(),
Laurent Le Brune51a4d22016-10-11 18:04:16 +0000181 Iterables.getLast(statements).getLocation().getEndOffset());
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000182 } else {
183 return Location.fromPathFragment(lexer.getFilename());
184 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100185 }
186
187 /**
brandjon733a97d2017-06-27 17:11:27 +0200188 * Main entry point for parsing a file.
brandjon540aac62017-06-12 23:08:09 +0200189 *
190 * @param input the input to parse
191 * @param eventHandler a reporter for parsing errors
brandjon733a97d2017-06-27 17:11:27 +0200192 * @see BuildFileAST#parseBuildString
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100193 */
laurentlb17d975e2017-09-01 17:49:23 +0200194 public static ParseResult parseFile(ParserInputSource input, EventHandler eventHandler) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000195 Lexer lexer = new Lexer(input, eventHandler);
laurentlbab58a922017-08-22 16:45:28 +0200196 Parser parser = new Parser(lexer, eventHandler);
twerthee91e232018-07-09 02:33:27 -0700197 List<Statement> statements;
198 try (SilentCloseable c =
laurentlb3cdfd1a2018-11-09 04:55:08 -0800199 Profiler.instance()
200 .profile(ProfilerTask.STARLARK_PARSER, input.getPath().getPathString())) {
twerthee91e232018-07-09 02:33:27 -0700201 statements = parser.parseFileInput();
202 }
laurentlbab58a922017-08-22 16:45:28 +0200203 boolean errors = parser.errorsCount > 0 || lexer.containsErrors();
Laurent Le Brun8c8857d2016-08-04 10:22:16 +0000204 return new ParseResult(
laurentlb17f8d4e2018-05-24 07:32:52 -0700205 statements, lexer.getComments(), locationFromStatements(lexer, statements), errors);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100206 }
207
208 /**
brandjon733a97d2017-06-27 17:11:27 +0200209 * Parses a sequence of statements, possibly followed by newline tokens.
brandjon540aac62017-06-12 23:08:09 +0200210 *
brandjon733a97d2017-06-27 17:11:27 +0200211 * <p>{@code load()} statements are not permitted. Use {@code parsingLevel} to control whether
212 * function definitions, for statements, etc., are allowed.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100213 */
brandjon733a97d2017-06-27 17:11:27 +0200214 public static List<Statement> parseStatements(
laurentlbab58a922017-08-22 16:45:28 +0200215 ParserInputSource input, EventHandler eventHandler, ParsingLevel parsingLevel) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000216 Lexer lexer = new Lexer(input, eventHandler);
laurentlbab58a922017-08-22 16:45:28 +0200217 Parser parser = new Parser(lexer, eventHandler);
brandjon733a97d2017-06-27 17:11:27 +0200218 List<Statement> result = new ArrayList<>();
219 parser.parseStatement(result, parsingLevel);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100220 while (parser.token.kind == TokenKind.NEWLINE) {
221 parser.nextToken();
222 }
223 parser.expect(TokenKind.EOF);
224 return result;
225 }
226
brandjon733a97d2017-06-27 17:11:27 +0200227 /**
228 * Convenience wrapper for {@link #parseStatements} where exactly one statement is expected.
229 *
230 * @throws IllegalArgumentException if the number of parsed statements was not exactly one
231 */
laurentlba9b9aea2017-09-04 17:39:09 +0200232 @VisibleForTesting
brandjon733a97d2017-06-27 17:11:27 +0200233 public static Statement parseStatement(
laurentlbab58a922017-08-22 16:45:28 +0200234 ParserInputSource input, EventHandler eventHandler, ParsingLevel parsingLevel) {
235 List<Statement> stmts = parseStatements(input, eventHandler, parsingLevel);
brandjon733a97d2017-06-27 17:11:27 +0200236 return Iterables.getOnlyElement(stmts);
brandjon540aac62017-06-12 23:08:09 +0200237 }
238
laurentlb9b96c0b2018-02-12 02:53:19 -0800239 // stmt ::= simple_stmt
240 // | def_stmt
241 // | for_stmt
242 // | if_stmt
243 private void parseStatement(List<Statement> list, ParsingLevel parsingLevel) {
244 if (token.kind == TokenKind.DEF) {
245 if (parsingLevel == ParsingLevel.LOCAL_LEVEL) {
246 reportError(
247 lexer.createLocation(token.left, token.right),
248 "nested functions are not allowed. Move the function to top-level");
249 }
250 parseFunctionDefStatement(list);
251 } else if (token.kind == TokenKind.IF) {
252 list.add(parseIfStatement());
253 } else if (token.kind == TokenKind.FOR) {
254 if (parsingLevel == ParsingLevel.TOP_LEVEL) {
255 reportError(
256 lexer.createLocation(token.left, token.right),
257 "for loops are not allowed on top-level. Put it into a function");
258 }
259 parseForStatement(list);
260 } else {
261 parseSimpleStatement(list);
262 }
263 }
264
brandjon733a97d2017-06-27 17:11:27 +0200265 /** Parses an expression, possibly followed by newline tokens. */
laurentlba9b9aea2017-09-04 17:39:09 +0200266 @VisibleForTesting
laurentlbab58a922017-08-22 16:45:28 +0200267 public static Expression parseExpression(ParserInputSource input, EventHandler eventHandler) {
brandjon733a97d2017-06-27 17:11:27 +0200268 Lexer lexer = new Lexer(input, eventHandler);
laurentlbab58a922017-08-22 16:45:28 +0200269 Parser parser = new Parser(lexer, eventHandler);
brandjon733a97d2017-06-27 17:11:27 +0200270 Expression result = parser.parseExpression();
271 while (parser.token.kind == TokenKind.NEWLINE) {
272 parser.nextToken();
273 }
274 parser.expect(TokenKind.EOF);
275 return result;
brandjon540aac62017-06-12 23:08:09 +0200276 }
277
laurentlb9b96c0b2018-02-12 02:53:19 -0800278 private Expression parseExpression() {
279 return parseExpression(false);
280 }
281
282 // Equivalent to 'testlist' rule in Python grammar. It can parse every kind of
283 // expression. In many cases, we need to use parseNonTupleExpression to avoid ambiguity:
284 // e.g. fct(x, y) vs fct((x, y))
285 //
286 // Tuples can have a trailing comma only when insideParens is true. This prevents bugs
287 // where a one-element tuple is surprisingly created:
288 // e.g. foo = f(x),
289 private Expression parseExpression(boolean insideParens) {
290 int start = token.left;
291 Expression expression = parseNonTupleExpression();
292 if (token.kind != TokenKind.COMMA) {
293 return expression;
294 }
295
296 // It's a tuple
297 List<Expression> tuple = parseExprList(insideParens);
298 tuple.add(0, expression); // add the first expression to the front of the tuple
299 return setLocation(ListLiteral.makeTuple(tuple), start, Iterables.getLast(tuple));
300 }
301
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100302 private void reportError(Location location, String message) {
303 errorsCount++;
304 // Limit the number of reported errors to avoid spamming output.
305 if (errorsCount <= 5) {
306 eventHandler.handle(Event.error(location, message));
307 }
308 }
309
laurentlb566ef5a2018-05-22 10:35:06 -0700310 private void syntaxError(String message) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100311 if (!recoveryMode) {
312 String msg = token.kind == TokenKind.INDENT
313 ? "indentation error"
Laurent Le Brun72329862015-03-23 14:20:03 +0000314 : "syntax error at '" + token + "': " + message;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100315 reportError(lexer.createLocation(token.left, token.right), msg);
316 recoveryMode = true;
317 }
318 }
319
Laurent Le Brun9060e162015-04-02 10:07:28 +0000320 /**
321 * Consumes the current token. If it is not of the specified (expected)
322 * kind, reports a syntax error.
323 */
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100324 private boolean expect(TokenKind kind) {
325 boolean expected = token.kind == kind;
326 if (!expected) {
laurentlb566ef5a2018-05-22 10:35:06 -0700327 syntaxError("expected " + kind.getPrettyName());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100328 }
329 nextToken();
330 return expected;
331 }
332
333 /**
Laurent Le Brun9060e162015-04-02 10:07:28 +0000334 * Same as expect, but stop the recovery mode if the token was expected.
335 */
336 private void expectAndRecover(TokenKind kind) {
337 if (expect(kind)) {
338 recoveryMode = false;
339 }
340 }
341
342 /**
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100343 * Consume tokens past the first token that has a kind that is in the set of
brandjonfe29c7242018-02-22 16:24:24 -0800344 * terminatingTokens.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100345 * @param terminatingTokens
346 * @return the end offset of the terminating token.
347 */
348 private int syncPast(EnumSet<TokenKind> terminatingTokens) {
349 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
350 while (!terminatingTokens.contains(token.kind)) {
351 nextToken();
352 }
353 int end = token.right;
354 // read past the synchronization token
355 nextToken();
356 return end;
357 }
358
359 /**
360 * Consume tokens until we reach the first token that has a kind that is in
brandjonfe29c7242018-02-22 16:24:24 -0800361 * the set of terminatingTokens.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100362 * @param terminatingTokens
363 * @return the end offset of the terminating token.
364 */
365 private int syncTo(EnumSet<TokenKind> terminatingTokens) {
366 // EOF must be in the set to prevent an infinite loop
367 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
368 // read past the problematic token
369 int previous = token.right;
370 nextToken();
371 int current = previous;
372 while (!terminatingTokens.contains(token.kind)) {
373 nextToken();
374 previous = current;
375 current = token.right;
376 }
377 return previous;
378 }
379
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000380 // Keywords that exist in Python and that we don't parse.
381 private static final EnumSet<TokenKind> FORBIDDEN_KEYWORDS =
laurentlbab58a922017-08-22 16:45:28 +0200382 EnumSet.of(
383 TokenKind.AS,
384 TokenKind.ASSERT,
385 TokenKind.CLASS,
386 TokenKind.DEL,
387 TokenKind.EXCEPT,
388 TokenKind.FINALLY,
389 TokenKind.FROM,
390 TokenKind.GLOBAL,
391 TokenKind.IMPORT,
392 TokenKind.IS,
393 TokenKind.LAMBDA,
394 TokenKind.NONLOCAL,
395 TokenKind.RAISE,
396 TokenKind.TRY,
397 TokenKind.WITH,
398 TokenKind.WHILE,
399 TokenKind.YIELD);
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000400
laurentlb566ef5a2018-05-22 10:35:06 -0700401 private void checkForbiddenKeywords() {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000402 if (!FORBIDDEN_KEYWORDS.contains(token.kind)) {
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000403 return;
404 }
405 String error;
406 switch (token.kind) {
407 case ASSERT: error = "'assert' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000408 case DEL:
409 error = "'del' not supported, use '.pop()' to delete an item from a dictionary or a list";
410 break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000411 case IMPORT: error = "'import' not supported, use 'load' instead"; break;
412 case IS: error = "'is' not supported, use '==' instead"; break;
413 case LAMBDA: error = "'lambda' not supported, declare a function instead"; break;
414 case RAISE: error = "'raise' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000415 case TRY: error = "'try' not supported, all exceptions are fatal"; break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000416 case WHILE: error = "'while' not supported, use 'for' instead"; break;
417 default: error = "keyword '" + token.kind.getPrettyName() + "' not supported"; break;
418 }
419 reportError(lexer.createLocation(token.left, token.right), error);
420 }
421
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100422 private void nextToken() {
laurentlb566ef5a2018-05-22 10:35:06 -0700423 if (token == null || token.kind != TokenKind.EOF) {
424 token = lexer.nextToken();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100425 }
laurentlb566ef5a2018-05-22 10:35:06 -0700426 checkForbiddenKeywords();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100427 if (DEBUGGING) {
428 System.err.print(token);
429 }
430 }
431
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100432 // create an error expression
Florian Weikert6f864c32015-07-23 11:26:39 +0000433 private Identifier makeErrorExpression(int start, int end) {
Taras Tsugrii36941362018-06-08 16:31:53 -0700434 return setLocation(Identifier.of("$error$"), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100435 }
436
laurentlba9b9aea2017-09-04 17:39:09 +0200437 // Convenience wrapper method around ASTNode.setLocation
laurentlb9b96c0b2018-02-12 02:53:19 -0800438 private <NodeT extends ASTNode> NodeT setLocation(NodeT node, int startOffset, int endOffset) {
laurentlba9b9aea2017-09-04 17:39:09 +0200439 return ASTNode.setLocation(lexer.createLocation(startOffset, endOffset), node);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100440 }
441
442 // Convenience method that uses end offset from the last node.
laurentlb9b96c0b2018-02-12 02:53:19 -0800443 private <NodeT extends ASTNode> NodeT setLocation(NodeT node, int startOffset, ASTNode lastNode) {
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000444 Preconditions.checkNotNull(lastNode, "can't extract end offset from a null node");
445 Preconditions.checkNotNull(lastNode.getLocation(), "lastNode doesn't have a location");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100446 return setLocation(node, startOffset, lastNode.getLocation().getEndOffset());
447 }
448
Laurent Le Brun56093892015-03-20 13:01:58 +0000449 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100450 // | expr
laurentlba9b9aea2017-09-04 17:39:09 +0200451 // | *args
452 // | **kwargs
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000453 private Argument.Passed parseFuncallArgument() {
454 final int start = token.left;
laurentlb566ef5a2018-05-22 10:35:06 -0700455 Expression expr;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000456 // parse **expr
457 if (token.kind == TokenKind.STAR_STAR) {
458 nextToken();
laurentlb566ef5a2018-05-22 10:35:06 -0700459 expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000460 return setLocation(new Argument.StarStar(expr), start, expr);
461 }
462 // parse *expr
463 if (token.kind == TokenKind.STAR) {
464 nextToken();
laurentlb566ef5a2018-05-22 10:35:06 -0700465 expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000466 return setLocation(new Argument.Star(expr), start, expr);
467 }
laurentlb566ef5a2018-05-22 10:35:06 -0700468
469 expr = parseNonTupleExpression();
470 if (expr instanceof Identifier) {
471 // parse a named argument
472 if (token.kind == TokenKind.EQUALS) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100473 nextToken();
laurentlb566ef5a2018-05-22 10:35:06 -0700474 Expression val = parseNonTupleExpression();
Taras Tsugrii36941362018-06-08 16:31:53 -0700475 return setLocation(new Argument.Keyword(((Identifier) expr), val), start, val);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100476 }
477 }
laurentlb566ef5a2018-05-22 10:35:06 -0700478
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100479 // parse a positional argument
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000480 return setLocation(new Argument.Positional(expr), start, expr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100481 }
482
Laurent Le Brun56093892015-03-20 13:01:58 +0000483 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100484 // | IDENTIFIER
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000485 private Parameter<Expression, Expression> parseFunctionParameter() {
486 // TODO(bazel-team): optionally support type annotations
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100487 int start = token.left;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000488 if (token.kind == TokenKind.STAR_STAR) { // kwarg
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100489 nextToken();
Florian Weikert6f864c32015-07-23 11:26:39 +0000490 Identifier ident = parseIdent();
Taras Tsugrii36941362018-06-08 16:31:53 -0700491 return setLocation(new Parameter.StarStar<>(ident), start, ident);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000492 } else if (token.kind == TokenKind.STAR) { // stararg
493 int end = token.right;
494 nextToken();
495 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000496 Identifier ident = parseIdent();
Taras Tsugrii36941362018-06-08 16:31:53 -0700497 return setLocation(new Parameter.Star<>(ident), start, ident);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000498 } else {
Taras Tsugrii36941362018-06-08 16:31:53 -0700499 return setLocation(new Parameter.Star<>(null), start, end);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000500 }
501 } else {
Florian Weikert6f864c32015-07-23 11:26:39 +0000502 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000503 if (token.kind == TokenKind.EQUALS) { // there's a default value
504 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000505 Expression expr = parseNonTupleExpression();
Taras Tsugrii36941362018-06-08 16:31:53 -0700506 return setLocation(new Parameter.Optional<>(ident, expr), start, expr);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000507 } else {
Taras Tsugrii36941362018-06-08 16:31:53 -0700508 return setLocation(new Parameter.Mandatory<>(ident), start, ident);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000509 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100510 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100511 }
512
513 // funcall_suffix ::= '(' arg_list? ')'
fzaisere0f13332017-08-14 12:00:51 +0200514 private Expression parseFuncallSuffix(int start, Expression function) {
michajlo5f394752017-10-06 23:51:10 +0200515 ImmutableList<Argument.Passed> args = ImmutableList.of();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100516 expect(TokenKind.LPAREN);
517 int end;
518 if (token.kind == TokenKind.RPAREN) {
519 end = token.right;
520 nextToken(); // RPAREN
521 } else {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000522 args = parseFuncallArguments(); // (includes optional trailing comma)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100523 end = token.right;
524 expect(TokenKind.RPAREN);
525 }
fzaisere0f13332017-08-14 12:00:51 +0200526 return setLocation(new FuncallExpression(function, args), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100527 }
528
529 // selector_suffix ::= '.' IDENTIFIER
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100530 private Expression parseSelectorSuffix(int start, Expression receiver) {
531 expect(TokenKind.DOT);
532 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000533 Identifier ident = parseIdent();
fzaiseraa8540d2017-09-26 06:01:30 -0400534 return setLocation(new DotExpression(receiver, ident), start, ident);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100535 } else {
laurentlb566ef5a2018-05-22 10:35:06 -0700536 syntaxError("expected identifier after dot");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100537 int end = syncTo(EXPR_TERMINATOR_SET);
538 return makeErrorExpression(start, end);
539 }
540 }
541
542 // arg_list ::= ( (arg ',')* arg ','? )?
michajlo5f394752017-10-06 23:51:10 +0200543 private ImmutableList<Argument.Passed> parseFuncallArguments() {
544 ImmutableList<Argument.Passed> arguments = parseFunctionArguments(this::parseFuncallArgument);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000545 try {
laurentlb254a4be2019-03-26 16:35:29 -0700546 Argument.validateFuncallArguments(arguments);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000547 } catch (Argument.ArgumentException e) {
laurentlb2852b362018-11-06 11:36:45 -0800548 reportError(e.getLocation(), e.getMessage());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100549 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000550 return arguments;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100551 }
552
Laurent Le Brun56093892015-03-20 13:01:58 +0000553 // expr_list parses a comma-separated list of expression. It assumes that the
554 // first expression was already parsed, so it starts with a comma.
555 // It is used to parse tuples and list elements.
556 // expr_list ::= ( ',' expr )* ','?
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000557 private List<Expression> parseExprList(boolean trailingColonAllowed) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100558 List<Expression> list = new ArrayList<>();
559 // terminating tokens for an expression list
Laurent Le Brun56093892015-03-20 13:01:58 +0000560 while (token.kind == TokenKind.COMMA) {
561 expect(TokenKind.COMMA);
562 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000563 if (!trailingColonAllowed) {
564 reportError(
565 lexer.createLocation(token.left, token.right),
566 "Trailing comma is allowed only in parenthesized tuples.");
567 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100568 break;
569 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000570 list.add(parseNonTupleExpression());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100571 }
572 return list;
573 }
574
575 // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )?
576 private List<DictionaryEntryLiteral> parseDictEntryList() {
577 List<DictionaryEntryLiteral> list = new ArrayList<>();
578 // the terminating token for a dict entry list
579 while (token.kind != TokenKind.RBRACE) {
580 list.add(parseDictEntry());
581 if (token.kind == TokenKind.COMMA) {
582 nextToken();
583 } else {
584 break;
585 }
586 }
587 return list;
588 }
589
Laurent Le Brun56093892015-03-20 13:01:58 +0000590 // dict_entry ::= nontupleexpr ':' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100591 private DictionaryEntryLiteral parseDictEntry() {
592 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000593 Expression key = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100594 expect(TokenKind.COLON);
Laurent Le Brun56093892015-03-20 13:01:58 +0000595 Expression value = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100596 return setLocation(new DictionaryEntryLiteral(key, value), start, value);
597 }
598
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000599 /**
600 * Parse a String literal value, e.g. "str".
601 */
602 private StringLiteral parseStringLiteral() {
603 Preconditions.checkState(token.kind == TokenKind.STRING);
604 int end = token.right;
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000605 StringLiteral literal =
nharmata6dbfafe2019-02-05 08:55:07 -0800606 setLocation(
607 new StringLiteral(stringInterner.intern((String) token.value)), token.left, end);
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000608
609 nextToken();
610 if (token.kind == TokenKind.STRING) {
611 reportError(lexer.createLocation(end, token.left),
612 "Implicit string concatenation is forbidden, use the + operator");
613 }
614 return literal;
615 }
616
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100617 // primary ::= INTEGER
618 // | STRING
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100619 // | IDENTIFIER
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100620 // | list_expression
621 // | '(' ')' // a tuple with zero elements
622 // | '(' expr ')' // a parenthesized expression
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100623 // | dict_expression
624 // | '-' primary_with_suffix
625 private Expression parsePrimary() {
626 int start = token.left;
627 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000628 case INT:
629 {
630 IntegerLiteral literal = new IntegerLiteral((Integer) token.value);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100631 setLocation(literal, start, token.right);
632 nextToken();
633 return literal;
634 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000635 case STRING:
636 return parseStringLiteral();
637 case IDENTIFIER:
fzaisere0f13332017-08-14 12:00:51 +0200638 return parseIdent();
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000639 case LBRACKET: // it's a list
640 return parseListMaker();
641 case LBRACE: // it's a dictionary
642 return parseDictExpression();
643 case LPAREN:
644 {
645 nextToken();
646 // check for the empty tuple literal
647 if (token.kind == TokenKind.RPAREN) {
laurentlbe5894f02018-10-25 13:02:00 -0700648 ListLiteral literal = ListLiteral.makeTuple(ImmutableList.of());
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000649 setLocation(literal, start, token.right);
650 nextToken();
651 return literal;
652 }
653 // parse the first expression
654 Expression expression = parseExpression(true);
655 setLocation(expression, start, token.right);
656 if (token.kind == TokenKind.RPAREN) {
657 nextToken();
658 return expression;
659 }
660 expect(TokenKind.RPAREN);
661 int end = syncTo(EXPR_TERMINATOR_SET);
662 return makeErrorExpression(start, end);
663 }
664 case MINUS:
665 {
666 nextToken();
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000667 Expression expr = parsePrimaryWithSuffix();
brandjonf2ed8582017-06-27 15:05:35 +0200668 UnaryOperatorExpression minus = new UnaryOperatorExpression(UnaryOperator.MINUS, expr);
669 return setLocation(minus, start, expr);
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000670 }
671 default:
672 {
laurentlb566ef5a2018-05-22 10:35:06 -0700673 syntaxError("expected expression");
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000674 int end = syncTo(EXPR_TERMINATOR_SET);
675 return makeErrorExpression(start, end);
676 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100677 }
678 }
679
fzaisere0f13332017-08-14 12:00:51 +0200680 // primary_with_suffix ::= primary (selector_suffix | substring_suffix | funcall_suffix)*
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100681 private Expression parsePrimaryWithSuffix() {
682 int start = token.left;
683 Expression receiver = parsePrimary();
684 while (true) {
685 if (token.kind == TokenKind.DOT) {
686 receiver = parseSelectorSuffix(start, receiver);
687 } else if (token.kind == TokenKind.LBRACKET) {
688 receiver = parseSubstringSuffix(start, receiver);
fzaisere0f13332017-08-14 12:00:51 +0200689 } else if (token.kind == TokenKind.LPAREN) {
690 receiver = parseFuncallSuffix(start, receiver);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100691 } else {
692 break;
693 }
694 }
695 return receiver;
696 }
697
Florian Weikerte3421962015-12-17 12:46:08 +0000698 // substring_suffix ::= '[' expression? ':' expression? ':' expression? ']'
fzaisere0f13332017-08-14 12:00:51 +0200699 // | '[' expression? ':' expression? ']'
700 // | '[' expression ']'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100701 private Expression parseSubstringSuffix(int start, Expression receiver) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100702 Expression startExpr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100703
704 expect(TokenKind.LBRACKET);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100705 if (token.kind == TokenKind.COLON) {
laurentlb9b96c0b2018-02-12 02:53:19 -0800706 startExpr = null;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100707 } else {
Laurent Le Brun6824d862015-09-11 13:51:41 +0000708 startExpr = parseExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100709 }
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000710 // This is an index/key access
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100711 if (token.kind == TokenKind.RBRACKET) {
fzaiseraa8540d2017-09-26 06:01:30 -0400712 Expression expr = setLocation(new IndexExpression(receiver, startExpr), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100713 expect(TokenKind.RBRACKET);
fzaiseraa8540d2017-09-26 06:01:30 -0400714 return expr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100715 }
Laurent Le Bruneeef30f2015-03-16 15:12:35 +0000716 // This is a slice (or substring)
laurentlb9b96c0b2018-02-12 02:53:19 -0800717 Expression endExpr = parseSliceArgument();
718 Expression stepExpr = parseSliceArgument();
fzaiseraa8540d2017-09-26 06:01:30 -0400719 Expression expr =
720 setLocation(
721 new SliceExpression(receiver, startExpr, endExpr, stepExpr), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100722 expect(TokenKind.RBRACKET);
fzaiseraa8540d2017-09-26 06:01:30 -0400723 return expr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100724 }
725
Florian Weikerte3421962015-12-17 12:46:08 +0000726 /**
727 * Parses {@code [':' [expr]]} which can either be the end or the step argument of a slice
laurentlb9b96c0b2018-02-12 02:53:19 -0800728 * operation. If no such expression is found, this method returns null.
Florian Weikerte3421962015-12-17 12:46:08 +0000729 */
laurentlb9b96c0b2018-02-12 02:53:19 -0800730 private @Nullable Expression parseSliceArgument() {
Florian Weikerte3421962015-12-17 12:46:08 +0000731 // There has to be a colon before any end or slice argument.
732 // However, if the next token thereafter is another colon or a right bracket, no argument value
733 // was specified.
734 if (token.kind == TokenKind.COLON) {
735 expect(TokenKind.COLON);
736 if (token.kind != TokenKind.COLON && token.kind != TokenKind.RBRACKET) {
737 return parseNonTupleExpression();
738 }
739 }
740 return null;
741 }
742
Laurent Le Brun185392d2015-03-20 14:41:25 +0000743 // Equivalent to 'exprlist' rule in Python grammar.
744 // loop_variables ::= primary_with_suffix ( ',' primary_with_suffix )* ','?
745 private Expression parseForLoopVariables() {
746 // We cannot reuse parseExpression because it would parse the 'in' operator.
747 // e.g. "for i in e: pass" -> we want to parse only "i" here.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100748 int start = token.left;
Laurent Le Brun185392d2015-03-20 14:41:25 +0000749 Expression e1 = parsePrimaryWithSuffix();
750 if (token.kind != TokenKind.COMMA) {
751 return e1;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100752 }
753
Laurent Le Brun185392d2015-03-20 14:41:25 +0000754 // It's a tuple
755 List<Expression> tuple = new ArrayList<>();
756 tuple.add(e1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100757 while (token.kind == TokenKind.COMMA) {
Laurent Le Brun185392d2015-03-20 14:41:25 +0000758 expect(TokenKind.COMMA);
759 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
760 break;
761 }
762 tuple.add(parsePrimaryWithSuffix());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100763 }
fzaiseraa8540d2017-09-26 06:01:30 -0400764 return setLocation(ListLiteral.makeTuple(tuple), start, Iterables.getLast(tuple));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100765 }
766
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000767 // comprehension_suffix ::= 'FOR' loop_variables 'IN' expr comprehension_suffix
768 // | 'IF' expr comprehension_suffix
769 // | ']'
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000770 private Expression parseComprehensionSuffix(
fzaiseraa8540d2017-09-26 06:01:30 -0400771 AbstractComprehension.AbstractBuilder comprehensionBuilder,
772 TokenKind closingBracket,
773 int comprehensionStartOffset) {
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000774 while (true) {
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000775 if (token.kind == TokenKind.FOR) {
776 nextToken();
brandjon990622b2017-07-11 19:56:45 +0200777 Expression lhs = parseForLoopVariables();
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000778 expect(TokenKind.IN);
779 // The expression cannot be a ternary expression ('x if y else z') due to
780 // conflicts in Python grammar ('if' is used by the comprehension).
781 Expression listExpression = parseNonTupleExpression(0);
brandjon990622b2017-07-11 19:56:45 +0200782 comprehensionBuilder.addFor(new LValue(lhs), listExpression);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000783 } else if (token.kind == TokenKind.IF) {
784 nextToken();
laurentlbc3a1af62017-06-16 14:37:43 +0200785 // [x for x in li if 1, 2] # parse error
786 // [x for x in li if (1, 2)] # ok
787 comprehensionBuilder.addIf(parseNonTupleExpression(0));
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000788 } else if (token.kind == closingBracket) {
fzaiseraa8540d2017-09-26 06:01:30 -0400789 Expression expr = comprehensionBuilder.build();
790 setLocation(expr, comprehensionStartOffset, token.right);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000791 nextToken();
fzaiseraa8540d2017-09-26 06:01:30 -0400792 return expr;
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000793 } else {
laurentlb566ef5a2018-05-22 10:35:06 -0700794 syntaxError("expected '" + closingBracket.getPrettyName() + "', 'for' or 'if'");
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000795 syncPast(LIST_TERMINATOR_SET);
fzaiseraa8540d2017-09-26 06:01:30 -0400796 return makeErrorExpression(comprehensionStartOffset, token.right);
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000797 }
798 }
799 }
800
Laurent Le Brun56093892015-03-20 13:01:58 +0000801 // list_maker ::= '[' ']'
802 // |'[' expr ']'
803 // |'[' expr expr_list ']'
laurentlba9b9aea2017-09-04 17:39:09 +0200804 // |'[' expr comprehension_suffix ']'
Laurent Le Brun56093892015-03-20 13:01:58 +0000805 private Expression parseListMaker() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100806 int start = token.left;
807 expect(TokenKind.LBRACKET);
808 if (token.kind == TokenKind.RBRACKET) { // empty List
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000809 ListLiteral literal = ListLiteral.emptyList();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100810 setLocation(literal, start, token.right);
811 nextToken();
812 return literal;
813 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000814 Expression expression = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100815 Preconditions.checkNotNull(expression,
816 "null element in list in AST at %s:%s", token.left, token.right);
817 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000818 case RBRACKET: // singleton List
819 {
laurentlbe5894f02018-10-25 13:02:00 -0700820 ListLiteral literal = ListLiteral.makeList(ImmutableList.of(expression));
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000821 setLocation(literal, start, token.right);
822 nextToken();
823 return literal;
824 }
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000825 case FOR:
826 { // list comprehension
fzaiseraa8540d2017-09-26 06:01:30 -0400827 return parseComprehensionSuffix(
828 new ListComprehension.Builder().setOutputExpression(expression),
829 TokenKind.RBRACKET,
830 start);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000831 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000832 case COMMA:
833 {
834 List<Expression> list = parseExprList(true);
835 Preconditions.checkState(
836 !list.contains(null),
837 "null element in list in AST at %s:%s",
838 token.left,
839 token.right);
840 list.add(0, expression);
841 if (token.kind == TokenKind.RBRACKET) {
842 ListLiteral literal = ListLiteral.makeList(list);
843 setLocation(literal, start, token.right);
844 nextToken();
845 return literal;
846 }
847 expect(TokenKind.RBRACKET);
848 int end = syncPast(LIST_TERMINATOR_SET);
849 return makeErrorExpression(start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100850 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000851 default:
852 {
laurentlb566ef5a2018-05-22 10:35:06 -0700853 syntaxError("expected ',', 'for' or ']'");
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000854 int end = syncPast(LIST_TERMINATOR_SET);
855 return makeErrorExpression(start, end);
856 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100857 }
858 }
859
860 // dict_expression ::= '{' '}'
861 // |'{' dict_entry_list '}'
laurentlba9b9aea2017-09-04 17:39:09 +0200862 // |'{' dict_entry comprehension_suffix '}'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100863 private Expression parseDictExpression() {
864 int start = token.left;
865 expect(TokenKind.LBRACE);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000866 if (token.kind == TokenKind.RBRACE) { // empty Dict
867 DictionaryLiteral literal = DictionaryLiteral.emptyDict();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100868 setLocation(literal, start, token.right);
869 nextToken();
870 return literal;
871 }
872 DictionaryEntryLiteral entry = parseDictEntry();
873 if (token.kind == TokenKind.FOR) {
874 // Dict comprehension
fzaiseraa8540d2017-09-26 06:01:30 -0400875 return parseComprehensionSuffix(
brandjon296cd492017-05-15 16:17:16 +0200876 new DictComprehension.Builder()
877 .setKeyExpression(entry.getKey())
878 .setValueExpression(entry.getValue()),
fzaiseraa8540d2017-09-26 06:01:30 -0400879 TokenKind.RBRACE,
880 start);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100881 }
882 List<DictionaryEntryLiteral> entries = new ArrayList<>();
883 entries.add(entry);
884 if (token.kind == TokenKind.COMMA) {
885 expect(TokenKind.COMMA);
886 entries.addAll(parseDictEntryList());
887 }
888 if (token.kind == TokenKind.RBRACE) {
889 DictionaryLiteral literal = new DictionaryLiteral(entries);
890 setLocation(literal, start, token.right);
891 nextToken();
892 return literal;
893 }
Laurent Le Brun72329862015-03-23 14:20:03 +0000894 expect(TokenKind.RBRACE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100895 int end = syncPast(DICT_TERMINATOR_SET);
896 return makeErrorExpression(start, end);
897 }
898
Florian Weikert6f864c32015-07-23 11:26:39 +0000899 private Identifier parseIdent() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100900 if (token.kind != TokenKind.IDENTIFIER) {
Laurent Le Brun72329862015-03-23 14:20:03 +0000901 expect(TokenKind.IDENTIFIER);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100902 return makeErrorExpression(token.left, token.right);
903 }
Taras Tsugrii36941362018-06-08 16:31:53 -0700904 Identifier ident = Identifier.of(((String) token.value));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100905 setLocation(ident, token.left, token.right);
906 nextToken();
907 return ident;
908 }
909
910 // binop_expression ::= binop_expression OP binop_expression
911 // | parsePrimaryWithSuffix
912 // This function takes care of precedence between operators (see operatorPrecedence for
913 // the order), and it assumes left-to-right associativity.
914 private Expression parseBinOpExpression(int prec) {
915 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000916 Expression expr = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100917 // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is
918 // limited to number of different precedence levels (operatorPrecedence.size()).
laurentlb1fcea382017-06-19 16:02:42 +0200919 Operator lastOp = null;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100920 for (;;) {
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000921
922 if (token.kind == TokenKind.NOT) {
923 // If NOT appears when we expect a binary operator, it must be followed by IN.
924 // Since the code expects every operator to be a single token, we push a NOT_IN token.
925 expect(TokenKind.NOT);
laurentlb566ef5a2018-05-22 10:35:06 -0700926 if (token.kind != TokenKind.IN) {
927 syntaxError("expected 'in'");
928 }
929 token.kind = TokenKind.NOT_IN;
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000930 }
931
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100932 if (!binaryOperators.containsKey(token.kind)) {
933 return expr;
934 }
935 Operator operator = binaryOperators.get(token.kind);
936 if (!operatorPrecedence.get(prec).contains(operator)) {
937 return expr;
938 }
laurentlb1fcea382017-06-19 16:02:42 +0200939
940 // Operator '==' and other operators of the same precedence (e.g. '<', 'in')
941 // are not associative.
942 if (lastOp != null && operatorPrecedence.get(prec).contains(Operator.EQUALS_EQUALS)) {
943 reportError(
944 lexer.createLocation(token.left, token.right),
945 String.format("Operator '%s' is not associative with operator '%s'. Use parens.",
946 lastOp, operator));
947 }
948
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100949 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000950 Expression secondary = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100951 expr = optimizeBinOpExpression(operator, expr, secondary);
952 setLocation(expr, start, secondary);
laurentlb1fcea382017-06-19 16:02:42 +0200953 lastOp = operator;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100954 }
955 }
956
957 // Optimize binary expressions.
958 // string literal + string literal can be concatenated into one string literal
959 // so we don't have to do the expensive string concatenation at runtime.
960 private Expression optimizeBinOpExpression(
961 Operator operator, Expression expr, Expression secondary) {
962 if (operator == Operator.PLUS) {
963 if (expr instanceof StringLiteral && secondary instanceof StringLiteral) {
964 StringLiteral left = (StringLiteral) expr;
965 StringLiteral right = (StringLiteral) secondary;
nharmata6dbfafe2019-02-05 08:55:07 -0800966 return new StringLiteral(stringInterner.intern(left.getValue() + right.getValue()));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100967 }
968 }
969 return new BinaryOperatorExpression(operator, expr, secondary);
970 }
971
Laurent Le Brun56093892015-03-20 13:01:58 +0000972 // Equivalent to 'test' rule in Python grammar.
973 private Expression parseNonTupleExpression() {
974 int start = token.left;
975 Expression expr = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000976 if (token.kind == TokenKind.IF) {
977 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000978 Expression condition = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000979 if (token.kind == TokenKind.ELSE) {
980 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000981 Expression elseClause = parseNonTupleExpression();
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000982 return setLocation(new ConditionalExpression(expr, condition, elseClause),
983 start, elseClause);
984 } else {
985 reportError(lexer.createLocation(start, token.left),
986 "missing else clause in conditional expression or semicolon before if");
987 return expr; // Try to recover from error: drop the if and the expression after it. Ouch.
988 }
989 }
990 return expr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100991 }
992
Laurent Le Brun56093892015-03-20 13:01:58 +0000993 private Expression parseNonTupleExpression(int prec) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100994 if (prec >= operatorPrecedence.size()) {
995 return parsePrimaryWithSuffix();
996 }
997 if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) {
998 return parseNotExpression(prec);
999 }
1000 return parseBinOpExpression(prec);
1001 }
1002
1003 // not_expr :== 'not' expr
1004 private Expression parseNotExpression(int prec) {
1005 int start = token.left;
1006 expect(TokenKind.NOT);
laurentlb7aa2c8e2018-10-18 10:09:30 -07001007 Expression expression = parseNonTupleExpression(prec);
brandjonf2ed8582017-06-27 15:05:35 +02001008 UnaryOperatorExpression notExpression =
1009 new UnaryOperatorExpression(UnaryOperator.NOT, expression);
fzaiseraa8540d2017-09-26 06:01:30 -04001010 return setLocation(notExpression, start, expression);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001011 }
1012
1013 // file_input ::= ('\n' | stmt)* EOF
1014 private List<Statement> parseFileInput() {
1015 List<Statement> list = new ArrayList<>();
1016 while (token.kind != TokenKind.EOF) {
1017 if (token.kind == TokenKind.NEWLINE) {
Laurent Le Brun9060e162015-04-02 10:07:28 +00001018 expectAndRecover(TokenKind.NEWLINE);
1019 } else if (recoveryMode) {
1020 // If there was a parse error, we want to recover here
1021 // before starting a new top-level statement.
1022 syncTo(STATEMENT_TERMINATOR_SET);
1023 recoveryMode = false;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001024 } else {
1025 parseTopLevelStatement(list);
1026 }
1027 }
1028 return list;
1029 }
1030
brandjonfe29c7242018-02-22 16:24:24 -08001031 // load '(' STRING (COMMA [IDENTIFIER EQUALS] STRING)+ COMMA? ')'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001032 private void parseLoad(List<Statement> list) {
1033 int start = token.left;
laurentlb2843ead2017-07-05 07:20:45 -04001034 expect(TokenKind.LOAD);
1035 expect(TokenKind.LPAREN);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001036 if (token.kind != TokenKind.STRING) {
1037 expect(TokenKind.STRING);
1038 return;
1039 }
Googler768cbc42015-08-28 12:52:14 +00001040
John Field9201fda2015-12-30 19:30:34 +00001041 StringLiteral importString = parseStringLiteral();
brandjonfe29c7242018-02-22 16:24:24 -08001042 if (token.kind == TokenKind.RPAREN) {
laurentlb566ef5a2018-05-22 10:35:06 -07001043 syntaxError("expected at least one symbol to load");
brandjonfe29c7242018-02-22 16:24:24 -08001044 return;
1045 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001046 expect(TokenKind.COMMA);
1047
laurentlb14c0f402018-11-09 13:59:34 -08001048 ImmutableList.Builder<LoadStatement.Binding> bindings = ImmutableList.builder();
1049 // previousSymbols is used to detect duplicate symbols in the same statement.
1050 Set<String> previousSymbols = new HashSet<>();
1051
1052 parseLoadSymbol(bindings, previousSymbols); // At least one symbol is required
Florian Weikert9d659ad2015-07-23 14:44:36 +00001053
Laurent Le Brun73a98492015-03-17 15:46:19 +00001054 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001055 expect(TokenKind.COMMA);
Laurent Le Brun59f587a2015-03-16 14:51:36 +00001056 if (token.kind == TokenKind.RPAREN) {
1057 break;
1058 }
Florian Weikert9d659ad2015-07-23 14:44:36 +00001059
laurentlb14c0f402018-11-09 13:59:34 -08001060 parseLoadSymbol(bindings, previousSymbols);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001061 }
Googler768cbc42015-08-28 12:52:14 +00001062
laurentlb14c0f402018-11-09 13:59:34 -08001063 LoadStatement stmt = new LoadStatement(importString, bindings.build());
fzaiserb5768af2017-10-09 15:16:50 +02001064 list.add(setLocation(stmt, start, token.right));
1065 expect(TokenKind.RPAREN);
brandjon09771fd2017-07-06 08:54:29 -04001066 expectAndRecover(TokenKind.NEWLINE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001067 }
1068
Florian Weikert9d659ad2015-07-23 14:44:36 +00001069 /**
1070 * Parses the next symbol argument of a load statement and puts it into the output map.
1071 *
laurentlb14c0f402018-11-09 13:59:34 -08001072 * <p>The symbol is either "name" (STRING) or name = "declared" (IDENTIFIER EQUALS STRING). If no
1073 * alias is used, "name" and "declared" will be identical. "Declared" refers to the original name
1074 * in the Bazel file that should be loaded, while "name" will be the key of the entry in the map.
Florian Weikert9d659ad2015-07-23 14:44:36 +00001075 */
laurentlb14c0f402018-11-09 13:59:34 -08001076 private void parseLoadSymbol(
1077 ImmutableList.Builder<LoadStatement.Binding> symbols, Set<String> previousSymbols) {
laurentlb566ef5a2018-05-22 10:35:06 -07001078 if (token.kind != TokenKind.STRING && token.kind != TokenKind.IDENTIFIER) {
1079 syntaxError("expected either a literal string or an identifier");
1080 return;
1081 }
Florian Weikert9d659ad2015-07-23 14:44:36 +00001082
laurentlb566ef5a2018-05-22 10:35:06 -07001083 String name = (String) token.value;
laurentlb14c0f402018-11-09 13:59:34 -08001084 Identifier local = setLocation(Identifier.of(name), token.left, token.right);
laurentlb566ef5a2018-05-22 10:35:06 -07001085
laurentlb14c0f402018-11-09 13:59:34 -08001086 if (previousSymbols.contains(local.getName())) {
1087 syntaxError(String.format("Identifier '%s' is used more than once", local.getName()));
1088 }
1089 previousSymbols.add(local.getName());
1090
1091 Identifier original;
Florian Weikert9d659ad2015-07-23 14:44:36 +00001092 if (token.kind == TokenKind.STRING) {
laurentlb14c0f402018-11-09 13:59:34 -08001093 // load(..., "name")
1094 original = local;
Florian Weikert9d659ad2015-07-23 14:44:36 +00001095 } else {
laurentlb14c0f402018-11-09 13:59:34 -08001096 // load(..., local = "orig")
Florian Weikert9d659ad2015-07-23 14:44:36 +00001097 expect(TokenKind.IDENTIFIER);
1098 expect(TokenKind.EQUALS);
laurentlb566ef5a2018-05-22 10:35:06 -07001099 if (token.kind != TokenKind.STRING) {
1100 syntaxError("expected string");
1101 return;
Florian Weikert9d659ad2015-07-23 14:44:36 +00001102 }
laurentlb14c0f402018-11-09 13:59:34 -08001103 original = setLocation(Identifier.of((String) token.value), token.left, token.right);
Florian Weikert9d659ad2015-07-23 14:44:36 +00001104 }
laurentlb566ef5a2018-05-22 10:35:06 -07001105 nextToken();
laurentlb14c0f402018-11-09 13:59:34 -08001106 symbols.add(new LoadStatement.Binding(local, original));
Florian Weikert9d659ad2015-07-23 14:44:36 +00001107 }
1108
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001109 private void parseTopLevelStatement(List<Statement> list) {
laurentlb2843ead2017-07-05 07:20:45 -04001110 // Unlike Python imports, load statements can appear only at top-level.
1111 if (token.kind == TokenKind.LOAD) {
1112 parseLoad(list);
1113 } else {
1114 parseStatement(list, ParsingLevel.TOP_LEVEL);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001115 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001116 }
1117
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001118 // small_stmt | 'pass'
1119 private void parseSmallStatementOrPass(List<Statement> list) {
1120 if (token.kind == TokenKind.PASS) {
fzaiser95429132017-10-23 18:21:36 +02001121 list.add(setLocation(new PassStatement(), token.left, token.right));
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001122 expect(TokenKind.PASS);
1123 } else {
1124 list.add(parseSmallStatement());
1125 }
1126 }
1127
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001128 // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE
1129 private void parseSimpleStatement(List<Statement> list) {
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001130 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001131
1132 while (token.kind == TokenKind.SEMI) {
1133 nextToken();
1134 if (token.kind == TokenKind.NEWLINE) {
1135 break;
1136 }
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001137 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001138 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001139 expectAndRecover(TokenKind.NEWLINE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001140 }
1141
1142 // small_stmt ::= assign_stmt
1143 // | expr
laurentlba9b9aea2017-09-04 17:39:09 +02001144 // | return_stmt
Florian Weikert917ceaa2015-06-10 13:54:26 +00001145 // | flow_stmt
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001146 // assign_stmt ::= expr ('=' | augassign) expr
laurentlba9b9aea2017-09-04 17:39:09 +02001147 // augassign ::= ('+=' | '-=' | '*=' | '/=' | '%=' | '//=' )
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001148 // Note that these are in Python, but not implemented here (at least for now):
laurentlba9b9aea2017-09-04 17:39:09 +02001149 // '&=' | '|=' | '^=' |'<<=' | '>>=' | '**='
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001150 private Statement parseSmallStatement() {
1151 int start = token.left;
1152 if (token.kind == TokenKind.RETURN) {
1153 return parseReturnStatement();
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001154 } else if (token.kind == TokenKind.BREAK || token.kind == TokenKind.CONTINUE) {
Florian Weikert917ceaa2015-06-10 13:54:26 +00001155 return parseFlowStatement(token.kind);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001156 }
1157 Expression expression = parseExpression();
1158 if (token.kind == TokenKind.EQUALS) {
1159 nextToken();
1160 Expression rvalue = parseExpression();
laurentlb094bb262017-05-19 21:18:25 +02001161 return setLocation(
brandjon540aac62017-06-12 23:08:09 +02001162 new AssignmentStatement(new LValue(expression), rvalue),
1163 start, rvalue);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001164 } else if (augmentedAssignmentMethods.containsKey(token.kind)) {
1165 Operator operator = augmentedAssignmentMethods.get(token.kind);
1166 nextToken();
1167 Expression operand = parseExpression();
Vladimir Moskva71536642016-12-19 13:51:57 +00001168 return setLocation(
brandjon540aac62017-06-12 23:08:09 +02001169 new AugmentedAssignmentStatement(operator, new LValue(expression), operand),
fzaiser1a92d562017-10-24 15:37:50 +02001170 start,
1171 operand);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001172 } else {
1173 return setLocation(new ExpressionStatement(expression), start, expression);
1174 }
1175 }
1176
1177 // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]?
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001178 private IfStatement parseIfStatement() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001179 int start = token.left;
1180 List<ConditionalStatements> thenBlocks = new ArrayList<>();
1181 thenBlocks.add(parseConditionalStatements(TokenKind.IF));
1182 while (token.kind == TokenKind.ELIF) {
1183 thenBlocks.add(parseConditionalStatements(TokenKind.ELIF));
1184 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001185 List<Statement> elseBlock;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001186 if (token.kind == TokenKind.ELSE) {
1187 expect(TokenKind.ELSE);
1188 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001189 elseBlock = parseSuite();
1190 } else {
1191 elseBlock = ImmutableList.of();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001192 }
fzaiser1a92d562017-10-24 15:37:50 +02001193 List<Statement> lastBlock =
1194 elseBlock.isEmpty() ? Iterables.getLast(thenBlocks).getStatements() : elseBlock;
1195 int end =
1196 lastBlock.isEmpty()
1197 ? token.left
1198 : Iterables.getLast(lastBlock).getLocation().getEndOffset();
1199 return setLocation(new IfStatement(thenBlocks, elseBlock), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001200 }
1201
1202 // cond_stmts ::= [EL]IF expr ':' suite
1203 private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) {
1204 int start = token.left;
1205 expect(tokenKind);
Laurent Le Brun56093892015-03-20 13:01:58 +00001206 Expression expr = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001207 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001208 List<Statement> thenBlock = parseSuite();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001209 ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock);
fzaiser1a92d562017-10-24 15:37:50 +02001210 int end =
1211 thenBlock.isEmpty()
1212 ? token.left
1213 : Iterables.getLast(thenBlock).getLocation().getEndOffset();
1214 return setLocation(stmt, start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001215 }
1216
1217 // for_stmt ::= FOR IDENTIFIER IN expr ':' suite
1218 private void parseForStatement(List<Statement> list) {
1219 int start = token.left;
1220 expect(TokenKind.FOR);
Laurent Le Brun185392d2015-03-20 14:41:25 +00001221 Expression loopVar = parseForLoopVariables();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001222 expect(TokenKind.IN);
1223 Expression collection = parseExpression();
1224 expect(TokenKind.COLON);
laurentlba9b9aea2017-09-04 17:39:09 +02001225 List<Statement> block = parseSuite();
1226 Statement stmt = new ForStatement(new LValue(loopVar), collection, block);
fzaiser1a92d562017-10-24 15:37:50 +02001227 int end = block.isEmpty() ? token.left : Iterables.getLast(block).getLocation().getEndOffset();
1228 list.add(setLocation(stmt, start, end));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001229 }
1230
laurentlba9b9aea2017-09-04 17:39:09 +02001231 // def_stmt ::= DEF IDENTIFIER '(' arguments ')' ':' suite
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001232 private void parseFunctionDefStatement(List<Statement> list) {
1233 int start = token.left;
1234 expect(TokenKind.DEF);
Florian Weikert6f864c32015-07-23 11:26:39 +00001235 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001236 expect(TokenKind.LPAREN);
laurentlbd6983672017-06-29 14:53:12 +02001237 List<Parameter<Expression, Expression>> params =
laurentlb3d2a68c2017-06-30 00:32:04 +02001238 parseFunctionArguments(this::parseFunctionParameter);
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001239 FunctionSignature.WithValues<Expression, Expression> signature = functionSignature(params);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001240 expect(TokenKind.RPAREN);
1241 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001242 List<Statement> block = parseSuite();
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001243 FunctionDefStatement stmt = new FunctionDefStatement(ident, params, signature, block);
fzaiser1a92d562017-10-24 15:37:50 +02001244 int end = block.isEmpty() ? token.left : Iterables.getLast(block).getLocation().getEndOffset();
1245 list.add(setLocation(stmt, start, end));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001246 }
1247
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001248 private FunctionSignature.WithValues<Expression, Expression> functionSignature(
1249 List<Parameter<Expression, Expression>> parameters) {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001250 try {
brandjon990622b2017-07-11 19:56:45 +02001251 return FunctionSignature.WithValues.of(parameters);
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001252 } catch (FunctionSignature.SignatureException e) {
1253 reportError(e.getParameter().getLocation(), e.getMessage());
1254 // return bogus empty signature
brandjon990622b2017-07-11 19:56:45 +02001255 return FunctionSignature.WithValues.create(FunctionSignature.of());
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001256 }
1257 }
1258
1259 /**
1260 * Parse a list of Argument-s. The arguments can be of class Argument.Passed or Parameter,
1261 * as returned by the Supplier parseArgument (that, taking no argument, must be closed over
1262 * the mutable input data structures).
1263 *
1264 * <p>This parser does minimal validation: it ensures the proper python use of the comma (that
1265 * can terminate before a star but not after) and the fact that a **kwarg must appear last.
1266 * It does NOT validate further ordering constraints for a {@code List<Argument.Passed>}, such as
1267 * all positional preceding keyword arguments in a call, nor does it check the more subtle
1268 * constraints for Parameter-s. This validation must happen afterwards in an appropriate method.
1269 */
1270 private <V extends Argument> ImmutableList<V>
1271 parseFunctionArguments(Supplier<V> parseArgument) {
1272 boolean hasArg = false;
1273 boolean hasStar = false;
1274 boolean hasStarStar = false;
michajlo77e8b032017-08-04 21:29:17 +02001275 ImmutableList.Builder<V> argumentsBuilder = ImmutableList.builder();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001276
1277 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
1278 if (hasStarStar) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001279 reportError(lexer.createLocation(token.left, token.right),
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001280 "unexpected tokens after kwarg");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001281 break;
1282 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001283 if (hasArg) {
1284 expect(TokenKind.COMMA);
1285 }
1286 if (token.kind == TokenKind.RPAREN && !hasStar) {
1287 // list can end with a COMMA if there is neither * nor **
1288 break;
1289 }
1290 V arg = parseArgument.get();
1291 hasArg = true;
1292 if (arg.isStar()) {
1293 hasStar = true;
1294 } else if (arg.isStarStar()) {
1295 hasStarStar = true;
1296 }
michajlo77e8b032017-08-04 21:29:17 +02001297 argumentsBuilder.add(arg);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001298 }
michajlo77e8b032017-08-04 21:29:17 +02001299 return argumentsBuilder.build();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001300 }
1301
Laurent Le Brun5f674452015-03-17 19:29:13 +00001302 // suite is typically what follows a colon (e.g. after def or for).
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001303 // suite ::= simple_stmt
1304 // | NEWLINE INDENT stmt+ OUTDENT
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001305 private List<Statement> parseSuite() {
1306 List<Statement> list = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001307 if (token.kind == TokenKind.NEWLINE) {
1308 expect(TokenKind.NEWLINE);
1309 if (token.kind != TokenKind.INDENT) {
1310 reportError(lexer.createLocation(token.left, token.right),
1311 "expected an indented block");
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001312 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001313 }
1314 expect(TokenKind.INDENT);
1315 while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) {
brandjon733a97d2017-06-27 17:11:27 +02001316 parseStatement(list, ParsingLevel.LOCAL_LEVEL);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001317 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001318 expectAndRecover(TokenKind.OUTDENT);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001319 } else {
Laurent Le Brun5f674452015-03-17 19:29:13 +00001320 parseSimpleStatement(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001321 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001322 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001323 }
1324
laurentlba9b9aea2017-09-04 17:39:09 +02001325 // flow_stmt ::= BREAK | CONTINUE
Florian Weikert917ceaa2015-06-10 13:54:26 +00001326 private FlowStatement parseFlowStatement(TokenKind kind) {
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001327 int start = token.left;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001328 int end = token.right;
Florian Weikert917ceaa2015-06-10 13:54:26 +00001329 expect(kind);
Laurent Le Brun7d6a3812015-10-26 12:07:12 +00001330 FlowStatement.Kind flowKind =
1331 kind == TokenKind.BREAK ? FlowStatement.Kind.BREAK : FlowStatement.Kind.CONTINUE;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001332 return setLocation(new FlowStatement(flowKind), start, end);
Florian Weikert917ceaa2015-06-10 13:54:26 +00001333 }
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001334
Googlercc0d9952015-08-10 12:01:34 +00001335 // return_stmt ::= RETURN [expr]
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001336 private ReturnStatement parseReturnStatement() {
1337 int start = token.left;
Googlercc0d9952015-08-10 12:01:34 +00001338 int end = token.right;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001339 expect(TokenKind.RETURN);
Googler768cbc42015-08-28 12:52:14 +00001340
fzaiser317a2692017-08-23 16:40:30 +02001341 Expression expression = null;
1342 if (!STATEMENT_TERMINATOR_SET.contains(token.kind)) {
1343 expression = parseExpression();
1344 end = expression.getLocation().getEndOffset();
Googlercc0d9952015-08-10 12:01:34 +00001345 }
fzaiser317a2692017-08-23 16:40:30 +02001346 return setLocation(new ReturnStatement(expression), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001347 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001348}