blob: 46de9a993338e08537e78141738136920540d926 [file] [log] [blame]
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00001// Copyright 2014 The Bazel Authors. All rights reserved.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package com.google.devtools.build.lib.syntax;
16
Laurent Le Brun9be852e2015-05-28 08:44:51 +000017import static com.google.devtools.build.lib.syntax.Parser.ParsingMode.BUILD;
Laurent Le Brun9be852e2015-05-28 08:44:51 +000018import static com.google.devtools.build.lib.syntax.Parser.ParsingMode.SKYLARK;
19
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010020import com.google.common.annotations.VisibleForTesting;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +000021import com.google.common.base.Supplier;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010022import com.google.common.collect.ImmutableList;
23import com.google.common.collect.ImmutableMap;
Laurent Le Brune51a4d22016-10-11 18:04:16 +000024import com.google.common.collect.Iterables;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010025import com.google.devtools.build.lib.events.Event;
26import com.google.devtools.build.lib.events.EventHandler;
27import com.google.devtools.build.lib.events.Location;
Googler768cbc42015-08-28 12:52:14 +000028import com.google.devtools.build.lib.profiler.Profiler;
29import com.google.devtools.build.lib.profiler.ProfilerTask;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010030import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral;
31import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements;
Mark Schaller6df81792015-12-10 18:47:47 +000032import com.google.devtools.build.lib.util.Preconditions;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010033import java.util.ArrayList;
34import java.util.Collections;
35import java.util.EnumSet;
Florian Weikert9d659ad2015-07-23 14:44:36 +000036import java.util.HashMap;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010037import java.util.Iterator;
38import java.util.List;
39import java.util.Map;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010040
41/**
42 * Recursive descent parser for LL(2) BUILD language.
43 * Loosely based on Python 2 grammar.
44 * See https://docs.python.org/2/reference/grammar.html
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010045 */
Han-Wen Nienhuysceae8c52015-09-22 16:24:45 +000046@VisibleForTesting
47public class Parser {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010048
49 /**
50 * Combines the parser result into a single value object.
51 */
52 public static final class ParseResult {
53 /** The statements (rules, basically) from the parsed file. */
54 public final List<Statement> statements;
55
56 /** The comments from the parsed file. */
57 public final List<Comment> comments;
58
Lukacs Berkid9e733d2015-09-18 08:18:11 +000059 /** Represents every statement in the file. */
60 public final Location location;
61
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010062 /** Whether the file contained any errors. */
63 public final boolean containsErrors;
64
Lukacs Berkid9e733d2015-09-18 08:18:11 +000065 public ParseResult(List<Statement> statements, List<Comment> comments, Location location,
66 boolean containsErrors) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010067 // No need to copy here; when the object is created, the parser instance is just about to go
68 // out of scope and be garbage collected.
69 this.statements = Preconditions.checkNotNull(statements);
70 this.comments = Preconditions.checkNotNull(comments);
Lukacs Berkid9e733d2015-09-18 08:18:11 +000071 this.location = location;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010072 this.containsErrors = containsErrors;
73 }
74 }
75
Laurent Le Brun9be852e2015-05-28 08:44:51 +000076 /**
77 * ParsingMode is used to select which features the parser should accept.
78 */
79 public enum ParsingMode {
80 /** Used for parsing BUILD files */
81 BUILD,
82 /** Used for parsing .bzl files */
83 SKYLARK,
Laurent Le Brun9be852e2015-05-28 08:44:51 +000084 }
85
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010086 private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET =
Googlercc0d9952015-08-10 12:01:34 +000087 EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010088
89 private static final EnumSet<TokenKind> LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000090 EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010091
92 private static final EnumSet<TokenKind> DICT_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000093 EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010094
Laurent Le Brun56093892015-03-20 13:01:58 +000095 private static final EnumSet<TokenKind> EXPR_LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000096 EnumSet.of(
97 TokenKind.EOF,
98 TokenKind.NEWLINE,
Laurent Le Brun29ad8622015-09-18 10:45:07 +000099 TokenKind.EQUALS,
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000100 TokenKind.RBRACE,
101 TokenKind.RBRACKET,
102 TokenKind.RPAREN,
103 TokenKind.SEMI);
Laurent Le Brun56093892015-03-20 13:01:58 +0000104
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000105 private static final EnumSet<TokenKind> BLOCK_STARTING_SET =
106 EnumSet.of(
107 TokenKind.CLASS,
108 TokenKind.DEF,
109 TokenKind.ELSE,
110 TokenKind.FOR,
111 TokenKind.IF,
112 TokenKind.TRY);
113
114 private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET =
115 EnumSet.of(
116 TokenKind.COLON,
117 TokenKind.COMMA,
118 TokenKind.EOF,
119 TokenKind.FOR,
120 TokenKind.MINUS,
121 TokenKind.PERCENT,
122 TokenKind.PLUS,
123 TokenKind.RBRACKET,
124 TokenKind.RPAREN,
125 TokenKind.SLASH);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100126
Florian Weikert1f004e52015-10-16 09:43:48 +0000127 /**
128 * Keywords that are forbidden in both Skylark and BUILD parsing modes.
129 *
130 * <p>(Mapping: token -> human-readable string description)
131 */
132 private static final ImmutableMap<TokenKind, String> ILLEGAL_BLOCK_KEYWORDS =
133 ImmutableMap.of(TokenKind.CLASS, "Class definition", TokenKind.TRY, "Try statement");
134
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100135 private Token token; // current lookahead token
136 private Token pushedToken = null; // used to implement LL(2)
Laurent Le Bruna3c25a62016-10-26 10:59:09 +0000137 private int loopCount; // break/continue keywords can be used only inside a loop
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100138
139 private static final boolean DEBUGGING = false;
140
141 private final Lexer lexer;
142 private final EventHandler eventHandler;
143 private final List<Comment> comments;
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000144 private final ParsingMode parsingMode;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100145
146 private static final Map<TokenKind, Operator> binaryOperators =
147 new ImmutableMap.Builder<TokenKind, Operator>()
148 .put(TokenKind.AND, Operator.AND)
149 .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS)
150 .put(TokenKind.GREATER, Operator.GREATER)
151 .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS)
152 .put(TokenKind.IN, Operator.IN)
153 .put(TokenKind.LESS, Operator.LESS)
154 .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS)
155 .put(TokenKind.MINUS, Operator.MINUS)
156 .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS)
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000157 .put(TokenKind.NOT_IN, Operator.NOT_IN)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100158 .put(TokenKind.OR, Operator.OR)
159 .put(TokenKind.PERCENT, Operator.PERCENT)
Laurent Le Brun8a528262015-04-15 14:23:35 +0000160 .put(TokenKind.SLASH, Operator.DIVIDE)
laurentlb094bb262017-05-19 21:18:25 +0200161 .put(TokenKind.SLASH_SLASH, Operator.FLOOR_DIVIDE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100162 .put(TokenKind.PLUS, Operator.PLUS)
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000163 .put(TokenKind.PIPE, Operator.PIPE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100164 .put(TokenKind.STAR, Operator.MULT)
165 .build();
166
Googler13151752016-06-02 18:37:13 +0000167 // TODO(bazel-team): add support for |=
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100168 private static final Map<TokenKind, Operator> augmentedAssignmentMethods =
169 new ImmutableMap.Builder<TokenKind, Operator>()
Googler13151752016-06-02 18:37:13 +0000170 .put(TokenKind.PLUS_EQUALS, Operator.PLUS)
171 .put(TokenKind.MINUS_EQUALS, Operator.MINUS)
172 .put(TokenKind.STAR_EQUALS, Operator.MULT)
173 .put(TokenKind.SLASH_EQUALS, Operator.DIVIDE)
laurentlb094bb262017-05-19 21:18:25 +0200174 .put(TokenKind.SLASH_SLASH_EQUALS, Operator.FLOOR_DIVIDE)
Googler13151752016-06-02 18:37:13 +0000175 .put(TokenKind.PERCENT_EQUALS, Operator.PERCENT)
176 .build();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100177
178 /** Highest precedence goes last.
179 * Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence
180 **/
181 private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of(
182 EnumSet.of(Operator.OR),
183 EnumSet.of(Operator.AND),
184 EnumSet.of(Operator.NOT),
185 EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS,
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000186 Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN, Operator.NOT_IN),
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000187 EnumSet.of(Operator.PIPE),
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100188 EnumSet.of(Operator.MINUS, Operator.PLUS),
laurentlb094bb262017-05-19 21:18:25 +0200189 EnumSet.of(Operator.DIVIDE, Operator.FLOOR_DIVIDE, Operator.MULT, Operator.PERCENT));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100190
Laurent Le Brune51a4d22016-10-11 18:04:16 +0000191 private final Iterator<Token> tokens;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100192 private int errorsCount;
193 private boolean recoveryMode; // stop reporting errors until next statement
194
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000195 private Parser(Lexer lexer, EventHandler eventHandler, ParsingMode parsingMode) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100196 this.lexer = lexer;
197 this.eventHandler = eventHandler;
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000198 this.parsingMode = parsingMode;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100199 this.tokens = lexer.getTokens().iterator();
Francois-Rene Rideauc673a822015-03-02 19:52:39 +0000200 this.comments = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100201 nextToken();
202 }
203
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000204 private static Location locationFromStatements(Lexer lexer, List<Statement> statements) {
205 if (!statements.isEmpty()) {
206 return lexer.createLocation(
207 statements.get(0).getLocation().getStartOffset(),
Laurent Le Brune51a4d22016-10-11 18:04:16 +0000208 Iterables.getLast(statements).getLocation().getEndOffset());
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000209 } else {
210 return Location.fromPathFragment(lexer.getFilename());
211 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100212 }
213
214 /**
brandjon540aac62017-06-12 23:08:09 +0200215 * Entry-point for parsing a file with comments.
216 *
217 * @param input the input to parse
218 * @param eventHandler a reporter for parsing errors
219 * @param parsingMode if set to {@link ParsingMode#BUILD}, restricts the parser to just the
220 * features present in the Build language
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100221 */
brandjon540aac62017-06-12 23:08:09 +0200222 public static ParseResult parseFile(
223 ParserInputSource input, EventHandler eventHandler, ParsingMode parsingMode) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000224 Lexer lexer = new Lexer(input, eventHandler);
brandjon540aac62017-06-12 23:08:09 +0200225 Parser parser = new Parser(lexer, eventHandler, parsingMode);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100226 List<Statement> statements = parser.parseFileInput();
Laurent Le Brun8c8857d2016-08-04 10:22:16 +0000227 return new ParseResult(
228 statements,
229 parser.comments,
230 locationFromStatements(lexer, statements),
231 parser.errorsCount > 0 || lexer.containsErrors());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100232 }
233
brandjon540aac62017-06-12 23:08:09 +0200234 /** Convenience method for {@code parseFile} with the Build language. */
235 public static ParseResult parseFile(ParserInputSource input, EventHandler eventHandler) {
236 return parseFile(input, eventHandler, BUILD);
237 }
238
239 /** Convenience method for {@code parseFile} with Skylark. */
240 public static ParseResult parseFileForSkylark(
241 ParserInputSource input, EventHandler eventHandler) {
242 return parseFile(input, eventHandler, SKYLARK);
243 }
244
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100245 /**
brandjon540aac62017-06-12 23:08:09 +0200246 * Entry-point for parsing an expression. The expression may be followed by newline tokens.
247 *
248 * @param input the input to parse
249 * @param eventHandler a reporter for parsing errors
250 * @param parsingMode if set to {@link ParsingMode#BUILD}, restricts the parser to just the
251 * features present in the Build language
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100252 */
253 @VisibleForTesting
brandjon540aac62017-06-12 23:08:09 +0200254 public static Expression parseExpression(
255 ParserInputSource input, EventHandler eventHandler, ParsingMode parsingMode) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000256 Lexer lexer = new Lexer(input, eventHandler);
brandjon540aac62017-06-12 23:08:09 +0200257 Parser parser = new Parser(lexer, eventHandler, parsingMode);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100258 Expression result = parser.parseExpression();
259 while (parser.token.kind == TokenKind.NEWLINE) {
260 parser.nextToken();
261 }
262 parser.expect(TokenKind.EOF);
263 return result;
264 }
265
brandjon540aac62017-06-12 23:08:09 +0200266 /** Convenience method for {@code parseExpression} with the Build language. */
267 @VisibleForTesting
268 public static Expression parseExpression(ParserInputSource input, EventHandler eventHandler) {
269 return parseExpression(input, eventHandler, BUILD);
270 }
271
272 /** Convenience method for {@code parseExpression} with Skylark. */
273 @VisibleForTesting
274 public static Expression parseExpressionForSkylark(
275 ParserInputSource input, EventHandler eventHandler) {
276 return parseExpression(input, eventHandler, SKYLARK);
277 }
278
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100279 private void reportError(Location location, String message) {
280 errorsCount++;
281 // Limit the number of reported errors to avoid spamming output.
282 if (errorsCount <= 5) {
283 eventHandler.handle(Event.error(location, message));
284 }
285 }
286
Laurent Le Brun72329862015-03-23 14:20:03 +0000287 private void syntaxError(Token token, String message) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100288 if (!recoveryMode) {
289 String msg = token.kind == TokenKind.INDENT
290 ? "indentation error"
Laurent Le Brun72329862015-03-23 14:20:03 +0000291 : "syntax error at '" + token + "': " + message;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100292 reportError(lexer.createLocation(token.left, token.right), msg);
293 recoveryMode = true;
294 }
295 }
296
Laurent Le Brun9060e162015-04-02 10:07:28 +0000297 /**
298 * Consumes the current token. If it is not of the specified (expected)
299 * kind, reports a syntax error.
300 */
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100301 private boolean expect(TokenKind kind) {
302 boolean expected = token.kind == kind;
303 if (!expected) {
Laurent Le Brun72329862015-03-23 14:20:03 +0000304 syntaxError(token, "expected " + kind.getPrettyName());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100305 }
306 nextToken();
307 return expected;
308 }
309
310 /**
Laurent Le Brun9060e162015-04-02 10:07:28 +0000311 * Same as expect, but stop the recovery mode if the token was expected.
312 */
313 private void expectAndRecover(TokenKind kind) {
314 if (expect(kind)) {
315 recoveryMode = false;
316 }
317 }
318
319 /**
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100320 * Consume tokens past the first token that has a kind that is in the set of
321 * teminatingTokens.
322 * @param terminatingTokens
323 * @return the end offset of the terminating token.
324 */
325 private int syncPast(EnumSet<TokenKind> terminatingTokens) {
326 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
327 while (!terminatingTokens.contains(token.kind)) {
328 nextToken();
329 }
330 int end = token.right;
331 // read past the synchronization token
332 nextToken();
333 return end;
334 }
335
336 /**
337 * Consume tokens until we reach the first token that has a kind that is in
338 * the set of teminatingTokens.
339 * @param terminatingTokens
340 * @return the end offset of the terminating token.
341 */
342 private int syncTo(EnumSet<TokenKind> terminatingTokens) {
343 // EOF must be in the set to prevent an infinite loop
344 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
345 // read past the problematic token
346 int previous = token.right;
347 nextToken();
348 int current = previous;
349 while (!terminatingTokens.contains(token.kind)) {
350 nextToken();
351 previous = current;
352 current = token.right;
353 }
354 return previous;
355 }
356
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000357 // Keywords that exist in Python and that we don't parse.
358 private static final EnumSet<TokenKind> FORBIDDEN_KEYWORDS =
Googler768cbc42015-08-28 12:52:14 +0000359 EnumSet.of(TokenKind.AS, TokenKind.ASSERT,
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000360 TokenKind.DEL, TokenKind.EXCEPT, TokenKind.FINALLY, TokenKind.FROM, TokenKind.GLOBAL,
361 TokenKind.IMPORT, TokenKind.IS, TokenKind.LAMBDA, TokenKind.NONLOCAL, TokenKind.RAISE,
362 TokenKind.TRY, TokenKind.WITH, TokenKind.WHILE, TokenKind.YIELD);
363
364 private void checkForbiddenKeywords(Token token) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000365 if (!FORBIDDEN_KEYWORDS.contains(token.kind)) {
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000366 return;
367 }
368 String error;
369 switch (token.kind) {
370 case ASSERT: error = "'assert' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000371 case DEL:
372 error = "'del' not supported, use '.pop()' to delete an item from a dictionary or a list";
373 break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000374 case IMPORT: error = "'import' not supported, use 'load' instead"; break;
375 case IS: error = "'is' not supported, use '==' instead"; break;
376 case LAMBDA: error = "'lambda' not supported, declare a function instead"; break;
377 case RAISE: error = "'raise' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000378 case TRY: error = "'try' not supported, all exceptions are fatal"; break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000379 case WHILE: error = "'while' not supported, use 'for' instead"; break;
380 default: error = "keyword '" + token.kind.getPrettyName() + "' not supported"; break;
381 }
382 reportError(lexer.createLocation(token.left, token.right), error);
383 }
384
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100385 private void nextToken() {
386 if (pushedToken != null) {
387 token = pushedToken;
388 pushedToken = null;
389 } else {
390 if (token == null || token.kind != TokenKind.EOF) {
391 token = tokens.next();
392 // transparently handle comment tokens
393 while (token.kind == TokenKind.COMMENT) {
394 makeComment(token);
395 token = tokens.next();
396 }
397 }
398 }
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000399 checkForbiddenKeywords(token);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100400 if (DEBUGGING) {
401 System.err.print(token);
402 }
403 }
404
405 private void pushToken(Token tokenToPush) {
406 if (pushedToken != null) {
407 throw new IllegalStateException("Exceeded LL(2) lookahead!");
408 }
409 pushedToken = token;
410 token = tokenToPush;
411 }
412
413 // create an error expression
Florian Weikert6f864c32015-07-23 11:26:39 +0000414 private Identifier makeErrorExpression(int start, int end) {
415 return setLocation(new Identifier("$error$"), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100416 }
417
418 // Convenience wrapper around ASTNode.setLocation that returns the node.
Francois-Rene Rideauedf7bdb2015-03-02 17:12:45 +0000419 private <NODE extends ASTNode> NODE setLocation(NODE node, Location location) {
420 return ASTNode.<NODE>setLocation(location, node);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100421 }
422
423 // Another convenience wrapper method around ASTNode.setLocation
Francois-Rene Rideauedf7bdb2015-03-02 17:12:45 +0000424 private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, int endOffset) {
425 return setLocation(node, lexer.createLocation(startOffset, endOffset));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100426 }
427
428 // Convenience method that uses end offset from the last node.
429 private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, ASTNode lastNode) {
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000430 Preconditions.checkNotNull(lastNode, "can't extract end offset from a null node");
431 Preconditions.checkNotNull(lastNode.getLocation(), "lastNode doesn't have a location");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100432 return setLocation(node, startOffset, lastNode.getLocation().getEndOffset());
433 }
434
435 // create a funcall expression
Florian Weikert6f864c32015-07-23 11:26:39 +0000436 private Expression makeFuncallExpression(Expression receiver, Identifier function,
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000437 List<Argument.Passed> args,
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100438 int start, int end) {
439 if (function.getLocation() == null) {
440 function = setLocation(function, start, end);
441 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100442 return setLocation(new FuncallExpression(receiver, function, args), start, end);
443 }
444
Laurent Le Brun56093892015-03-20 13:01:58 +0000445 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100446 // | expr
Laurent Le Brunb3266382015-05-27 16:14:43 +0000447 // | *args (only in Skylark mode)
448 // | **kwargs (only in Skylark mode)
449 // To keep BUILD files declarative and easy to process, *args and **kwargs
450 // arguments are allowed only in Skylark mode.
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000451 private Argument.Passed parseFuncallArgument() {
452 final int start = token.left;
453 // parse **expr
454 if (token.kind == TokenKind.STAR_STAR) {
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000455 if (parsingMode != SKYLARK) {
Laurent Le Brunb3266382015-05-27 16:14:43 +0000456 reportError(
457 lexer.createLocation(token.left, token.right),
458 "**kwargs arguments are not allowed in BUILD files");
459 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000460 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000461 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000462 return setLocation(new Argument.StarStar(expr), start, expr);
463 }
464 // parse *expr
465 if (token.kind == TokenKind.STAR) {
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000466 if (parsingMode != SKYLARK) {
Laurent Le Brunb3266382015-05-27 16:14:43 +0000467 reportError(
468 lexer.createLocation(token.left, token.right),
469 "*args arguments are not allowed in BUILD files");
470 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000471 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000472 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000473 return setLocation(new Argument.Star(expr), start, expr);
474 }
475 // parse keyword = expr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100476 if (token.kind == TokenKind.IDENTIFIER) {
477 Token identToken = token;
478 String name = (String) token.value;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100479 nextToken();
480 if (token.kind == TokenKind.EQUALS) { // it's a named argument
481 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000482 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000483 return setLocation(new Argument.Keyword(name, expr), start, expr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100484 } else { // oops, back up!
485 pushToken(identToken);
486 }
487 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100488 // parse a positional argument
Laurent Le Brun56093892015-03-20 13:01:58 +0000489 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000490 return setLocation(new Argument.Positional(expr), start, expr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100491 }
492
Laurent Le Brun56093892015-03-20 13:01:58 +0000493 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100494 // | IDENTIFIER
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000495 private Parameter<Expression, Expression> parseFunctionParameter() {
496 // TODO(bazel-team): optionally support type annotations
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100497 int start = token.left;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000498 if (token.kind == TokenKind.STAR_STAR) { // kwarg
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100499 nextToken();
Florian Weikert6f864c32015-07-23 11:26:39 +0000500 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000501 return setLocation(new Parameter.StarStar<Expression, Expression>(
502 ident.getName()), start, ident);
503 } else if (token.kind == TokenKind.STAR) { // stararg
504 int end = token.right;
505 nextToken();
506 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000507 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000508 return setLocation(new Parameter.Star<Expression, Expression>(ident.getName()),
509 start, ident);
510 } else {
511 return setLocation(new Parameter.Star<Expression, Expression>(null), start, end);
512 }
513 } else {
Florian Weikert6f864c32015-07-23 11:26:39 +0000514 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000515 if (token.kind == TokenKind.EQUALS) { // there's a default value
516 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000517 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000518 return setLocation(new Parameter.Optional<Expression, Expression>(
519 ident.getName(), expr), start, expr);
520 } else {
521 return setLocation(new Parameter.Mandatory<Expression, Expression>(
522 ident.getName()), start, ident);
523 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100524 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100525 }
526
527 // funcall_suffix ::= '(' arg_list? ')'
Florian Weikert6f864c32015-07-23 11:26:39 +0000528 private Expression parseFuncallSuffix(int start, Expression receiver, Identifier function) {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000529 List<Argument.Passed> args = Collections.emptyList();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100530 expect(TokenKind.LPAREN);
531 int end;
532 if (token.kind == TokenKind.RPAREN) {
533 end = token.right;
534 nextToken(); // RPAREN
535 } else {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000536 args = parseFuncallArguments(); // (includes optional trailing comma)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100537 end = token.right;
538 expect(TokenKind.RPAREN);
539 }
540 return makeFuncallExpression(receiver, function, args, start, end);
541 }
542
543 // selector_suffix ::= '.' IDENTIFIER
544 // |'.' IDENTIFIER funcall_suffix
545 private Expression parseSelectorSuffix(int start, Expression receiver) {
546 expect(TokenKind.DOT);
547 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000548 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100549 if (token.kind == TokenKind.LPAREN) {
550 return parseFuncallSuffix(start, receiver, ident);
551 } else {
552 return setLocation(new DotExpression(receiver, ident), start, token.right);
553 }
554 } else {
Laurent Le Brun72329862015-03-23 14:20:03 +0000555 syntaxError(token, "expected identifier after dot");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100556 int end = syncTo(EXPR_TERMINATOR_SET);
557 return makeErrorExpression(start, end);
558 }
559 }
560
561 // arg_list ::= ( (arg ',')* arg ','? )?
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000562 private List<Argument.Passed> parseFuncallArguments() {
563 List<Argument.Passed> arguments =
564 parseFunctionArguments(new Supplier<Argument.Passed>() {
565 @Override public Argument.Passed get() {
566 return parseFuncallArgument();
567 }
568 });
569 try {
570 Argument.validateFuncallArguments(arguments);
571 } catch (Argument.ArgumentException e) {
572 reportError(lexer.createLocation(token.left, token.right), e.getMessage());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100573 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000574 return arguments;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100575 }
576
Laurent Le Brun56093892015-03-20 13:01:58 +0000577 // expr_list parses a comma-separated list of expression. It assumes that the
578 // first expression was already parsed, so it starts with a comma.
579 // It is used to parse tuples and list elements.
580 // expr_list ::= ( ',' expr )* ','?
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000581 private List<Expression> parseExprList(boolean trailingColonAllowed) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100582 List<Expression> list = new ArrayList<>();
583 // terminating tokens for an expression list
Laurent Le Brun56093892015-03-20 13:01:58 +0000584 while (token.kind == TokenKind.COMMA) {
585 expect(TokenKind.COMMA);
586 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000587 if (!trailingColonAllowed) {
588 reportError(
589 lexer.createLocation(token.left, token.right),
590 "Trailing comma is allowed only in parenthesized tuples.");
591 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100592 break;
593 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000594 list.add(parseNonTupleExpression());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100595 }
596 return list;
597 }
598
599 // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )?
600 private List<DictionaryEntryLiteral> parseDictEntryList() {
601 List<DictionaryEntryLiteral> list = new ArrayList<>();
602 // the terminating token for a dict entry list
603 while (token.kind != TokenKind.RBRACE) {
604 list.add(parseDictEntry());
605 if (token.kind == TokenKind.COMMA) {
606 nextToken();
607 } else {
608 break;
609 }
610 }
611 return list;
612 }
613
Laurent Le Brun56093892015-03-20 13:01:58 +0000614 // dict_entry ::= nontupleexpr ':' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100615 private DictionaryEntryLiteral parseDictEntry() {
616 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000617 Expression key = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100618 expect(TokenKind.COLON);
Laurent Le Brun56093892015-03-20 13:01:58 +0000619 Expression value = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100620 return setLocation(new DictionaryEntryLiteral(key, value), start, value);
621 }
622
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000623 /**
624 * Parse a String literal value, e.g. "str".
625 */
626 private StringLiteral parseStringLiteral() {
627 Preconditions.checkState(token.kind == TokenKind.STRING);
628 int end = token.right;
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000629 StringLiteral literal =
Michajlo Matijkiw8c539ea2017-02-22 23:02:46 +0000630 setLocation(new StringLiteral((String) token.value), token.left, end);
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000631
632 nextToken();
633 if (token.kind == TokenKind.STRING) {
634 reportError(lexer.createLocation(end, token.left),
635 "Implicit string concatenation is forbidden, use the + operator");
636 }
637 return literal;
638 }
639
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100640 // primary ::= INTEGER
641 // | STRING
642 // | STRING '.' IDENTIFIER funcall_suffix
643 // | IDENTIFIER
644 // | IDENTIFIER funcall_suffix
645 // | IDENTIFIER '.' selector_suffix
646 // | list_expression
647 // | '(' ')' // a tuple with zero elements
648 // | '(' expr ')' // a parenthesized expression
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100649 // | dict_expression
650 // | '-' primary_with_suffix
651 private Expression parsePrimary() {
652 int start = token.left;
653 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000654 case INT:
655 {
656 IntegerLiteral literal = new IntegerLiteral((Integer) token.value);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100657 setLocation(literal, start, token.right);
658 nextToken();
659 return literal;
660 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000661 case STRING:
662 return parseStringLiteral();
663 case IDENTIFIER:
664 {
665 Identifier ident = parseIdent();
666 if (token.kind == TokenKind.LPAREN) { // it's a function application
667 return parseFuncallSuffix(start, null, ident);
668 } else {
669 return ident;
670 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100671 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000672 case LBRACKET: // it's a list
673 return parseListMaker();
674 case LBRACE: // it's a dictionary
675 return parseDictExpression();
676 case LPAREN:
677 {
678 nextToken();
679 // check for the empty tuple literal
680 if (token.kind == TokenKind.RPAREN) {
681 ListLiteral literal = ListLiteral.makeTuple(Collections.<Expression>emptyList());
682 setLocation(literal, start, token.right);
683 nextToken();
684 return literal;
685 }
686 // parse the first expression
687 Expression expression = parseExpression(true);
688 setLocation(expression, start, token.right);
689 if (token.kind == TokenKind.RPAREN) {
690 nextToken();
691 return expression;
692 }
693 expect(TokenKind.RPAREN);
694 int end = syncTo(EXPR_TERMINATOR_SET);
695 return makeErrorExpression(start, end);
696 }
697 case MINUS:
698 {
699 nextToken();
700 List<Argument.Passed> args = new ArrayList<>();
701 Expression expr = parsePrimaryWithSuffix();
702 args.add(setLocation(new Argument.Positional(expr), start, expr));
703 return makeFuncallExpression(null, new Identifier("-"), args, start, token.right);
704 }
705 default:
706 {
707 syntaxError(token, "expected expression");
708 int end = syncTo(EXPR_TERMINATOR_SET);
709 return makeErrorExpression(start, end);
710 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100711 }
712 }
713
714 // primary_with_suffix ::= primary selector_suffix*
715 // | primary substring_suffix
716 private Expression parsePrimaryWithSuffix() {
717 int start = token.left;
718 Expression receiver = parsePrimary();
719 while (true) {
720 if (token.kind == TokenKind.DOT) {
721 receiver = parseSelectorSuffix(start, receiver);
722 } else if (token.kind == TokenKind.LBRACKET) {
723 receiver = parseSubstringSuffix(start, receiver);
724 } else {
725 break;
726 }
727 }
728 return receiver;
729 }
730
Florian Weikerte3421962015-12-17 12:46:08 +0000731 // substring_suffix ::= '[' expression? ':' expression? ':' expression? ']'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100732 private Expression parseSubstringSuffix(int start, Expression receiver) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100733 Expression startExpr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100734
735 expect(TokenKind.LBRACKET);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100736 if (token.kind == TokenKind.COLON) {
Florian Weikerte3421962015-12-17 12:46:08 +0000737 startExpr = setLocation(new Identifier("None"), token.left, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100738 } else {
Laurent Le Brun6824d862015-09-11 13:51:41 +0000739 startExpr = parseExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100740 }
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000741 // This is an index/key access
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100742 if (token.kind == TokenKind.RBRACKET) {
743 expect(TokenKind.RBRACKET);
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000744 return setLocation(new IndexExpression(receiver, startExpr), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100745 }
Laurent Le Bruneeef30f2015-03-16 15:12:35 +0000746 // This is a slice (or substring)
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000747 Expression endExpr = parseSliceArgument(new Identifier("None"));
748 Expression stepExpr = parseSliceArgument(new IntegerLiteral(1));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100749 expect(TokenKind.RBRACKET);
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000750 return setLocation(new SliceExpression(receiver, startExpr, endExpr, stepExpr),
751 start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100752 }
753
Florian Weikerte3421962015-12-17 12:46:08 +0000754 /**
755 * Parses {@code [':' [expr]]} which can either be the end or the step argument of a slice
756 * operation. If no such expression is found, this method returns an argument that represents
757 * {@code defaultValue}.
758 */
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000759 private Expression parseSliceArgument(Expression defaultValue) {
Florian Weikerte3421962015-12-17 12:46:08 +0000760 Expression explicitArg = getSliceEndOrStepExpression();
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000761 if (explicitArg == null) {
762 return setLocation(defaultValue, token.left, token.right);
763 }
764 return explicitArg;
Florian Weikerte3421962015-12-17 12:46:08 +0000765 }
766
767 private Expression getSliceEndOrStepExpression() {
768 // There has to be a colon before any end or slice argument.
769 // However, if the next token thereafter is another colon or a right bracket, no argument value
770 // was specified.
771 if (token.kind == TokenKind.COLON) {
772 expect(TokenKind.COLON);
773 if (token.kind != TokenKind.COLON && token.kind != TokenKind.RBRACKET) {
774 return parseNonTupleExpression();
775 }
776 }
777 return null;
778 }
779
Laurent Le Brun185392d2015-03-20 14:41:25 +0000780 // Equivalent to 'exprlist' rule in Python grammar.
781 // loop_variables ::= primary_with_suffix ( ',' primary_with_suffix )* ','?
782 private Expression parseForLoopVariables() {
783 // We cannot reuse parseExpression because it would parse the 'in' operator.
784 // e.g. "for i in e: pass" -> we want to parse only "i" here.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100785 int start = token.left;
Laurent Le Brun185392d2015-03-20 14:41:25 +0000786 Expression e1 = parsePrimaryWithSuffix();
787 if (token.kind != TokenKind.COMMA) {
788 return e1;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100789 }
790
Laurent Le Brun185392d2015-03-20 14:41:25 +0000791 // It's a tuple
792 List<Expression> tuple = new ArrayList<>();
793 tuple.add(e1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100794 while (token.kind == TokenKind.COMMA) {
Laurent Le Brun185392d2015-03-20 14:41:25 +0000795 expect(TokenKind.COMMA);
796 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
797 break;
798 }
799 tuple.add(parsePrimaryWithSuffix());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100800 }
Laurent Le Brun185392d2015-03-20 14:41:25 +0000801 return setLocation(ListLiteral.makeTuple(tuple), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100802 }
803
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000804 // comprehension_suffix ::= 'FOR' loop_variables 'IN' expr comprehension_suffix
805 // | 'IF' expr comprehension_suffix
806 // | ']'
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000807 private Expression parseComprehensionSuffix(
brandjon296cd492017-05-15 16:17:16 +0200808 AbstractComprehension.AbstractBuilder comprehensionBuilder, TokenKind closingBracket) {
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000809 while (true) {
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000810 if (token.kind == TokenKind.FOR) {
811 nextToken();
812 Expression loopVar = parseForLoopVariables();
813 expect(TokenKind.IN);
814 // The expression cannot be a ternary expression ('x if y else z') due to
815 // conflicts in Python grammar ('if' is used by the comprehension).
816 Expression listExpression = parseNonTupleExpression(0);
brandjon296cd492017-05-15 16:17:16 +0200817 comprehensionBuilder.addFor(loopVar, listExpression);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000818 } else if (token.kind == TokenKind.IF) {
819 nextToken();
laurentlbc3a1af62017-06-16 14:37:43 +0200820 // [x for x in li if 1, 2] # parse error
821 // [x for x in li if (1, 2)] # ok
822 comprehensionBuilder.addIf(parseNonTupleExpression(0));
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000823 } else if (token.kind == closingBracket) {
824 nextToken();
brandjon296cd492017-05-15 16:17:16 +0200825 return comprehensionBuilder.build();
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000826 } else {
827 syntaxError(token, "expected '" + closingBracket.getPrettyName() + "', 'for' or 'if'");
828 syncPast(LIST_TERMINATOR_SET);
829 return makeErrorExpression(token.left, token.right);
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000830 }
831 }
832 }
833
Laurent Le Brun56093892015-03-20 13:01:58 +0000834 // list_maker ::= '[' ']'
835 // |'[' expr ']'
836 // |'[' expr expr_list ']'
837 // |'[' expr ('FOR' loop_variables 'IN' expr)+ ']'
838 private Expression parseListMaker() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100839 int start = token.left;
840 expect(TokenKind.LBRACKET);
841 if (token.kind == TokenKind.RBRACKET) { // empty List
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000842 ListLiteral literal = ListLiteral.emptyList();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100843 setLocation(literal, start, token.right);
844 nextToken();
845 return literal;
846 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000847 Expression expression = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100848 Preconditions.checkNotNull(expression,
849 "null element in list in AST at %s:%s", token.left, token.right);
850 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000851 case RBRACKET: // singleton List
852 {
853 ListLiteral literal = ListLiteral.makeList(Collections.singletonList(expression));
854 setLocation(literal, start, token.right);
855 nextToken();
856 return literal;
857 }
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000858 case FOR:
859 { // list comprehension
860 Expression result =
brandjon296cd492017-05-15 16:17:16 +0200861 parseComprehensionSuffix(
862 new ListComprehension.Builder().setOutputExpression(expression),
863 TokenKind.RBRACKET);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000864 return setLocation(result, start, token.right);
865 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000866 case COMMA:
867 {
868 List<Expression> list = parseExprList(true);
869 Preconditions.checkState(
870 !list.contains(null),
871 "null element in list in AST at %s:%s",
872 token.left,
873 token.right);
874 list.add(0, expression);
875 if (token.kind == TokenKind.RBRACKET) {
876 ListLiteral literal = ListLiteral.makeList(list);
877 setLocation(literal, start, token.right);
878 nextToken();
879 return literal;
880 }
881 expect(TokenKind.RBRACKET);
882 int end = syncPast(LIST_TERMINATOR_SET);
883 return makeErrorExpression(start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100884 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000885 default:
886 {
887 syntaxError(token, "expected ',', 'for' or ']'");
888 int end = syncPast(LIST_TERMINATOR_SET);
889 return makeErrorExpression(start, end);
890 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100891 }
892 }
893
894 // dict_expression ::= '{' '}'
895 // |'{' dict_entry_list '}'
896 // |'{' dict_entry 'FOR' loop_variables 'IN' expr '}'
897 private Expression parseDictExpression() {
898 int start = token.left;
899 expect(TokenKind.LBRACE);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000900 if (token.kind == TokenKind.RBRACE) { // empty Dict
901 DictionaryLiteral literal = DictionaryLiteral.emptyDict();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100902 setLocation(literal, start, token.right);
903 nextToken();
904 return literal;
905 }
906 DictionaryEntryLiteral entry = parseDictEntry();
907 if (token.kind == TokenKind.FOR) {
908 // Dict comprehension
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000909 Expression result = parseComprehensionSuffix(
brandjon296cd492017-05-15 16:17:16 +0200910 new DictComprehension.Builder()
911 .setKeyExpression(entry.getKey())
912 .setValueExpression(entry.getValue()),
913 TokenKind.RBRACE);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000914 return setLocation(result, start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100915 }
916 List<DictionaryEntryLiteral> entries = new ArrayList<>();
917 entries.add(entry);
918 if (token.kind == TokenKind.COMMA) {
919 expect(TokenKind.COMMA);
920 entries.addAll(parseDictEntryList());
921 }
922 if (token.kind == TokenKind.RBRACE) {
923 DictionaryLiteral literal = new DictionaryLiteral(entries);
924 setLocation(literal, start, token.right);
925 nextToken();
926 return literal;
927 }
Laurent Le Brun72329862015-03-23 14:20:03 +0000928 expect(TokenKind.RBRACE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100929 int end = syncPast(DICT_TERMINATOR_SET);
930 return makeErrorExpression(start, end);
931 }
932
Florian Weikert6f864c32015-07-23 11:26:39 +0000933 private Identifier parseIdent() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100934 if (token.kind != TokenKind.IDENTIFIER) {
Laurent Le Brun72329862015-03-23 14:20:03 +0000935 expect(TokenKind.IDENTIFIER);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100936 return makeErrorExpression(token.left, token.right);
937 }
Florian Weikert6f864c32015-07-23 11:26:39 +0000938 Identifier ident = new Identifier(((String) token.value));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100939 setLocation(ident, token.left, token.right);
940 nextToken();
941 return ident;
942 }
943
944 // binop_expression ::= binop_expression OP binop_expression
945 // | parsePrimaryWithSuffix
946 // This function takes care of precedence between operators (see operatorPrecedence for
947 // the order), and it assumes left-to-right associativity.
948 private Expression parseBinOpExpression(int prec) {
949 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000950 Expression expr = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100951 // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is
952 // limited to number of different precedence levels (operatorPrecedence.size()).
laurentlb1fcea382017-06-19 16:02:42 +0200953 Operator lastOp = null;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100954 for (;;) {
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000955
956 if (token.kind == TokenKind.NOT) {
957 // If NOT appears when we expect a binary operator, it must be followed by IN.
958 // Since the code expects every operator to be a single token, we push a NOT_IN token.
959 expect(TokenKind.NOT);
960 expect(TokenKind.IN);
961 pushToken(new Token(TokenKind.NOT_IN, token.left, token.right));
962 }
963
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100964 if (!binaryOperators.containsKey(token.kind)) {
965 return expr;
966 }
967 Operator operator = binaryOperators.get(token.kind);
968 if (!operatorPrecedence.get(prec).contains(operator)) {
969 return expr;
970 }
laurentlb1fcea382017-06-19 16:02:42 +0200971
972 // Operator '==' and other operators of the same precedence (e.g. '<', 'in')
973 // are not associative.
974 if (lastOp != null && operatorPrecedence.get(prec).contains(Operator.EQUALS_EQUALS)) {
975 reportError(
976 lexer.createLocation(token.left, token.right),
977 String.format("Operator '%s' is not associative with operator '%s'. Use parens.",
978 lastOp, operator));
979 }
980
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100981 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000982 Expression secondary = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100983 expr = optimizeBinOpExpression(operator, expr, secondary);
984 setLocation(expr, start, secondary);
laurentlb1fcea382017-06-19 16:02:42 +0200985 lastOp = operator;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100986 }
987 }
988
989 // Optimize binary expressions.
990 // string literal + string literal can be concatenated into one string literal
991 // so we don't have to do the expensive string concatenation at runtime.
992 private Expression optimizeBinOpExpression(
993 Operator operator, Expression expr, Expression secondary) {
994 if (operator == Operator.PLUS) {
995 if (expr instanceof StringLiteral && secondary instanceof StringLiteral) {
996 StringLiteral left = (StringLiteral) expr;
997 StringLiteral right = (StringLiteral) secondary;
Michajlo Matijkiw8c539ea2017-02-22 23:02:46 +0000998 return new StringLiteral(left.getValue() + right.getValue());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100999 }
1000 }
1001 return new BinaryOperatorExpression(operator, expr, secondary);
1002 }
1003
1004 private Expression parseExpression() {
Laurent Le Brunb639ca82017-01-17 11:18:23 +00001005 return parseExpression(false);
1006 }
1007
1008 // Equivalent to 'testlist' rule in Python grammar. It can parse every kind of
1009 // expression. In many cases, we need to use parseNonTupleExpression to avoid ambiguity:
1010 // e.g. fct(x, y) vs fct((x, y))
1011 //
1012 // Tuples can have a trailing comma only when insideParens is true. This prevents bugs
1013 // where a one-element tuple is surprisingly created:
1014 // e.g. foo = f(x),
1015 private Expression parseExpression(boolean insideParens) {
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001016 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +00001017 Expression expression = parseNonTupleExpression();
1018 if (token.kind != TokenKind.COMMA) {
1019 return expression;
1020 }
1021
1022 // It's a tuple
Laurent Le Brunb639ca82017-01-17 11:18:23 +00001023 List<Expression> tuple = parseExprList(insideParens);
Laurent Le Brun56093892015-03-20 13:01:58 +00001024 tuple.add(0, expression); // add the first expression to the front of the tuple
1025 return setLocation(ListLiteral.makeTuple(tuple), start, token.right);
1026 }
1027
1028 // Equivalent to 'test' rule in Python grammar.
1029 private Expression parseNonTupleExpression() {
1030 int start = token.left;
1031 Expression expr = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001032 if (token.kind == TokenKind.IF) {
1033 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +00001034 Expression condition = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001035 if (token.kind == TokenKind.ELSE) {
1036 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +00001037 Expression elseClause = parseNonTupleExpression();
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001038 return setLocation(new ConditionalExpression(expr, condition, elseClause),
1039 start, elseClause);
1040 } else {
1041 reportError(lexer.createLocation(start, token.left),
1042 "missing else clause in conditional expression or semicolon before if");
1043 return expr; // Try to recover from error: drop the if and the expression after it. Ouch.
1044 }
1045 }
1046 return expr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001047 }
1048
Laurent Le Brun56093892015-03-20 13:01:58 +00001049 private Expression parseNonTupleExpression(int prec) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001050 if (prec >= operatorPrecedence.size()) {
1051 return parsePrimaryWithSuffix();
1052 }
1053 if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) {
1054 return parseNotExpression(prec);
1055 }
1056 return parseBinOpExpression(prec);
1057 }
1058
1059 // not_expr :== 'not' expr
1060 private Expression parseNotExpression(int prec) {
1061 int start = token.left;
1062 expect(TokenKind.NOT);
Laurent Le Brun56093892015-03-20 13:01:58 +00001063 Expression expression = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001064 NotExpression notExpression = new NotExpression(expression);
1065 return setLocation(notExpression, start, token.right);
1066 }
1067
1068 // file_input ::= ('\n' | stmt)* EOF
1069 private List<Statement> parseFileInput() {
Googler768cbc42015-08-28 12:52:14 +00001070 long startTime = Profiler.nanoTimeMaybe();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001071 List<Statement> list = new ArrayList<>();
1072 while (token.kind != TokenKind.EOF) {
1073 if (token.kind == TokenKind.NEWLINE) {
Laurent Le Brun9060e162015-04-02 10:07:28 +00001074 expectAndRecover(TokenKind.NEWLINE);
1075 } else if (recoveryMode) {
1076 // If there was a parse error, we want to recover here
1077 // before starting a new top-level statement.
1078 syncTo(STATEMENT_TERMINATOR_SET);
1079 recoveryMode = false;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001080 } else {
1081 parseTopLevelStatement(list);
1082 }
1083 }
Lukacs Berkid9e733d2015-09-18 08:18:11 +00001084 Profiler.instance().logSimpleTask(startTime, ProfilerTask.SKYLARK_PARSER, "");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001085 return list;
1086 }
1087
Florian Weikert9d659ad2015-07-23 14:44:36 +00001088 // load '(' STRING (COMMA [IDENTIFIER EQUALS] STRING)* COMMA? ')'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001089 private void parseLoad(List<Statement> list) {
1090 int start = token.left;
1091 if (token.kind != TokenKind.STRING) {
1092 expect(TokenKind.STRING);
1093 return;
1094 }
Googler768cbc42015-08-28 12:52:14 +00001095
John Field9201fda2015-12-30 19:30:34 +00001096 StringLiteral importString = parseStringLiteral();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001097 expect(TokenKind.COMMA);
1098
Florian Weikert9d659ad2015-07-23 14:44:36 +00001099 Map<Identifier, String> symbols = new HashMap<>();
1100 parseLoadSymbol(symbols); // At least one symbol is required
1101
Laurent Le Brun73a98492015-03-17 15:46:19 +00001102 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001103 expect(TokenKind.COMMA);
Laurent Le Brun59f587a2015-03-16 14:51:36 +00001104 if (token.kind == TokenKind.RPAREN) {
1105 break;
1106 }
Florian Weikert9d659ad2015-07-23 14:44:36 +00001107
1108 parseLoadSymbol(symbols);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001109 }
1110 expect(TokenKind.RPAREN);
Googler768cbc42015-08-28 12:52:14 +00001111
Laurent Le Brun7b1708c2016-10-13 10:05:12 +00001112 LoadStatement stmt = new LoadStatement(importString, symbols);
Miguel Alcon Pinto927f3b22016-08-22 14:21:30 +00001113 list.add(setLocation(stmt, start, token.left));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001114 }
1115
Florian Weikert9d659ad2015-07-23 14:44:36 +00001116 /**
1117 * Parses the next symbol argument of a load statement and puts it into the output map.
1118 *
1119 * <p> The symbol is either "name" (STRING) or name = "declared" (IDENTIFIER EQUALS STRING).
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001120 * If no alias is used, "name" and "declared" will be identical. "Declared" refers to the
1121 * original name in the Bazel file that should be loaded, while "name" will be the key of the
1122 * entry in the map.
Florian Weikert9d659ad2015-07-23 14:44:36 +00001123 */
1124 private void parseLoadSymbol(Map<Identifier, String> symbols) {
Vladimir Moskva8d610c62016-09-15 14:36:41 +00001125 Token nameToken;
1126 Token declaredToken;
Florian Weikert9d659ad2015-07-23 14:44:36 +00001127
1128 if (token.kind == TokenKind.STRING) {
1129 nameToken = token;
1130 declaredToken = nameToken;
1131 } else {
1132 if (token.kind != TokenKind.IDENTIFIER) {
1133 syntaxError(token, "Expected either a literal string or an identifier");
1134 }
1135
1136 nameToken = token;
1137
1138 expect(TokenKind.IDENTIFIER);
1139 expect(TokenKind.EQUALS);
1140
1141 declaredToken = token;
1142 }
1143
1144 expect(TokenKind.STRING);
1145
1146 try {
1147 Identifier identifier = new Identifier(nameToken.value.toString());
1148
1149 if (symbols.containsKey(identifier)) {
1150 syntaxError(
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001151 nameToken, String.format("Identifier '%s' is used more than once",
1152 identifier.getName()));
Florian Weikert9d659ad2015-07-23 14:44:36 +00001153 } else {
1154 symbols.put(
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001155 setLocation(identifier, nameToken.left, nameToken.right),
1156 declaredToken.value.toString());
Florian Weikert9d659ad2015-07-23 14:44:36 +00001157 }
1158 } catch (NullPointerException npe) {
1159 // This means that the value of at least one token is null. In this case, the previous
1160 // expect() call has already logged an error.
1161 }
1162 }
1163
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001164 private void parseTopLevelStatement(List<Statement> list) {
1165 // In Python grammar, there is no "top-level statement" and imports are
1166 // considered as "small statements". We are a bit stricter than Python here.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001167 // Check if there is an include
1168 if (token.kind == TokenKind.IDENTIFIER) {
1169 Token identToken = token;
Florian Weikert6f864c32015-07-23 11:26:39 +00001170 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001171
Lukacs Berkid9e733d2015-09-18 08:18:11 +00001172 if (ident.getName().equals("load") && token.kind == TokenKind.LPAREN) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001173 expect(TokenKind.LPAREN);
1174 parseLoad(list);
1175 return;
1176 }
1177 pushToken(identToken); // push the ident back to parse it as a statement
1178 }
1179 parseStatement(list, true);
1180 }
1181
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001182 // small_stmt | 'pass'
1183 private void parseSmallStatementOrPass(List<Statement> list) {
1184 if (token.kind == TokenKind.PASS) {
1185 // Skip the token, don't add it to the list.
1186 // It has no existence in the AST.
1187 expect(TokenKind.PASS);
1188 } else {
1189 list.add(parseSmallStatement());
1190 }
1191 }
1192
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001193 // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE
1194 private void parseSimpleStatement(List<Statement> list) {
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001195 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001196
1197 while (token.kind == TokenKind.SEMI) {
1198 nextToken();
1199 if (token.kind == TokenKind.NEWLINE) {
1200 break;
1201 }
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001202 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001203 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001204 expectAndRecover(TokenKind.NEWLINE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001205 }
1206
1207 // small_stmt ::= assign_stmt
1208 // | expr
1209 // | RETURN expr
Florian Weikert917ceaa2015-06-10 13:54:26 +00001210 // | flow_stmt
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001211 // assign_stmt ::= expr ('=' | augassign) expr
Vladimir Moskva71536642016-12-19 13:51:57 +00001212 // augassign ::= ('+=' | '-=' | '*=' | '/=' | '%=')
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001213 // Note that these are in Python, but not implemented here (at least for now):
Vladimir Moskva71536642016-12-19 13:51:57 +00001214 // '&=' | '|=' | '^=' |'<<=' | '>>=' | '**=' | '//='
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001215 // Semantic difference from Python:
1216 // In Skylark, x += y is simple syntactic sugar for x = x + y.
1217 // In Python, x += y is more or less equivalent to x = x + y, but if a method is defined
1218 // on x.__iadd__(y), then it takes precedence, and in the case of lists it side-effects
1219 // the original list (it doesn't do that on tuples); if no such method is defined it falls back
1220 // to the x.__add__(y) method that backs x + y. In Skylark, we don't support this side-effect.
1221 // Note also that there is a special casing to translate 'ident[key] = value'
1222 // to 'ident = ident + {key: value}'. This is needed to support the pure version of Python-like
1223 // dictionary assignment syntax.
1224 private Statement parseSmallStatement() {
1225 int start = token.left;
1226 if (token.kind == TokenKind.RETURN) {
1227 return parseReturnStatement();
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001228 } else if (token.kind == TokenKind.BREAK || token.kind == TokenKind.CONTINUE) {
Florian Weikert917ceaa2015-06-10 13:54:26 +00001229 return parseFlowStatement(token.kind);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001230 }
1231 Expression expression = parseExpression();
1232 if (token.kind == TokenKind.EQUALS) {
1233 nextToken();
1234 Expression rvalue = parseExpression();
laurentlb094bb262017-05-19 21:18:25 +02001235 return setLocation(
brandjon540aac62017-06-12 23:08:09 +02001236 new AssignmentStatement(new LValue(expression), rvalue),
1237 start, rvalue);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001238 } else if (augmentedAssignmentMethods.containsKey(token.kind)) {
1239 Operator operator = augmentedAssignmentMethods.get(token.kind);
1240 nextToken();
1241 Expression operand = parseExpression();
1242 int end = operand.getLocation().getEndOffset();
Vladimir Moskva71536642016-12-19 13:51:57 +00001243 return setLocation(
brandjon540aac62017-06-12 23:08:09 +02001244 new AugmentedAssignmentStatement(operator, new LValue(expression), operand),
1245 start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001246 } else {
1247 return setLocation(new ExpressionStatement(expression), start, expression);
1248 }
1249 }
1250
1251 // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]?
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001252 private IfStatement parseIfStatement() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001253 int start = token.left;
1254 List<ConditionalStatements> thenBlocks = new ArrayList<>();
1255 thenBlocks.add(parseConditionalStatements(TokenKind.IF));
1256 while (token.kind == TokenKind.ELIF) {
1257 thenBlocks.add(parseConditionalStatements(TokenKind.ELIF));
1258 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001259 List<Statement> elseBlock;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001260 if (token.kind == TokenKind.ELSE) {
1261 expect(TokenKind.ELSE);
1262 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001263 elseBlock = parseSuite();
1264 } else {
1265 elseBlock = ImmutableList.of();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001266 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001267 return setLocation(new IfStatement(thenBlocks, elseBlock), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001268 }
1269
1270 // cond_stmts ::= [EL]IF expr ':' suite
1271 private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) {
1272 int start = token.left;
1273 expect(tokenKind);
Laurent Le Brun56093892015-03-20 13:01:58 +00001274 Expression expr = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001275 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001276 List<Statement> thenBlock = parseSuite();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001277 ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock);
1278 return setLocation(stmt, start, token.right);
1279 }
1280
1281 // for_stmt ::= FOR IDENTIFIER IN expr ':' suite
1282 private void parseForStatement(List<Statement> list) {
1283 int start = token.left;
1284 expect(TokenKind.FOR);
Laurent Le Brun185392d2015-03-20 14:41:25 +00001285 Expression loopVar = parseForLoopVariables();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001286 expect(TokenKind.IN);
1287 Expression collection = parseExpression();
1288 expect(TokenKind.COLON);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001289 enterLoop();
1290 try {
1291 List<Statement> block = parseSuite();
brandjon540aac62017-06-12 23:08:09 +02001292 Statement stmt = new ForStatement(new LValue(loopVar), collection, block);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001293 list.add(setLocation(stmt, start, token.right));
1294 } finally {
1295 exitLoop();
1296 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001297 }
1298
1299 // def foo(bar1, bar2):
1300 private void parseFunctionDefStatement(List<Statement> list) {
1301 int start = token.left;
1302 expect(TokenKind.DEF);
Florian Weikert6f864c32015-07-23 11:26:39 +00001303 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001304 expect(TokenKind.LPAREN);
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001305 List<Parameter<Expression, Expression>> params = parseParameters();
1306 FunctionSignature.WithValues<Expression, Expression> signature = functionSignature(params);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001307 expect(TokenKind.RPAREN);
1308 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001309 List<Statement> block = parseSuite();
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001310 FunctionDefStatement stmt = new FunctionDefStatement(ident, params, signature, block);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001311 list.add(setLocation(stmt, start, token.right));
1312 }
1313
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001314 private FunctionSignature.WithValues<Expression, Expression> functionSignature(
1315 List<Parameter<Expression, Expression>> parameters) {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001316 try {
1317 return FunctionSignature.WithValues.<Expression, Expression>of(parameters);
1318 } catch (FunctionSignature.SignatureException e) {
1319 reportError(e.getParameter().getLocation(), e.getMessage());
1320 // return bogus empty signature
1321 return FunctionSignature.WithValues.<Expression, Expression>create(FunctionSignature.of());
1322 }
1323 }
1324
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001325 private List<Parameter<Expression, Expression>> parseParameters() {
1326 return parseFunctionArguments(
1327 new Supplier<Parameter<Expression, Expression>>() {
1328 @Override public Parameter<Expression, Expression> get() {
1329 return parseFunctionParameter();
1330 }
1331 });
1332 }
1333
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001334 /**
1335 * Parse a list of Argument-s. The arguments can be of class Argument.Passed or Parameter,
1336 * as returned by the Supplier parseArgument (that, taking no argument, must be closed over
1337 * the mutable input data structures).
1338 *
1339 * <p>This parser does minimal validation: it ensures the proper python use of the comma (that
1340 * can terminate before a star but not after) and the fact that a **kwarg must appear last.
1341 * It does NOT validate further ordering constraints for a {@code List<Argument.Passed>}, such as
1342 * all positional preceding keyword arguments in a call, nor does it check the more subtle
1343 * constraints for Parameter-s. This validation must happen afterwards in an appropriate method.
1344 */
1345 private <V extends Argument> ImmutableList<V>
1346 parseFunctionArguments(Supplier<V> parseArgument) {
1347 boolean hasArg = false;
1348 boolean hasStar = false;
1349 boolean hasStarStar = false;
1350 ArrayList<V> arguments = new ArrayList<>();
1351
1352 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
1353 if (hasStarStar) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001354 reportError(lexer.createLocation(token.left, token.right),
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001355 "unexpected tokens after kwarg");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001356 break;
1357 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001358 if (hasArg) {
1359 expect(TokenKind.COMMA);
1360 }
1361 if (token.kind == TokenKind.RPAREN && !hasStar) {
1362 // list can end with a COMMA if there is neither * nor **
1363 break;
1364 }
1365 V arg = parseArgument.get();
1366 hasArg = true;
1367 if (arg.isStar()) {
1368 hasStar = true;
1369 } else if (arg.isStarStar()) {
1370 hasStarStar = true;
1371 }
1372 arguments.add(arg);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001373 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001374 return ImmutableList.copyOf(arguments);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001375 }
1376
Laurent Le Brun5f674452015-03-17 19:29:13 +00001377 // suite is typically what follows a colon (e.g. after def or for).
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001378 // suite ::= simple_stmt
1379 // | NEWLINE INDENT stmt+ OUTDENT
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001380 private List<Statement> parseSuite() {
1381 List<Statement> list = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001382 if (token.kind == TokenKind.NEWLINE) {
1383 expect(TokenKind.NEWLINE);
1384 if (token.kind != TokenKind.INDENT) {
1385 reportError(lexer.createLocation(token.left, token.right),
1386 "expected an indented block");
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001387 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001388 }
1389 expect(TokenKind.INDENT);
1390 while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) {
1391 parseStatement(list, false);
1392 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001393 expectAndRecover(TokenKind.OUTDENT);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001394 } else {
Laurent Le Brun5f674452015-03-17 19:29:13 +00001395 parseSimpleStatement(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001396 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001397 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001398 }
1399
1400 // skipSuite does not check that the code is syntactically correct, it
1401 // just skips based on indentation levels.
1402 private void skipSuite() {
1403 if (token.kind == TokenKind.NEWLINE) {
1404 expect(TokenKind.NEWLINE);
1405 if (token.kind != TokenKind.INDENT) {
1406 reportError(lexer.createLocation(token.left, token.right),
1407 "expected an indented block");
1408 return;
1409 }
1410 expect(TokenKind.INDENT);
1411
1412 // Don't try to parse all the Python syntax, just skip the block
1413 // until the corresponding outdent token.
1414 int depth = 1;
1415 while (depth > 0) {
1416 // Because of the way the lexer works, this should never happen
1417 Preconditions.checkState(token.kind != TokenKind.EOF);
1418
1419 if (token.kind == TokenKind.INDENT) {
1420 depth++;
1421 }
1422 if (token.kind == TokenKind.OUTDENT) {
1423 depth--;
1424 }
1425 nextToken();
1426 }
1427
1428 } else {
1429 // the block ends at the newline token
1430 // e.g. if x == 3: print "three"
1431 syncTo(STATEMENT_TERMINATOR_SET);
1432 }
1433 }
1434
1435 // stmt ::= simple_stmt
1436 // | compound_stmt
1437 private void parseStatement(List<Statement> list, boolean isTopLevel) {
Laurent Le Brun9be852e2015-05-28 08:44:51 +00001438 if (token.kind == TokenKind.DEF && parsingMode == SKYLARK) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001439 if (!isTopLevel) {
1440 reportError(lexer.createLocation(token.left, token.right),
1441 "nested functions are not allowed. Move the function to top-level");
1442 }
1443 parseFunctionDefStatement(list);
Laurent Le Brun9be852e2015-05-28 08:44:51 +00001444 } else if (token.kind == TokenKind.IF && parsingMode == SKYLARK) {
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001445 list.add(parseIfStatement());
Laurent Le Brun9be852e2015-05-28 08:44:51 +00001446 } else if (token.kind == TokenKind.FOR && parsingMode == SKYLARK) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001447 if (isTopLevel) {
Yue Gan4866e152016-04-07 13:07:08 +00001448 reportError(
1449 lexer.createLocation(token.left, token.right),
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001450 "for loops are not allowed on top-level. Put it into a function");
1451 }
1452 parseForStatement(list);
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +00001453 } else if (BLOCK_STARTING_SET.contains(token.kind)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001454 skipBlock();
1455 } else {
1456 parseSimpleStatement(list);
1457 }
1458 }
1459
Florian Weikert917ceaa2015-06-10 13:54:26 +00001460 // flow_stmt ::= break_stmt | continue_stmt
1461 private FlowStatement parseFlowStatement(TokenKind kind) {
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001462 int start = token.left;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001463 int end = token.right;
Florian Weikert917ceaa2015-06-10 13:54:26 +00001464 expect(kind);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001465 if (loopCount == 0) {
1466 reportError(
1467 lexer.createLocation(start, end),
1468 kind.getPrettyName() + " statement must be inside a for loop");
1469 }
Laurent Le Brun7d6a3812015-10-26 12:07:12 +00001470 FlowStatement.Kind flowKind =
1471 kind == TokenKind.BREAK ? FlowStatement.Kind.BREAK : FlowStatement.Kind.CONTINUE;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001472 return setLocation(new FlowStatement(flowKind), start, end);
Florian Weikert917ceaa2015-06-10 13:54:26 +00001473 }
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001474
Googlercc0d9952015-08-10 12:01:34 +00001475 // return_stmt ::= RETURN [expr]
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001476 private ReturnStatement parseReturnStatement() {
1477 int start = token.left;
Googlercc0d9952015-08-10 12:01:34 +00001478 int end = token.right;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001479 expect(TokenKind.RETURN);
Googler768cbc42015-08-28 12:52:14 +00001480
Googlercc0d9952015-08-10 12:01:34 +00001481 Expression expression;
1482 if (STATEMENT_TERMINATOR_SET.contains(token.kind)) {
1483 // this None makes the AST not correspond to the source exactly anymore
1484 expression = new Identifier("None");
1485 setLocation(expression, start, end);
1486 } else {
1487 expression = parseExpression();
1488 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001489 return setLocation(new ReturnStatement(expression), start, expression);
1490 }
1491
Florian Weikert1f004e52015-10-16 09:43:48 +00001492 // block ::= ('if' | 'for' | 'class' | 'try' | 'def') expr ':' suite
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001493 private void skipBlock() {
1494 int start = token.left;
1495 Token blockToken = token;
1496 syncTo(EnumSet.of(TokenKind.COLON, TokenKind.EOF)); // skip over expression or name
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001497 if (blockToken.kind == TokenKind.ELSE) {
Yue Gan4866e152016-04-07 13:07:08 +00001498 reportError(
1499 lexer.createLocation(blockToken.left, blockToken.right),
1500 "syntax error at 'else': not allowed here.");
Laurent Le Brunb566c7d2016-10-07 16:31:03 +00001501 } else {
Florian Weikert1f004e52015-10-16 09:43:48 +00001502 String msg =
1503 ILLEGAL_BLOCK_KEYWORDS.containsKey(blockToken.kind)
1504 ? String.format("%ss are not supported.", ILLEGAL_BLOCK_KEYWORDS.get(blockToken.kind))
1505 : "This is not supported in BUILD files. Move the block to a .bzl file and load it";
Laurent Le Brunb13a4382015-06-30 14:20:45 +00001506 reportError(
1507 lexer.createLocation(start, token.right),
Florian Weikert1f004e52015-10-16 09:43:48 +00001508 String.format("syntax error at '%s': %s", blockToken, msg));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001509 }
1510 expect(TokenKind.COLON);
1511 skipSuite();
1512 }
1513
1514 // create a comment node
1515 private void makeComment(Token token) {
1516 comments.add(setLocation(new Comment((String) token.value), token.left, token.right));
1517 }
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001518
1519 private void enterLoop() {
1520 loopCount++;
1521 }
1522
1523 private void exitLoop() {
1524 Preconditions.checkState(loopCount > 0);
1525 loopCount--;
1526 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001527}