blob: e4999077776d3d6803ffdc4aa5270058ceec2d44 [file] [log] [blame]
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00001// Copyright 2014 The Bazel Authors. All rights reserved.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package com.google.devtools.build.lib.syntax;
16
Laurent Le Brun9be852e2015-05-28 08:44:51 +000017import static com.google.devtools.build.lib.syntax.Parser.ParsingMode.BUILD;
Laurent Le Brun9be852e2015-05-28 08:44:51 +000018import static com.google.devtools.build.lib.syntax.Parser.ParsingMode.SKYLARK;
19
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010020import com.google.common.annotations.VisibleForTesting;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +000021import com.google.common.base.Supplier;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010022import com.google.common.collect.ImmutableList;
23import com.google.common.collect.ImmutableMap;
Laurent Le Brune51a4d22016-10-11 18:04:16 +000024import com.google.common.collect.Iterables;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010025import com.google.devtools.build.lib.events.Event;
26import com.google.devtools.build.lib.events.EventHandler;
27import com.google.devtools.build.lib.events.Location;
Googler768cbc42015-08-28 12:52:14 +000028import com.google.devtools.build.lib.profiler.Profiler;
29import com.google.devtools.build.lib.profiler.ProfilerTask;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010030import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral;
31import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements;
Mark Schaller6df81792015-12-10 18:47:47 +000032import com.google.devtools.build.lib.util.Preconditions;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010033import java.util.ArrayList;
34import java.util.Collections;
35import java.util.EnumSet;
Florian Weikert9d659ad2015-07-23 14:44:36 +000036import java.util.HashMap;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010037import java.util.Iterator;
38import java.util.List;
39import java.util.Map;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010040
Laurent Le Brun494eca92015-09-03 13:27:06 +000041
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010042/**
43 * Recursive descent parser for LL(2) BUILD language.
44 * Loosely based on Python 2 grammar.
45 * See https://docs.python.org/2/reference/grammar.html
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010046 */
Han-Wen Nienhuysceae8c52015-09-22 16:24:45 +000047@VisibleForTesting
48public class Parser {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010049
50 /**
51 * Combines the parser result into a single value object.
52 */
53 public static final class ParseResult {
54 /** The statements (rules, basically) from the parsed file. */
55 public final List<Statement> statements;
56
57 /** The comments from the parsed file. */
58 public final List<Comment> comments;
59
Lukacs Berkid9e733d2015-09-18 08:18:11 +000060 /** Represents every statement in the file. */
61 public final Location location;
62
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010063 /** Whether the file contained any errors. */
64 public final boolean containsErrors;
65
Lukacs Berkid9e733d2015-09-18 08:18:11 +000066 public ParseResult(List<Statement> statements, List<Comment> comments, Location location,
67 boolean containsErrors) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010068 // No need to copy here; when the object is created, the parser instance is just about to go
69 // out of scope and be garbage collected.
70 this.statements = Preconditions.checkNotNull(statements);
71 this.comments = Preconditions.checkNotNull(comments);
Lukacs Berkid9e733d2015-09-18 08:18:11 +000072 this.location = location;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010073 this.containsErrors = containsErrors;
74 }
75 }
76
Laurent Le Brun9be852e2015-05-28 08:44:51 +000077 /**
78 * ParsingMode is used to select which features the parser should accept.
79 */
80 public enum ParsingMode {
81 /** Used for parsing BUILD files */
82 BUILD,
83 /** Used for parsing .bzl files */
84 SKYLARK,
Laurent Le Brun9be852e2015-05-28 08:44:51 +000085 }
86
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010087 private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET =
Googlercc0d9952015-08-10 12:01:34 +000088 EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010089
90 private static final EnumSet<TokenKind> LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000091 EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010092
93 private static final EnumSet<TokenKind> DICT_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000094 EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010095
Laurent Le Brun56093892015-03-20 13:01:58 +000096 private static final EnumSet<TokenKind> EXPR_LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000097 EnumSet.of(
98 TokenKind.EOF,
99 TokenKind.NEWLINE,
Laurent Le Brun29ad8622015-09-18 10:45:07 +0000100 TokenKind.EQUALS,
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000101 TokenKind.RBRACE,
102 TokenKind.RBRACKET,
103 TokenKind.RPAREN,
104 TokenKind.SEMI);
Laurent Le Brun56093892015-03-20 13:01:58 +0000105
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000106 private static final EnumSet<TokenKind> BLOCK_STARTING_SET =
107 EnumSet.of(
108 TokenKind.CLASS,
109 TokenKind.DEF,
110 TokenKind.ELSE,
111 TokenKind.FOR,
112 TokenKind.IF,
113 TokenKind.TRY);
114
115 private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET =
116 EnumSet.of(
117 TokenKind.COLON,
118 TokenKind.COMMA,
119 TokenKind.EOF,
120 TokenKind.FOR,
121 TokenKind.MINUS,
122 TokenKind.PERCENT,
123 TokenKind.PLUS,
124 TokenKind.RBRACKET,
125 TokenKind.RPAREN,
126 TokenKind.SLASH);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100127
Florian Weikert1f004e52015-10-16 09:43:48 +0000128 /**
129 * Keywords that are forbidden in both Skylark and BUILD parsing modes.
130 *
131 * <p>(Mapping: token -> human-readable string description)
132 */
133 private static final ImmutableMap<TokenKind, String> ILLEGAL_BLOCK_KEYWORDS =
134 ImmutableMap.of(TokenKind.CLASS, "Class definition", TokenKind.TRY, "Try statement");
135
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100136 private Token token; // current lookahead token
137 private Token pushedToken = null; // used to implement LL(2)
Laurent Le Bruna3c25a62016-10-26 10:59:09 +0000138 private int loopCount; // break/continue keywords can be used only inside a loop
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100139
140 private static final boolean DEBUGGING = false;
141
142 private final Lexer lexer;
143 private final EventHandler eventHandler;
144 private final List<Comment> comments;
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000145 private final ParsingMode parsingMode;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100146
147 private static final Map<TokenKind, Operator> binaryOperators =
148 new ImmutableMap.Builder<TokenKind, Operator>()
149 .put(TokenKind.AND, Operator.AND)
150 .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS)
151 .put(TokenKind.GREATER, Operator.GREATER)
152 .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS)
153 .put(TokenKind.IN, Operator.IN)
154 .put(TokenKind.LESS, Operator.LESS)
155 .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS)
156 .put(TokenKind.MINUS, Operator.MINUS)
157 .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS)
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000158 .put(TokenKind.NOT_IN, Operator.NOT_IN)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100159 .put(TokenKind.OR, Operator.OR)
160 .put(TokenKind.PERCENT, Operator.PERCENT)
Laurent Le Brun8a528262015-04-15 14:23:35 +0000161 .put(TokenKind.SLASH, Operator.DIVIDE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100162 .put(TokenKind.PLUS, Operator.PLUS)
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000163 .put(TokenKind.PIPE, Operator.PIPE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100164 .put(TokenKind.STAR, Operator.MULT)
165 .build();
166
Googler13151752016-06-02 18:37:13 +0000167 // TODO(bazel-team): add support for |=
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100168 private static final Map<TokenKind, Operator> augmentedAssignmentMethods =
169 new ImmutableMap.Builder<TokenKind, Operator>()
Googler13151752016-06-02 18:37:13 +0000170 .put(TokenKind.PLUS_EQUALS, Operator.PLUS)
171 .put(TokenKind.MINUS_EQUALS, Operator.MINUS)
172 .put(TokenKind.STAR_EQUALS, Operator.MULT)
173 .put(TokenKind.SLASH_EQUALS, Operator.DIVIDE)
174 .put(TokenKind.PERCENT_EQUALS, Operator.PERCENT)
175 .build();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100176
177 /** Highest precedence goes last.
178 * Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence
179 **/
180 private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of(
181 EnumSet.of(Operator.OR),
182 EnumSet.of(Operator.AND),
183 EnumSet.of(Operator.NOT),
184 EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS,
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000185 Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN, Operator.NOT_IN),
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000186 EnumSet.of(Operator.PIPE),
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100187 EnumSet.of(Operator.MINUS, Operator.PLUS),
Laurent Le Brun8a528262015-04-15 14:23:35 +0000188 EnumSet.of(Operator.DIVIDE, Operator.MULT, Operator.PERCENT));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100189
Laurent Le Brune51a4d22016-10-11 18:04:16 +0000190 private final Iterator<Token> tokens;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100191 private int errorsCount;
192 private boolean recoveryMode; // stop reporting errors until next statement
193
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000194 private Parser(Lexer lexer, EventHandler eventHandler, ParsingMode parsingMode) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100195 this.lexer = lexer;
196 this.eventHandler = eventHandler;
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000197 this.parsingMode = parsingMode;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100198 this.tokens = lexer.getTokens().iterator();
Francois-Rene Rideauc673a822015-03-02 19:52:39 +0000199 this.comments = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100200 nextToken();
201 }
202
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000203 private static Location locationFromStatements(Lexer lexer, List<Statement> statements) {
204 if (!statements.isEmpty()) {
205 return lexer.createLocation(
206 statements.get(0).getLocation().getStartOffset(),
Laurent Le Brune51a4d22016-10-11 18:04:16 +0000207 Iterables.getLast(statements).getLocation().getEndOffset());
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000208 } else {
209 return Location.fromPathFragment(lexer.getFilename());
210 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100211 }
212
213 /**
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000214 * Entry-point to parser that parses a build file with comments. All errors encountered during
215 * parsing are reported via "reporter".
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100216 */
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000217 public static ParseResult parseFile(ParserInputSource input, EventHandler eventHandler) {
218 Lexer lexer = new Lexer(input, eventHandler);
219 Parser parser = new Parser(lexer, eventHandler, BUILD);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100220 List<Statement> statements = parser.parseFileInput();
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000221 return new ParseResult(statements, parser.comments, locationFromStatements(lexer, statements),
222 parser.errorsCount > 0 || lexer.containsErrors());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100223 }
224
225 /**
Laurent Le Brun8c8857d2016-08-04 10:22:16 +0000226 * Entry-point to parser that parses a build file with comments. All errors encountered during
227 * parsing are reported via "reporter". Enable Skylark extensions that are not part of the core
228 * BUILD language.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100229 */
230 public static ParseResult parseFileForSkylark(
Laurent Le Brun8c8857d2016-08-04 10:22:16 +0000231 ParserInputSource input, EventHandler eventHandler) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000232 Lexer lexer = new Lexer(input, eventHandler);
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000233 Parser parser = new Parser(lexer, eventHandler, SKYLARK);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100234 List<Statement> statements = parser.parseFileInput();
Laurent Le Brun8c8857d2016-08-04 10:22:16 +0000235 return new ParseResult(
236 statements,
237 parser.comments,
238 locationFromStatements(lexer, statements),
239 parser.errorsCount > 0 || lexer.containsErrors());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100240 }
241
242 /**
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100243 * Entry-point to parser that parses an expression. All errors encountered
244 * during parsing are reported via "reporter". The expression may be followed
245 * by newline tokens.
246 */
247 @VisibleForTesting
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000248 public static Expression parseExpression(ParserInputSource input, EventHandler eventHandler) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000249 Lexer lexer = new Lexer(input, eventHandler);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100250 Parser parser = new Parser(lexer, eventHandler, null);
251 Expression result = parser.parseExpression();
252 while (parser.token.kind == TokenKind.NEWLINE) {
253 parser.nextToken();
254 }
255 parser.expect(TokenKind.EOF);
256 return result;
257 }
258
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100259 private void reportError(Location location, String message) {
260 errorsCount++;
261 // Limit the number of reported errors to avoid spamming output.
262 if (errorsCount <= 5) {
263 eventHandler.handle(Event.error(location, message));
264 }
265 }
266
Laurent Le Brun72329862015-03-23 14:20:03 +0000267 private void syntaxError(Token token, String message) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100268 if (!recoveryMode) {
269 String msg = token.kind == TokenKind.INDENT
270 ? "indentation error"
Laurent Le Brun72329862015-03-23 14:20:03 +0000271 : "syntax error at '" + token + "': " + message;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100272 reportError(lexer.createLocation(token.left, token.right), msg);
273 recoveryMode = true;
274 }
275 }
276
Laurent Le Brun9060e162015-04-02 10:07:28 +0000277 /**
278 * Consumes the current token. If it is not of the specified (expected)
279 * kind, reports a syntax error.
280 */
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100281 private boolean expect(TokenKind kind) {
282 boolean expected = token.kind == kind;
283 if (!expected) {
Laurent Le Brun72329862015-03-23 14:20:03 +0000284 syntaxError(token, "expected " + kind.getPrettyName());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100285 }
286 nextToken();
287 return expected;
288 }
289
290 /**
Laurent Le Brun9060e162015-04-02 10:07:28 +0000291 * Same as expect, but stop the recovery mode if the token was expected.
292 */
293 private void expectAndRecover(TokenKind kind) {
294 if (expect(kind)) {
295 recoveryMode = false;
296 }
297 }
298
299 /**
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100300 * Consume tokens past the first token that has a kind that is in the set of
301 * teminatingTokens.
302 * @param terminatingTokens
303 * @return the end offset of the terminating token.
304 */
305 private int syncPast(EnumSet<TokenKind> terminatingTokens) {
306 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
307 while (!terminatingTokens.contains(token.kind)) {
308 nextToken();
309 }
310 int end = token.right;
311 // read past the synchronization token
312 nextToken();
313 return end;
314 }
315
316 /**
317 * Consume tokens until we reach the first token that has a kind that is in
318 * the set of teminatingTokens.
319 * @param terminatingTokens
320 * @return the end offset of the terminating token.
321 */
322 private int syncTo(EnumSet<TokenKind> terminatingTokens) {
323 // EOF must be in the set to prevent an infinite loop
324 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
325 // read past the problematic token
326 int previous = token.right;
327 nextToken();
328 int current = previous;
329 while (!terminatingTokens.contains(token.kind)) {
330 nextToken();
331 previous = current;
332 current = token.right;
333 }
334 return previous;
335 }
336
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000337 // Keywords that exist in Python and that we don't parse.
338 private static final EnumSet<TokenKind> FORBIDDEN_KEYWORDS =
Googler768cbc42015-08-28 12:52:14 +0000339 EnumSet.of(TokenKind.AS, TokenKind.ASSERT,
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000340 TokenKind.DEL, TokenKind.EXCEPT, TokenKind.FINALLY, TokenKind.FROM, TokenKind.GLOBAL,
341 TokenKind.IMPORT, TokenKind.IS, TokenKind.LAMBDA, TokenKind.NONLOCAL, TokenKind.RAISE,
342 TokenKind.TRY, TokenKind.WITH, TokenKind.WHILE, TokenKind.YIELD);
343
344 private void checkForbiddenKeywords(Token token) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000345 if (!FORBIDDEN_KEYWORDS.contains(token.kind)) {
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000346 return;
347 }
348 String error;
349 switch (token.kind) {
350 case ASSERT: error = "'assert' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000351 case DEL:
352 error = "'del' not supported, use '.pop()' to delete an item from a dictionary or a list";
353 break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000354 case IMPORT: error = "'import' not supported, use 'load' instead"; break;
355 case IS: error = "'is' not supported, use '==' instead"; break;
356 case LAMBDA: error = "'lambda' not supported, declare a function instead"; break;
357 case RAISE: error = "'raise' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000358 case TRY: error = "'try' not supported, all exceptions are fatal"; break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000359 case WHILE: error = "'while' not supported, use 'for' instead"; break;
360 default: error = "keyword '" + token.kind.getPrettyName() + "' not supported"; break;
361 }
362 reportError(lexer.createLocation(token.left, token.right), error);
363 }
364
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100365 private void nextToken() {
366 if (pushedToken != null) {
367 token = pushedToken;
368 pushedToken = null;
369 } else {
370 if (token == null || token.kind != TokenKind.EOF) {
371 token = tokens.next();
372 // transparently handle comment tokens
373 while (token.kind == TokenKind.COMMENT) {
374 makeComment(token);
375 token = tokens.next();
376 }
377 }
378 }
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000379 checkForbiddenKeywords(token);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100380 if (DEBUGGING) {
381 System.err.print(token);
382 }
383 }
384
385 private void pushToken(Token tokenToPush) {
386 if (pushedToken != null) {
387 throw new IllegalStateException("Exceeded LL(2) lookahead!");
388 }
389 pushedToken = token;
390 token = tokenToPush;
391 }
392
393 // create an error expression
Florian Weikert6f864c32015-07-23 11:26:39 +0000394 private Identifier makeErrorExpression(int start, int end) {
395 return setLocation(new Identifier("$error$"), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100396 }
397
398 // Convenience wrapper around ASTNode.setLocation that returns the node.
Francois-Rene Rideauedf7bdb2015-03-02 17:12:45 +0000399 private <NODE extends ASTNode> NODE setLocation(NODE node, Location location) {
400 return ASTNode.<NODE>setLocation(location, node);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100401 }
402
403 // Another convenience wrapper method around ASTNode.setLocation
Francois-Rene Rideauedf7bdb2015-03-02 17:12:45 +0000404 private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, int endOffset) {
405 return setLocation(node, lexer.createLocation(startOffset, endOffset));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100406 }
407
408 // Convenience method that uses end offset from the last node.
409 private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, ASTNode lastNode) {
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000410 Preconditions.checkNotNull(lastNode, "can't extract end offset from a null node");
411 Preconditions.checkNotNull(lastNode.getLocation(), "lastNode doesn't have a location");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100412 return setLocation(node, startOffset, lastNode.getLocation().getEndOffset());
413 }
414
415 // create a funcall expression
Florian Weikert6f864c32015-07-23 11:26:39 +0000416 private Expression makeFuncallExpression(Expression receiver, Identifier function,
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000417 List<Argument.Passed> args,
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100418 int start, int end) {
419 if (function.getLocation() == null) {
420 function = setLocation(function, start, end);
421 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100422 return setLocation(new FuncallExpression(receiver, function, args), start, end);
423 }
424
Laurent Le Brun56093892015-03-20 13:01:58 +0000425 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100426 // | expr
Laurent Le Brunb3266382015-05-27 16:14:43 +0000427 // | *args (only in Skylark mode)
428 // | **kwargs (only in Skylark mode)
429 // To keep BUILD files declarative and easy to process, *args and **kwargs
430 // arguments are allowed only in Skylark mode.
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000431 private Argument.Passed parseFuncallArgument() {
432 final int start = token.left;
433 // parse **expr
434 if (token.kind == TokenKind.STAR_STAR) {
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000435 if (parsingMode != SKYLARK) {
Laurent Le Brunb3266382015-05-27 16:14:43 +0000436 reportError(
437 lexer.createLocation(token.left, token.right),
438 "**kwargs arguments are not allowed in BUILD files");
439 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000440 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000441 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000442 return setLocation(new Argument.StarStar(expr), start, expr);
443 }
444 // parse *expr
445 if (token.kind == TokenKind.STAR) {
Laurent Le Brun9be852e2015-05-28 08:44:51 +0000446 if (parsingMode != SKYLARK) {
Laurent Le Brunb3266382015-05-27 16:14:43 +0000447 reportError(
448 lexer.createLocation(token.left, token.right),
449 "*args arguments are not allowed in BUILD files");
450 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000451 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000452 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000453 return setLocation(new Argument.Star(expr), start, expr);
454 }
455 // parse keyword = expr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100456 if (token.kind == TokenKind.IDENTIFIER) {
457 Token identToken = token;
458 String name = (String) token.value;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100459 nextToken();
460 if (token.kind == TokenKind.EQUALS) { // it's a named argument
461 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000462 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000463 return setLocation(new Argument.Keyword(name, expr), start, expr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100464 } else { // oops, back up!
465 pushToken(identToken);
466 }
467 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100468 // parse a positional argument
Laurent Le Brun56093892015-03-20 13:01:58 +0000469 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000470 return setLocation(new Argument.Positional(expr), start, expr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100471 }
472
Laurent Le Brun56093892015-03-20 13:01:58 +0000473 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100474 // | IDENTIFIER
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000475 private Parameter<Expression, Expression> parseFunctionParameter() {
476 // TODO(bazel-team): optionally support type annotations
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100477 int start = token.left;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000478 if (token.kind == TokenKind.STAR_STAR) { // kwarg
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100479 nextToken();
Florian Weikert6f864c32015-07-23 11:26:39 +0000480 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000481 return setLocation(new Parameter.StarStar<Expression, Expression>(
482 ident.getName()), start, ident);
483 } else if (token.kind == TokenKind.STAR) { // stararg
484 int end = token.right;
485 nextToken();
486 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000487 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000488 return setLocation(new Parameter.Star<Expression, Expression>(ident.getName()),
489 start, ident);
490 } else {
491 return setLocation(new Parameter.Star<Expression, Expression>(null), start, end);
492 }
493 } else {
Florian Weikert6f864c32015-07-23 11:26:39 +0000494 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000495 if (token.kind == TokenKind.EQUALS) { // there's a default value
496 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000497 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000498 return setLocation(new Parameter.Optional<Expression, Expression>(
499 ident.getName(), expr), start, expr);
500 } else {
501 return setLocation(new Parameter.Mandatory<Expression, Expression>(
502 ident.getName()), start, ident);
503 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100504 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100505 }
506
507 // funcall_suffix ::= '(' arg_list? ')'
Florian Weikert6f864c32015-07-23 11:26:39 +0000508 private Expression parseFuncallSuffix(int start, Expression receiver, Identifier function) {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000509 List<Argument.Passed> args = Collections.emptyList();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100510 expect(TokenKind.LPAREN);
511 int end;
512 if (token.kind == TokenKind.RPAREN) {
513 end = token.right;
514 nextToken(); // RPAREN
515 } else {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000516 args = parseFuncallArguments(); // (includes optional trailing comma)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100517 end = token.right;
518 expect(TokenKind.RPAREN);
519 }
520 return makeFuncallExpression(receiver, function, args, start, end);
521 }
522
523 // selector_suffix ::= '.' IDENTIFIER
524 // |'.' IDENTIFIER funcall_suffix
525 private Expression parseSelectorSuffix(int start, Expression receiver) {
526 expect(TokenKind.DOT);
527 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000528 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100529 if (token.kind == TokenKind.LPAREN) {
530 return parseFuncallSuffix(start, receiver, ident);
531 } else {
532 return setLocation(new DotExpression(receiver, ident), start, token.right);
533 }
534 } else {
Laurent Le Brun72329862015-03-23 14:20:03 +0000535 syntaxError(token, "expected identifier after dot");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100536 int end = syncTo(EXPR_TERMINATOR_SET);
537 return makeErrorExpression(start, end);
538 }
539 }
540
541 // arg_list ::= ( (arg ',')* arg ','? )?
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000542 private List<Argument.Passed> parseFuncallArguments() {
543 List<Argument.Passed> arguments =
544 parseFunctionArguments(new Supplier<Argument.Passed>() {
545 @Override public Argument.Passed get() {
546 return parseFuncallArgument();
547 }
548 });
549 try {
550 Argument.validateFuncallArguments(arguments);
551 } catch (Argument.ArgumentException e) {
552 reportError(lexer.createLocation(token.left, token.right), e.getMessage());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100553 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000554 return arguments;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100555 }
556
Laurent Le Brun56093892015-03-20 13:01:58 +0000557 // expr_list parses a comma-separated list of expression. It assumes that the
558 // first expression was already parsed, so it starts with a comma.
559 // It is used to parse tuples and list elements.
560 // expr_list ::= ( ',' expr )* ','?
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000561 private List<Expression> parseExprList(boolean trailingColonAllowed) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100562 List<Expression> list = new ArrayList<>();
563 // terminating tokens for an expression list
Laurent Le Brun56093892015-03-20 13:01:58 +0000564 while (token.kind == TokenKind.COMMA) {
565 expect(TokenKind.COMMA);
566 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000567 if (!trailingColonAllowed) {
568 reportError(
569 lexer.createLocation(token.left, token.right),
570 "Trailing comma is allowed only in parenthesized tuples.");
571 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100572 break;
573 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000574 list.add(parseNonTupleExpression());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100575 }
576 return list;
577 }
578
579 // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )?
580 private List<DictionaryEntryLiteral> parseDictEntryList() {
581 List<DictionaryEntryLiteral> list = new ArrayList<>();
582 // the terminating token for a dict entry list
583 while (token.kind != TokenKind.RBRACE) {
584 list.add(parseDictEntry());
585 if (token.kind == TokenKind.COMMA) {
586 nextToken();
587 } else {
588 break;
589 }
590 }
591 return list;
592 }
593
Laurent Le Brun56093892015-03-20 13:01:58 +0000594 // dict_entry ::= nontupleexpr ':' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100595 private DictionaryEntryLiteral parseDictEntry() {
596 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000597 Expression key = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100598 expect(TokenKind.COLON);
Laurent Le Brun56093892015-03-20 13:01:58 +0000599 Expression value = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100600 return setLocation(new DictionaryEntryLiteral(key, value), start, value);
601 }
602
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000603 /**
604 * Parse a String literal value, e.g. "str".
605 */
606 private StringLiteral parseStringLiteral() {
607 Preconditions.checkState(token.kind == TokenKind.STRING);
608 int end = token.right;
609 char quoteChar = lexer.charAt(token.left);
610 StringLiteral literal =
611 setLocation(new StringLiteral((String) token.value, quoteChar), token.left, end);
612
613 nextToken();
614 if (token.kind == TokenKind.STRING) {
615 reportError(lexer.createLocation(end, token.left),
616 "Implicit string concatenation is forbidden, use the + operator");
617 }
618 return literal;
619 }
620
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100621 // primary ::= INTEGER
622 // | STRING
623 // | STRING '.' IDENTIFIER funcall_suffix
624 // | IDENTIFIER
625 // | IDENTIFIER funcall_suffix
626 // | IDENTIFIER '.' selector_suffix
627 // | list_expression
628 // | '(' ')' // a tuple with zero elements
629 // | '(' expr ')' // a parenthesized expression
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100630 // | dict_expression
631 // | '-' primary_with_suffix
632 private Expression parsePrimary() {
633 int start = token.left;
634 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000635 case INT:
636 {
637 IntegerLiteral literal = new IntegerLiteral((Integer) token.value);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100638 setLocation(literal, start, token.right);
639 nextToken();
640 return literal;
641 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000642 case STRING:
643 return parseStringLiteral();
644 case IDENTIFIER:
645 {
646 Identifier ident = parseIdent();
647 if (token.kind == TokenKind.LPAREN) { // it's a function application
648 return parseFuncallSuffix(start, null, ident);
649 } else {
650 return ident;
651 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100652 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000653 case LBRACKET: // it's a list
654 return parseListMaker();
655 case LBRACE: // it's a dictionary
656 return parseDictExpression();
657 case LPAREN:
658 {
659 nextToken();
660 // check for the empty tuple literal
661 if (token.kind == TokenKind.RPAREN) {
662 ListLiteral literal = ListLiteral.makeTuple(Collections.<Expression>emptyList());
663 setLocation(literal, start, token.right);
664 nextToken();
665 return literal;
666 }
667 // parse the first expression
668 Expression expression = parseExpression(true);
669 setLocation(expression, start, token.right);
670 if (token.kind == TokenKind.RPAREN) {
671 nextToken();
672 return expression;
673 }
674 expect(TokenKind.RPAREN);
675 int end = syncTo(EXPR_TERMINATOR_SET);
676 return makeErrorExpression(start, end);
677 }
678 case MINUS:
679 {
680 nextToken();
681 List<Argument.Passed> args = new ArrayList<>();
682 Expression expr = parsePrimaryWithSuffix();
683 args.add(setLocation(new Argument.Positional(expr), start, expr));
684 return makeFuncallExpression(null, new Identifier("-"), args, start, token.right);
685 }
686 default:
687 {
688 syntaxError(token, "expected expression");
689 int end = syncTo(EXPR_TERMINATOR_SET);
690 return makeErrorExpression(start, end);
691 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100692 }
693 }
694
695 // primary_with_suffix ::= primary selector_suffix*
696 // | primary substring_suffix
697 private Expression parsePrimaryWithSuffix() {
698 int start = token.left;
699 Expression receiver = parsePrimary();
700 while (true) {
701 if (token.kind == TokenKind.DOT) {
702 receiver = parseSelectorSuffix(start, receiver);
703 } else if (token.kind == TokenKind.LBRACKET) {
704 receiver = parseSubstringSuffix(start, receiver);
705 } else {
706 break;
707 }
708 }
709 return receiver;
710 }
711
Florian Weikerte3421962015-12-17 12:46:08 +0000712 // substring_suffix ::= '[' expression? ':' expression? ':' expression? ']'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100713 private Expression parseSubstringSuffix(int start, Expression receiver) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100714 Expression startExpr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100715
716 expect(TokenKind.LBRACKET);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100717 if (token.kind == TokenKind.COLON) {
Florian Weikerte3421962015-12-17 12:46:08 +0000718 startExpr = setLocation(new Identifier("None"), token.left, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100719 } else {
Laurent Le Brun6824d862015-09-11 13:51:41 +0000720 startExpr = parseExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100721 }
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000722 // This is an index/key access
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100723 if (token.kind == TokenKind.RBRACKET) {
724 expect(TokenKind.RBRACKET);
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000725 return setLocation(new IndexExpression(receiver, startExpr), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100726 }
Laurent Le Bruneeef30f2015-03-16 15:12:35 +0000727 // This is a slice (or substring)
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000728 Expression endExpr = parseSliceArgument(new Identifier("None"));
729 Expression stepExpr = parseSliceArgument(new IntegerLiteral(1));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100730 expect(TokenKind.RBRACKET);
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000731 return setLocation(new SliceExpression(receiver, startExpr, endExpr, stepExpr),
732 start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100733 }
734
Florian Weikerte3421962015-12-17 12:46:08 +0000735 /**
736 * Parses {@code [':' [expr]]} which can either be the end or the step argument of a slice
737 * operation. If no such expression is found, this method returns an argument that represents
738 * {@code defaultValue}.
739 */
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000740 private Expression parseSliceArgument(Expression defaultValue) {
Florian Weikerte3421962015-12-17 12:46:08 +0000741 Expression explicitArg = getSliceEndOrStepExpression();
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000742 if (explicitArg == null) {
743 return setLocation(defaultValue, token.left, token.right);
744 }
745 return explicitArg;
Florian Weikerte3421962015-12-17 12:46:08 +0000746 }
747
748 private Expression getSliceEndOrStepExpression() {
749 // There has to be a colon before any end or slice argument.
750 // However, if the next token thereafter is another colon or a right bracket, no argument value
751 // was specified.
752 if (token.kind == TokenKind.COLON) {
753 expect(TokenKind.COLON);
754 if (token.kind != TokenKind.COLON && token.kind != TokenKind.RBRACKET) {
755 return parseNonTupleExpression();
756 }
757 }
758 return null;
759 }
760
Laurent Le Brun185392d2015-03-20 14:41:25 +0000761 // Equivalent to 'exprlist' rule in Python grammar.
762 // loop_variables ::= primary_with_suffix ( ',' primary_with_suffix )* ','?
763 private Expression parseForLoopVariables() {
764 // We cannot reuse parseExpression because it would parse the 'in' operator.
765 // e.g. "for i in e: pass" -> we want to parse only "i" here.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100766 int start = token.left;
Laurent Le Brun185392d2015-03-20 14:41:25 +0000767 Expression e1 = parsePrimaryWithSuffix();
768 if (token.kind != TokenKind.COMMA) {
769 return e1;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100770 }
771
Laurent Le Brun185392d2015-03-20 14:41:25 +0000772 // It's a tuple
773 List<Expression> tuple = new ArrayList<>();
774 tuple.add(e1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100775 while (token.kind == TokenKind.COMMA) {
Laurent Le Brun185392d2015-03-20 14:41:25 +0000776 expect(TokenKind.COMMA);
777 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
778 break;
779 }
780 tuple.add(parsePrimaryWithSuffix());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100781 }
Laurent Le Brun185392d2015-03-20 14:41:25 +0000782 return setLocation(ListLiteral.makeTuple(tuple), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100783 }
784
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000785 // comprehension_suffix ::= 'FOR' loop_variables 'IN' expr comprehension_suffix
786 // | 'IF' expr comprehension_suffix
787 // | ']'
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000788 private Expression parseComprehensionSuffix(
789 AbstractComprehension comprehension, TokenKind closingBracket) {
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000790 while (true) {
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000791 if (token.kind == TokenKind.FOR) {
792 nextToken();
793 Expression loopVar = parseForLoopVariables();
794 expect(TokenKind.IN);
795 // The expression cannot be a ternary expression ('x if y else z') due to
796 // conflicts in Python grammar ('if' is used by the comprehension).
797 Expression listExpression = parseNonTupleExpression(0);
798 comprehension.addFor(loopVar, listExpression);
799 } else if (token.kind == TokenKind.IF) {
800 nextToken();
801 comprehension.addIf(parseExpression());
802 } else if (token.kind == closingBracket) {
803 nextToken();
804 return comprehension;
805 } else {
806 syntaxError(token, "expected '" + closingBracket.getPrettyName() + "', 'for' or 'if'");
807 syncPast(LIST_TERMINATOR_SET);
808 return makeErrorExpression(token.left, token.right);
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000809 }
810 }
811 }
812
Laurent Le Brun56093892015-03-20 13:01:58 +0000813 // list_maker ::= '[' ']'
814 // |'[' expr ']'
815 // |'[' expr expr_list ']'
816 // |'[' expr ('FOR' loop_variables 'IN' expr)+ ']'
817 private Expression parseListMaker() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100818 int start = token.left;
819 expect(TokenKind.LBRACKET);
820 if (token.kind == TokenKind.RBRACKET) { // empty List
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000821 ListLiteral literal = ListLiteral.emptyList();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100822 setLocation(literal, start, token.right);
823 nextToken();
824 return literal;
825 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000826 Expression expression = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100827 Preconditions.checkNotNull(expression,
828 "null element in list in AST at %s:%s", token.left, token.right);
829 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000830 case RBRACKET: // singleton List
831 {
832 ListLiteral literal = ListLiteral.makeList(Collections.singletonList(expression));
833 setLocation(literal, start, token.right);
834 nextToken();
835 return literal;
836 }
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000837 case FOR:
838 { // list comprehension
839 Expression result =
840 parseComprehensionSuffix(new ListComprehension(expression), TokenKind.RBRACKET);
841 return setLocation(result, start, token.right);
842 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000843 case COMMA:
844 {
845 List<Expression> list = parseExprList(true);
846 Preconditions.checkState(
847 !list.contains(null),
848 "null element in list in AST at %s:%s",
849 token.left,
850 token.right);
851 list.add(0, expression);
852 if (token.kind == TokenKind.RBRACKET) {
853 ListLiteral literal = ListLiteral.makeList(list);
854 setLocation(literal, start, token.right);
855 nextToken();
856 return literal;
857 }
858 expect(TokenKind.RBRACKET);
859 int end = syncPast(LIST_TERMINATOR_SET);
860 return makeErrorExpression(start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100861 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000862 default:
863 {
864 syntaxError(token, "expected ',', 'for' or ']'");
865 int end = syncPast(LIST_TERMINATOR_SET);
866 return makeErrorExpression(start, end);
867 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100868 }
869 }
870
871 // dict_expression ::= '{' '}'
872 // |'{' dict_entry_list '}'
873 // |'{' dict_entry 'FOR' loop_variables 'IN' expr '}'
874 private Expression parseDictExpression() {
875 int start = token.left;
876 expect(TokenKind.LBRACE);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000877 if (token.kind == TokenKind.RBRACE) { // empty Dict
878 DictionaryLiteral literal = DictionaryLiteral.emptyDict();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100879 setLocation(literal, start, token.right);
880 nextToken();
881 return literal;
882 }
883 DictionaryEntryLiteral entry = parseDictEntry();
884 if (token.kind == TokenKind.FOR) {
885 // Dict comprehension
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000886 Expression result = parseComprehensionSuffix(
887 new DictComprehension(entry.getKey(), entry.getValue()), TokenKind.RBRACE);
888 return setLocation(result, start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100889 }
890 List<DictionaryEntryLiteral> entries = new ArrayList<>();
891 entries.add(entry);
892 if (token.kind == TokenKind.COMMA) {
893 expect(TokenKind.COMMA);
894 entries.addAll(parseDictEntryList());
895 }
896 if (token.kind == TokenKind.RBRACE) {
897 DictionaryLiteral literal = new DictionaryLiteral(entries);
898 setLocation(literal, start, token.right);
899 nextToken();
900 return literal;
901 }
Laurent Le Brun72329862015-03-23 14:20:03 +0000902 expect(TokenKind.RBRACE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100903 int end = syncPast(DICT_TERMINATOR_SET);
904 return makeErrorExpression(start, end);
905 }
906
Florian Weikert6f864c32015-07-23 11:26:39 +0000907 private Identifier parseIdent() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100908 if (token.kind != TokenKind.IDENTIFIER) {
Laurent Le Brun72329862015-03-23 14:20:03 +0000909 expect(TokenKind.IDENTIFIER);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100910 return makeErrorExpression(token.left, token.right);
911 }
Florian Weikert6f864c32015-07-23 11:26:39 +0000912 Identifier ident = new Identifier(((String) token.value));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100913 setLocation(ident, token.left, token.right);
914 nextToken();
915 return ident;
916 }
917
918 // binop_expression ::= binop_expression OP binop_expression
919 // | parsePrimaryWithSuffix
920 // This function takes care of precedence between operators (see operatorPrecedence for
921 // the order), and it assumes left-to-right associativity.
922 private Expression parseBinOpExpression(int prec) {
923 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000924 Expression expr = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100925 // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is
926 // limited to number of different precedence levels (operatorPrecedence.size()).
927 for (;;) {
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000928
929 if (token.kind == TokenKind.NOT) {
930 // If NOT appears when we expect a binary operator, it must be followed by IN.
931 // Since the code expects every operator to be a single token, we push a NOT_IN token.
932 expect(TokenKind.NOT);
933 expect(TokenKind.IN);
934 pushToken(new Token(TokenKind.NOT_IN, token.left, token.right));
935 }
936
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100937 if (!binaryOperators.containsKey(token.kind)) {
938 return expr;
939 }
940 Operator operator = binaryOperators.get(token.kind);
941 if (!operatorPrecedence.get(prec).contains(operator)) {
942 return expr;
943 }
944 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000945 Expression secondary = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100946 expr = optimizeBinOpExpression(operator, expr, secondary);
947 setLocation(expr, start, secondary);
948 }
949 }
950
951 // Optimize binary expressions.
952 // string literal + string literal can be concatenated into one string literal
953 // so we don't have to do the expensive string concatenation at runtime.
954 private Expression optimizeBinOpExpression(
955 Operator operator, Expression expr, Expression secondary) {
956 if (operator == Operator.PLUS) {
957 if (expr instanceof StringLiteral && secondary instanceof StringLiteral) {
958 StringLiteral left = (StringLiteral) expr;
959 StringLiteral right = (StringLiteral) secondary;
960 if (left.getQuoteChar() == right.getQuoteChar()) {
961 return new StringLiteral(left.getValue() + right.getValue(), left.getQuoteChar());
962 }
963 }
964 }
965 return new BinaryOperatorExpression(operator, expr, secondary);
966 }
967
968 private Expression parseExpression() {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000969 return parseExpression(false);
970 }
971
972 // Equivalent to 'testlist' rule in Python grammar. It can parse every kind of
973 // expression. In many cases, we need to use parseNonTupleExpression to avoid ambiguity:
974 // e.g. fct(x, y) vs fct((x, y))
975 //
976 // Tuples can have a trailing comma only when insideParens is true. This prevents bugs
977 // where a one-element tuple is surprisingly created:
978 // e.g. foo = f(x),
979 private Expression parseExpression(boolean insideParens) {
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000980 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000981 Expression expression = parseNonTupleExpression();
982 if (token.kind != TokenKind.COMMA) {
983 return expression;
984 }
985
986 // It's a tuple
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000987 List<Expression> tuple = parseExprList(insideParens);
Laurent Le Brun56093892015-03-20 13:01:58 +0000988 tuple.add(0, expression); // add the first expression to the front of the tuple
989 return setLocation(ListLiteral.makeTuple(tuple), start, token.right);
990 }
991
992 // Equivalent to 'test' rule in Python grammar.
993 private Expression parseNonTupleExpression() {
994 int start = token.left;
995 Expression expr = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000996 if (token.kind == TokenKind.IF) {
997 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000998 Expression condition = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000999 if (token.kind == TokenKind.ELSE) {
1000 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +00001001 Expression elseClause = parseNonTupleExpression();
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001002 return setLocation(new ConditionalExpression(expr, condition, elseClause),
1003 start, elseClause);
1004 } else {
1005 reportError(lexer.createLocation(start, token.left),
1006 "missing else clause in conditional expression or semicolon before if");
1007 return expr; // Try to recover from error: drop the if and the expression after it. Ouch.
1008 }
1009 }
1010 return expr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001011 }
1012
Laurent Le Brun56093892015-03-20 13:01:58 +00001013 private Expression parseNonTupleExpression(int prec) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001014 if (prec >= operatorPrecedence.size()) {
1015 return parsePrimaryWithSuffix();
1016 }
1017 if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) {
1018 return parseNotExpression(prec);
1019 }
1020 return parseBinOpExpression(prec);
1021 }
1022
1023 // not_expr :== 'not' expr
1024 private Expression parseNotExpression(int prec) {
1025 int start = token.left;
1026 expect(TokenKind.NOT);
Laurent Le Brun56093892015-03-20 13:01:58 +00001027 Expression expression = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001028 NotExpression notExpression = new NotExpression(expression);
1029 return setLocation(notExpression, start, token.right);
1030 }
1031
1032 // file_input ::= ('\n' | stmt)* EOF
1033 private List<Statement> parseFileInput() {
Googler768cbc42015-08-28 12:52:14 +00001034 long startTime = Profiler.nanoTimeMaybe();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001035 List<Statement> list = new ArrayList<>();
1036 while (token.kind != TokenKind.EOF) {
1037 if (token.kind == TokenKind.NEWLINE) {
Laurent Le Brun9060e162015-04-02 10:07:28 +00001038 expectAndRecover(TokenKind.NEWLINE);
1039 } else if (recoveryMode) {
1040 // If there was a parse error, we want to recover here
1041 // before starting a new top-level statement.
1042 syncTo(STATEMENT_TERMINATOR_SET);
1043 recoveryMode = false;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001044 } else {
1045 parseTopLevelStatement(list);
1046 }
1047 }
Lukacs Berkid9e733d2015-09-18 08:18:11 +00001048 Profiler.instance().logSimpleTask(startTime, ProfilerTask.SKYLARK_PARSER, "");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001049 return list;
1050 }
1051
Florian Weikert9d659ad2015-07-23 14:44:36 +00001052 // load '(' STRING (COMMA [IDENTIFIER EQUALS] STRING)* COMMA? ')'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001053 private void parseLoad(List<Statement> list) {
1054 int start = token.left;
1055 if (token.kind != TokenKind.STRING) {
1056 expect(TokenKind.STRING);
1057 return;
1058 }
Googler768cbc42015-08-28 12:52:14 +00001059
John Field9201fda2015-12-30 19:30:34 +00001060 StringLiteral importString = parseStringLiteral();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001061 expect(TokenKind.COMMA);
1062
Florian Weikert9d659ad2015-07-23 14:44:36 +00001063 Map<Identifier, String> symbols = new HashMap<>();
1064 parseLoadSymbol(symbols); // At least one symbol is required
1065
Laurent Le Brun73a98492015-03-17 15:46:19 +00001066 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001067 expect(TokenKind.COMMA);
Laurent Le Brun59f587a2015-03-16 14:51:36 +00001068 if (token.kind == TokenKind.RPAREN) {
1069 break;
1070 }
Florian Weikert9d659ad2015-07-23 14:44:36 +00001071
1072 parseLoadSymbol(symbols);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001073 }
1074 expect(TokenKind.RPAREN);
Googler768cbc42015-08-28 12:52:14 +00001075
Laurent Le Brun7b1708c2016-10-13 10:05:12 +00001076 LoadStatement stmt = new LoadStatement(importString, symbols);
Miguel Alcon Pinto927f3b22016-08-22 14:21:30 +00001077 list.add(setLocation(stmt, start, token.left));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001078 }
1079
Florian Weikert9d659ad2015-07-23 14:44:36 +00001080 /**
1081 * Parses the next symbol argument of a load statement and puts it into the output map.
1082 *
1083 * <p> The symbol is either "name" (STRING) or name = "declared" (IDENTIFIER EQUALS STRING).
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001084 * If no alias is used, "name" and "declared" will be identical. "Declared" refers to the
1085 * original name in the Bazel file that should be loaded, while "name" will be the key of the
1086 * entry in the map.
Florian Weikert9d659ad2015-07-23 14:44:36 +00001087 */
1088 private void parseLoadSymbol(Map<Identifier, String> symbols) {
Vladimir Moskva8d610c62016-09-15 14:36:41 +00001089 Token nameToken;
1090 Token declaredToken;
Florian Weikert9d659ad2015-07-23 14:44:36 +00001091
1092 if (token.kind == TokenKind.STRING) {
1093 nameToken = token;
1094 declaredToken = nameToken;
1095 } else {
1096 if (token.kind != TokenKind.IDENTIFIER) {
1097 syntaxError(token, "Expected either a literal string or an identifier");
1098 }
1099
1100 nameToken = token;
1101
1102 expect(TokenKind.IDENTIFIER);
1103 expect(TokenKind.EQUALS);
1104
1105 declaredToken = token;
1106 }
1107
1108 expect(TokenKind.STRING);
1109
1110 try {
1111 Identifier identifier = new Identifier(nameToken.value.toString());
1112
1113 if (symbols.containsKey(identifier)) {
1114 syntaxError(
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001115 nameToken, String.format("Identifier '%s' is used more than once",
1116 identifier.getName()));
Florian Weikert9d659ad2015-07-23 14:44:36 +00001117 } else {
1118 symbols.put(
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001119 setLocation(identifier, nameToken.left, nameToken.right),
1120 declaredToken.value.toString());
Florian Weikert9d659ad2015-07-23 14:44:36 +00001121 }
1122 } catch (NullPointerException npe) {
1123 // This means that the value of at least one token is null. In this case, the previous
1124 // expect() call has already logged an error.
1125 }
1126 }
1127
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001128 private void parseTopLevelStatement(List<Statement> list) {
1129 // In Python grammar, there is no "top-level statement" and imports are
1130 // considered as "small statements". We are a bit stricter than Python here.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001131 // Check if there is an include
1132 if (token.kind == TokenKind.IDENTIFIER) {
1133 Token identToken = token;
Florian Weikert6f864c32015-07-23 11:26:39 +00001134 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001135
Lukacs Berkid9e733d2015-09-18 08:18:11 +00001136 if (ident.getName().equals("load") && token.kind == TokenKind.LPAREN) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001137 expect(TokenKind.LPAREN);
1138 parseLoad(list);
1139 return;
1140 }
1141 pushToken(identToken); // push the ident back to parse it as a statement
1142 }
1143 parseStatement(list, true);
1144 }
1145
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001146 // small_stmt | 'pass'
1147 private void parseSmallStatementOrPass(List<Statement> list) {
1148 if (token.kind == TokenKind.PASS) {
1149 // Skip the token, don't add it to the list.
1150 // It has no existence in the AST.
1151 expect(TokenKind.PASS);
1152 } else {
1153 list.add(parseSmallStatement());
1154 }
1155 }
1156
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001157 // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE
1158 private void parseSimpleStatement(List<Statement> list) {
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001159 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001160
1161 while (token.kind == TokenKind.SEMI) {
1162 nextToken();
1163 if (token.kind == TokenKind.NEWLINE) {
1164 break;
1165 }
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001166 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001167 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001168 expectAndRecover(TokenKind.NEWLINE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001169 }
1170
1171 // small_stmt ::= assign_stmt
1172 // | expr
1173 // | RETURN expr
Florian Weikert917ceaa2015-06-10 13:54:26 +00001174 // | flow_stmt
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001175 // assign_stmt ::= expr ('=' | augassign) expr
Vladimir Moskva71536642016-12-19 13:51:57 +00001176 // augassign ::= ('+=' | '-=' | '*=' | '/=' | '%=')
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001177 // Note that these are in Python, but not implemented here (at least for now):
Vladimir Moskva71536642016-12-19 13:51:57 +00001178 // '&=' | '|=' | '^=' |'<<=' | '>>=' | '**=' | '//='
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001179 // Semantic difference from Python:
1180 // In Skylark, x += y is simple syntactic sugar for x = x + y.
1181 // In Python, x += y is more or less equivalent to x = x + y, but if a method is defined
1182 // on x.__iadd__(y), then it takes precedence, and in the case of lists it side-effects
1183 // the original list (it doesn't do that on tuples); if no such method is defined it falls back
1184 // to the x.__add__(y) method that backs x + y. In Skylark, we don't support this side-effect.
1185 // Note also that there is a special casing to translate 'ident[key] = value'
1186 // to 'ident = ident + {key: value}'. This is needed to support the pure version of Python-like
1187 // dictionary assignment syntax.
1188 private Statement parseSmallStatement() {
1189 int start = token.left;
1190 if (token.kind == TokenKind.RETURN) {
1191 return parseReturnStatement();
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001192 } else if (token.kind == TokenKind.BREAK || token.kind == TokenKind.CONTINUE) {
Florian Weikert917ceaa2015-06-10 13:54:26 +00001193 return parseFlowStatement(token.kind);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001194 }
1195 Expression expression = parseExpression();
1196 if (token.kind == TokenKind.EQUALS) {
1197 nextToken();
1198 Expression rvalue = parseExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001199 return setLocation(new AssignmentStatement(expression, rvalue), start, rvalue);
1200 } else if (augmentedAssignmentMethods.containsKey(token.kind)) {
1201 Operator operator = augmentedAssignmentMethods.get(token.kind);
1202 nextToken();
1203 Expression operand = parseExpression();
1204 int end = operand.getLocation().getEndOffset();
Vladimir Moskva71536642016-12-19 13:51:57 +00001205 return setLocation(
1206 new AugmentedAssignmentStatement(operator, expression, operand), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001207 } else {
1208 return setLocation(new ExpressionStatement(expression), start, expression);
1209 }
1210 }
1211
1212 // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]?
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001213 private IfStatement parseIfStatement() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001214 int start = token.left;
1215 List<ConditionalStatements> thenBlocks = new ArrayList<>();
1216 thenBlocks.add(parseConditionalStatements(TokenKind.IF));
1217 while (token.kind == TokenKind.ELIF) {
1218 thenBlocks.add(parseConditionalStatements(TokenKind.ELIF));
1219 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001220 List<Statement> elseBlock;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001221 if (token.kind == TokenKind.ELSE) {
1222 expect(TokenKind.ELSE);
1223 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001224 elseBlock = parseSuite();
1225 } else {
1226 elseBlock = ImmutableList.of();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001227 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001228 return setLocation(new IfStatement(thenBlocks, elseBlock), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001229 }
1230
1231 // cond_stmts ::= [EL]IF expr ':' suite
1232 private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) {
1233 int start = token.left;
1234 expect(tokenKind);
Laurent Le Brun56093892015-03-20 13:01:58 +00001235 Expression expr = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001236 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001237 List<Statement> thenBlock = parseSuite();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001238 ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock);
1239 return setLocation(stmt, start, token.right);
1240 }
1241
1242 // for_stmt ::= FOR IDENTIFIER IN expr ':' suite
1243 private void parseForStatement(List<Statement> list) {
1244 int start = token.left;
1245 expect(TokenKind.FOR);
Laurent Le Brun185392d2015-03-20 14:41:25 +00001246 Expression loopVar = parseForLoopVariables();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001247 expect(TokenKind.IN);
1248 Expression collection = parseExpression();
1249 expect(TokenKind.COLON);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001250 enterLoop();
1251 try {
1252 List<Statement> block = parseSuite();
1253 Statement stmt = new ForStatement(loopVar, collection, block);
1254 list.add(setLocation(stmt, start, token.right));
1255 } finally {
1256 exitLoop();
1257 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001258 }
1259
1260 // def foo(bar1, bar2):
1261 private void parseFunctionDefStatement(List<Statement> list) {
1262 int start = token.left;
1263 expect(TokenKind.DEF);
Florian Weikert6f864c32015-07-23 11:26:39 +00001264 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001265 expect(TokenKind.LPAREN);
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001266 List<Parameter<Expression, Expression>> params = parseParameters();
1267 FunctionSignature.WithValues<Expression, Expression> signature = functionSignature(params);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001268 expect(TokenKind.RPAREN);
1269 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001270 List<Statement> block = parseSuite();
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001271 FunctionDefStatement stmt = new FunctionDefStatement(ident, params, signature, block);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001272 list.add(setLocation(stmt, start, token.right));
1273 }
1274
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001275 private FunctionSignature.WithValues<Expression, Expression> functionSignature(
1276 List<Parameter<Expression, Expression>> parameters) {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001277 try {
1278 return FunctionSignature.WithValues.<Expression, Expression>of(parameters);
1279 } catch (FunctionSignature.SignatureException e) {
1280 reportError(e.getParameter().getLocation(), e.getMessage());
1281 // return bogus empty signature
1282 return FunctionSignature.WithValues.<Expression, Expression>create(FunctionSignature.of());
1283 }
1284 }
1285
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001286 private List<Parameter<Expression, Expression>> parseParameters() {
1287 return parseFunctionArguments(
1288 new Supplier<Parameter<Expression, Expression>>() {
1289 @Override public Parameter<Expression, Expression> get() {
1290 return parseFunctionParameter();
1291 }
1292 });
1293 }
1294
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001295 /**
1296 * Parse a list of Argument-s. The arguments can be of class Argument.Passed or Parameter,
1297 * as returned by the Supplier parseArgument (that, taking no argument, must be closed over
1298 * the mutable input data structures).
1299 *
1300 * <p>This parser does minimal validation: it ensures the proper python use of the comma (that
1301 * can terminate before a star but not after) and the fact that a **kwarg must appear last.
1302 * It does NOT validate further ordering constraints for a {@code List<Argument.Passed>}, such as
1303 * all positional preceding keyword arguments in a call, nor does it check the more subtle
1304 * constraints for Parameter-s. This validation must happen afterwards in an appropriate method.
1305 */
1306 private <V extends Argument> ImmutableList<V>
1307 parseFunctionArguments(Supplier<V> parseArgument) {
1308 boolean hasArg = false;
1309 boolean hasStar = false;
1310 boolean hasStarStar = false;
1311 ArrayList<V> arguments = new ArrayList<>();
1312
1313 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
1314 if (hasStarStar) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001315 reportError(lexer.createLocation(token.left, token.right),
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001316 "unexpected tokens after kwarg");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001317 break;
1318 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001319 if (hasArg) {
1320 expect(TokenKind.COMMA);
1321 }
1322 if (token.kind == TokenKind.RPAREN && !hasStar) {
1323 // list can end with a COMMA if there is neither * nor **
1324 break;
1325 }
1326 V arg = parseArgument.get();
1327 hasArg = true;
1328 if (arg.isStar()) {
1329 hasStar = true;
1330 } else if (arg.isStarStar()) {
1331 hasStarStar = true;
1332 }
1333 arguments.add(arg);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001334 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001335 return ImmutableList.copyOf(arguments);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001336 }
1337
Laurent Le Brun5f674452015-03-17 19:29:13 +00001338 // suite is typically what follows a colon (e.g. after def or for).
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001339 // suite ::= simple_stmt
1340 // | NEWLINE INDENT stmt+ OUTDENT
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001341 private List<Statement> parseSuite() {
1342 List<Statement> list = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001343 if (token.kind == TokenKind.NEWLINE) {
1344 expect(TokenKind.NEWLINE);
1345 if (token.kind != TokenKind.INDENT) {
1346 reportError(lexer.createLocation(token.left, token.right),
1347 "expected an indented block");
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001348 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001349 }
1350 expect(TokenKind.INDENT);
1351 while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) {
1352 parseStatement(list, false);
1353 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001354 expectAndRecover(TokenKind.OUTDENT);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001355 } else {
Laurent Le Brun5f674452015-03-17 19:29:13 +00001356 parseSimpleStatement(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001357 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001358 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001359 }
1360
1361 // skipSuite does not check that the code is syntactically correct, it
1362 // just skips based on indentation levels.
1363 private void skipSuite() {
1364 if (token.kind == TokenKind.NEWLINE) {
1365 expect(TokenKind.NEWLINE);
1366 if (token.kind != TokenKind.INDENT) {
1367 reportError(lexer.createLocation(token.left, token.right),
1368 "expected an indented block");
1369 return;
1370 }
1371 expect(TokenKind.INDENT);
1372
1373 // Don't try to parse all the Python syntax, just skip the block
1374 // until the corresponding outdent token.
1375 int depth = 1;
1376 while (depth > 0) {
1377 // Because of the way the lexer works, this should never happen
1378 Preconditions.checkState(token.kind != TokenKind.EOF);
1379
1380 if (token.kind == TokenKind.INDENT) {
1381 depth++;
1382 }
1383 if (token.kind == TokenKind.OUTDENT) {
1384 depth--;
1385 }
1386 nextToken();
1387 }
1388
1389 } else {
1390 // the block ends at the newline token
1391 // e.g. if x == 3: print "three"
1392 syncTo(STATEMENT_TERMINATOR_SET);
1393 }
1394 }
1395
1396 // stmt ::= simple_stmt
1397 // | compound_stmt
1398 private void parseStatement(List<Statement> list, boolean isTopLevel) {
Laurent Le Brun9be852e2015-05-28 08:44:51 +00001399 if (token.kind == TokenKind.DEF && parsingMode == SKYLARK) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001400 if (!isTopLevel) {
1401 reportError(lexer.createLocation(token.left, token.right),
1402 "nested functions are not allowed. Move the function to top-level");
1403 }
1404 parseFunctionDefStatement(list);
Laurent Le Brun9be852e2015-05-28 08:44:51 +00001405 } else if (token.kind == TokenKind.IF && parsingMode == SKYLARK) {
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001406 list.add(parseIfStatement());
Laurent Le Brun9be852e2015-05-28 08:44:51 +00001407 } else if (token.kind == TokenKind.FOR && parsingMode == SKYLARK) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001408 if (isTopLevel) {
Yue Gan4866e152016-04-07 13:07:08 +00001409 reportError(
1410 lexer.createLocation(token.left, token.right),
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001411 "for loops are not allowed on top-level. Put it into a function");
1412 }
1413 parseForStatement(list);
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +00001414 } else if (BLOCK_STARTING_SET.contains(token.kind)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001415 skipBlock();
1416 } else {
1417 parseSimpleStatement(list);
1418 }
1419 }
1420
Florian Weikert917ceaa2015-06-10 13:54:26 +00001421 // flow_stmt ::= break_stmt | continue_stmt
1422 private FlowStatement parseFlowStatement(TokenKind kind) {
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001423 int start = token.left;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001424 int end = token.right;
Florian Weikert917ceaa2015-06-10 13:54:26 +00001425 expect(kind);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001426 if (loopCount == 0) {
1427 reportError(
1428 lexer.createLocation(start, end),
1429 kind.getPrettyName() + " statement must be inside a for loop");
1430 }
Laurent Le Brun7d6a3812015-10-26 12:07:12 +00001431 FlowStatement.Kind flowKind =
1432 kind == TokenKind.BREAK ? FlowStatement.Kind.BREAK : FlowStatement.Kind.CONTINUE;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001433 return setLocation(new FlowStatement(flowKind), start, end);
Florian Weikert917ceaa2015-06-10 13:54:26 +00001434 }
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001435
Googlercc0d9952015-08-10 12:01:34 +00001436 // return_stmt ::= RETURN [expr]
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001437 private ReturnStatement parseReturnStatement() {
1438 int start = token.left;
Googlercc0d9952015-08-10 12:01:34 +00001439 int end = token.right;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001440 expect(TokenKind.RETURN);
Googler768cbc42015-08-28 12:52:14 +00001441
Googlercc0d9952015-08-10 12:01:34 +00001442 Expression expression;
1443 if (STATEMENT_TERMINATOR_SET.contains(token.kind)) {
1444 // this None makes the AST not correspond to the source exactly anymore
1445 expression = new Identifier("None");
1446 setLocation(expression, start, end);
1447 } else {
1448 expression = parseExpression();
1449 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001450 return setLocation(new ReturnStatement(expression), start, expression);
1451 }
1452
Florian Weikert1f004e52015-10-16 09:43:48 +00001453 // block ::= ('if' | 'for' | 'class' | 'try' | 'def') expr ':' suite
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001454 private void skipBlock() {
1455 int start = token.left;
1456 Token blockToken = token;
1457 syncTo(EnumSet.of(TokenKind.COLON, TokenKind.EOF)); // skip over expression or name
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001458 if (blockToken.kind == TokenKind.ELSE) {
Yue Gan4866e152016-04-07 13:07:08 +00001459 reportError(
1460 lexer.createLocation(blockToken.left, blockToken.right),
1461 "syntax error at 'else': not allowed here.");
Laurent Le Brunb566c7d2016-10-07 16:31:03 +00001462 } else {
Florian Weikert1f004e52015-10-16 09:43:48 +00001463 String msg =
1464 ILLEGAL_BLOCK_KEYWORDS.containsKey(blockToken.kind)
1465 ? String.format("%ss are not supported.", ILLEGAL_BLOCK_KEYWORDS.get(blockToken.kind))
1466 : "This is not supported in BUILD files. Move the block to a .bzl file and load it";
Laurent Le Brunb13a4382015-06-30 14:20:45 +00001467 reportError(
1468 lexer.createLocation(start, token.right),
Florian Weikert1f004e52015-10-16 09:43:48 +00001469 String.format("syntax error at '%s': %s", blockToken, msg));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001470 }
1471 expect(TokenKind.COLON);
1472 skipSuite();
1473 }
1474
1475 // create a comment node
1476 private void makeComment(Token token) {
1477 comments.add(setLocation(new Comment((String) token.value), token.left, token.right));
1478 }
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001479
1480 private void enterLoop() {
1481 loopCount++;
1482 }
1483
1484 private void exitLoop() {
1485 Preconditions.checkState(loopCount > 0);
1486 loopCount--;
1487 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001488}