blob: df379548546dfde54b37dc481823b9d39b665627 [file] [log] [blame]
Damien Martin-Guillerezf88f4d82015-09-25 13:56:55 +00001// Copyright 2014 The Bazel Authors. All rights reserved.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package com.google.devtools.build.lib.syntax;
16
brandjon733a97d2017-06-27 17:11:27 +020017import static com.google.devtools.build.lib.syntax.Parser.Dialect.SKYLARK;
Laurent Le Brun9be852e2015-05-28 08:44:51 +000018
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010019import com.google.common.annotations.VisibleForTesting;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +000020import com.google.common.base.Supplier;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010021import com.google.common.collect.ImmutableList;
22import com.google.common.collect.ImmutableMap;
Laurent Le Brune51a4d22016-10-11 18:04:16 +000023import com.google.common.collect.Iterables;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010024import com.google.devtools.build.lib.events.Event;
25import com.google.devtools.build.lib.events.EventHandler;
26import com.google.devtools.build.lib.events.Location;
Googler768cbc42015-08-28 12:52:14 +000027import com.google.devtools.build.lib.profiler.Profiler;
28import com.google.devtools.build.lib.profiler.ProfilerTask;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010029import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral;
30import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements;
Mark Schaller6df81792015-12-10 18:47:47 +000031import com.google.devtools.build.lib.util.Preconditions;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010032import java.util.ArrayList;
33import java.util.Collections;
34import java.util.EnumSet;
Florian Weikert9d659ad2015-07-23 14:44:36 +000035import java.util.HashMap;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010036import java.util.Iterator;
37import java.util.List;
38import java.util.Map;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010039
40/**
41 * Recursive descent parser for LL(2) BUILD language.
42 * Loosely based on Python 2 grammar.
43 * See https://docs.python.org/2/reference/grammar.html
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010044 */
Han-Wen Nienhuysceae8c52015-09-22 16:24:45 +000045@VisibleForTesting
46public class Parser {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010047
48 /**
49 * Combines the parser result into a single value object.
50 */
51 public static final class ParseResult {
52 /** The statements (rules, basically) from the parsed file. */
53 public final List<Statement> statements;
54
55 /** The comments from the parsed file. */
56 public final List<Comment> comments;
57
Lukacs Berkid9e733d2015-09-18 08:18:11 +000058 /** Represents every statement in the file. */
59 public final Location location;
60
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010061 /** Whether the file contained any errors. */
62 public final boolean containsErrors;
63
Lukacs Berkid9e733d2015-09-18 08:18:11 +000064 public ParseResult(List<Statement> statements, List<Comment> comments, Location location,
65 boolean containsErrors) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010066 // No need to copy here; when the object is created, the parser instance is just about to go
67 // out of scope and be garbage collected.
68 this.statements = Preconditions.checkNotNull(statements);
69 this.comments = Preconditions.checkNotNull(comments);
Lukacs Berkid9e733d2015-09-18 08:18:11 +000070 this.location = location;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010071 this.containsErrors = containsErrors;
72 }
73 }
74
brandjon733a97d2017-06-27 17:11:27 +020075 /** Used to select whether the parser rejects features that are prohibited for BUILD files. */
76 // TODO(brandjon): Instead of using an enum to control what features are allowed, factor these
77 // restrictions into a separate visitor that can be outside the core Skylark parser. This will
78 // reduce parser complexity and help keep Bazel-specific knowledge out of the interpreter.
79 public enum Dialect {
80 /** Used for BUILD files. */
Laurent Le Brun9be852e2015-05-28 08:44:51 +000081 BUILD,
brandjon733a97d2017-06-27 17:11:27 +020082 /** Used for .bzl and other Skylark files. This allows all language features. */
Laurent Le Brun9be852e2015-05-28 08:44:51 +000083 SKYLARK,
Laurent Le Brun9be852e2015-05-28 08:44:51 +000084 }
85
brandjon733a97d2017-06-27 17:11:27 +020086 /** Used to select what constructs are allowed based on whether we're at the top level. */
87 public enum ParsingLevel {
88 TOP_LEVEL,
89 LOCAL_LEVEL
90 }
91
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010092 private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET =
Googlercc0d9952015-08-10 12:01:34 +000093 EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010094
95 private static final EnumSet<TokenKind> LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000096 EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +010097
98 private static final EnumSet<TokenKind> DICT_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +000099 EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100100
Laurent Le Brun56093892015-03-20 13:01:58 +0000101 private static final EnumSet<TokenKind> EXPR_LIST_TERMINATOR_SET =
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000102 EnumSet.of(
103 TokenKind.EOF,
104 TokenKind.NEWLINE,
Laurent Le Brun29ad8622015-09-18 10:45:07 +0000105 TokenKind.EQUALS,
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000106 TokenKind.RBRACE,
107 TokenKind.RBRACKET,
108 TokenKind.RPAREN,
109 TokenKind.SEMI);
Laurent Le Brun56093892015-03-20 13:01:58 +0000110
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +0000111 private static final EnumSet<TokenKind> BLOCK_STARTING_SET =
112 EnumSet.of(
113 TokenKind.CLASS,
114 TokenKind.DEF,
115 TokenKind.ELSE,
116 TokenKind.FOR,
117 TokenKind.IF,
118 TokenKind.TRY);
119
120 private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET =
121 EnumSet.of(
122 TokenKind.COLON,
123 TokenKind.COMMA,
124 TokenKind.EOF,
125 TokenKind.FOR,
126 TokenKind.MINUS,
127 TokenKind.PERCENT,
128 TokenKind.PLUS,
129 TokenKind.RBRACKET,
130 TokenKind.RPAREN,
131 TokenKind.SLASH);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100132
Florian Weikert1f004e52015-10-16 09:43:48 +0000133 /**
134 * Keywords that are forbidden in both Skylark and BUILD parsing modes.
135 *
136 * <p>(Mapping: token -> human-readable string description)
137 */
138 private static final ImmutableMap<TokenKind, String> ILLEGAL_BLOCK_KEYWORDS =
139 ImmutableMap.of(TokenKind.CLASS, "Class definition", TokenKind.TRY, "Try statement");
140
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100141 private Token token; // current lookahead token
142 private Token pushedToken = null; // used to implement LL(2)
Laurent Le Bruna3c25a62016-10-26 10:59:09 +0000143 private int loopCount; // break/continue keywords can be used only inside a loop
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100144
145 private static final boolean DEBUGGING = false;
146
147 private final Lexer lexer;
148 private final EventHandler eventHandler;
149 private final List<Comment> comments;
brandjon733a97d2017-06-27 17:11:27 +0200150 private final Dialect dialect;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100151
152 private static final Map<TokenKind, Operator> binaryOperators =
153 new ImmutableMap.Builder<TokenKind, Operator>()
154 .put(TokenKind.AND, Operator.AND)
155 .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS)
156 .put(TokenKind.GREATER, Operator.GREATER)
157 .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS)
158 .put(TokenKind.IN, Operator.IN)
159 .put(TokenKind.LESS, Operator.LESS)
160 .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS)
161 .put(TokenKind.MINUS, Operator.MINUS)
162 .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS)
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000163 .put(TokenKind.NOT_IN, Operator.NOT_IN)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100164 .put(TokenKind.OR, Operator.OR)
165 .put(TokenKind.PERCENT, Operator.PERCENT)
Laurent Le Brun8a528262015-04-15 14:23:35 +0000166 .put(TokenKind.SLASH, Operator.DIVIDE)
laurentlb094bb262017-05-19 21:18:25 +0200167 .put(TokenKind.SLASH_SLASH, Operator.FLOOR_DIVIDE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100168 .put(TokenKind.PLUS, Operator.PLUS)
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000169 .put(TokenKind.PIPE, Operator.PIPE)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100170 .put(TokenKind.STAR, Operator.MULT)
171 .build();
172
Googler13151752016-06-02 18:37:13 +0000173 // TODO(bazel-team): add support for |=
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100174 private static final Map<TokenKind, Operator> augmentedAssignmentMethods =
175 new ImmutableMap.Builder<TokenKind, Operator>()
Googler13151752016-06-02 18:37:13 +0000176 .put(TokenKind.PLUS_EQUALS, Operator.PLUS)
177 .put(TokenKind.MINUS_EQUALS, Operator.MINUS)
178 .put(TokenKind.STAR_EQUALS, Operator.MULT)
179 .put(TokenKind.SLASH_EQUALS, Operator.DIVIDE)
laurentlb094bb262017-05-19 21:18:25 +0200180 .put(TokenKind.SLASH_SLASH_EQUALS, Operator.FLOOR_DIVIDE)
Googler13151752016-06-02 18:37:13 +0000181 .put(TokenKind.PERCENT_EQUALS, Operator.PERCENT)
182 .build();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100183
184 /** Highest precedence goes last.
185 * Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence
186 **/
187 private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of(
188 EnumSet.of(Operator.OR),
189 EnumSet.of(Operator.AND),
190 EnumSet.of(Operator.NOT),
191 EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS,
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000192 Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN, Operator.NOT_IN),
Laurent Le Brun092f13b2015-08-24 14:50:00 +0000193 EnumSet.of(Operator.PIPE),
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100194 EnumSet.of(Operator.MINUS, Operator.PLUS),
laurentlb094bb262017-05-19 21:18:25 +0200195 EnumSet.of(Operator.DIVIDE, Operator.FLOOR_DIVIDE, Operator.MULT, Operator.PERCENT));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100196
Laurent Le Brune51a4d22016-10-11 18:04:16 +0000197 private final Iterator<Token> tokens;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100198 private int errorsCount;
199 private boolean recoveryMode; // stop reporting errors until next statement
200
brandjon733a97d2017-06-27 17:11:27 +0200201 private Parser(Lexer lexer, EventHandler eventHandler, Dialect dialect) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100202 this.lexer = lexer;
203 this.eventHandler = eventHandler;
brandjon733a97d2017-06-27 17:11:27 +0200204 this.dialect = dialect;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100205 this.tokens = lexer.getTokens().iterator();
Francois-Rene Rideauc673a822015-03-02 19:52:39 +0000206 this.comments = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100207 nextToken();
208 }
209
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000210 private static Location locationFromStatements(Lexer lexer, List<Statement> statements) {
211 if (!statements.isEmpty()) {
212 return lexer.createLocation(
213 statements.get(0).getLocation().getStartOffset(),
Laurent Le Brune51a4d22016-10-11 18:04:16 +0000214 Iterables.getLast(statements).getLocation().getEndOffset());
Lukacs Berkid9e733d2015-09-18 08:18:11 +0000215 } else {
216 return Location.fromPathFragment(lexer.getFilename());
217 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100218 }
219
220 /**
brandjon733a97d2017-06-27 17:11:27 +0200221 * Main entry point for parsing a file.
brandjon540aac62017-06-12 23:08:09 +0200222 *
223 * @param input the input to parse
224 * @param eventHandler a reporter for parsing errors
brandjon733a97d2017-06-27 17:11:27 +0200225 * @param dialect may restrict the parser to Build-language features
226 * @see BuildFileAST#parseBuildString
227 * @see BuildFileAST#parseSkylarkString
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100228 */
brandjon540aac62017-06-12 23:08:09 +0200229 public static ParseResult parseFile(
brandjon733a97d2017-06-27 17:11:27 +0200230 ParserInputSource input, EventHandler eventHandler, Dialect dialect) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000231 Lexer lexer = new Lexer(input, eventHandler);
brandjon733a97d2017-06-27 17:11:27 +0200232 Parser parser = new Parser(lexer, eventHandler, dialect);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100233 List<Statement> statements = parser.parseFileInput();
Laurent Le Brun8c8857d2016-08-04 10:22:16 +0000234 return new ParseResult(
235 statements,
236 parser.comments,
237 locationFromStatements(lexer, statements),
238 parser.errorsCount > 0 || lexer.containsErrors());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100239 }
240
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100241 /**
brandjon733a97d2017-06-27 17:11:27 +0200242 * Parses a sequence of statements, possibly followed by newline tokens.
brandjon540aac62017-06-12 23:08:09 +0200243 *
brandjon733a97d2017-06-27 17:11:27 +0200244 * <p>{@code load()} statements are not permitted. Use {@code parsingLevel} to control whether
245 * function definitions, for statements, etc., are allowed.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100246 */
brandjon733a97d2017-06-27 17:11:27 +0200247 public static List<Statement> parseStatements(
248 ParserInputSource input, EventHandler eventHandler,
249 ParsingLevel parsingLevel, Dialect dialect) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000250 Lexer lexer = new Lexer(input, eventHandler);
brandjon733a97d2017-06-27 17:11:27 +0200251 Parser parser = new Parser(lexer, eventHandler, dialect);
252 List<Statement> result = new ArrayList<>();
253 parser.parseStatement(result, parsingLevel);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100254 while (parser.token.kind == TokenKind.NEWLINE) {
255 parser.nextToken();
256 }
257 parser.expect(TokenKind.EOF);
258 return result;
259 }
260
brandjon733a97d2017-06-27 17:11:27 +0200261 /**
262 * Convenience wrapper for {@link #parseStatements} where exactly one statement is expected.
263 *
264 * @throws IllegalArgumentException if the number of parsed statements was not exactly one
265 */
266 public static Statement parseStatement(
267 ParserInputSource input, EventHandler eventHandler,
268 ParsingLevel parsingLevel, Dialect dialect) {
269 List<Statement> stmts = parseStatements(
270 input, eventHandler, parsingLevel, dialect);
271 return Iterables.getOnlyElement(stmts);
brandjon540aac62017-06-12 23:08:09 +0200272 }
273
brandjon733a97d2017-06-27 17:11:27 +0200274 /** Parses an expression, possibly followed by newline tokens. */
275 public static Expression parseExpression(
276 ParserInputSource input, EventHandler eventHandler, Dialect dialect) {
277 Lexer lexer = new Lexer(input, eventHandler);
278 Parser parser = new Parser(lexer, eventHandler, dialect);
279 Expression result = parser.parseExpression();
280 while (parser.token.kind == TokenKind.NEWLINE) {
281 parser.nextToken();
282 }
283 parser.expect(TokenKind.EOF);
284 return result;
brandjon540aac62017-06-12 23:08:09 +0200285 }
286
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100287 private void reportError(Location location, String message) {
288 errorsCount++;
289 // Limit the number of reported errors to avoid spamming output.
290 if (errorsCount <= 5) {
291 eventHandler.handle(Event.error(location, message));
292 }
293 }
294
Laurent Le Brun72329862015-03-23 14:20:03 +0000295 private void syntaxError(Token token, String message) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100296 if (!recoveryMode) {
297 String msg = token.kind == TokenKind.INDENT
298 ? "indentation error"
Laurent Le Brun72329862015-03-23 14:20:03 +0000299 : "syntax error at '" + token + "': " + message;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100300 reportError(lexer.createLocation(token.left, token.right), msg);
301 recoveryMode = true;
302 }
303 }
304
Laurent Le Brun9060e162015-04-02 10:07:28 +0000305 /**
306 * Consumes the current token. If it is not of the specified (expected)
307 * kind, reports a syntax error.
308 */
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100309 private boolean expect(TokenKind kind) {
310 boolean expected = token.kind == kind;
311 if (!expected) {
Laurent Le Brun72329862015-03-23 14:20:03 +0000312 syntaxError(token, "expected " + kind.getPrettyName());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100313 }
314 nextToken();
315 return expected;
316 }
317
318 /**
Laurent Le Brun9060e162015-04-02 10:07:28 +0000319 * Same as expect, but stop the recovery mode if the token was expected.
320 */
321 private void expectAndRecover(TokenKind kind) {
322 if (expect(kind)) {
323 recoveryMode = false;
324 }
325 }
326
327 /**
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100328 * Consume tokens past the first token that has a kind that is in the set of
329 * teminatingTokens.
330 * @param terminatingTokens
331 * @return the end offset of the terminating token.
332 */
333 private int syncPast(EnumSet<TokenKind> terminatingTokens) {
334 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
335 while (!terminatingTokens.contains(token.kind)) {
336 nextToken();
337 }
338 int end = token.right;
339 // read past the synchronization token
340 nextToken();
341 return end;
342 }
343
344 /**
345 * Consume tokens until we reach the first token that has a kind that is in
346 * the set of teminatingTokens.
347 * @param terminatingTokens
348 * @return the end offset of the terminating token.
349 */
350 private int syncTo(EnumSet<TokenKind> terminatingTokens) {
351 // EOF must be in the set to prevent an infinite loop
352 Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF));
353 // read past the problematic token
354 int previous = token.right;
355 nextToken();
356 int current = previous;
357 while (!terminatingTokens.contains(token.kind)) {
358 nextToken();
359 previous = current;
360 current = token.right;
361 }
362 return previous;
363 }
364
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000365 // Keywords that exist in Python and that we don't parse.
366 private static final EnumSet<TokenKind> FORBIDDEN_KEYWORDS =
Googler768cbc42015-08-28 12:52:14 +0000367 EnumSet.of(TokenKind.AS, TokenKind.ASSERT,
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000368 TokenKind.DEL, TokenKind.EXCEPT, TokenKind.FINALLY, TokenKind.FROM, TokenKind.GLOBAL,
369 TokenKind.IMPORT, TokenKind.IS, TokenKind.LAMBDA, TokenKind.NONLOCAL, TokenKind.RAISE,
370 TokenKind.TRY, TokenKind.WITH, TokenKind.WHILE, TokenKind.YIELD);
371
372 private void checkForbiddenKeywords(Token token) {
Laurent Le Brunb566c7d2016-10-07 16:31:03 +0000373 if (!FORBIDDEN_KEYWORDS.contains(token.kind)) {
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000374 return;
375 }
376 String error;
377 switch (token.kind) {
378 case ASSERT: error = "'assert' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000379 case DEL:
380 error = "'del' not supported, use '.pop()' to delete an item from a dictionary or a list";
381 break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000382 case IMPORT: error = "'import' not supported, use 'load' instead"; break;
383 case IS: error = "'is' not supported, use '==' instead"; break;
384 case LAMBDA: error = "'lambda' not supported, declare a function instead"; break;
385 case RAISE: error = "'raise' not supported, use 'fail' instead"; break;
Laurent Le Brun44ad7fa2016-10-11 12:09:05 +0000386 case TRY: error = "'try' not supported, all exceptions are fatal"; break;
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000387 case WHILE: error = "'while' not supported, use 'for' instead"; break;
388 default: error = "keyword '" + token.kind.getPrettyName() + "' not supported"; break;
389 }
390 reportError(lexer.createLocation(token.left, token.right), error);
391 }
392
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100393 private void nextToken() {
394 if (pushedToken != null) {
395 token = pushedToken;
396 pushedToken = null;
397 } else {
398 if (token == null || token.kind != TokenKind.EOF) {
399 token = tokens.next();
400 // transparently handle comment tokens
401 while (token.kind == TokenKind.COMMENT) {
402 makeComment(token);
403 token = tokens.next();
404 }
405 }
406 }
Laurent Le Brun0ddcba22015-03-23 16:48:01 +0000407 checkForbiddenKeywords(token);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100408 if (DEBUGGING) {
409 System.err.print(token);
410 }
411 }
412
413 private void pushToken(Token tokenToPush) {
414 if (pushedToken != null) {
415 throw new IllegalStateException("Exceeded LL(2) lookahead!");
416 }
417 pushedToken = token;
418 token = tokenToPush;
419 }
420
421 // create an error expression
Florian Weikert6f864c32015-07-23 11:26:39 +0000422 private Identifier makeErrorExpression(int start, int end) {
423 return setLocation(new Identifier("$error$"), start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100424 }
425
426 // Convenience wrapper around ASTNode.setLocation that returns the node.
Francois-Rene Rideauedf7bdb2015-03-02 17:12:45 +0000427 private <NODE extends ASTNode> NODE setLocation(NODE node, Location location) {
428 return ASTNode.<NODE>setLocation(location, node);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100429 }
430
431 // Another convenience wrapper method around ASTNode.setLocation
Francois-Rene Rideauedf7bdb2015-03-02 17:12:45 +0000432 private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, int endOffset) {
433 return setLocation(node, lexer.createLocation(startOffset, endOffset));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100434 }
435
436 // Convenience method that uses end offset from the last node.
437 private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, ASTNode lastNode) {
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +0000438 Preconditions.checkNotNull(lastNode, "can't extract end offset from a null node");
439 Preconditions.checkNotNull(lastNode.getLocation(), "lastNode doesn't have a location");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100440 return setLocation(node, startOffset, lastNode.getLocation().getEndOffset());
441 }
442
443 // create a funcall expression
Florian Weikert6f864c32015-07-23 11:26:39 +0000444 private Expression makeFuncallExpression(Expression receiver, Identifier function,
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000445 List<Argument.Passed> args,
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100446 int start, int end) {
447 if (function.getLocation() == null) {
448 function = setLocation(function, start, end);
449 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100450 return setLocation(new FuncallExpression(receiver, function, args), start, end);
451 }
452
Laurent Le Brun56093892015-03-20 13:01:58 +0000453 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100454 // | expr
Laurent Le Brunb3266382015-05-27 16:14:43 +0000455 // | *args (only in Skylark mode)
456 // | **kwargs (only in Skylark mode)
457 // To keep BUILD files declarative and easy to process, *args and **kwargs
458 // arguments are allowed only in Skylark mode.
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000459 private Argument.Passed parseFuncallArgument() {
460 final int start = token.left;
461 // parse **expr
462 if (token.kind == TokenKind.STAR_STAR) {
brandjon733a97d2017-06-27 17:11:27 +0200463 if (dialect != SKYLARK) {
Laurent Le Brunb3266382015-05-27 16:14:43 +0000464 reportError(
465 lexer.createLocation(token.left, token.right),
466 "**kwargs arguments are not allowed in BUILD files");
467 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000468 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000469 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000470 return setLocation(new Argument.StarStar(expr), start, expr);
471 }
472 // parse *expr
473 if (token.kind == TokenKind.STAR) {
brandjon733a97d2017-06-27 17:11:27 +0200474 if (dialect != SKYLARK) {
Laurent Le Brunb3266382015-05-27 16:14:43 +0000475 reportError(
476 lexer.createLocation(token.left, token.right),
477 "*args arguments are not allowed in BUILD files");
478 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000479 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000480 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000481 return setLocation(new Argument.Star(expr), start, expr);
482 }
483 // parse keyword = expr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100484 if (token.kind == TokenKind.IDENTIFIER) {
485 Token identToken = token;
486 String name = (String) token.value;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100487 nextToken();
488 if (token.kind == TokenKind.EQUALS) { // it's a named argument
489 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000490 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000491 return setLocation(new Argument.Keyword(name, expr), start, expr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100492 } else { // oops, back up!
493 pushToken(identToken);
494 }
495 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100496 // parse a positional argument
Laurent Le Brun56093892015-03-20 13:01:58 +0000497 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000498 return setLocation(new Argument.Positional(expr), start, expr);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100499 }
500
Laurent Le Brun56093892015-03-20 13:01:58 +0000501 // arg ::= IDENTIFIER '=' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100502 // | IDENTIFIER
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000503 private Parameter<Expression, Expression> parseFunctionParameter() {
504 // TODO(bazel-team): optionally support type annotations
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100505 int start = token.left;
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000506 if (token.kind == TokenKind.STAR_STAR) { // kwarg
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100507 nextToken();
Florian Weikert6f864c32015-07-23 11:26:39 +0000508 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000509 return setLocation(new Parameter.StarStar<Expression, Expression>(
510 ident.getName()), start, ident);
511 } else if (token.kind == TokenKind.STAR) { // stararg
512 int end = token.right;
513 nextToken();
514 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000515 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000516 return setLocation(new Parameter.Star<Expression, Expression>(ident.getName()),
517 start, ident);
518 } else {
519 return setLocation(new Parameter.Star<Expression, Expression>(null), start, end);
520 }
521 } else {
Florian Weikert6f864c32015-07-23 11:26:39 +0000522 Identifier ident = parseIdent();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000523 if (token.kind == TokenKind.EQUALS) { // there's a default value
524 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000525 Expression expr = parseNonTupleExpression();
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000526 return setLocation(new Parameter.Optional<Expression, Expression>(
527 ident.getName(), expr), start, expr);
528 } else {
529 return setLocation(new Parameter.Mandatory<Expression, Expression>(
530 ident.getName()), start, ident);
531 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100532 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100533 }
534
535 // funcall_suffix ::= '(' arg_list? ')'
Florian Weikert6f864c32015-07-23 11:26:39 +0000536 private Expression parseFuncallSuffix(int start, Expression receiver, Identifier function) {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000537 List<Argument.Passed> args = Collections.emptyList();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100538 expect(TokenKind.LPAREN);
539 int end;
540 if (token.kind == TokenKind.RPAREN) {
541 end = token.right;
542 nextToken(); // RPAREN
543 } else {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000544 args = parseFuncallArguments(); // (includes optional trailing comma)
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100545 end = token.right;
546 expect(TokenKind.RPAREN);
547 }
548 return makeFuncallExpression(receiver, function, args, start, end);
549 }
550
551 // selector_suffix ::= '.' IDENTIFIER
552 // |'.' IDENTIFIER funcall_suffix
553 private Expression parseSelectorSuffix(int start, Expression receiver) {
554 expect(TokenKind.DOT);
555 if (token.kind == TokenKind.IDENTIFIER) {
Florian Weikert6f864c32015-07-23 11:26:39 +0000556 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100557 if (token.kind == TokenKind.LPAREN) {
558 return parseFuncallSuffix(start, receiver, ident);
559 } else {
560 return setLocation(new DotExpression(receiver, ident), start, token.right);
561 }
562 } else {
Laurent Le Brun72329862015-03-23 14:20:03 +0000563 syntaxError(token, "expected identifier after dot");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100564 int end = syncTo(EXPR_TERMINATOR_SET);
565 return makeErrorExpression(start, end);
566 }
567 }
568
569 // arg_list ::= ( (arg ',')* arg ','? )?
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000570 private List<Argument.Passed> parseFuncallArguments() {
571 List<Argument.Passed> arguments =
572 parseFunctionArguments(new Supplier<Argument.Passed>() {
573 @Override public Argument.Passed get() {
574 return parseFuncallArgument();
575 }
576 });
577 try {
578 Argument.validateFuncallArguments(arguments);
579 } catch (Argument.ArgumentException e) {
580 reportError(lexer.createLocation(token.left, token.right), e.getMessage());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100581 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +0000582 return arguments;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100583 }
584
Laurent Le Brun56093892015-03-20 13:01:58 +0000585 // expr_list parses a comma-separated list of expression. It assumes that the
586 // first expression was already parsed, so it starts with a comma.
587 // It is used to parse tuples and list elements.
588 // expr_list ::= ( ',' expr )* ','?
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000589 private List<Expression> parseExprList(boolean trailingColonAllowed) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100590 List<Expression> list = new ArrayList<>();
591 // terminating tokens for an expression list
Laurent Le Brun56093892015-03-20 13:01:58 +0000592 while (token.kind == TokenKind.COMMA) {
593 expect(TokenKind.COMMA);
594 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000595 if (!trailingColonAllowed) {
596 reportError(
597 lexer.createLocation(token.left, token.right),
598 "Trailing comma is allowed only in parenthesized tuples.");
599 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100600 break;
601 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000602 list.add(parseNonTupleExpression());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100603 }
604 return list;
605 }
606
607 // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )?
608 private List<DictionaryEntryLiteral> parseDictEntryList() {
609 List<DictionaryEntryLiteral> list = new ArrayList<>();
610 // the terminating token for a dict entry list
611 while (token.kind != TokenKind.RBRACE) {
612 list.add(parseDictEntry());
613 if (token.kind == TokenKind.COMMA) {
614 nextToken();
615 } else {
616 break;
617 }
618 }
619 return list;
620 }
621
Laurent Le Brun56093892015-03-20 13:01:58 +0000622 // dict_entry ::= nontupleexpr ':' nontupleexpr
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100623 private DictionaryEntryLiteral parseDictEntry() {
624 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000625 Expression key = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100626 expect(TokenKind.COLON);
Laurent Le Brun56093892015-03-20 13:01:58 +0000627 Expression value = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100628 return setLocation(new DictionaryEntryLiteral(key, value), start, value);
629 }
630
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000631 /**
632 * Parse a String literal value, e.g. "str".
633 */
634 private StringLiteral parseStringLiteral() {
635 Preconditions.checkState(token.kind == TokenKind.STRING);
636 int end = token.right;
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000637 StringLiteral literal =
Michajlo Matijkiw8c539ea2017-02-22 23:02:46 +0000638 setLocation(new StringLiteral((String) token.value), token.left, end);
Laurent Le Brun4aa29122015-09-10 11:31:30 +0000639
640 nextToken();
641 if (token.kind == TokenKind.STRING) {
642 reportError(lexer.createLocation(end, token.left),
643 "Implicit string concatenation is forbidden, use the + operator");
644 }
645 return literal;
646 }
647
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100648 // primary ::= INTEGER
649 // | STRING
650 // | STRING '.' IDENTIFIER funcall_suffix
651 // | IDENTIFIER
652 // | IDENTIFIER funcall_suffix
653 // | IDENTIFIER '.' selector_suffix
654 // | list_expression
655 // | '(' ')' // a tuple with zero elements
656 // | '(' expr ')' // a parenthesized expression
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100657 // | dict_expression
658 // | '-' primary_with_suffix
659 private Expression parsePrimary() {
660 int start = token.left;
661 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000662 case INT:
663 {
664 IntegerLiteral literal = new IntegerLiteral((Integer) token.value);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100665 setLocation(literal, start, token.right);
666 nextToken();
667 return literal;
668 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000669 case STRING:
670 return parseStringLiteral();
671 case IDENTIFIER:
672 {
673 Identifier ident = parseIdent();
674 if (token.kind == TokenKind.LPAREN) { // it's a function application
675 return parseFuncallSuffix(start, null, ident);
676 } else {
677 return ident;
678 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100679 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000680 case LBRACKET: // it's a list
681 return parseListMaker();
682 case LBRACE: // it's a dictionary
683 return parseDictExpression();
684 case LPAREN:
685 {
686 nextToken();
687 // check for the empty tuple literal
688 if (token.kind == TokenKind.RPAREN) {
689 ListLiteral literal = ListLiteral.makeTuple(Collections.<Expression>emptyList());
690 setLocation(literal, start, token.right);
691 nextToken();
692 return literal;
693 }
694 // parse the first expression
695 Expression expression = parseExpression(true);
696 setLocation(expression, start, token.right);
697 if (token.kind == TokenKind.RPAREN) {
698 nextToken();
699 return expression;
700 }
701 expect(TokenKind.RPAREN);
702 int end = syncTo(EXPR_TERMINATOR_SET);
703 return makeErrorExpression(start, end);
704 }
705 case MINUS:
706 {
707 nextToken();
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000708 Expression expr = parsePrimaryWithSuffix();
brandjonf2ed8582017-06-27 15:05:35 +0200709 UnaryOperatorExpression minus = new UnaryOperatorExpression(UnaryOperator.MINUS, expr);
710 return setLocation(minus, start, expr);
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000711 }
712 default:
713 {
714 syntaxError(token, "expected expression");
715 int end = syncTo(EXPR_TERMINATOR_SET);
716 return makeErrorExpression(start, end);
717 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100718 }
719 }
720
721 // primary_with_suffix ::= primary selector_suffix*
722 // | primary substring_suffix
723 private Expression parsePrimaryWithSuffix() {
724 int start = token.left;
725 Expression receiver = parsePrimary();
726 while (true) {
727 if (token.kind == TokenKind.DOT) {
728 receiver = parseSelectorSuffix(start, receiver);
729 } else if (token.kind == TokenKind.LBRACKET) {
730 receiver = parseSubstringSuffix(start, receiver);
731 } else {
732 break;
733 }
734 }
735 return receiver;
736 }
737
Florian Weikerte3421962015-12-17 12:46:08 +0000738 // substring_suffix ::= '[' expression? ':' expression? ':' expression? ']'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100739 private Expression parseSubstringSuffix(int start, Expression receiver) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100740 Expression startExpr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100741
742 expect(TokenKind.LBRACKET);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100743 if (token.kind == TokenKind.COLON) {
Florian Weikerte3421962015-12-17 12:46:08 +0000744 startExpr = setLocation(new Identifier("None"), token.left, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100745 } else {
Laurent Le Brun6824d862015-09-11 13:51:41 +0000746 startExpr = parseExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100747 }
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000748 // This is an index/key access
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100749 if (token.kind == TokenKind.RBRACKET) {
750 expect(TokenKind.RBRACKET);
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000751 return setLocation(new IndexExpression(receiver, startExpr), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100752 }
Laurent Le Bruneeef30f2015-03-16 15:12:35 +0000753 // This is a slice (or substring)
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000754 Expression endExpr = parseSliceArgument(new Identifier("None"));
755 Expression stepExpr = parseSliceArgument(new IntegerLiteral(1));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100756 expect(TokenKind.RBRACKET);
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000757 return setLocation(new SliceExpression(receiver, startExpr, endExpr, stepExpr),
758 start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100759 }
760
Florian Weikerte3421962015-12-17 12:46:08 +0000761 /**
762 * Parses {@code [':' [expr]]} which can either be the end or the step argument of a slice
763 * operation. If no such expression is found, this method returns an argument that represents
764 * {@code defaultValue}.
765 */
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000766 private Expression parseSliceArgument(Expression defaultValue) {
Florian Weikerte3421962015-12-17 12:46:08 +0000767 Expression explicitArg = getSliceEndOrStepExpression();
Vladimir Moskva8d610c62016-09-15 14:36:41 +0000768 if (explicitArg == null) {
769 return setLocation(defaultValue, token.left, token.right);
770 }
771 return explicitArg;
Florian Weikerte3421962015-12-17 12:46:08 +0000772 }
773
774 private Expression getSliceEndOrStepExpression() {
775 // There has to be a colon before any end or slice argument.
776 // However, if the next token thereafter is another colon or a right bracket, no argument value
777 // was specified.
778 if (token.kind == TokenKind.COLON) {
779 expect(TokenKind.COLON);
780 if (token.kind != TokenKind.COLON && token.kind != TokenKind.RBRACKET) {
781 return parseNonTupleExpression();
782 }
783 }
784 return null;
785 }
786
Laurent Le Brun185392d2015-03-20 14:41:25 +0000787 // Equivalent to 'exprlist' rule in Python grammar.
788 // loop_variables ::= primary_with_suffix ( ',' primary_with_suffix )* ','?
789 private Expression parseForLoopVariables() {
790 // We cannot reuse parseExpression because it would parse the 'in' operator.
791 // e.g. "for i in e: pass" -> we want to parse only "i" here.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100792 int start = token.left;
Laurent Le Brun185392d2015-03-20 14:41:25 +0000793 Expression e1 = parsePrimaryWithSuffix();
794 if (token.kind != TokenKind.COMMA) {
795 return e1;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100796 }
797
Laurent Le Brun185392d2015-03-20 14:41:25 +0000798 // It's a tuple
799 List<Expression> tuple = new ArrayList<>();
800 tuple.add(e1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100801 while (token.kind == TokenKind.COMMA) {
Laurent Le Brun185392d2015-03-20 14:41:25 +0000802 expect(TokenKind.COMMA);
803 if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) {
804 break;
805 }
806 tuple.add(parsePrimaryWithSuffix());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100807 }
Laurent Le Brun185392d2015-03-20 14:41:25 +0000808 return setLocation(ListLiteral.makeTuple(tuple), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100809 }
810
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000811 // comprehension_suffix ::= 'FOR' loop_variables 'IN' expr comprehension_suffix
812 // | 'IF' expr comprehension_suffix
813 // | ']'
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000814 private Expression parseComprehensionSuffix(
brandjon296cd492017-05-15 16:17:16 +0200815 AbstractComprehension.AbstractBuilder comprehensionBuilder, TokenKind closingBracket) {
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000816 while (true) {
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000817 if (token.kind == TokenKind.FOR) {
818 nextToken();
819 Expression loopVar = parseForLoopVariables();
820 expect(TokenKind.IN);
821 // The expression cannot be a ternary expression ('x if y else z') due to
822 // conflicts in Python grammar ('if' is used by the comprehension).
823 Expression listExpression = parseNonTupleExpression(0);
brandjon296cd492017-05-15 16:17:16 +0200824 comprehensionBuilder.addFor(loopVar, listExpression);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000825 } else if (token.kind == TokenKind.IF) {
826 nextToken();
laurentlbc3a1af62017-06-16 14:37:43 +0200827 // [x for x in li if 1, 2] # parse error
828 // [x for x in li if (1, 2)] # ok
829 comprehensionBuilder.addIf(parseNonTupleExpression(0));
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000830 } else if (token.kind == closingBracket) {
831 nextToken();
brandjon296cd492017-05-15 16:17:16 +0200832 return comprehensionBuilder.build();
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000833 } else {
834 syntaxError(token, "expected '" + closingBracket.getPrettyName() + "', 'for' or 'if'");
835 syncPast(LIST_TERMINATOR_SET);
836 return makeErrorExpression(token.left, token.right);
Laurent Le Brun443aaae2015-04-21 19:49:49 +0000837 }
838 }
839 }
840
Laurent Le Brun56093892015-03-20 13:01:58 +0000841 // list_maker ::= '[' ']'
842 // |'[' expr ']'
843 // |'[' expr expr_list ']'
844 // |'[' expr ('FOR' loop_variables 'IN' expr)+ ']'
845 private Expression parseListMaker() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100846 int start = token.left;
847 expect(TokenKind.LBRACKET);
848 if (token.kind == TokenKind.RBRACKET) { // empty List
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000849 ListLiteral literal = ListLiteral.emptyList();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100850 setLocation(literal, start, token.right);
851 nextToken();
852 return literal;
853 }
Laurent Le Brun56093892015-03-20 13:01:58 +0000854 Expression expression = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100855 Preconditions.checkNotNull(expression,
856 "null element in list in AST at %s:%s", token.left, token.right);
857 switch (token.kind) {
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000858 case RBRACKET: // singleton List
859 {
860 ListLiteral literal = ListLiteral.makeList(Collections.singletonList(expression));
861 setLocation(literal, start, token.right);
862 nextToken();
863 return literal;
864 }
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000865 case FOR:
866 { // list comprehension
867 Expression result =
brandjon296cd492017-05-15 16:17:16 +0200868 parseComprehensionSuffix(
869 new ListComprehension.Builder().setOutputExpression(expression),
870 TokenKind.RBRACKET);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000871 return setLocation(result, start, token.right);
872 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000873 case COMMA:
874 {
875 List<Expression> list = parseExprList(true);
876 Preconditions.checkState(
877 !list.contains(null),
878 "null element in list in AST at %s:%s",
879 token.left,
880 token.right);
881 list.add(0, expression);
882 if (token.kind == TokenKind.RBRACKET) {
883 ListLiteral literal = ListLiteral.makeList(list);
884 setLocation(literal, start, token.right);
885 nextToken();
886 return literal;
887 }
888 expect(TokenKind.RBRACKET);
889 int end = syncPast(LIST_TERMINATOR_SET);
890 return makeErrorExpression(start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100891 }
Laurent Le Brunb639ca82017-01-17 11:18:23 +0000892 default:
893 {
894 syntaxError(token, "expected ',', 'for' or ']'");
895 int end = syncPast(LIST_TERMINATOR_SET);
896 return makeErrorExpression(start, end);
897 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100898 }
899 }
900
901 // dict_expression ::= '{' '}'
902 // |'{' dict_entry_list '}'
903 // |'{' dict_entry 'FOR' loop_variables 'IN' expr '}'
904 private Expression parseDictExpression() {
905 int start = token.left;
906 expect(TokenKind.LBRACE);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +0000907 if (token.kind == TokenKind.RBRACE) { // empty Dict
908 DictionaryLiteral literal = DictionaryLiteral.emptyDict();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100909 setLocation(literal, start, token.right);
910 nextToken();
911 return literal;
912 }
913 DictionaryEntryLiteral entry = parseDictEntry();
914 if (token.kind == TokenKind.FOR) {
915 // Dict comprehension
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000916 Expression result = parseComprehensionSuffix(
brandjon296cd492017-05-15 16:17:16 +0200917 new DictComprehension.Builder()
918 .setKeyExpression(entry.getKey())
919 .setValueExpression(entry.getValue()),
920 TokenKind.RBRACE);
Florian Weikertffd8a5a2015-09-18 11:51:01 +0000921 return setLocation(result, start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100922 }
923 List<DictionaryEntryLiteral> entries = new ArrayList<>();
924 entries.add(entry);
925 if (token.kind == TokenKind.COMMA) {
926 expect(TokenKind.COMMA);
927 entries.addAll(parseDictEntryList());
928 }
929 if (token.kind == TokenKind.RBRACE) {
930 DictionaryLiteral literal = new DictionaryLiteral(entries);
931 setLocation(literal, start, token.right);
932 nextToken();
933 return literal;
934 }
Laurent Le Brun72329862015-03-23 14:20:03 +0000935 expect(TokenKind.RBRACE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100936 int end = syncPast(DICT_TERMINATOR_SET);
937 return makeErrorExpression(start, end);
938 }
939
Florian Weikert6f864c32015-07-23 11:26:39 +0000940 private Identifier parseIdent() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100941 if (token.kind != TokenKind.IDENTIFIER) {
Laurent Le Brun72329862015-03-23 14:20:03 +0000942 expect(TokenKind.IDENTIFIER);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100943 return makeErrorExpression(token.left, token.right);
944 }
Florian Weikert6f864c32015-07-23 11:26:39 +0000945 Identifier ident = new Identifier(((String) token.value));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100946 setLocation(ident, token.left, token.right);
947 nextToken();
948 return ident;
949 }
950
951 // binop_expression ::= binop_expression OP binop_expression
952 // | parsePrimaryWithSuffix
953 // This function takes care of precedence between operators (see operatorPrecedence for
954 // the order), and it assumes left-to-right associativity.
955 private Expression parseBinOpExpression(int prec) {
956 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +0000957 Expression expr = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100958 // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is
959 // limited to number of different precedence levels (operatorPrecedence.size()).
laurentlb1fcea382017-06-19 16:02:42 +0200960 Operator lastOp = null;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100961 for (;;) {
Laurent Le Brune3f4ed72015-05-08 14:47:26 +0000962
963 if (token.kind == TokenKind.NOT) {
964 // If NOT appears when we expect a binary operator, it must be followed by IN.
965 // Since the code expects every operator to be a single token, we push a NOT_IN token.
966 expect(TokenKind.NOT);
967 expect(TokenKind.IN);
968 pushToken(new Token(TokenKind.NOT_IN, token.left, token.right));
969 }
970
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100971 if (!binaryOperators.containsKey(token.kind)) {
972 return expr;
973 }
974 Operator operator = binaryOperators.get(token.kind);
975 if (!operatorPrecedence.get(prec).contains(operator)) {
976 return expr;
977 }
laurentlb1fcea382017-06-19 16:02:42 +0200978
979 // Operator '==' and other operators of the same precedence (e.g. '<', 'in')
980 // are not associative.
981 if (lastOp != null && operatorPrecedence.get(prec).contains(Operator.EQUALS_EQUALS)) {
982 reportError(
983 lexer.createLocation(token.left, token.right),
984 String.format("Operator '%s' is not associative with operator '%s'. Use parens.",
985 lastOp, operator));
986 }
987
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100988 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +0000989 Expression secondary = parseNonTupleExpression(prec + 1);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100990 expr = optimizeBinOpExpression(operator, expr, secondary);
991 setLocation(expr, start, secondary);
laurentlb1fcea382017-06-19 16:02:42 +0200992 lastOp = operator;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +0100993 }
994 }
995
996 // Optimize binary expressions.
997 // string literal + string literal can be concatenated into one string literal
998 // so we don't have to do the expensive string concatenation at runtime.
999 private Expression optimizeBinOpExpression(
1000 Operator operator, Expression expr, Expression secondary) {
1001 if (operator == Operator.PLUS) {
1002 if (expr instanceof StringLiteral && secondary instanceof StringLiteral) {
1003 StringLiteral left = (StringLiteral) expr;
1004 StringLiteral right = (StringLiteral) secondary;
Michajlo Matijkiw8c539ea2017-02-22 23:02:46 +00001005 return new StringLiteral(left.getValue() + right.getValue());
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001006 }
1007 }
1008 return new BinaryOperatorExpression(operator, expr, secondary);
1009 }
1010
1011 private Expression parseExpression() {
Laurent Le Brunb639ca82017-01-17 11:18:23 +00001012 return parseExpression(false);
1013 }
1014
1015 // Equivalent to 'testlist' rule in Python grammar. It can parse every kind of
1016 // expression. In many cases, we need to use parseNonTupleExpression to avoid ambiguity:
1017 // e.g. fct(x, y) vs fct((x, y))
1018 //
1019 // Tuples can have a trailing comma only when insideParens is true. This prevents bugs
1020 // where a one-element tuple is surprisingly created:
1021 // e.g. foo = f(x),
1022 private Expression parseExpression(boolean insideParens) {
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001023 int start = token.left;
Laurent Le Brun56093892015-03-20 13:01:58 +00001024 Expression expression = parseNonTupleExpression();
1025 if (token.kind != TokenKind.COMMA) {
1026 return expression;
1027 }
1028
1029 // It's a tuple
Laurent Le Brunb639ca82017-01-17 11:18:23 +00001030 List<Expression> tuple = parseExprList(insideParens);
Laurent Le Brun56093892015-03-20 13:01:58 +00001031 tuple.add(0, expression); // add the first expression to the front of the tuple
1032 return setLocation(ListLiteral.makeTuple(tuple), start, token.right);
1033 }
1034
1035 // Equivalent to 'test' rule in Python grammar.
1036 private Expression parseNonTupleExpression() {
1037 int start = token.left;
1038 Expression expr = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001039 if (token.kind == TokenKind.IF) {
1040 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +00001041 Expression condition = parseNonTupleExpression(0);
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001042 if (token.kind == TokenKind.ELSE) {
1043 nextToken();
Laurent Le Brun56093892015-03-20 13:01:58 +00001044 Expression elseClause = parseNonTupleExpression();
Francois-Rene Rideau6fc5ee72015-03-12 20:55:17 +00001045 return setLocation(new ConditionalExpression(expr, condition, elseClause),
1046 start, elseClause);
1047 } else {
1048 reportError(lexer.createLocation(start, token.left),
1049 "missing else clause in conditional expression or semicolon before if");
1050 return expr; // Try to recover from error: drop the if and the expression after it. Ouch.
1051 }
1052 }
1053 return expr;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001054 }
1055
Laurent Le Brun56093892015-03-20 13:01:58 +00001056 private Expression parseNonTupleExpression(int prec) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001057 if (prec >= operatorPrecedence.size()) {
1058 return parsePrimaryWithSuffix();
1059 }
1060 if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) {
1061 return parseNotExpression(prec);
1062 }
1063 return parseBinOpExpression(prec);
1064 }
1065
1066 // not_expr :== 'not' expr
1067 private Expression parseNotExpression(int prec) {
1068 int start = token.left;
1069 expect(TokenKind.NOT);
Laurent Le Brun56093892015-03-20 13:01:58 +00001070 Expression expression = parseNonTupleExpression(prec + 1);
brandjonf2ed8582017-06-27 15:05:35 +02001071 UnaryOperatorExpression notExpression =
1072 new UnaryOperatorExpression(UnaryOperator.NOT, expression);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001073 return setLocation(notExpression, start, token.right);
1074 }
1075
1076 // file_input ::= ('\n' | stmt)* EOF
1077 private List<Statement> parseFileInput() {
Googler768cbc42015-08-28 12:52:14 +00001078 long startTime = Profiler.nanoTimeMaybe();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001079 List<Statement> list = new ArrayList<>();
1080 while (token.kind != TokenKind.EOF) {
1081 if (token.kind == TokenKind.NEWLINE) {
Laurent Le Brun9060e162015-04-02 10:07:28 +00001082 expectAndRecover(TokenKind.NEWLINE);
1083 } else if (recoveryMode) {
1084 // If there was a parse error, we want to recover here
1085 // before starting a new top-level statement.
1086 syncTo(STATEMENT_TERMINATOR_SET);
1087 recoveryMode = false;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001088 } else {
1089 parseTopLevelStatement(list);
1090 }
1091 }
Lukacs Berkid9e733d2015-09-18 08:18:11 +00001092 Profiler.instance().logSimpleTask(startTime, ProfilerTask.SKYLARK_PARSER, "");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001093 return list;
1094 }
1095
Florian Weikert9d659ad2015-07-23 14:44:36 +00001096 // load '(' STRING (COMMA [IDENTIFIER EQUALS] STRING)* COMMA? ')'
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001097 private void parseLoad(List<Statement> list) {
1098 int start = token.left;
1099 if (token.kind != TokenKind.STRING) {
1100 expect(TokenKind.STRING);
1101 return;
1102 }
Googler768cbc42015-08-28 12:52:14 +00001103
John Field9201fda2015-12-30 19:30:34 +00001104 StringLiteral importString = parseStringLiteral();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001105 expect(TokenKind.COMMA);
1106
Florian Weikert9d659ad2015-07-23 14:44:36 +00001107 Map<Identifier, String> symbols = new HashMap<>();
1108 parseLoadSymbol(symbols); // At least one symbol is required
1109
Laurent Le Brun73a98492015-03-17 15:46:19 +00001110 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001111 expect(TokenKind.COMMA);
Laurent Le Brun59f587a2015-03-16 14:51:36 +00001112 if (token.kind == TokenKind.RPAREN) {
1113 break;
1114 }
Florian Weikert9d659ad2015-07-23 14:44:36 +00001115
1116 parseLoadSymbol(symbols);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001117 }
1118 expect(TokenKind.RPAREN);
Googler768cbc42015-08-28 12:52:14 +00001119
Laurent Le Brun7b1708c2016-10-13 10:05:12 +00001120 LoadStatement stmt = new LoadStatement(importString, symbols);
Miguel Alcon Pinto927f3b22016-08-22 14:21:30 +00001121 list.add(setLocation(stmt, start, token.left));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001122 }
1123
Florian Weikert9d659ad2015-07-23 14:44:36 +00001124 /**
1125 * Parses the next symbol argument of a load statement and puts it into the output map.
1126 *
1127 * <p> The symbol is either "name" (STRING) or name = "declared" (IDENTIFIER EQUALS STRING).
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001128 * If no alias is used, "name" and "declared" will be identical. "Declared" refers to the
1129 * original name in the Bazel file that should be loaded, while "name" will be the key of the
1130 * entry in the map.
Florian Weikert9d659ad2015-07-23 14:44:36 +00001131 */
1132 private void parseLoadSymbol(Map<Identifier, String> symbols) {
Vladimir Moskva8d610c62016-09-15 14:36:41 +00001133 Token nameToken;
1134 Token declaredToken;
Florian Weikert9d659ad2015-07-23 14:44:36 +00001135
1136 if (token.kind == TokenKind.STRING) {
1137 nameToken = token;
1138 declaredToken = nameToken;
1139 } else {
1140 if (token.kind != TokenKind.IDENTIFIER) {
1141 syntaxError(token, "Expected either a literal string or an identifier");
1142 }
1143
1144 nameToken = token;
1145
1146 expect(TokenKind.IDENTIFIER);
1147 expect(TokenKind.EQUALS);
1148
1149 declaredToken = token;
1150 }
1151
1152 expect(TokenKind.STRING);
1153
1154 try {
1155 Identifier identifier = new Identifier(nameToken.value.toString());
1156
1157 if (symbols.containsKey(identifier)) {
1158 syntaxError(
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001159 nameToken, String.format("Identifier '%s' is used more than once",
1160 identifier.getName()));
Florian Weikert9d659ad2015-07-23 14:44:36 +00001161 } else {
1162 symbols.put(
Jon Brandveinee8b7aa2016-07-28 15:01:26 +00001163 setLocation(identifier, nameToken.left, nameToken.right),
1164 declaredToken.value.toString());
Florian Weikert9d659ad2015-07-23 14:44:36 +00001165 }
1166 } catch (NullPointerException npe) {
1167 // This means that the value of at least one token is null. In this case, the previous
1168 // expect() call has already logged an error.
1169 }
1170 }
1171
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001172 private void parseTopLevelStatement(List<Statement> list) {
1173 // In Python grammar, there is no "top-level statement" and imports are
1174 // considered as "small statements". We are a bit stricter than Python here.
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001175 // Check if there is an include
1176 if (token.kind == TokenKind.IDENTIFIER) {
1177 Token identToken = token;
Florian Weikert6f864c32015-07-23 11:26:39 +00001178 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001179
Lukacs Berkid9e733d2015-09-18 08:18:11 +00001180 if (ident.getName().equals("load") && token.kind == TokenKind.LPAREN) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001181 expect(TokenKind.LPAREN);
1182 parseLoad(list);
1183 return;
1184 }
1185 pushToken(identToken); // push the ident back to parse it as a statement
1186 }
brandjon733a97d2017-06-27 17:11:27 +02001187 parseStatement(list, ParsingLevel.TOP_LEVEL);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001188 }
1189
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001190 // small_stmt | 'pass'
1191 private void parseSmallStatementOrPass(List<Statement> list) {
1192 if (token.kind == TokenKind.PASS) {
1193 // Skip the token, don't add it to the list.
1194 // It has no existence in the AST.
1195 expect(TokenKind.PASS);
1196 } else {
1197 list.add(parseSmallStatement());
1198 }
1199 }
1200
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001201 // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE
1202 private void parseSimpleStatement(List<Statement> list) {
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001203 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001204
1205 while (token.kind == TokenKind.SEMI) {
1206 nextToken();
1207 if (token.kind == TokenKind.NEWLINE) {
1208 break;
1209 }
Laurent Le Brun0942ee92015-03-17 20:22:16 +00001210 parseSmallStatementOrPass(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001211 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001212 expectAndRecover(TokenKind.NEWLINE);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001213 }
1214
1215 // small_stmt ::= assign_stmt
1216 // | expr
1217 // | RETURN expr
Florian Weikert917ceaa2015-06-10 13:54:26 +00001218 // | flow_stmt
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001219 // assign_stmt ::= expr ('=' | augassign) expr
Vladimir Moskva71536642016-12-19 13:51:57 +00001220 // augassign ::= ('+=' | '-=' | '*=' | '/=' | '%=')
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001221 // Note that these are in Python, but not implemented here (at least for now):
Vladimir Moskva71536642016-12-19 13:51:57 +00001222 // '&=' | '|=' | '^=' |'<<=' | '>>=' | '**=' | '//='
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001223 // Semantic difference from Python:
1224 // In Skylark, x += y is simple syntactic sugar for x = x + y.
1225 // In Python, x += y is more or less equivalent to x = x + y, but if a method is defined
1226 // on x.__iadd__(y), then it takes precedence, and in the case of lists it side-effects
1227 // the original list (it doesn't do that on tuples); if no such method is defined it falls back
1228 // to the x.__add__(y) method that backs x + y. In Skylark, we don't support this side-effect.
1229 // Note also that there is a special casing to translate 'ident[key] = value'
1230 // to 'ident = ident + {key: value}'. This is needed to support the pure version of Python-like
1231 // dictionary assignment syntax.
1232 private Statement parseSmallStatement() {
1233 int start = token.left;
1234 if (token.kind == TokenKind.RETURN) {
1235 return parseReturnStatement();
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001236 } else if (token.kind == TokenKind.BREAK || token.kind == TokenKind.CONTINUE) {
Florian Weikert917ceaa2015-06-10 13:54:26 +00001237 return parseFlowStatement(token.kind);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001238 }
1239 Expression expression = parseExpression();
1240 if (token.kind == TokenKind.EQUALS) {
1241 nextToken();
1242 Expression rvalue = parseExpression();
laurentlb094bb262017-05-19 21:18:25 +02001243 return setLocation(
brandjon540aac62017-06-12 23:08:09 +02001244 new AssignmentStatement(new LValue(expression), rvalue),
1245 start, rvalue);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001246 } else if (augmentedAssignmentMethods.containsKey(token.kind)) {
1247 Operator operator = augmentedAssignmentMethods.get(token.kind);
1248 nextToken();
1249 Expression operand = parseExpression();
1250 int end = operand.getLocation().getEndOffset();
Vladimir Moskva71536642016-12-19 13:51:57 +00001251 return setLocation(
brandjon540aac62017-06-12 23:08:09 +02001252 new AugmentedAssignmentStatement(operator, new LValue(expression), operand),
1253 start, end);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001254 } else {
1255 return setLocation(new ExpressionStatement(expression), start, expression);
1256 }
1257 }
1258
1259 // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]?
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001260 private IfStatement parseIfStatement() {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001261 int start = token.left;
1262 List<ConditionalStatements> thenBlocks = new ArrayList<>();
1263 thenBlocks.add(parseConditionalStatements(TokenKind.IF));
1264 while (token.kind == TokenKind.ELIF) {
1265 thenBlocks.add(parseConditionalStatements(TokenKind.ELIF));
1266 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001267 List<Statement> elseBlock;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001268 if (token.kind == TokenKind.ELSE) {
1269 expect(TokenKind.ELSE);
1270 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001271 elseBlock = parseSuite();
1272 } else {
1273 elseBlock = ImmutableList.of();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001274 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001275 return setLocation(new IfStatement(thenBlocks, elseBlock), start, token.right);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001276 }
1277
1278 // cond_stmts ::= [EL]IF expr ':' suite
1279 private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) {
1280 int start = token.left;
1281 expect(tokenKind);
Laurent Le Brun56093892015-03-20 13:01:58 +00001282 Expression expr = parseNonTupleExpression();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001283 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001284 List<Statement> thenBlock = parseSuite();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001285 ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock);
1286 return setLocation(stmt, start, token.right);
1287 }
1288
1289 // for_stmt ::= FOR IDENTIFIER IN expr ':' suite
1290 private void parseForStatement(List<Statement> list) {
1291 int start = token.left;
1292 expect(TokenKind.FOR);
Laurent Le Brun185392d2015-03-20 14:41:25 +00001293 Expression loopVar = parseForLoopVariables();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001294 expect(TokenKind.IN);
1295 Expression collection = parseExpression();
1296 expect(TokenKind.COLON);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001297 enterLoop();
1298 try {
1299 List<Statement> block = parseSuite();
brandjon540aac62017-06-12 23:08:09 +02001300 Statement stmt = new ForStatement(new LValue(loopVar), collection, block);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001301 list.add(setLocation(stmt, start, token.right));
1302 } finally {
1303 exitLoop();
1304 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001305 }
1306
1307 // def foo(bar1, bar2):
1308 private void parseFunctionDefStatement(List<Statement> list) {
1309 int start = token.left;
1310 expect(TokenKind.DEF);
Florian Weikert6f864c32015-07-23 11:26:39 +00001311 Identifier ident = parseIdent();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001312 expect(TokenKind.LPAREN);
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001313 List<Parameter<Expression, Expression>> params = parseParameters();
1314 FunctionSignature.WithValues<Expression, Expression> signature = functionSignature(params);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001315 expect(TokenKind.RPAREN);
1316 expect(TokenKind.COLON);
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001317 List<Statement> block = parseSuite();
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001318 FunctionDefStatement stmt = new FunctionDefStatement(ident, params, signature, block);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001319 list.add(setLocation(stmt, start, token.right));
1320 }
1321
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001322 private FunctionSignature.WithValues<Expression, Expression> functionSignature(
1323 List<Parameter<Expression, Expression>> parameters) {
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001324 try {
1325 return FunctionSignature.WithValues.<Expression, Expression>of(parameters);
1326 } catch (FunctionSignature.SignatureException e) {
1327 reportError(e.getParameter().getLocation(), e.getMessage());
1328 // return bogus empty signature
1329 return FunctionSignature.WithValues.<Expression, Expression>create(FunctionSignature.of());
1330 }
1331 }
1332
Laurent Le Brun4baefdc2015-09-04 11:27:46 +00001333 private List<Parameter<Expression, Expression>> parseParameters() {
1334 return parseFunctionArguments(
1335 new Supplier<Parameter<Expression, Expression>>() {
1336 @Override public Parameter<Expression, Expression> get() {
1337 return parseFunctionParameter();
1338 }
1339 });
1340 }
1341
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001342 /**
1343 * Parse a list of Argument-s. The arguments can be of class Argument.Passed or Parameter,
1344 * as returned by the Supplier parseArgument (that, taking no argument, must be closed over
1345 * the mutable input data structures).
1346 *
1347 * <p>This parser does minimal validation: it ensures the proper python use of the comma (that
1348 * can terminate before a star but not after) and the fact that a **kwarg must appear last.
1349 * It does NOT validate further ordering constraints for a {@code List<Argument.Passed>}, such as
1350 * all positional preceding keyword arguments in a call, nor does it check the more subtle
1351 * constraints for Parameter-s. This validation must happen afterwards in an appropriate method.
1352 */
1353 private <V extends Argument> ImmutableList<V>
1354 parseFunctionArguments(Supplier<V> parseArgument) {
1355 boolean hasArg = false;
1356 boolean hasStar = false;
1357 boolean hasStarStar = false;
1358 ArrayList<V> arguments = new ArrayList<>();
1359
1360 while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) {
1361 if (hasStarStar) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001362 reportError(lexer.createLocation(token.left, token.right),
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001363 "unexpected tokens after kwarg");
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001364 break;
1365 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001366 if (hasArg) {
1367 expect(TokenKind.COMMA);
1368 }
1369 if (token.kind == TokenKind.RPAREN && !hasStar) {
1370 // list can end with a COMMA if there is neither * nor **
1371 break;
1372 }
1373 V arg = parseArgument.get();
1374 hasArg = true;
1375 if (arg.isStar()) {
1376 hasStar = true;
1377 } else if (arg.isStarStar()) {
1378 hasStarStar = true;
1379 }
1380 arguments.add(arg);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001381 }
Francois-Rene Rideau5dcdbf92015-02-19 18:36:17 +00001382 return ImmutableList.copyOf(arguments);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001383 }
1384
Laurent Le Brun5f674452015-03-17 19:29:13 +00001385 // suite is typically what follows a colon (e.g. after def or for).
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001386 // suite ::= simple_stmt
1387 // | NEWLINE INDENT stmt+ OUTDENT
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001388 private List<Statement> parseSuite() {
1389 List<Statement> list = new ArrayList<>();
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001390 if (token.kind == TokenKind.NEWLINE) {
1391 expect(TokenKind.NEWLINE);
1392 if (token.kind != TokenKind.INDENT) {
1393 reportError(lexer.createLocation(token.left, token.right),
1394 "expected an indented block");
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001395 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001396 }
1397 expect(TokenKind.INDENT);
1398 while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) {
brandjon733a97d2017-06-27 17:11:27 +02001399 parseStatement(list, ParsingLevel.LOCAL_LEVEL);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001400 }
Laurent Le Brun9060e162015-04-02 10:07:28 +00001401 expectAndRecover(TokenKind.OUTDENT);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001402 } else {
Laurent Le Brun5f674452015-03-17 19:29:13 +00001403 parseSimpleStatement(list);
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001404 }
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001405 return list;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001406 }
1407
1408 // skipSuite does not check that the code is syntactically correct, it
1409 // just skips based on indentation levels.
1410 private void skipSuite() {
1411 if (token.kind == TokenKind.NEWLINE) {
1412 expect(TokenKind.NEWLINE);
1413 if (token.kind != TokenKind.INDENT) {
1414 reportError(lexer.createLocation(token.left, token.right),
1415 "expected an indented block");
1416 return;
1417 }
1418 expect(TokenKind.INDENT);
1419
1420 // Don't try to parse all the Python syntax, just skip the block
1421 // until the corresponding outdent token.
1422 int depth = 1;
1423 while (depth > 0) {
1424 // Because of the way the lexer works, this should never happen
1425 Preconditions.checkState(token.kind != TokenKind.EOF);
1426
1427 if (token.kind == TokenKind.INDENT) {
1428 depth++;
1429 }
1430 if (token.kind == TokenKind.OUTDENT) {
1431 depth--;
1432 }
1433 nextToken();
1434 }
1435
1436 } else {
1437 // the block ends at the newline token
1438 // e.g. if x == 3: print "three"
1439 syncTo(STATEMENT_TERMINATOR_SET);
1440 }
1441 }
1442
1443 // stmt ::= simple_stmt
1444 // | compound_stmt
brandjon733a97d2017-06-27 17:11:27 +02001445 private void parseStatement(List<Statement> list, ParsingLevel parsingLevel) {
1446 if (token.kind == TokenKind.DEF && dialect == SKYLARK) {
1447 if (parsingLevel == ParsingLevel.LOCAL_LEVEL) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001448 reportError(lexer.createLocation(token.left, token.right),
1449 "nested functions are not allowed. Move the function to top-level");
1450 }
1451 parseFunctionDefStatement(list);
brandjon733a97d2017-06-27 17:11:27 +02001452 } else if (token.kind == TokenKind.IF && dialect == SKYLARK) {
Francois-Rene Rideaucbebd632015-02-11 16:56:37 +00001453 list.add(parseIfStatement());
brandjon733a97d2017-06-27 17:11:27 +02001454 } else if (token.kind == TokenKind.FOR && dialect == SKYLARK) {
1455 if (parsingLevel == ParsingLevel.TOP_LEVEL) {
Yue Gan4866e152016-04-07 13:07:08 +00001456 reportError(
1457 lexer.createLocation(token.left, token.right),
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001458 "for loops are not allowed on top-level. Put it into a function");
1459 }
1460 parseForStatement(list);
Laurent Le Brun3bc8e9a2015-09-10 11:00:37 +00001461 } else if (BLOCK_STARTING_SET.contains(token.kind)) {
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001462 skipBlock();
1463 } else {
1464 parseSimpleStatement(list);
1465 }
1466 }
1467
Florian Weikert917ceaa2015-06-10 13:54:26 +00001468 // flow_stmt ::= break_stmt | continue_stmt
1469 private FlowStatement parseFlowStatement(TokenKind kind) {
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001470 int start = token.left;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001471 int end = token.right;
Florian Weikert917ceaa2015-06-10 13:54:26 +00001472 expect(kind);
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001473 if (loopCount == 0) {
1474 reportError(
1475 lexer.createLocation(start, end),
1476 kind.getPrettyName() + " statement must be inside a for loop");
1477 }
Laurent Le Brun7d6a3812015-10-26 12:07:12 +00001478 FlowStatement.Kind flowKind =
1479 kind == TokenKind.BREAK ? FlowStatement.Kind.BREAK : FlowStatement.Kind.CONTINUE;
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001480 return setLocation(new FlowStatement(flowKind), start, end);
Florian Weikert917ceaa2015-06-10 13:54:26 +00001481 }
Laurent Le Brund412c8f2015-06-16 11:12:54 +00001482
Googlercc0d9952015-08-10 12:01:34 +00001483 // return_stmt ::= RETURN [expr]
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001484 private ReturnStatement parseReturnStatement() {
1485 int start = token.left;
Googlercc0d9952015-08-10 12:01:34 +00001486 int end = token.right;
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001487 expect(TokenKind.RETURN);
Googler768cbc42015-08-28 12:52:14 +00001488
Googlercc0d9952015-08-10 12:01:34 +00001489 Expression expression;
1490 if (STATEMENT_TERMINATOR_SET.contains(token.kind)) {
1491 // this None makes the AST not correspond to the source exactly anymore
1492 expression = new Identifier("None");
1493 setLocation(expression, start, end);
1494 } else {
1495 expression = parseExpression();
1496 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001497 return setLocation(new ReturnStatement(expression), start, expression);
1498 }
1499
Florian Weikert1f004e52015-10-16 09:43:48 +00001500 // block ::= ('if' | 'for' | 'class' | 'try' | 'def') expr ':' suite
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001501 private void skipBlock() {
1502 int start = token.left;
1503 Token blockToken = token;
1504 syncTo(EnumSet.of(TokenKind.COLON, TokenKind.EOF)); // skip over expression or name
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001505 if (blockToken.kind == TokenKind.ELSE) {
Yue Gan4866e152016-04-07 13:07:08 +00001506 reportError(
1507 lexer.createLocation(blockToken.left, blockToken.right),
1508 "syntax error at 'else': not allowed here.");
Laurent Le Brunb566c7d2016-10-07 16:31:03 +00001509 } else {
Florian Weikert1f004e52015-10-16 09:43:48 +00001510 String msg =
1511 ILLEGAL_BLOCK_KEYWORDS.containsKey(blockToken.kind)
1512 ? String.format("%ss are not supported.", ILLEGAL_BLOCK_KEYWORDS.get(blockToken.kind))
1513 : "This is not supported in BUILD files. Move the block to a .bzl file and load it";
Laurent Le Brunb13a4382015-06-30 14:20:45 +00001514 reportError(
1515 lexer.createLocation(start, token.right),
Florian Weikert1f004e52015-10-16 09:43:48 +00001516 String.format("syntax error at '%s': %s", blockToken, msg));
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001517 }
1518 expect(TokenKind.COLON);
1519 skipSuite();
1520 }
1521
1522 // create a comment node
1523 private void makeComment(Token token) {
1524 comments.add(setLocation(new Comment((String) token.value), token.left, token.right));
1525 }
Laurent Le Bruna3c25a62016-10-26 10:59:09 +00001526
1527 private void enterLoop() {
1528 loopCount++;
1529 }
1530
1531 private void exitLoop() {
1532 Preconditions.checkState(loopCount > 0);
1533 loopCount--;
1534 }
Han-Wen Nienhuysd08b27f2015-02-25 16:45:20 +01001535}