Damien Martin-Guillerez | f88f4d8 | 2015-09-25 13:56:55 +0000 | [diff] [blame] | 1 | // Copyright 2014 The Bazel Authors. All rights reserved. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package com.google.devtools.build.lib.syntax; |
| 16 | |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 17 | import static com.google.devtools.build.lib.syntax.Parser.ParsingMode.BUILD; |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 18 | import static com.google.devtools.build.lib.syntax.Parser.ParsingMode.SKYLARK; |
| 19 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 20 | import com.google.common.annotations.VisibleForTesting; |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 21 | import com.google.common.base.Supplier; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 22 | import com.google.common.collect.ImmutableList; |
| 23 | import com.google.common.collect.ImmutableMap; |
Laurent Le Brun | e51a4d2 | 2016-10-11 18:04:16 +0000 | [diff] [blame] | 24 | import com.google.common.collect.Iterables; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 25 | import com.google.devtools.build.lib.events.Event; |
| 26 | import com.google.devtools.build.lib.events.EventHandler; |
| 27 | import com.google.devtools.build.lib.events.Location; |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 28 | import com.google.devtools.build.lib.profiler.Profiler; |
| 29 | import com.google.devtools.build.lib.profiler.ProfilerTask; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 30 | import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral; |
| 31 | import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements; |
Mark Schaller | 6df8179 | 2015-12-10 18:47:47 +0000 | [diff] [blame] | 32 | import com.google.devtools.build.lib.util.Preconditions; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 33 | import java.util.ArrayList; |
| 34 | import java.util.Collections; |
| 35 | import java.util.EnumSet; |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 36 | import java.util.HashMap; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 37 | import java.util.Iterator; |
| 38 | import java.util.List; |
| 39 | import java.util.Map; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 40 | |
Laurent Le Brun | 494eca9 | 2015-09-03 13:27:06 +0000 | [diff] [blame] | 41 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 42 | /** |
| 43 | * Recursive descent parser for LL(2) BUILD language. |
| 44 | * Loosely based on Python 2 grammar. |
| 45 | * See https://docs.python.org/2/reference/grammar.html |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 46 | */ |
Han-Wen Nienhuys | ceae8c5 | 2015-09-22 16:24:45 +0000 | [diff] [blame] | 47 | @VisibleForTesting |
| 48 | public class Parser { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 49 | |
| 50 | /** |
| 51 | * Combines the parser result into a single value object. |
| 52 | */ |
| 53 | public static final class ParseResult { |
| 54 | /** The statements (rules, basically) from the parsed file. */ |
| 55 | public final List<Statement> statements; |
| 56 | |
| 57 | /** The comments from the parsed file. */ |
| 58 | public final List<Comment> comments; |
| 59 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 60 | /** Represents every statement in the file. */ |
| 61 | public final Location location; |
| 62 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 63 | /** Whether the file contained any errors. */ |
| 64 | public final boolean containsErrors; |
| 65 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 66 | public ParseResult(List<Statement> statements, List<Comment> comments, Location location, |
| 67 | boolean containsErrors) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 68 | // No need to copy here; when the object is created, the parser instance is just about to go |
| 69 | // out of scope and be garbage collected. |
| 70 | this.statements = Preconditions.checkNotNull(statements); |
| 71 | this.comments = Preconditions.checkNotNull(comments); |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 72 | this.location = location; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 73 | this.containsErrors = containsErrors; |
| 74 | } |
| 75 | } |
| 76 | |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 77 | /** |
| 78 | * ParsingMode is used to select which features the parser should accept. |
| 79 | */ |
| 80 | public enum ParsingMode { |
| 81 | /** Used for parsing BUILD files */ |
| 82 | BUILD, |
| 83 | /** Used for parsing .bzl files */ |
| 84 | SKYLARK, |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 85 | } |
| 86 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 87 | private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET = |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 88 | EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE, TokenKind.SEMI); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 89 | |
| 90 | private static final EnumSet<TokenKind> LIST_TERMINATOR_SET = |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 91 | EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 92 | |
| 93 | private static final EnumSet<TokenKind> DICT_TERMINATOR_SET = |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 94 | EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 95 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 96 | private static final EnumSet<TokenKind> EXPR_LIST_TERMINATOR_SET = |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 97 | EnumSet.of( |
| 98 | TokenKind.EOF, |
| 99 | TokenKind.NEWLINE, |
Laurent Le Brun | 29ad862 | 2015-09-18 10:45:07 +0000 | [diff] [blame] | 100 | TokenKind.EQUALS, |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 101 | TokenKind.RBRACE, |
| 102 | TokenKind.RBRACKET, |
| 103 | TokenKind.RPAREN, |
| 104 | TokenKind.SEMI); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 105 | |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 106 | private static final EnumSet<TokenKind> BLOCK_STARTING_SET = |
| 107 | EnumSet.of( |
| 108 | TokenKind.CLASS, |
| 109 | TokenKind.DEF, |
| 110 | TokenKind.ELSE, |
| 111 | TokenKind.FOR, |
| 112 | TokenKind.IF, |
| 113 | TokenKind.TRY); |
| 114 | |
| 115 | private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET = |
| 116 | EnumSet.of( |
| 117 | TokenKind.COLON, |
| 118 | TokenKind.COMMA, |
| 119 | TokenKind.EOF, |
| 120 | TokenKind.FOR, |
| 121 | TokenKind.MINUS, |
| 122 | TokenKind.PERCENT, |
| 123 | TokenKind.PLUS, |
| 124 | TokenKind.RBRACKET, |
| 125 | TokenKind.RPAREN, |
| 126 | TokenKind.SLASH); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 127 | |
Florian Weikert | 1f004e5 | 2015-10-16 09:43:48 +0000 | [diff] [blame] | 128 | /** |
| 129 | * Keywords that are forbidden in both Skylark and BUILD parsing modes. |
| 130 | * |
| 131 | * <p>(Mapping: token -> human-readable string description) |
| 132 | */ |
| 133 | private static final ImmutableMap<TokenKind, String> ILLEGAL_BLOCK_KEYWORDS = |
| 134 | ImmutableMap.of(TokenKind.CLASS, "Class definition", TokenKind.TRY, "Try statement"); |
| 135 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 136 | private Token token; // current lookahead token |
| 137 | private Token pushedToken = null; // used to implement LL(2) |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 138 | private int loopCount; // break/continue keywords can be used only inside a loop |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 139 | |
| 140 | private static final boolean DEBUGGING = false; |
| 141 | |
| 142 | private final Lexer lexer; |
| 143 | private final EventHandler eventHandler; |
| 144 | private final List<Comment> comments; |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 145 | private final ParsingMode parsingMode; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 146 | |
| 147 | private static final Map<TokenKind, Operator> binaryOperators = |
| 148 | new ImmutableMap.Builder<TokenKind, Operator>() |
| 149 | .put(TokenKind.AND, Operator.AND) |
| 150 | .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS) |
| 151 | .put(TokenKind.GREATER, Operator.GREATER) |
| 152 | .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS) |
| 153 | .put(TokenKind.IN, Operator.IN) |
| 154 | .put(TokenKind.LESS, Operator.LESS) |
| 155 | .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS) |
| 156 | .put(TokenKind.MINUS, Operator.MINUS) |
| 157 | .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS) |
Laurent Le Brun | e3f4ed7 | 2015-05-08 14:47:26 +0000 | [diff] [blame] | 158 | .put(TokenKind.NOT_IN, Operator.NOT_IN) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 159 | .put(TokenKind.OR, Operator.OR) |
| 160 | .put(TokenKind.PERCENT, Operator.PERCENT) |
Laurent Le Brun | 8a52826 | 2015-04-15 14:23:35 +0000 | [diff] [blame] | 161 | .put(TokenKind.SLASH, Operator.DIVIDE) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 162 | .put(TokenKind.PLUS, Operator.PLUS) |
Laurent Le Brun | 092f13b | 2015-08-24 14:50:00 +0000 | [diff] [blame] | 163 | .put(TokenKind.PIPE, Operator.PIPE) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 164 | .put(TokenKind.STAR, Operator.MULT) |
| 165 | .build(); |
| 166 | |
Googler | 1315175 | 2016-06-02 18:37:13 +0000 | [diff] [blame] | 167 | // TODO(bazel-team): add support for |= |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 168 | private static final Map<TokenKind, Operator> augmentedAssignmentMethods = |
| 169 | new ImmutableMap.Builder<TokenKind, Operator>() |
Googler | 1315175 | 2016-06-02 18:37:13 +0000 | [diff] [blame] | 170 | .put(TokenKind.PLUS_EQUALS, Operator.PLUS) |
| 171 | .put(TokenKind.MINUS_EQUALS, Operator.MINUS) |
| 172 | .put(TokenKind.STAR_EQUALS, Operator.MULT) |
| 173 | .put(TokenKind.SLASH_EQUALS, Operator.DIVIDE) |
| 174 | .put(TokenKind.PERCENT_EQUALS, Operator.PERCENT) |
| 175 | .build(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 176 | |
| 177 | /** Highest precedence goes last. |
| 178 | * Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence |
| 179 | **/ |
| 180 | private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of( |
| 181 | EnumSet.of(Operator.OR), |
| 182 | EnumSet.of(Operator.AND), |
| 183 | EnumSet.of(Operator.NOT), |
| 184 | EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS, |
Laurent Le Brun | e3f4ed7 | 2015-05-08 14:47:26 +0000 | [diff] [blame] | 185 | Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN, Operator.NOT_IN), |
Laurent Le Brun | 092f13b | 2015-08-24 14:50:00 +0000 | [diff] [blame] | 186 | EnumSet.of(Operator.PIPE), |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 187 | EnumSet.of(Operator.MINUS, Operator.PLUS), |
Laurent Le Brun | 8a52826 | 2015-04-15 14:23:35 +0000 | [diff] [blame] | 188 | EnumSet.of(Operator.DIVIDE, Operator.MULT, Operator.PERCENT)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 189 | |
Laurent Le Brun | e51a4d2 | 2016-10-11 18:04:16 +0000 | [diff] [blame] | 190 | private final Iterator<Token> tokens; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 191 | private int errorsCount; |
| 192 | private boolean recoveryMode; // stop reporting errors until next statement |
| 193 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 194 | private Parser(Lexer lexer, EventHandler eventHandler, ParsingMode parsingMode) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 195 | this.lexer = lexer; |
| 196 | this.eventHandler = eventHandler; |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 197 | this.parsingMode = parsingMode; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 198 | this.tokens = lexer.getTokens().iterator(); |
Francois-Rene Rideau | c673a82 | 2015-03-02 19:52:39 +0000 | [diff] [blame] | 199 | this.comments = new ArrayList<>(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 200 | nextToken(); |
| 201 | } |
| 202 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 203 | private static Location locationFromStatements(Lexer lexer, List<Statement> statements) { |
| 204 | if (!statements.isEmpty()) { |
| 205 | return lexer.createLocation( |
| 206 | statements.get(0).getLocation().getStartOffset(), |
Laurent Le Brun | e51a4d2 | 2016-10-11 18:04:16 +0000 | [diff] [blame] | 207 | Iterables.getLast(statements).getLocation().getEndOffset()); |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 208 | } else { |
| 209 | return Location.fromPathFragment(lexer.getFilename()); |
| 210 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 211 | } |
| 212 | |
| 213 | /** |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 214 | * Entry-point to parser that parses a build file with comments. All errors encountered during |
| 215 | * parsing are reported via "reporter". |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 216 | */ |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 217 | public static ParseResult parseFile(ParserInputSource input, EventHandler eventHandler) { |
| 218 | Lexer lexer = new Lexer(input, eventHandler); |
| 219 | Parser parser = new Parser(lexer, eventHandler, BUILD); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 220 | List<Statement> statements = parser.parseFileInput(); |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 221 | return new ParseResult(statements, parser.comments, locationFromStatements(lexer, statements), |
| 222 | parser.errorsCount > 0 || lexer.containsErrors()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 223 | } |
| 224 | |
| 225 | /** |
Laurent Le Brun | 8c8857d | 2016-08-04 10:22:16 +0000 | [diff] [blame] | 226 | * Entry-point to parser that parses a build file with comments. All errors encountered during |
| 227 | * parsing are reported via "reporter". Enable Skylark extensions that are not part of the core |
| 228 | * BUILD language. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 229 | */ |
| 230 | public static ParseResult parseFileForSkylark( |
Laurent Le Brun | 8c8857d | 2016-08-04 10:22:16 +0000 | [diff] [blame] | 231 | ParserInputSource input, EventHandler eventHandler) { |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 232 | Lexer lexer = new Lexer(input, eventHandler); |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 233 | Parser parser = new Parser(lexer, eventHandler, SKYLARK); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 234 | List<Statement> statements = parser.parseFileInput(); |
Laurent Le Brun | 8c8857d | 2016-08-04 10:22:16 +0000 | [diff] [blame] | 235 | return new ParseResult( |
| 236 | statements, |
| 237 | parser.comments, |
| 238 | locationFromStatements(lexer, statements), |
| 239 | parser.errorsCount > 0 || lexer.containsErrors()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 240 | } |
| 241 | |
| 242 | /** |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 243 | * Entry-point to parser that parses an expression. All errors encountered |
| 244 | * during parsing are reported via "reporter". The expression may be followed |
| 245 | * by newline tokens. |
| 246 | */ |
| 247 | @VisibleForTesting |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 248 | public static Expression parseExpression(ParserInputSource input, EventHandler eventHandler) { |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 249 | Lexer lexer = new Lexer(input, eventHandler); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 250 | Parser parser = new Parser(lexer, eventHandler, null); |
| 251 | Expression result = parser.parseExpression(); |
| 252 | while (parser.token.kind == TokenKind.NEWLINE) { |
| 253 | parser.nextToken(); |
| 254 | } |
| 255 | parser.expect(TokenKind.EOF); |
| 256 | return result; |
| 257 | } |
| 258 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 259 | private void reportError(Location location, String message) { |
| 260 | errorsCount++; |
| 261 | // Limit the number of reported errors to avoid spamming output. |
| 262 | if (errorsCount <= 5) { |
| 263 | eventHandler.handle(Event.error(location, message)); |
| 264 | } |
| 265 | } |
| 266 | |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 267 | private void syntaxError(Token token, String message) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 268 | if (!recoveryMode) { |
| 269 | String msg = token.kind == TokenKind.INDENT |
| 270 | ? "indentation error" |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 271 | : "syntax error at '" + token + "': " + message; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 272 | reportError(lexer.createLocation(token.left, token.right), msg); |
| 273 | recoveryMode = true; |
| 274 | } |
| 275 | } |
| 276 | |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 277 | /** |
| 278 | * Consumes the current token. If it is not of the specified (expected) |
| 279 | * kind, reports a syntax error. |
| 280 | */ |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 281 | private boolean expect(TokenKind kind) { |
| 282 | boolean expected = token.kind == kind; |
| 283 | if (!expected) { |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 284 | syntaxError(token, "expected " + kind.getPrettyName()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 285 | } |
| 286 | nextToken(); |
| 287 | return expected; |
| 288 | } |
| 289 | |
| 290 | /** |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 291 | * Same as expect, but stop the recovery mode if the token was expected. |
| 292 | */ |
| 293 | private void expectAndRecover(TokenKind kind) { |
| 294 | if (expect(kind)) { |
| 295 | recoveryMode = false; |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | /** |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 300 | * Consume tokens past the first token that has a kind that is in the set of |
| 301 | * teminatingTokens. |
| 302 | * @param terminatingTokens |
| 303 | * @return the end offset of the terminating token. |
| 304 | */ |
| 305 | private int syncPast(EnumSet<TokenKind> terminatingTokens) { |
| 306 | Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF)); |
| 307 | while (!terminatingTokens.contains(token.kind)) { |
| 308 | nextToken(); |
| 309 | } |
| 310 | int end = token.right; |
| 311 | // read past the synchronization token |
| 312 | nextToken(); |
| 313 | return end; |
| 314 | } |
| 315 | |
| 316 | /** |
| 317 | * Consume tokens until we reach the first token that has a kind that is in |
| 318 | * the set of teminatingTokens. |
| 319 | * @param terminatingTokens |
| 320 | * @return the end offset of the terminating token. |
| 321 | */ |
| 322 | private int syncTo(EnumSet<TokenKind> terminatingTokens) { |
| 323 | // EOF must be in the set to prevent an infinite loop |
| 324 | Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF)); |
| 325 | // read past the problematic token |
| 326 | int previous = token.right; |
| 327 | nextToken(); |
| 328 | int current = previous; |
| 329 | while (!terminatingTokens.contains(token.kind)) { |
| 330 | nextToken(); |
| 331 | previous = current; |
| 332 | current = token.right; |
| 333 | } |
| 334 | return previous; |
| 335 | } |
| 336 | |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 337 | // Keywords that exist in Python and that we don't parse. |
| 338 | private static final EnumSet<TokenKind> FORBIDDEN_KEYWORDS = |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 339 | EnumSet.of(TokenKind.AS, TokenKind.ASSERT, |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 340 | TokenKind.DEL, TokenKind.EXCEPT, TokenKind.FINALLY, TokenKind.FROM, TokenKind.GLOBAL, |
| 341 | TokenKind.IMPORT, TokenKind.IS, TokenKind.LAMBDA, TokenKind.NONLOCAL, TokenKind.RAISE, |
| 342 | TokenKind.TRY, TokenKind.WITH, TokenKind.WHILE, TokenKind.YIELD); |
| 343 | |
| 344 | private void checkForbiddenKeywords(Token token) { |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 345 | if (!FORBIDDEN_KEYWORDS.contains(token.kind)) { |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 346 | return; |
| 347 | } |
| 348 | String error; |
| 349 | switch (token.kind) { |
| 350 | case ASSERT: error = "'assert' not supported, use 'fail' instead"; break; |
Laurent Le Brun | 44ad7fa | 2016-10-11 12:09:05 +0000 | [diff] [blame] | 351 | case DEL: |
| 352 | error = "'del' not supported, use '.pop()' to delete an item from a dictionary or a list"; |
| 353 | break; |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 354 | case IMPORT: error = "'import' not supported, use 'load' instead"; break; |
| 355 | case IS: error = "'is' not supported, use '==' instead"; break; |
| 356 | case LAMBDA: error = "'lambda' not supported, declare a function instead"; break; |
| 357 | case RAISE: error = "'raise' not supported, use 'fail' instead"; break; |
Laurent Le Brun | 44ad7fa | 2016-10-11 12:09:05 +0000 | [diff] [blame] | 358 | case TRY: error = "'try' not supported, all exceptions are fatal"; break; |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 359 | case WHILE: error = "'while' not supported, use 'for' instead"; break; |
| 360 | default: error = "keyword '" + token.kind.getPrettyName() + "' not supported"; break; |
| 361 | } |
| 362 | reportError(lexer.createLocation(token.left, token.right), error); |
| 363 | } |
| 364 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 365 | private void nextToken() { |
| 366 | if (pushedToken != null) { |
| 367 | token = pushedToken; |
| 368 | pushedToken = null; |
| 369 | } else { |
| 370 | if (token == null || token.kind != TokenKind.EOF) { |
| 371 | token = tokens.next(); |
| 372 | // transparently handle comment tokens |
| 373 | while (token.kind == TokenKind.COMMENT) { |
| 374 | makeComment(token); |
| 375 | token = tokens.next(); |
| 376 | } |
| 377 | } |
| 378 | } |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 379 | checkForbiddenKeywords(token); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 380 | if (DEBUGGING) { |
| 381 | System.err.print(token); |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | private void pushToken(Token tokenToPush) { |
| 386 | if (pushedToken != null) { |
| 387 | throw new IllegalStateException("Exceeded LL(2) lookahead!"); |
| 388 | } |
| 389 | pushedToken = token; |
| 390 | token = tokenToPush; |
| 391 | } |
| 392 | |
| 393 | // create an error expression |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 394 | private Identifier makeErrorExpression(int start, int end) { |
| 395 | return setLocation(new Identifier("$error$"), start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 396 | } |
| 397 | |
| 398 | // Convenience wrapper around ASTNode.setLocation that returns the node. |
Francois-Rene Rideau | edf7bdb | 2015-03-02 17:12:45 +0000 | [diff] [blame] | 399 | private <NODE extends ASTNode> NODE setLocation(NODE node, Location location) { |
| 400 | return ASTNode.<NODE>setLocation(location, node); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 401 | } |
| 402 | |
| 403 | // Another convenience wrapper method around ASTNode.setLocation |
Francois-Rene Rideau | edf7bdb | 2015-03-02 17:12:45 +0000 | [diff] [blame] | 404 | private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, int endOffset) { |
| 405 | return setLocation(node, lexer.createLocation(startOffset, endOffset)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 406 | } |
| 407 | |
| 408 | // Convenience method that uses end offset from the last node. |
| 409 | private <NODE extends ASTNode> NODE setLocation(NODE node, int startOffset, ASTNode lastNode) { |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 410 | Preconditions.checkNotNull(lastNode, "can't extract end offset from a null node"); |
| 411 | Preconditions.checkNotNull(lastNode.getLocation(), "lastNode doesn't have a location"); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 412 | return setLocation(node, startOffset, lastNode.getLocation().getEndOffset()); |
| 413 | } |
| 414 | |
| 415 | // create a funcall expression |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 416 | private Expression makeFuncallExpression(Expression receiver, Identifier function, |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 417 | List<Argument.Passed> args, |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 418 | int start, int end) { |
| 419 | if (function.getLocation() == null) { |
| 420 | function = setLocation(function, start, end); |
| 421 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 422 | return setLocation(new FuncallExpression(receiver, function, args), start, end); |
| 423 | } |
| 424 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 425 | // arg ::= IDENTIFIER '=' nontupleexpr |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 426 | // | expr |
Laurent Le Brun | b326638 | 2015-05-27 16:14:43 +0000 | [diff] [blame] | 427 | // | *args (only in Skylark mode) |
| 428 | // | **kwargs (only in Skylark mode) |
| 429 | // To keep BUILD files declarative and easy to process, *args and **kwargs |
| 430 | // arguments are allowed only in Skylark mode. |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 431 | private Argument.Passed parseFuncallArgument() { |
| 432 | final int start = token.left; |
| 433 | // parse **expr |
| 434 | if (token.kind == TokenKind.STAR_STAR) { |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 435 | if (parsingMode != SKYLARK) { |
Laurent Le Brun | b326638 | 2015-05-27 16:14:43 +0000 | [diff] [blame] | 436 | reportError( |
| 437 | lexer.createLocation(token.left, token.right), |
| 438 | "**kwargs arguments are not allowed in BUILD files"); |
| 439 | } |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 440 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 441 | Expression expr = parseNonTupleExpression(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 442 | return setLocation(new Argument.StarStar(expr), start, expr); |
| 443 | } |
| 444 | // parse *expr |
| 445 | if (token.kind == TokenKind.STAR) { |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 446 | if (parsingMode != SKYLARK) { |
Laurent Le Brun | b326638 | 2015-05-27 16:14:43 +0000 | [diff] [blame] | 447 | reportError( |
| 448 | lexer.createLocation(token.left, token.right), |
| 449 | "*args arguments are not allowed in BUILD files"); |
| 450 | } |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 451 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 452 | Expression expr = parseNonTupleExpression(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 453 | return setLocation(new Argument.Star(expr), start, expr); |
| 454 | } |
| 455 | // parse keyword = expr |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 456 | if (token.kind == TokenKind.IDENTIFIER) { |
| 457 | Token identToken = token; |
| 458 | String name = (String) token.value; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 459 | nextToken(); |
| 460 | if (token.kind == TokenKind.EQUALS) { // it's a named argument |
| 461 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 462 | Expression expr = parseNonTupleExpression(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 463 | return setLocation(new Argument.Keyword(name, expr), start, expr); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 464 | } else { // oops, back up! |
| 465 | pushToken(identToken); |
| 466 | } |
| 467 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 468 | // parse a positional argument |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 469 | Expression expr = parseNonTupleExpression(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 470 | return setLocation(new Argument.Positional(expr), start, expr); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 471 | } |
| 472 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 473 | // arg ::= IDENTIFIER '=' nontupleexpr |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 474 | // | IDENTIFIER |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 475 | private Parameter<Expression, Expression> parseFunctionParameter() { |
| 476 | // TODO(bazel-team): optionally support type annotations |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 477 | int start = token.left; |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 478 | if (token.kind == TokenKind.STAR_STAR) { // kwarg |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 479 | nextToken(); |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 480 | Identifier ident = parseIdent(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 481 | return setLocation(new Parameter.StarStar<Expression, Expression>( |
| 482 | ident.getName()), start, ident); |
| 483 | } else if (token.kind == TokenKind.STAR) { // stararg |
| 484 | int end = token.right; |
| 485 | nextToken(); |
| 486 | if (token.kind == TokenKind.IDENTIFIER) { |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 487 | Identifier ident = parseIdent(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 488 | return setLocation(new Parameter.Star<Expression, Expression>(ident.getName()), |
| 489 | start, ident); |
| 490 | } else { |
| 491 | return setLocation(new Parameter.Star<Expression, Expression>(null), start, end); |
| 492 | } |
| 493 | } else { |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 494 | Identifier ident = parseIdent(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 495 | if (token.kind == TokenKind.EQUALS) { // there's a default value |
| 496 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 497 | Expression expr = parseNonTupleExpression(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 498 | return setLocation(new Parameter.Optional<Expression, Expression>( |
| 499 | ident.getName(), expr), start, expr); |
| 500 | } else { |
| 501 | return setLocation(new Parameter.Mandatory<Expression, Expression>( |
| 502 | ident.getName()), start, ident); |
| 503 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 504 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 505 | } |
| 506 | |
| 507 | // funcall_suffix ::= '(' arg_list? ')' |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 508 | private Expression parseFuncallSuffix(int start, Expression receiver, Identifier function) { |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 509 | List<Argument.Passed> args = Collections.emptyList(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 510 | expect(TokenKind.LPAREN); |
| 511 | int end; |
| 512 | if (token.kind == TokenKind.RPAREN) { |
| 513 | end = token.right; |
| 514 | nextToken(); // RPAREN |
| 515 | } else { |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 516 | args = parseFuncallArguments(); // (includes optional trailing comma) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 517 | end = token.right; |
| 518 | expect(TokenKind.RPAREN); |
| 519 | } |
| 520 | return makeFuncallExpression(receiver, function, args, start, end); |
| 521 | } |
| 522 | |
| 523 | // selector_suffix ::= '.' IDENTIFIER |
| 524 | // |'.' IDENTIFIER funcall_suffix |
| 525 | private Expression parseSelectorSuffix(int start, Expression receiver) { |
| 526 | expect(TokenKind.DOT); |
| 527 | if (token.kind == TokenKind.IDENTIFIER) { |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 528 | Identifier ident = parseIdent(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 529 | if (token.kind == TokenKind.LPAREN) { |
| 530 | return parseFuncallSuffix(start, receiver, ident); |
| 531 | } else { |
| 532 | return setLocation(new DotExpression(receiver, ident), start, token.right); |
| 533 | } |
| 534 | } else { |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 535 | syntaxError(token, "expected identifier after dot"); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 536 | int end = syncTo(EXPR_TERMINATOR_SET); |
| 537 | return makeErrorExpression(start, end); |
| 538 | } |
| 539 | } |
| 540 | |
| 541 | // arg_list ::= ( (arg ',')* arg ','? )? |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 542 | private List<Argument.Passed> parseFuncallArguments() { |
| 543 | List<Argument.Passed> arguments = |
| 544 | parseFunctionArguments(new Supplier<Argument.Passed>() { |
| 545 | @Override public Argument.Passed get() { |
| 546 | return parseFuncallArgument(); |
| 547 | } |
| 548 | }); |
| 549 | try { |
| 550 | Argument.validateFuncallArguments(arguments); |
| 551 | } catch (Argument.ArgumentException e) { |
| 552 | reportError(lexer.createLocation(token.left, token.right), e.getMessage()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 553 | } |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 554 | return arguments; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 555 | } |
| 556 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 557 | // expr_list parses a comma-separated list of expression. It assumes that the |
| 558 | // first expression was already parsed, so it starts with a comma. |
| 559 | // It is used to parse tuples and list elements. |
| 560 | // expr_list ::= ( ',' expr )* ','? |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 561 | private List<Expression> parseExprList(boolean trailingColonAllowed) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 562 | List<Expression> list = new ArrayList<>(); |
| 563 | // terminating tokens for an expression list |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 564 | while (token.kind == TokenKind.COMMA) { |
| 565 | expect(TokenKind.COMMA); |
| 566 | if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) { |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 567 | if (!trailingColonAllowed) { |
| 568 | reportError( |
| 569 | lexer.createLocation(token.left, token.right), |
| 570 | "Trailing comma is allowed only in parenthesized tuples."); |
| 571 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 572 | break; |
| 573 | } |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 574 | list.add(parseNonTupleExpression()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 575 | } |
| 576 | return list; |
| 577 | } |
| 578 | |
| 579 | // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )? |
| 580 | private List<DictionaryEntryLiteral> parseDictEntryList() { |
| 581 | List<DictionaryEntryLiteral> list = new ArrayList<>(); |
| 582 | // the terminating token for a dict entry list |
| 583 | while (token.kind != TokenKind.RBRACE) { |
| 584 | list.add(parseDictEntry()); |
| 585 | if (token.kind == TokenKind.COMMA) { |
| 586 | nextToken(); |
| 587 | } else { |
| 588 | break; |
| 589 | } |
| 590 | } |
| 591 | return list; |
| 592 | } |
| 593 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 594 | // dict_entry ::= nontupleexpr ':' nontupleexpr |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 595 | private DictionaryEntryLiteral parseDictEntry() { |
| 596 | int start = token.left; |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 597 | Expression key = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 598 | expect(TokenKind.COLON); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 599 | Expression value = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 600 | return setLocation(new DictionaryEntryLiteral(key, value), start, value); |
| 601 | } |
| 602 | |
Laurent Le Brun | 4aa2912 | 2015-09-10 11:31:30 +0000 | [diff] [blame] | 603 | /** |
| 604 | * Parse a String literal value, e.g. "str". |
| 605 | */ |
| 606 | private StringLiteral parseStringLiteral() { |
| 607 | Preconditions.checkState(token.kind == TokenKind.STRING); |
| 608 | int end = token.right; |
| 609 | char quoteChar = lexer.charAt(token.left); |
| 610 | StringLiteral literal = |
| 611 | setLocation(new StringLiteral((String) token.value, quoteChar), token.left, end); |
| 612 | |
| 613 | nextToken(); |
| 614 | if (token.kind == TokenKind.STRING) { |
| 615 | reportError(lexer.createLocation(end, token.left), |
| 616 | "Implicit string concatenation is forbidden, use the + operator"); |
| 617 | } |
| 618 | return literal; |
| 619 | } |
| 620 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 621 | // primary ::= INTEGER |
| 622 | // | STRING |
| 623 | // | STRING '.' IDENTIFIER funcall_suffix |
| 624 | // | IDENTIFIER |
| 625 | // | IDENTIFIER funcall_suffix |
| 626 | // | IDENTIFIER '.' selector_suffix |
| 627 | // | list_expression |
| 628 | // | '(' ')' // a tuple with zero elements |
| 629 | // | '(' expr ')' // a parenthesized expression |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 630 | // | dict_expression |
| 631 | // | '-' primary_with_suffix |
| 632 | private Expression parsePrimary() { |
| 633 | int start = token.left; |
| 634 | switch (token.kind) { |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 635 | case INT: |
| 636 | { |
| 637 | IntegerLiteral literal = new IntegerLiteral((Integer) token.value); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 638 | setLocation(literal, start, token.right); |
| 639 | nextToken(); |
| 640 | return literal; |
| 641 | } |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 642 | case STRING: |
| 643 | return parseStringLiteral(); |
| 644 | case IDENTIFIER: |
| 645 | { |
| 646 | Identifier ident = parseIdent(); |
| 647 | if (token.kind == TokenKind.LPAREN) { // it's a function application |
| 648 | return parseFuncallSuffix(start, null, ident); |
| 649 | } else { |
| 650 | return ident; |
| 651 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 652 | } |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 653 | case LBRACKET: // it's a list |
| 654 | return parseListMaker(); |
| 655 | case LBRACE: // it's a dictionary |
| 656 | return parseDictExpression(); |
| 657 | case LPAREN: |
| 658 | { |
| 659 | nextToken(); |
| 660 | // check for the empty tuple literal |
| 661 | if (token.kind == TokenKind.RPAREN) { |
| 662 | ListLiteral literal = ListLiteral.makeTuple(Collections.<Expression>emptyList()); |
| 663 | setLocation(literal, start, token.right); |
| 664 | nextToken(); |
| 665 | return literal; |
| 666 | } |
| 667 | // parse the first expression |
| 668 | Expression expression = parseExpression(true); |
| 669 | setLocation(expression, start, token.right); |
| 670 | if (token.kind == TokenKind.RPAREN) { |
| 671 | nextToken(); |
| 672 | return expression; |
| 673 | } |
| 674 | expect(TokenKind.RPAREN); |
| 675 | int end = syncTo(EXPR_TERMINATOR_SET); |
| 676 | return makeErrorExpression(start, end); |
| 677 | } |
| 678 | case MINUS: |
| 679 | { |
| 680 | nextToken(); |
| 681 | List<Argument.Passed> args = new ArrayList<>(); |
| 682 | Expression expr = parsePrimaryWithSuffix(); |
| 683 | args.add(setLocation(new Argument.Positional(expr), start, expr)); |
| 684 | return makeFuncallExpression(null, new Identifier("-"), args, start, token.right); |
| 685 | } |
| 686 | default: |
| 687 | { |
| 688 | syntaxError(token, "expected expression"); |
| 689 | int end = syncTo(EXPR_TERMINATOR_SET); |
| 690 | return makeErrorExpression(start, end); |
| 691 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 692 | } |
| 693 | } |
| 694 | |
| 695 | // primary_with_suffix ::= primary selector_suffix* |
| 696 | // | primary substring_suffix |
| 697 | private Expression parsePrimaryWithSuffix() { |
| 698 | int start = token.left; |
| 699 | Expression receiver = parsePrimary(); |
| 700 | while (true) { |
| 701 | if (token.kind == TokenKind.DOT) { |
| 702 | receiver = parseSelectorSuffix(start, receiver); |
| 703 | } else if (token.kind == TokenKind.LBRACKET) { |
| 704 | receiver = parseSubstringSuffix(start, receiver); |
| 705 | } else { |
| 706 | break; |
| 707 | } |
| 708 | } |
| 709 | return receiver; |
| 710 | } |
| 711 | |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 712 | // substring_suffix ::= '[' expression? ':' expression? ':' expression? ']' |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 713 | private Expression parseSubstringSuffix(int start, Expression receiver) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 714 | Expression startExpr; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 715 | |
| 716 | expect(TokenKind.LBRACKET); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 717 | if (token.kind == TokenKind.COLON) { |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 718 | startExpr = setLocation(new Identifier("None"), token.left, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 719 | } else { |
Laurent Le Brun | 6824d86 | 2015-09-11 13:51:41 +0000 | [diff] [blame] | 720 | startExpr = parseExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 721 | } |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 722 | // This is an index/key access |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 723 | if (token.kind == TokenKind.RBRACKET) { |
| 724 | expect(TokenKind.RBRACKET); |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 725 | return setLocation(new IndexExpression(receiver, startExpr), start, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 726 | } |
Laurent Le Brun | eeef30f | 2015-03-16 15:12:35 +0000 | [diff] [blame] | 727 | // This is a slice (or substring) |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 728 | Expression endExpr = parseSliceArgument(new Identifier("None")); |
| 729 | Expression stepExpr = parseSliceArgument(new IntegerLiteral(1)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 730 | expect(TokenKind.RBRACKET); |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 731 | return setLocation(new SliceExpression(receiver, startExpr, endExpr, stepExpr), |
| 732 | start, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 733 | } |
| 734 | |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 735 | /** |
| 736 | * Parses {@code [':' [expr]]} which can either be the end or the step argument of a slice |
| 737 | * operation. If no such expression is found, this method returns an argument that represents |
| 738 | * {@code defaultValue}. |
| 739 | */ |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 740 | private Expression parseSliceArgument(Expression defaultValue) { |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 741 | Expression explicitArg = getSliceEndOrStepExpression(); |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 742 | if (explicitArg == null) { |
| 743 | return setLocation(defaultValue, token.left, token.right); |
| 744 | } |
| 745 | return explicitArg; |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 746 | } |
| 747 | |
| 748 | private Expression getSliceEndOrStepExpression() { |
| 749 | // There has to be a colon before any end or slice argument. |
| 750 | // However, if the next token thereafter is another colon or a right bracket, no argument value |
| 751 | // was specified. |
| 752 | if (token.kind == TokenKind.COLON) { |
| 753 | expect(TokenKind.COLON); |
| 754 | if (token.kind != TokenKind.COLON && token.kind != TokenKind.RBRACKET) { |
| 755 | return parseNonTupleExpression(); |
| 756 | } |
| 757 | } |
| 758 | return null; |
| 759 | } |
| 760 | |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 761 | // Equivalent to 'exprlist' rule in Python grammar. |
| 762 | // loop_variables ::= primary_with_suffix ( ',' primary_with_suffix )* ','? |
| 763 | private Expression parseForLoopVariables() { |
| 764 | // We cannot reuse parseExpression because it would parse the 'in' operator. |
| 765 | // e.g. "for i in e: pass" -> we want to parse only "i" here. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 766 | int start = token.left; |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 767 | Expression e1 = parsePrimaryWithSuffix(); |
| 768 | if (token.kind != TokenKind.COMMA) { |
| 769 | return e1; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 770 | } |
| 771 | |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 772 | // It's a tuple |
| 773 | List<Expression> tuple = new ArrayList<>(); |
| 774 | tuple.add(e1); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 775 | while (token.kind == TokenKind.COMMA) { |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 776 | expect(TokenKind.COMMA); |
| 777 | if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) { |
| 778 | break; |
| 779 | } |
| 780 | tuple.add(parsePrimaryWithSuffix()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 781 | } |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 782 | return setLocation(ListLiteral.makeTuple(tuple), start, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 783 | } |
| 784 | |
Laurent Le Brun | 443aaae | 2015-04-21 19:49:49 +0000 | [diff] [blame] | 785 | // comprehension_suffix ::= 'FOR' loop_variables 'IN' expr comprehension_suffix |
| 786 | // | 'IF' expr comprehension_suffix |
| 787 | // | ']' |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 788 | private Expression parseComprehensionSuffix( |
| 789 | AbstractComprehension comprehension, TokenKind closingBracket) { |
Laurent Le Brun | 443aaae | 2015-04-21 19:49:49 +0000 | [diff] [blame] | 790 | while (true) { |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 791 | if (token.kind == TokenKind.FOR) { |
| 792 | nextToken(); |
| 793 | Expression loopVar = parseForLoopVariables(); |
| 794 | expect(TokenKind.IN); |
| 795 | // The expression cannot be a ternary expression ('x if y else z') due to |
| 796 | // conflicts in Python grammar ('if' is used by the comprehension). |
| 797 | Expression listExpression = parseNonTupleExpression(0); |
| 798 | comprehension.addFor(loopVar, listExpression); |
| 799 | } else if (token.kind == TokenKind.IF) { |
| 800 | nextToken(); |
| 801 | comprehension.addIf(parseExpression()); |
| 802 | } else if (token.kind == closingBracket) { |
| 803 | nextToken(); |
| 804 | return comprehension; |
| 805 | } else { |
| 806 | syntaxError(token, "expected '" + closingBracket.getPrettyName() + "', 'for' or 'if'"); |
| 807 | syncPast(LIST_TERMINATOR_SET); |
| 808 | return makeErrorExpression(token.left, token.right); |
Laurent Le Brun | 443aaae | 2015-04-21 19:49:49 +0000 | [diff] [blame] | 809 | } |
| 810 | } |
| 811 | } |
| 812 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 813 | // list_maker ::= '[' ']' |
| 814 | // |'[' expr ']' |
| 815 | // |'[' expr expr_list ']' |
| 816 | // |'[' expr ('FOR' loop_variables 'IN' expr)+ ']' |
| 817 | private Expression parseListMaker() { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 818 | int start = token.left; |
| 819 | expect(TokenKind.LBRACKET); |
| 820 | if (token.kind == TokenKind.RBRACKET) { // empty List |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 821 | ListLiteral literal = ListLiteral.emptyList(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 822 | setLocation(literal, start, token.right); |
| 823 | nextToken(); |
| 824 | return literal; |
| 825 | } |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 826 | Expression expression = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 827 | Preconditions.checkNotNull(expression, |
| 828 | "null element in list in AST at %s:%s", token.left, token.right); |
| 829 | switch (token.kind) { |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 830 | case RBRACKET: // singleton List |
| 831 | { |
| 832 | ListLiteral literal = ListLiteral.makeList(Collections.singletonList(expression)); |
| 833 | setLocation(literal, start, token.right); |
| 834 | nextToken(); |
| 835 | return literal; |
| 836 | } |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 837 | case FOR: |
| 838 | { // list comprehension |
| 839 | Expression result = |
| 840 | parseComprehensionSuffix(new ListComprehension(expression), TokenKind.RBRACKET); |
| 841 | return setLocation(result, start, token.right); |
| 842 | } |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 843 | case COMMA: |
| 844 | { |
| 845 | List<Expression> list = parseExprList(true); |
| 846 | Preconditions.checkState( |
| 847 | !list.contains(null), |
| 848 | "null element in list in AST at %s:%s", |
| 849 | token.left, |
| 850 | token.right); |
| 851 | list.add(0, expression); |
| 852 | if (token.kind == TokenKind.RBRACKET) { |
| 853 | ListLiteral literal = ListLiteral.makeList(list); |
| 854 | setLocation(literal, start, token.right); |
| 855 | nextToken(); |
| 856 | return literal; |
| 857 | } |
| 858 | expect(TokenKind.RBRACKET); |
| 859 | int end = syncPast(LIST_TERMINATOR_SET); |
| 860 | return makeErrorExpression(start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 861 | } |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 862 | default: |
| 863 | { |
| 864 | syntaxError(token, "expected ',', 'for' or ']'"); |
| 865 | int end = syncPast(LIST_TERMINATOR_SET); |
| 866 | return makeErrorExpression(start, end); |
| 867 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 868 | } |
| 869 | } |
| 870 | |
| 871 | // dict_expression ::= '{' '}' |
| 872 | // |'{' dict_entry_list '}' |
| 873 | // |'{' dict_entry 'FOR' loop_variables 'IN' expr '}' |
| 874 | private Expression parseDictExpression() { |
| 875 | int start = token.left; |
| 876 | expect(TokenKind.LBRACE); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 877 | if (token.kind == TokenKind.RBRACE) { // empty Dict |
| 878 | DictionaryLiteral literal = DictionaryLiteral.emptyDict(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 879 | setLocation(literal, start, token.right); |
| 880 | nextToken(); |
| 881 | return literal; |
| 882 | } |
| 883 | DictionaryEntryLiteral entry = parseDictEntry(); |
| 884 | if (token.kind == TokenKind.FOR) { |
| 885 | // Dict comprehension |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 886 | Expression result = parseComprehensionSuffix( |
| 887 | new DictComprehension(entry.getKey(), entry.getValue()), TokenKind.RBRACE); |
| 888 | return setLocation(result, start, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 889 | } |
| 890 | List<DictionaryEntryLiteral> entries = new ArrayList<>(); |
| 891 | entries.add(entry); |
| 892 | if (token.kind == TokenKind.COMMA) { |
| 893 | expect(TokenKind.COMMA); |
| 894 | entries.addAll(parseDictEntryList()); |
| 895 | } |
| 896 | if (token.kind == TokenKind.RBRACE) { |
| 897 | DictionaryLiteral literal = new DictionaryLiteral(entries); |
| 898 | setLocation(literal, start, token.right); |
| 899 | nextToken(); |
| 900 | return literal; |
| 901 | } |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 902 | expect(TokenKind.RBRACE); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 903 | int end = syncPast(DICT_TERMINATOR_SET); |
| 904 | return makeErrorExpression(start, end); |
| 905 | } |
| 906 | |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 907 | private Identifier parseIdent() { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 908 | if (token.kind != TokenKind.IDENTIFIER) { |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 909 | expect(TokenKind.IDENTIFIER); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 910 | return makeErrorExpression(token.left, token.right); |
| 911 | } |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 912 | Identifier ident = new Identifier(((String) token.value)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 913 | setLocation(ident, token.left, token.right); |
| 914 | nextToken(); |
| 915 | return ident; |
| 916 | } |
| 917 | |
| 918 | // binop_expression ::= binop_expression OP binop_expression |
| 919 | // | parsePrimaryWithSuffix |
| 920 | // This function takes care of precedence between operators (see operatorPrecedence for |
| 921 | // the order), and it assumes left-to-right associativity. |
| 922 | private Expression parseBinOpExpression(int prec) { |
| 923 | int start = token.left; |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 924 | Expression expr = parseNonTupleExpression(prec + 1); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 925 | // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is |
| 926 | // limited to number of different precedence levels (operatorPrecedence.size()). |
| 927 | for (;;) { |
Laurent Le Brun | e3f4ed7 | 2015-05-08 14:47:26 +0000 | [diff] [blame] | 928 | |
| 929 | if (token.kind == TokenKind.NOT) { |
| 930 | // If NOT appears when we expect a binary operator, it must be followed by IN. |
| 931 | // Since the code expects every operator to be a single token, we push a NOT_IN token. |
| 932 | expect(TokenKind.NOT); |
| 933 | expect(TokenKind.IN); |
| 934 | pushToken(new Token(TokenKind.NOT_IN, token.left, token.right)); |
| 935 | } |
| 936 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 937 | if (!binaryOperators.containsKey(token.kind)) { |
| 938 | return expr; |
| 939 | } |
| 940 | Operator operator = binaryOperators.get(token.kind); |
| 941 | if (!operatorPrecedence.get(prec).contains(operator)) { |
| 942 | return expr; |
| 943 | } |
| 944 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 945 | Expression secondary = parseNonTupleExpression(prec + 1); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 946 | expr = optimizeBinOpExpression(operator, expr, secondary); |
| 947 | setLocation(expr, start, secondary); |
| 948 | } |
| 949 | } |
| 950 | |
| 951 | // Optimize binary expressions. |
| 952 | // string literal + string literal can be concatenated into one string literal |
| 953 | // so we don't have to do the expensive string concatenation at runtime. |
| 954 | private Expression optimizeBinOpExpression( |
| 955 | Operator operator, Expression expr, Expression secondary) { |
| 956 | if (operator == Operator.PLUS) { |
| 957 | if (expr instanceof StringLiteral && secondary instanceof StringLiteral) { |
| 958 | StringLiteral left = (StringLiteral) expr; |
| 959 | StringLiteral right = (StringLiteral) secondary; |
| 960 | if (left.getQuoteChar() == right.getQuoteChar()) { |
| 961 | return new StringLiteral(left.getValue() + right.getValue(), left.getQuoteChar()); |
| 962 | } |
| 963 | } |
| 964 | } |
| 965 | return new BinaryOperatorExpression(operator, expr, secondary); |
| 966 | } |
| 967 | |
| 968 | private Expression parseExpression() { |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 969 | return parseExpression(false); |
| 970 | } |
| 971 | |
| 972 | // Equivalent to 'testlist' rule in Python grammar. It can parse every kind of |
| 973 | // expression. In many cases, we need to use parseNonTupleExpression to avoid ambiguity: |
| 974 | // e.g. fct(x, y) vs fct((x, y)) |
| 975 | // |
| 976 | // Tuples can have a trailing comma only when insideParens is true. This prevents bugs |
| 977 | // where a one-element tuple is surprisingly created: |
| 978 | // e.g. foo = f(x), |
| 979 | private Expression parseExpression(boolean insideParens) { |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 980 | int start = token.left; |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 981 | Expression expression = parseNonTupleExpression(); |
| 982 | if (token.kind != TokenKind.COMMA) { |
| 983 | return expression; |
| 984 | } |
| 985 | |
| 986 | // It's a tuple |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 987 | List<Expression> tuple = parseExprList(insideParens); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 988 | tuple.add(0, expression); // add the first expression to the front of the tuple |
| 989 | return setLocation(ListLiteral.makeTuple(tuple), start, token.right); |
| 990 | } |
| 991 | |
| 992 | // Equivalent to 'test' rule in Python grammar. |
| 993 | private Expression parseNonTupleExpression() { |
| 994 | int start = token.left; |
| 995 | Expression expr = parseNonTupleExpression(0); |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 996 | if (token.kind == TokenKind.IF) { |
| 997 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 998 | Expression condition = parseNonTupleExpression(0); |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 999 | if (token.kind == TokenKind.ELSE) { |
| 1000 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 1001 | Expression elseClause = parseNonTupleExpression(); |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 1002 | return setLocation(new ConditionalExpression(expr, condition, elseClause), |
| 1003 | start, elseClause); |
| 1004 | } else { |
| 1005 | reportError(lexer.createLocation(start, token.left), |
| 1006 | "missing else clause in conditional expression or semicolon before if"); |
| 1007 | return expr; // Try to recover from error: drop the if and the expression after it. Ouch. |
| 1008 | } |
| 1009 | } |
| 1010 | return expr; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1011 | } |
| 1012 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 1013 | private Expression parseNonTupleExpression(int prec) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1014 | if (prec >= operatorPrecedence.size()) { |
| 1015 | return parsePrimaryWithSuffix(); |
| 1016 | } |
| 1017 | if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) { |
| 1018 | return parseNotExpression(prec); |
| 1019 | } |
| 1020 | return parseBinOpExpression(prec); |
| 1021 | } |
| 1022 | |
| 1023 | // not_expr :== 'not' expr |
| 1024 | private Expression parseNotExpression(int prec) { |
| 1025 | int start = token.left; |
| 1026 | expect(TokenKind.NOT); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 1027 | Expression expression = parseNonTupleExpression(prec + 1); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1028 | NotExpression notExpression = new NotExpression(expression); |
| 1029 | return setLocation(notExpression, start, token.right); |
| 1030 | } |
| 1031 | |
| 1032 | // file_input ::= ('\n' | stmt)* EOF |
| 1033 | private List<Statement> parseFileInput() { |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 1034 | long startTime = Profiler.nanoTimeMaybe(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1035 | List<Statement> list = new ArrayList<>(); |
| 1036 | while (token.kind != TokenKind.EOF) { |
| 1037 | if (token.kind == TokenKind.NEWLINE) { |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 1038 | expectAndRecover(TokenKind.NEWLINE); |
| 1039 | } else if (recoveryMode) { |
| 1040 | // If there was a parse error, we want to recover here |
| 1041 | // before starting a new top-level statement. |
| 1042 | syncTo(STATEMENT_TERMINATOR_SET); |
| 1043 | recoveryMode = false; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1044 | } else { |
| 1045 | parseTopLevelStatement(list); |
| 1046 | } |
| 1047 | } |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 1048 | Profiler.instance().logSimpleTask(startTime, ProfilerTask.SKYLARK_PARSER, ""); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1049 | return list; |
| 1050 | } |
| 1051 | |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1052 | // load '(' STRING (COMMA [IDENTIFIER EQUALS] STRING)* COMMA? ')' |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1053 | private void parseLoad(List<Statement> list) { |
| 1054 | int start = token.left; |
| 1055 | if (token.kind != TokenKind.STRING) { |
| 1056 | expect(TokenKind.STRING); |
| 1057 | return; |
| 1058 | } |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 1059 | |
John Field | 9201fda | 2015-12-30 19:30:34 +0000 | [diff] [blame] | 1060 | StringLiteral importString = parseStringLiteral(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1061 | expect(TokenKind.COMMA); |
| 1062 | |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1063 | Map<Identifier, String> symbols = new HashMap<>(); |
| 1064 | parseLoadSymbol(symbols); // At least one symbol is required |
| 1065 | |
Laurent Le Brun | 73a9849 | 2015-03-17 15:46:19 +0000 | [diff] [blame] | 1066 | while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1067 | expect(TokenKind.COMMA); |
Laurent Le Brun | 59f587a | 2015-03-16 14:51:36 +0000 | [diff] [blame] | 1068 | if (token.kind == TokenKind.RPAREN) { |
| 1069 | break; |
| 1070 | } |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1071 | |
| 1072 | parseLoadSymbol(symbols); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1073 | } |
| 1074 | expect(TokenKind.RPAREN); |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 1075 | |
Laurent Le Brun | 7b1708c | 2016-10-13 10:05:12 +0000 | [diff] [blame] | 1076 | LoadStatement stmt = new LoadStatement(importString, symbols); |
Miguel Alcon Pinto | 927f3b2 | 2016-08-22 14:21:30 +0000 | [diff] [blame] | 1077 | list.add(setLocation(stmt, start, token.left)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1078 | } |
| 1079 | |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1080 | /** |
| 1081 | * Parses the next symbol argument of a load statement and puts it into the output map. |
| 1082 | * |
| 1083 | * <p> The symbol is either "name" (STRING) or name = "declared" (IDENTIFIER EQUALS STRING). |
Jon Brandvein | ee8b7aa | 2016-07-28 15:01:26 +0000 | [diff] [blame] | 1084 | * If no alias is used, "name" and "declared" will be identical. "Declared" refers to the |
| 1085 | * original name in the Bazel file that should be loaded, while "name" will be the key of the |
| 1086 | * entry in the map. |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1087 | */ |
| 1088 | private void parseLoadSymbol(Map<Identifier, String> symbols) { |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 1089 | Token nameToken; |
| 1090 | Token declaredToken; |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1091 | |
| 1092 | if (token.kind == TokenKind.STRING) { |
| 1093 | nameToken = token; |
| 1094 | declaredToken = nameToken; |
| 1095 | } else { |
| 1096 | if (token.kind != TokenKind.IDENTIFIER) { |
| 1097 | syntaxError(token, "Expected either a literal string or an identifier"); |
| 1098 | } |
| 1099 | |
| 1100 | nameToken = token; |
| 1101 | |
| 1102 | expect(TokenKind.IDENTIFIER); |
| 1103 | expect(TokenKind.EQUALS); |
| 1104 | |
| 1105 | declaredToken = token; |
| 1106 | } |
| 1107 | |
| 1108 | expect(TokenKind.STRING); |
| 1109 | |
| 1110 | try { |
| 1111 | Identifier identifier = new Identifier(nameToken.value.toString()); |
| 1112 | |
| 1113 | if (symbols.containsKey(identifier)) { |
| 1114 | syntaxError( |
Jon Brandvein | ee8b7aa | 2016-07-28 15:01:26 +0000 | [diff] [blame] | 1115 | nameToken, String.format("Identifier '%s' is used more than once", |
| 1116 | identifier.getName())); |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1117 | } else { |
| 1118 | symbols.put( |
Jon Brandvein | ee8b7aa | 2016-07-28 15:01:26 +0000 | [diff] [blame] | 1119 | setLocation(identifier, nameToken.left, nameToken.right), |
| 1120 | declaredToken.value.toString()); |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1121 | } |
| 1122 | } catch (NullPointerException npe) { |
| 1123 | // This means that the value of at least one token is null. In this case, the previous |
| 1124 | // expect() call has already logged an error. |
| 1125 | } |
| 1126 | } |
| 1127 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1128 | private void parseTopLevelStatement(List<Statement> list) { |
| 1129 | // In Python grammar, there is no "top-level statement" and imports are |
| 1130 | // considered as "small statements". We are a bit stricter than Python here. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1131 | // Check if there is an include |
| 1132 | if (token.kind == TokenKind.IDENTIFIER) { |
| 1133 | Token identToken = token; |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 1134 | Identifier ident = parseIdent(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1135 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 1136 | if (ident.getName().equals("load") && token.kind == TokenKind.LPAREN) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1137 | expect(TokenKind.LPAREN); |
| 1138 | parseLoad(list); |
| 1139 | return; |
| 1140 | } |
| 1141 | pushToken(identToken); // push the ident back to parse it as a statement |
| 1142 | } |
| 1143 | parseStatement(list, true); |
| 1144 | } |
| 1145 | |
Laurent Le Brun | 0942ee9 | 2015-03-17 20:22:16 +0000 | [diff] [blame] | 1146 | // small_stmt | 'pass' |
| 1147 | private void parseSmallStatementOrPass(List<Statement> list) { |
| 1148 | if (token.kind == TokenKind.PASS) { |
| 1149 | // Skip the token, don't add it to the list. |
| 1150 | // It has no existence in the AST. |
| 1151 | expect(TokenKind.PASS); |
| 1152 | } else { |
| 1153 | list.add(parseSmallStatement()); |
| 1154 | } |
| 1155 | } |
| 1156 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1157 | // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE |
| 1158 | private void parseSimpleStatement(List<Statement> list) { |
Laurent Le Brun | 0942ee9 | 2015-03-17 20:22:16 +0000 | [diff] [blame] | 1159 | parseSmallStatementOrPass(list); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1160 | |
| 1161 | while (token.kind == TokenKind.SEMI) { |
| 1162 | nextToken(); |
| 1163 | if (token.kind == TokenKind.NEWLINE) { |
| 1164 | break; |
| 1165 | } |
Laurent Le Brun | 0942ee9 | 2015-03-17 20:22:16 +0000 | [diff] [blame] | 1166 | parseSmallStatementOrPass(list); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1167 | } |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 1168 | expectAndRecover(TokenKind.NEWLINE); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1169 | } |
| 1170 | |
| 1171 | // small_stmt ::= assign_stmt |
| 1172 | // | expr |
| 1173 | // | RETURN expr |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1174 | // | flow_stmt |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1175 | // assign_stmt ::= expr ('=' | augassign) expr |
Vladimir Moskva | 7153664 | 2016-12-19 13:51:57 +0000 | [diff] [blame] | 1176 | // augassign ::= ('+=' | '-=' | '*=' | '/=' | '%=') |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1177 | // Note that these are in Python, but not implemented here (at least for now): |
Vladimir Moskva | 7153664 | 2016-12-19 13:51:57 +0000 | [diff] [blame] | 1178 | // '&=' | '|=' | '^=' |'<<=' | '>>=' | '**=' | '//=' |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1179 | // Semantic difference from Python: |
| 1180 | // In Skylark, x += y is simple syntactic sugar for x = x + y. |
| 1181 | // In Python, x += y is more or less equivalent to x = x + y, but if a method is defined |
| 1182 | // on x.__iadd__(y), then it takes precedence, and in the case of lists it side-effects |
| 1183 | // the original list (it doesn't do that on tuples); if no such method is defined it falls back |
| 1184 | // to the x.__add__(y) method that backs x + y. In Skylark, we don't support this side-effect. |
| 1185 | // Note also that there is a special casing to translate 'ident[key] = value' |
| 1186 | // to 'ident = ident + {key: value}'. This is needed to support the pure version of Python-like |
| 1187 | // dictionary assignment syntax. |
| 1188 | private Statement parseSmallStatement() { |
| 1189 | int start = token.left; |
| 1190 | if (token.kind == TokenKind.RETURN) { |
| 1191 | return parseReturnStatement(); |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1192 | } else if (token.kind == TokenKind.BREAK || token.kind == TokenKind.CONTINUE) { |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1193 | return parseFlowStatement(token.kind); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1194 | } |
| 1195 | Expression expression = parseExpression(); |
| 1196 | if (token.kind == TokenKind.EQUALS) { |
| 1197 | nextToken(); |
| 1198 | Expression rvalue = parseExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1199 | return setLocation(new AssignmentStatement(expression, rvalue), start, rvalue); |
| 1200 | } else if (augmentedAssignmentMethods.containsKey(token.kind)) { |
| 1201 | Operator operator = augmentedAssignmentMethods.get(token.kind); |
| 1202 | nextToken(); |
| 1203 | Expression operand = parseExpression(); |
| 1204 | int end = operand.getLocation().getEndOffset(); |
Vladimir Moskva | 7153664 | 2016-12-19 13:51:57 +0000 | [diff] [blame] | 1205 | return setLocation( |
| 1206 | new AugmentedAssignmentStatement(operator, expression, operand), start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1207 | } else { |
| 1208 | return setLocation(new ExpressionStatement(expression), start, expression); |
| 1209 | } |
| 1210 | } |
| 1211 | |
| 1212 | // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]? |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1213 | private IfStatement parseIfStatement() { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1214 | int start = token.left; |
| 1215 | List<ConditionalStatements> thenBlocks = new ArrayList<>(); |
| 1216 | thenBlocks.add(parseConditionalStatements(TokenKind.IF)); |
| 1217 | while (token.kind == TokenKind.ELIF) { |
| 1218 | thenBlocks.add(parseConditionalStatements(TokenKind.ELIF)); |
| 1219 | } |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1220 | List<Statement> elseBlock; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1221 | if (token.kind == TokenKind.ELSE) { |
| 1222 | expect(TokenKind.ELSE); |
| 1223 | expect(TokenKind.COLON); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1224 | elseBlock = parseSuite(); |
| 1225 | } else { |
| 1226 | elseBlock = ImmutableList.of(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1227 | } |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1228 | return setLocation(new IfStatement(thenBlocks, elseBlock), start, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1229 | } |
| 1230 | |
| 1231 | // cond_stmts ::= [EL]IF expr ':' suite |
| 1232 | private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) { |
| 1233 | int start = token.left; |
| 1234 | expect(tokenKind); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 1235 | Expression expr = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1236 | expect(TokenKind.COLON); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1237 | List<Statement> thenBlock = parseSuite(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1238 | ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock); |
| 1239 | return setLocation(stmt, start, token.right); |
| 1240 | } |
| 1241 | |
| 1242 | // for_stmt ::= FOR IDENTIFIER IN expr ':' suite |
| 1243 | private void parseForStatement(List<Statement> list) { |
| 1244 | int start = token.left; |
| 1245 | expect(TokenKind.FOR); |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 1246 | Expression loopVar = parseForLoopVariables(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1247 | expect(TokenKind.IN); |
| 1248 | Expression collection = parseExpression(); |
| 1249 | expect(TokenKind.COLON); |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1250 | enterLoop(); |
| 1251 | try { |
| 1252 | List<Statement> block = parseSuite(); |
| 1253 | Statement stmt = new ForStatement(loopVar, collection, block); |
| 1254 | list.add(setLocation(stmt, start, token.right)); |
| 1255 | } finally { |
| 1256 | exitLoop(); |
| 1257 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1258 | } |
| 1259 | |
| 1260 | // def foo(bar1, bar2): |
| 1261 | private void parseFunctionDefStatement(List<Statement> list) { |
| 1262 | int start = token.left; |
| 1263 | expect(TokenKind.DEF); |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 1264 | Identifier ident = parseIdent(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1265 | expect(TokenKind.LPAREN); |
Laurent Le Brun | 4baefdc | 2015-09-04 11:27:46 +0000 | [diff] [blame] | 1266 | List<Parameter<Expression, Expression>> params = parseParameters(); |
| 1267 | FunctionSignature.WithValues<Expression, Expression> signature = functionSignature(params); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1268 | expect(TokenKind.RPAREN); |
| 1269 | expect(TokenKind.COLON); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1270 | List<Statement> block = parseSuite(); |
Laurent Le Brun | 4baefdc | 2015-09-04 11:27:46 +0000 | [diff] [blame] | 1271 | FunctionDefStatement stmt = new FunctionDefStatement(ident, params, signature, block); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1272 | list.add(setLocation(stmt, start, token.right)); |
| 1273 | } |
| 1274 | |
Laurent Le Brun | 4baefdc | 2015-09-04 11:27:46 +0000 | [diff] [blame] | 1275 | private FunctionSignature.WithValues<Expression, Expression> functionSignature( |
| 1276 | List<Parameter<Expression, Expression>> parameters) { |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1277 | try { |
| 1278 | return FunctionSignature.WithValues.<Expression, Expression>of(parameters); |
| 1279 | } catch (FunctionSignature.SignatureException e) { |
| 1280 | reportError(e.getParameter().getLocation(), e.getMessage()); |
| 1281 | // return bogus empty signature |
| 1282 | return FunctionSignature.WithValues.<Expression, Expression>create(FunctionSignature.of()); |
| 1283 | } |
| 1284 | } |
| 1285 | |
Laurent Le Brun | 4baefdc | 2015-09-04 11:27:46 +0000 | [diff] [blame] | 1286 | private List<Parameter<Expression, Expression>> parseParameters() { |
| 1287 | return parseFunctionArguments( |
| 1288 | new Supplier<Parameter<Expression, Expression>>() { |
| 1289 | @Override public Parameter<Expression, Expression> get() { |
| 1290 | return parseFunctionParameter(); |
| 1291 | } |
| 1292 | }); |
| 1293 | } |
| 1294 | |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1295 | /** |
| 1296 | * Parse a list of Argument-s. The arguments can be of class Argument.Passed or Parameter, |
| 1297 | * as returned by the Supplier parseArgument (that, taking no argument, must be closed over |
| 1298 | * the mutable input data structures). |
| 1299 | * |
| 1300 | * <p>This parser does minimal validation: it ensures the proper python use of the comma (that |
| 1301 | * can terminate before a star but not after) and the fact that a **kwarg must appear last. |
| 1302 | * It does NOT validate further ordering constraints for a {@code List<Argument.Passed>}, such as |
| 1303 | * all positional preceding keyword arguments in a call, nor does it check the more subtle |
| 1304 | * constraints for Parameter-s. This validation must happen afterwards in an appropriate method. |
| 1305 | */ |
| 1306 | private <V extends Argument> ImmutableList<V> |
| 1307 | parseFunctionArguments(Supplier<V> parseArgument) { |
| 1308 | boolean hasArg = false; |
| 1309 | boolean hasStar = false; |
| 1310 | boolean hasStarStar = false; |
| 1311 | ArrayList<V> arguments = new ArrayList<>(); |
| 1312 | |
| 1313 | while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) { |
| 1314 | if (hasStarStar) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1315 | reportError(lexer.createLocation(token.left, token.right), |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1316 | "unexpected tokens after kwarg"); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1317 | break; |
| 1318 | } |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1319 | if (hasArg) { |
| 1320 | expect(TokenKind.COMMA); |
| 1321 | } |
| 1322 | if (token.kind == TokenKind.RPAREN && !hasStar) { |
| 1323 | // list can end with a COMMA if there is neither * nor ** |
| 1324 | break; |
| 1325 | } |
| 1326 | V arg = parseArgument.get(); |
| 1327 | hasArg = true; |
| 1328 | if (arg.isStar()) { |
| 1329 | hasStar = true; |
| 1330 | } else if (arg.isStarStar()) { |
| 1331 | hasStarStar = true; |
| 1332 | } |
| 1333 | arguments.add(arg); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1334 | } |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1335 | return ImmutableList.copyOf(arguments); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1336 | } |
| 1337 | |
Laurent Le Brun | 5f67445 | 2015-03-17 19:29:13 +0000 | [diff] [blame] | 1338 | // suite is typically what follows a colon (e.g. after def or for). |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1339 | // suite ::= simple_stmt |
| 1340 | // | NEWLINE INDENT stmt+ OUTDENT |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1341 | private List<Statement> parseSuite() { |
| 1342 | List<Statement> list = new ArrayList<>(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1343 | if (token.kind == TokenKind.NEWLINE) { |
| 1344 | expect(TokenKind.NEWLINE); |
| 1345 | if (token.kind != TokenKind.INDENT) { |
| 1346 | reportError(lexer.createLocation(token.left, token.right), |
| 1347 | "expected an indented block"); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1348 | return list; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1349 | } |
| 1350 | expect(TokenKind.INDENT); |
| 1351 | while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) { |
| 1352 | parseStatement(list, false); |
| 1353 | } |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 1354 | expectAndRecover(TokenKind.OUTDENT); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1355 | } else { |
Laurent Le Brun | 5f67445 | 2015-03-17 19:29:13 +0000 | [diff] [blame] | 1356 | parseSimpleStatement(list); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1357 | } |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1358 | return list; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1359 | } |
| 1360 | |
| 1361 | // skipSuite does not check that the code is syntactically correct, it |
| 1362 | // just skips based on indentation levels. |
| 1363 | private void skipSuite() { |
| 1364 | if (token.kind == TokenKind.NEWLINE) { |
| 1365 | expect(TokenKind.NEWLINE); |
| 1366 | if (token.kind != TokenKind.INDENT) { |
| 1367 | reportError(lexer.createLocation(token.left, token.right), |
| 1368 | "expected an indented block"); |
| 1369 | return; |
| 1370 | } |
| 1371 | expect(TokenKind.INDENT); |
| 1372 | |
| 1373 | // Don't try to parse all the Python syntax, just skip the block |
| 1374 | // until the corresponding outdent token. |
| 1375 | int depth = 1; |
| 1376 | while (depth > 0) { |
| 1377 | // Because of the way the lexer works, this should never happen |
| 1378 | Preconditions.checkState(token.kind != TokenKind.EOF); |
| 1379 | |
| 1380 | if (token.kind == TokenKind.INDENT) { |
| 1381 | depth++; |
| 1382 | } |
| 1383 | if (token.kind == TokenKind.OUTDENT) { |
| 1384 | depth--; |
| 1385 | } |
| 1386 | nextToken(); |
| 1387 | } |
| 1388 | |
| 1389 | } else { |
| 1390 | // the block ends at the newline token |
| 1391 | // e.g. if x == 3: print "three" |
| 1392 | syncTo(STATEMENT_TERMINATOR_SET); |
| 1393 | } |
| 1394 | } |
| 1395 | |
| 1396 | // stmt ::= simple_stmt |
| 1397 | // | compound_stmt |
| 1398 | private void parseStatement(List<Statement> list, boolean isTopLevel) { |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 1399 | if (token.kind == TokenKind.DEF && parsingMode == SKYLARK) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1400 | if (!isTopLevel) { |
| 1401 | reportError(lexer.createLocation(token.left, token.right), |
| 1402 | "nested functions are not allowed. Move the function to top-level"); |
| 1403 | } |
| 1404 | parseFunctionDefStatement(list); |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 1405 | } else if (token.kind == TokenKind.IF && parsingMode == SKYLARK) { |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1406 | list.add(parseIfStatement()); |
Laurent Le Brun | 9be852e | 2015-05-28 08:44:51 +0000 | [diff] [blame] | 1407 | } else if (token.kind == TokenKind.FOR && parsingMode == SKYLARK) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1408 | if (isTopLevel) { |
Yue Gan | 4866e15 | 2016-04-07 13:07:08 +0000 | [diff] [blame] | 1409 | reportError( |
| 1410 | lexer.createLocation(token.left, token.right), |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1411 | "for loops are not allowed on top-level. Put it into a function"); |
| 1412 | } |
| 1413 | parseForStatement(list); |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 1414 | } else if (BLOCK_STARTING_SET.contains(token.kind)) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1415 | skipBlock(); |
| 1416 | } else { |
| 1417 | parseSimpleStatement(list); |
| 1418 | } |
| 1419 | } |
| 1420 | |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1421 | // flow_stmt ::= break_stmt | continue_stmt |
| 1422 | private FlowStatement parseFlowStatement(TokenKind kind) { |
Laurent Le Brun | d412c8f | 2015-06-16 11:12:54 +0000 | [diff] [blame] | 1423 | int start = token.left; |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1424 | int end = token.right; |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1425 | expect(kind); |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1426 | if (loopCount == 0) { |
| 1427 | reportError( |
| 1428 | lexer.createLocation(start, end), |
| 1429 | kind.getPrettyName() + " statement must be inside a for loop"); |
| 1430 | } |
Laurent Le Brun | 7d6a381 | 2015-10-26 12:07:12 +0000 | [diff] [blame] | 1431 | FlowStatement.Kind flowKind = |
| 1432 | kind == TokenKind.BREAK ? FlowStatement.Kind.BREAK : FlowStatement.Kind.CONTINUE; |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1433 | return setLocation(new FlowStatement(flowKind), start, end); |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1434 | } |
Laurent Le Brun | d412c8f | 2015-06-16 11:12:54 +0000 | [diff] [blame] | 1435 | |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 1436 | // return_stmt ::= RETURN [expr] |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1437 | private ReturnStatement parseReturnStatement() { |
| 1438 | int start = token.left; |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 1439 | int end = token.right; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1440 | expect(TokenKind.RETURN); |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 1441 | |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 1442 | Expression expression; |
| 1443 | if (STATEMENT_TERMINATOR_SET.contains(token.kind)) { |
| 1444 | // this None makes the AST not correspond to the source exactly anymore |
| 1445 | expression = new Identifier("None"); |
| 1446 | setLocation(expression, start, end); |
| 1447 | } else { |
| 1448 | expression = parseExpression(); |
| 1449 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1450 | return setLocation(new ReturnStatement(expression), start, expression); |
| 1451 | } |
| 1452 | |
Florian Weikert | 1f004e5 | 2015-10-16 09:43:48 +0000 | [diff] [blame] | 1453 | // block ::= ('if' | 'for' | 'class' | 'try' | 'def') expr ':' suite |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1454 | private void skipBlock() { |
| 1455 | int start = token.left; |
| 1456 | Token blockToken = token; |
| 1457 | syncTo(EnumSet.of(TokenKind.COLON, TokenKind.EOF)); // skip over expression or name |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1458 | if (blockToken.kind == TokenKind.ELSE) { |
Yue Gan | 4866e15 | 2016-04-07 13:07:08 +0000 | [diff] [blame] | 1459 | reportError( |
| 1460 | lexer.createLocation(blockToken.left, blockToken.right), |
| 1461 | "syntax error at 'else': not allowed here."); |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 1462 | } else { |
Florian Weikert | 1f004e5 | 2015-10-16 09:43:48 +0000 | [diff] [blame] | 1463 | String msg = |
| 1464 | ILLEGAL_BLOCK_KEYWORDS.containsKey(blockToken.kind) |
| 1465 | ? String.format("%ss are not supported.", ILLEGAL_BLOCK_KEYWORDS.get(blockToken.kind)) |
| 1466 | : "This is not supported in BUILD files. Move the block to a .bzl file and load it"; |
Laurent Le Brun | b13a438 | 2015-06-30 14:20:45 +0000 | [diff] [blame] | 1467 | reportError( |
| 1468 | lexer.createLocation(start, token.right), |
Florian Weikert | 1f004e5 | 2015-10-16 09:43:48 +0000 | [diff] [blame] | 1469 | String.format("syntax error at '%s': %s", blockToken, msg)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1470 | } |
| 1471 | expect(TokenKind.COLON); |
| 1472 | skipSuite(); |
| 1473 | } |
| 1474 | |
| 1475 | // create a comment node |
| 1476 | private void makeComment(Token token) { |
| 1477 | comments.add(setLocation(new Comment((String) token.value), token.left, token.right)); |
| 1478 | } |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1479 | |
| 1480 | private void enterLoop() { |
| 1481 | loopCount++; |
| 1482 | } |
| 1483 | |
| 1484 | private void exitLoop() { |
| 1485 | Preconditions.checkState(loopCount > 0); |
| 1486 | loopCount--; |
| 1487 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1488 | } |