Damien Martin-Guillerez | f88f4d8 | 2015-09-25 13:56:55 +0000 | [diff] [blame] | 1 | // Copyright 2014 The Bazel Authors. All rights reserved. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | package com.google.devtools.build.lib.syntax; |
| 16 | |
| 17 | import com.google.common.annotations.VisibleForTesting; |
tomlu | a155b53 | 2017-11-08 20:12:47 +0100 | [diff] [blame] | 18 | import com.google.common.base.Preconditions; |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 19 | import com.google.common.base.Supplier; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 20 | import com.google.common.collect.ImmutableList; |
| 21 | import com.google.common.collect.ImmutableMap; |
nharmata | 6dbfafe | 2019-02-05 08:55:07 -0800 | [diff] [blame] | 22 | import com.google.common.collect.Interner; |
Laurent Le Brun | e51a4d2 | 2016-10-11 18:04:16 +0000 | [diff] [blame] | 23 | import com.google.common.collect.Iterables; |
nharmata | 6dbfafe | 2019-02-05 08:55:07 -0800 | [diff] [blame] | 24 | import com.google.devtools.build.lib.concurrent.BlazeInterners; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 25 | import com.google.devtools.build.lib.events.Event; |
| 26 | import com.google.devtools.build.lib.events.EventHandler; |
| 27 | import com.google.devtools.build.lib.events.Location; |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 28 | import com.google.devtools.build.lib.profiler.Profiler; |
| 29 | import com.google.devtools.build.lib.profiler.ProfilerTask; |
twerth | ee91e23 | 2018-07-09 02:33:27 -0700 | [diff] [blame] | 30 | import com.google.devtools.build.lib.profiler.SilentCloseable; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 31 | import com.google.devtools.build.lib.syntax.DictionaryLiteral.DictionaryEntryLiteral; |
| 32 | import com.google.devtools.build.lib.syntax.IfStatement.ConditionalStatements; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 33 | import java.util.ArrayList; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 34 | import java.util.EnumSet; |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 35 | import java.util.HashSet; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 36 | import java.util.List; |
| 37 | import java.util.Map; |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 38 | import java.util.Set; |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 39 | import javax.annotation.Nullable; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 40 | |
| 41 | /** |
| 42 | * Recursive descent parser for LL(2) BUILD language. |
| 43 | * Loosely based on Python 2 grammar. |
| 44 | * See https://docs.python.org/2/reference/grammar.html |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 45 | */ |
Han-Wen Nienhuys | ceae8c5 | 2015-09-22 16:24:45 +0000 | [diff] [blame] | 46 | @VisibleForTesting |
| 47 | public class Parser { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 48 | |
| 49 | /** |
| 50 | * Combines the parser result into a single value object. |
| 51 | */ |
| 52 | public static final class ParseResult { |
| 53 | /** The statements (rules, basically) from the parsed file. */ |
| 54 | public final List<Statement> statements; |
| 55 | |
| 56 | /** The comments from the parsed file. */ |
| 57 | public final List<Comment> comments; |
| 58 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 59 | /** Represents every statement in the file. */ |
| 60 | public final Location location; |
| 61 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 62 | /** Whether the file contained any errors. */ |
| 63 | public final boolean containsErrors; |
| 64 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 65 | public ParseResult(List<Statement> statements, List<Comment> comments, Location location, |
| 66 | boolean containsErrors) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 67 | // No need to copy here; when the object is created, the parser instance is just about to go |
| 68 | // out of scope and be garbage collected. |
| 69 | this.statements = Preconditions.checkNotNull(statements); |
| 70 | this.comments = Preconditions.checkNotNull(comments); |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 71 | this.location = location; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 72 | this.containsErrors = containsErrors; |
| 73 | } |
| 74 | } |
| 75 | |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 76 | /** Used to select what constructs are allowed based on whether we're at the top level. */ |
| 77 | public enum ParsingLevel { |
| 78 | TOP_LEVEL, |
| 79 | LOCAL_LEVEL |
| 80 | } |
| 81 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 82 | private static final EnumSet<TokenKind> STATEMENT_TERMINATOR_SET = |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 83 | EnumSet.of(TokenKind.EOF, TokenKind.NEWLINE, TokenKind.SEMI); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 84 | |
| 85 | private static final EnumSet<TokenKind> LIST_TERMINATOR_SET = |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 86 | EnumSet.of(TokenKind.EOF, TokenKind.RBRACKET, TokenKind.SEMI); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 87 | |
| 88 | private static final EnumSet<TokenKind> DICT_TERMINATOR_SET = |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 89 | EnumSet.of(TokenKind.EOF, TokenKind.RBRACE, TokenKind.SEMI); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 90 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 91 | private static final EnumSet<TokenKind> EXPR_LIST_TERMINATOR_SET = |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 92 | EnumSet.of( |
| 93 | TokenKind.EOF, |
| 94 | TokenKind.NEWLINE, |
Laurent Le Brun | 29ad862 | 2015-09-18 10:45:07 +0000 | [diff] [blame] | 95 | TokenKind.EQUALS, |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 96 | TokenKind.RBRACE, |
| 97 | TokenKind.RBRACKET, |
| 98 | TokenKind.RPAREN, |
| 99 | TokenKind.SEMI); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 100 | |
Laurent Le Brun | 3bc8e9a | 2015-09-10 11:00:37 +0000 | [diff] [blame] | 101 | private static final EnumSet<TokenKind> EXPR_TERMINATOR_SET = |
| 102 | EnumSet.of( |
| 103 | TokenKind.COLON, |
| 104 | TokenKind.COMMA, |
| 105 | TokenKind.EOF, |
| 106 | TokenKind.FOR, |
| 107 | TokenKind.MINUS, |
| 108 | TokenKind.PERCENT, |
| 109 | TokenKind.PLUS, |
| 110 | TokenKind.RBRACKET, |
| 111 | TokenKind.RPAREN, |
| 112 | TokenKind.SLASH); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 113 | |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 114 | /** Current lookahead token. May be mutated by the parser. */ |
| 115 | private Token token; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 116 | |
| 117 | private static final boolean DEBUGGING = false; |
| 118 | |
| 119 | private final Lexer lexer; |
| 120 | private final EventHandler eventHandler; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 121 | |
| 122 | private static final Map<TokenKind, Operator> binaryOperators = |
| 123 | new ImmutableMap.Builder<TokenKind, Operator>() |
| 124 | .put(TokenKind.AND, Operator.AND) |
| 125 | .put(TokenKind.EQUALS_EQUALS, Operator.EQUALS_EQUALS) |
| 126 | .put(TokenKind.GREATER, Operator.GREATER) |
| 127 | .put(TokenKind.GREATER_EQUALS, Operator.GREATER_EQUALS) |
| 128 | .put(TokenKind.IN, Operator.IN) |
| 129 | .put(TokenKind.LESS, Operator.LESS) |
| 130 | .put(TokenKind.LESS_EQUALS, Operator.LESS_EQUALS) |
| 131 | .put(TokenKind.MINUS, Operator.MINUS) |
| 132 | .put(TokenKind.NOT_EQUALS, Operator.NOT_EQUALS) |
Laurent Le Brun | e3f4ed7 | 2015-05-08 14:47:26 +0000 | [diff] [blame] | 133 | .put(TokenKind.NOT_IN, Operator.NOT_IN) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 134 | .put(TokenKind.OR, Operator.OR) |
| 135 | .put(TokenKind.PERCENT, Operator.PERCENT) |
Laurent Le Brun | 8a52826 | 2015-04-15 14:23:35 +0000 | [diff] [blame] | 136 | .put(TokenKind.SLASH, Operator.DIVIDE) |
laurentlb | 094bb26 | 2017-05-19 21:18:25 +0200 | [diff] [blame] | 137 | .put(TokenKind.SLASH_SLASH, Operator.FLOOR_DIVIDE) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 138 | .put(TokenKind.PLUS, Operator.PLUS) |
Laurent Le Brun | 092f13b | 2015-08-24 14:50:00 +0000 | [diff] [blame] | 139 | .put(TokenKind.PIPE, Operator.PIPE) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 140 | .put(TokenKind.STAR, Operator.MULT) |
| 141 | .build(); |
| 142 | |
| 143 | private static final Map<TokenKind, Operator> augmentedAssignmentMethods = |
| 144 | new ImmutableMap.Builder<TokenKind, Operator>() |
Googler | 1315175 | 2016-06-02 18:37:13 +0000 | [diff] [blame] | 145 | .put(TokenKind.PLUS_EQUALS, Operator.PLUS) |
| 146 | .put(TokenKind.MINUS_EQUALS, Operator.MINUS) |
| 147 | .put(TokenKind.STAR_EQUALS, Operator.MULT) |
| 148 | .put(TokenKind.SLASH_EQUALS, Operator.DIVIDE) |
laurentlb | 094bb26 | 2017-05-19 21:18:25 +0200 | [diff] [blame] | 149 | .put(TokenKind.SLASH_SLASH_EQUALS, Operator.FLOOR_DIVIDE) |
Googler | 1315175 | 2016-06-02 18:37:13 +0000 | [diff] [blame] | 150 | .put(TokenKind.PERCENT_EQUALS, Operator.PERCENT) |
| 151 | .build(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 152 | |
| 153 | /** Highest precedence goes last. |
| 154 | * Based on: http://docs.python.org/2/reference/expressions.html#operator-precedence |
| 155 | **/ |
| 156 | private static final List<EnumSet<Operator>> operatorPrecedence = ImmutableList.of( |
| 157 | EnumSet.of(Operator.OR), |
| 158 | EnumSet.of(Operator.AND), |
| 159 | EnumSet.of(Operator.NOT), |
| 160 | EnumSet.of(Operator.EQUALS_EQUALS, Operator.NOT_EQUALS, Operator.LESS, Operator.LESS_EQUALS, |
Laurent Le Brun | e3f4ed7 | 2015-05-08 14:47:26 +0000 | [diff] [blame] | 161 | Operator.GREATER, Operator.GREATER_EQUALS, Operator.IN, Operator.NOT_IN), |
Laurent Le Brun | 092f13b | 2015-08-24 14:50:00 +0000 | [diff] [blame] | 162 | EnumSet.of(Operator.PIPE), |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 163 | EnumSet.of(Operator.MINUS, Operator.PLUS), |
laurentlb | 094bb26 | 2017-05-19 21:18:25 +0200 | [diff] [blame] | 164 | EnumSet.of(Operator.DIVIDE, Operator.FLOOR_DIVIDE, Operator.MULT, Operator.PERCENT)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 165 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 166 | private int errorsCount; |
| 167 | private boolean recoveryMode; // stop reporting errors until next statement |
| 168 | |
nharmata | 6dbfafe | 2019-02-05 08:55:07 -0800 | [diff] [blame] | 169 | private final Interner<String> stringInterner = BlazeInterners.newStrongInterner(); |
| 170 | |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 171 | private Parser(Lexer lexer, EventHandler eventHandler) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 172 | this.lexer = lexer; |
| 173 | this.eventHandler = eventHandler; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 174 | nextToken(); |
| 175 | } |
| 176 | |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 177 | private static Location locationFromStatements(Lexer lexer, List<Statement> statements) { |
| 178 | if (!statements.isEmpty()) { |
| 179 | return lexer.createLocation( |
| 180 | statements.get(0).getLocation().getStartOffset(), |
Laurent Le Brun | e51a4d2 | 2016-10-11 18:04:16 +0000 | [diff] [blame] | 181 | Iterables.getLast(statements).getLocation().getEndOffset()); |
Lukacs Berki | d9e733d | 2015-09-18 08:18:11 +0000 | [diff] [blame] | 182 | } else { |
| 183 | return Location.fromPathFragment(lexer.getFilename()); |
| 184 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 185 | } |
| 186 | |
| 187 | /** |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 188 | * Main entry point for parsing a file. |
brandjon | 540aac6 | 2017-06-12 23:08:09 +0200 | [diff] [blame] | 189 | * |
| 190 | * @param input the input to parse |
| 191 | * @param eventHandler a reporter for parsing errors |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 192 | * @see BuildFileAST#parseBuildString |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 193 | */ |
laurentlb | 17d975e | 2017-09-01 17:49:23 +0200 | [diff] [blame] | 194 | public static ParseResult parseFile(ParserInputSource input, EventHandler eventHandler) { |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 195 | Lexer lexer = new Lexer(input, eventHandler); |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 196 | Parser parser = new Parser(lexer, eventHandler); |
twerth | ee91e23 | 2018-07-09 02:33:27 -0700 | [diff] [blame] | 197 | List<Statement> statements; |
| 198 | try (SilentCloseable c = |
laurentlb | 3cdfd1a | 2018-11-09 04:55:08 -0800 | [diff] [blame] | 199 | Profiler.instance() |
| 200 | .profile(ProfilerTask.STARLARK_PARSER, input.getPath().getPathString())) { |
twerth | ee91e23 | 2018-07-09 02:33:27 -0700 | [diff] [blame] | 201 | statements = parser.parseFileInput(); |
| 202 | } |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 203 | boolean errors = parser.errorsCount > 0 || lexer.containsErrors(); |
Laurent Le Brun | 8c8857d | 2016-08-04 10:22:16 +0000 | [diff] [blame] | 204 | return new ParseResult( |
laurentlb | 17f8d4e | 2018-05-24 07:32:52 -0700 | [diff] [blame] | 205 | statements, lexer.getComments(), locationFromStatements(lexer, statements), errors); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 206 | } |
| 207 | |
| 208 | /** |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 209 | * Parses a sequence of statements, possibly followed by newline tokens. |
brandjon | 540aac6 | 2017-06-12 23:08:09 +0200 | [diff] [blame] | 210 | * |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 211 | * <p>{@code load()} statements are not permitted. Use {@code parsingLevel} to control whether |
| 212 | * function definitions, for statements, etc., are allowed. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 213 | */ |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 214 | public static List<Statement> parseStatements( |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 215 | ParserInputSource input, EventHandler eventHandler, ParsingLevel parsingLevel) { |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 216 | Lexer lexer = new Lexer(input, eventHandler); |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 217 | Parser parser = new Parser(lexer, eventHandler); |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 218 | List<Statement> result = new ArrayList<>(); |
| 219 | parser.parseStatement(result, parsingLevel); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 220 | while (parser.token.kind == TokenKind.NEWLINE) { |
| 221 | parser.nextToken(); |
| 222 | } |
| 223 | parser.expect(TokenKind.EOF); |
| 224 | return result; |
| 225 | } |
| 226 | |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 227 | /** |
| 228 | * Convenience wrapper for {@link #parseStatements} where exactly one statement is expected. |
| 229 | * |
| 230 | * @throws IllegalArgumentException if the number of parsed statements was not exactly one |
| 231 | */ |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 232 | @VisibleForTesting |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 233 | public static Statement parseStatement( |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 234 | ParserInputSource input, EventHandler eventHandler, ParsingLevel parsingLevel) { |
| 235 | List<Statement> stmts = parseStatements(input, eventHandler, parsingLevel); |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 236 | return Iterables.getOnlyElement(stmts); |
brandjon | 540aac6 | 2017-06-12 23:08:09 +0200 | [diff] [blame] | 237 | } |
| 238 | |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 239 | // stmt ::= simple_stmt |
| 240 | // | def_stmt |
| 241 | // | for_stmt |
| 242 | // | if_stmt |
| 243 | private void parseStatement(List<Statement> list, ParsingLevel parsingLevel) { |
| 244 | if (token.kind == TokenKind.DEF) { |
| 245 | if (parsingLevel == ParsingLevel.LOCAL_LEVEL) { |
| 246 | reportError( |
| 247 | lexer.createLocation(token.left, token.right), |
| 248 | "nested functions are not allowed. Move the function to top-level"); |
| 249 | } |
| 250 | parseFunctionDefStatement(list); |
| 251 | } else if (token.kind == TokenKind.IF) { |
| 252 | list.add(parseIfStatement()); |
| 253 | } else if (token.kind == TokenKind.FOR) { |
| 254 | if (parsingLevel == ParsingLevel.TOP_LEVEL) { |
| 255 | reportError( |
| 256 | lexer.createLocation(token.left, token.right), |
| 257 | "for loops are not allowed on top-level. Put it into a function"); |
| 258 | } |
| 259 | parseForStatement(list); |
| 260 | } else { |
| 261 | parseSimpleStatement(list); |
| 262 | } |
| 263 | } |
| 264 | |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 265 | /** Parses an expression, possibly followed by newline tokens. */ |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 266 | @VisibleForTesting |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 267 | public static Expression parseExpression(ParserInputSource input, EventHandler eventHandler) { |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 268 | Lexer lexer = new Lexer(input, eventHandler); |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 269 | Parser parser = new Parser(lexer, eventHandler); |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 270 | Expression result = parser.parseExpression(); |
| 271 | while (parser.token.kind == TokenKind.NEWLINE) { |
| 272 | parser.nextToken(); |
| 273 | } |
| 274 | parser.expect(TokenKind.EOF); |
| 275 | return result; |
brandjon | 540aac6 | 2017-06-12 23:08:09 +0200 | [diff] [blame] | 276 | } |
| 277 | |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 278 | private Expression parseExpression() { |
| 279 | return parseExpression(false); |
| 280 | } |
| 281 | |
| 282 | // Equivalent to 'testlist' rule in Python grammar. It can parse every kind of |
| 283 | // expression. In many cases, we need to use parseNonTupleExpression to avoid ambiguity: |
| 284 | // e.g. fct(x, y) vs fct((x, y)) |
| 285 | // |
| 286 | // Tuples can have a trailing comma only when insideParens is true. This prevents bugs |
| 287 | // where a one-element tuple is surprisingly created: |
| 288 | // e.g. foo = f(x), |
| 289 | private Expression parseExpression(boolean insideParens) { |
| 290 | int start = token.left; |
| 291 | Expression expression = parseNonTupleExpression(); |
| 292 | if (token.kind != TokenKind.COMMA) { |
| 293 | return expression; |
| 294 | } |
| 295 | |
| 296 | // It's a tuple |
| 297 | List<Expression> tuple = parseExprList(insideParens); |
| 298 | tuple.add(0, expression); // add the first expression to the front of the tuple |
| 299 | return setLocation(ListLiteral.makeTuple(tuple), start, Iterables.getLast(tuple)); |
| 300 | } |
| 301 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 302 | private void reportError(Location location, String message) { |
| 303 | errorsCount++; |
| 304 | // Limit the number of reported errors to avoid spamming output. |
| 305 | if (errorsCount <= 5) { |
| 306 | eventHandler.handle(Event.error(location, message)); |
| 307 | } |
| 308 | } |
| 309 | |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 310 | private void syntaxError(String message) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 311 | if (!recoveryMode) { |
| 312 | String msg = token.kind == TokenKind.INDENT |
| 313 | ? "indentation error" |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 314 | : "syntax error at '" + token + "': " + message; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 315 | reportError(lexer.createLocation(token.left, token.right), msg); |
| 316 | recoveryMode = true; |
| 317 | } |
| 318 | } |
| 319 | |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 320 | /** |
| 321 | * Consumes the current token. If it is not of the specified (expected) |
| 322 | * kind, reports a syntax error. |
| 323 | */ |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 324 | private boolean expect(TokenKind kind) { |
| 325 | boolean expected = token.kind == kind; |
| 326 | if (!expected) { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 327 | syntaxError("expected " + kind.getPrettyName()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 328 | } |
| 329 | nextToken(); |
| 330 | return expected; |
| 331 | } |
| 332 | |
| 333 | /** |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 334 | * Same as expect, but stop the recovery mode if the token was expected. |
| 335 | */ |
| 336 | private void expectAndRecover(TokenKind kind) { |
| 337 | if (expect(kind)) { |
| 338 | recoveryMode = false; |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | /** |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 343 | * Consume tokens past the first token that has a kind that is in the set of |
brandjon | fe29c724 | 2018-02-22 16:24:24 -0800 | [diff] [blame] | 344 | * terminatingTokens. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 345 | * @param terminatingTokens |
| 346 | * @return the end offset of the terminating token. |
| 347 | */ |
| 348 | private int syncPast(EnumSet<TokenKind> terminatingTokens) { |
| 349 | Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF)); |
| 350 | while (!terminatingTokens.contains(token.kind)) { |
| 351 | nextToken(); |
| 352 | } |
| 353 | int end = token.right; |
| 354 | // read past the synchronization token |
| 355 | nextToken(); |
| 356 | return end; |
| 357 | } |
| 358 | |
| 359 | /** |
| 360 | * Consume tokens until we reach the first token that has a kind that is in |
brandjon | fe29c724 | 2018-02-22 16:24:24 -0800 | [diff] [blame] | 361 | * the set of terminatingTokens. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 362 | * @param terminatingTokens |
| 363 | * @return the end offset of the terminating token. |
| 364 | */ |
| 365 | private int syncTo(EnumSet<TokenKind> terminatingTokens) { |
| 366 | // EOF must be in the set to prevent an infinite loop |
| 367 | Preconditions.checkState(terminatingTokens.contains(TokenKind.EOF)); |
| 368 | // read past the problematic token |
| 369 | int previous = token.right; |
| 370 | nextToken(); |
| 371 | int current = previous; |
| 372 | while (!terminatingTokens.contains(token.kind)) { |
| 373 | nextToken(); |
| 374 | previous = current; |
| 375 | current = token.right; |
| 376 | } |
| 377 | return previous; |
| 378 | } |
| 379 | |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 380 | // Keywords that exist in Python and that we don't parse. |
| 381 | private static final EnumSet<TokenKind> FORBIDDEN_KEYWORDS = |
laurentlb | ab58a92 | 2017-08-22 16:45:28 +0200 | [diff] [blame] | 382 | EnumSet.of( |
| 383 | TokenKind.AS, |
| 384 | TokenKind.ASSERT, |
| 385 | TokenKind.CLASS, |
| 386 | TokenKind.DEL, |
| 387 | TokenKind.EXCEPT, |
| 388 | TokenKind.FINALLY, |
| 389 | TokenKind.FROM, |
| 390 | TokenKind.GLOBAL, |
| 391 | TokenKind.IMPORT, |
| 392 | TokenKind.IS, |
| 393 | TokenKind.LAMBDA, |
| 394 | TokenKind.NONLOCAL, |
| 395 | TokenKind.RAISE, |
| 396 | TokenKind.TRY, |
| 397 | TokenKind.WITH, |
| 398 | TokenKind.WHILE, |
| 399 | TokenKind.YIELD); |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 400 | |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 401 | private void checkForbiddenKeywords() { |
Laurent Le Brun | b566c7d | 2016-10-07 16:31:03 +0000 | [diff] [blame] | 402 | if (!FORBIDDEN_KEYWORDS.contains(token.kind)) { |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 403 | return; |
| 404 | } |
| 405 | String error; |
| 406 | switch (token.kind) { |
| 407 | case ASSERT: error = "'assert' not supported, use 'fail' instead"; break; |
Laurent Le Brun | 44ad7fa | 2016-10-11 12:09:05 +0000 | [diff] [blame] | 408 | case DEL: |
| 409 | error = "'del' not supported, use '.pop()' to delete an item from a dictionary or a list"; |
| 410 | break; |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 411 | case IMPORT: error = "'import' not supported, use 'load' instead"; break; |
| 412 | case IS: error = "'is' not supported, use '==' instead"; break; |
| 413 | case LAMBDA: error = "'lambda' not supported, declare a function instead"; break; |
| 414 | case RAISE: error = "'raise' not supported, use 'fail' instead"; break; |
Laurent Le Brun | 44ad7fa | 2016-10-11 12:09:05 +0000 | [diff] [blame] | 415 | case TRY: error = "'try' not supported, all exceptions are fatal"; break; |
Laurent Le Brun | 0ddcba2 | 2015-03-23 16:48:01 +0000 | [diff] [blame] | 416 | case WHILE: error = "'while' not supported, use 'for' instead"; break; |
| 417 | default: error = "keyword '" + token.kind.getPrettyName() + "' not supported"; break; |
| 418 | } |
| 419 | reportError(lexer.createLocation(token.left, token.right), error); |
| 420 | } |
| 421 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 422 | private void nextToken() { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 423 | if (token == null || token.kind != TokenKind.EOF) { |
| 424 | token = lexer.nextToken(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 425 | } |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 426 | checkForbiddenKeywords(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 427 | if (DEBUGGING) { |
| 428 | System.err.print(token); |
| 429 | } |
| 430 | } |
| 431 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 432 | // create an error expression |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 433 | private Identifier makeErrorExpression(int start, int end) { |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 434 | return setLocation(Identifier.of("$error$"), start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 435 | } |
| 436 | |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 437 | // Convenience wrapper method around ASTNode.setLocation |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 438 | private <NodeT extends ASTNode> NodeT setLocation(NodeT node, int startOffset, int endOffset) { |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 439 | return ASTNode.setLocation(lexer.createLocation(startOffset, endOffset), node); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 440 | } |
| 441 | |
| 442 | // Convenience method that uses end offset from the last node. |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 443 | private <NodeT extends ASTNode> NodeT setLocation(NodeT node, int startOffset, ASTNode lastNode) { |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 444 | Preconditions.checkNotNull(lastNode, "can't extract end offset from a null node"); |
| 445 | Preconditions.checkNotNull(lastNode.getLocation(), "lastNode doesn't have a location"); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 446 | return setLocation(node, startOffset, lastNode.getLocation().getEndOffset()); |
| 447 | } |
| 448 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 449 | // arg ::= IDENTIFIER '=' nontupleexpr |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 450 | // | expr |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 451 | // | *args |
| 452 | // | **kwargs |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 453 | private Argument.Passed parseFuncallArgument() { |
| 454 | final int start = token.left; |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 455 | Expression expr; |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 456 | // parse **expr |
| 457 | if (token.kind == TokenKind.STAR_STAR) { |
| 458 | nextToken(); |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 459 | expr = parseNonTupleExpression(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 460 | return setLocation(new Argument.StarStar(expr), start, expr); |
| 461 | } |
| 462 | // parse *expr |
| 463 | if (token.kind == TokenKind.STAR) { |
| 464 | nextToken(); |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 465 | expr = parseNonTupleExpression(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 466 | return setLocation(new Argument.Star(expr), start, expr); |
| 467 | } |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 468 | |
| 469 | expr = parseNonTupleExpression(); |
| 470 | if (expr instanceof Identifier) { |
| 471 | // parse a named argument |
| 472 | if (token.kind == TokenKind.EQUALS) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 473 | nextToken(); |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 474 | Expression val = parseNonTupleExpression(); |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 475 | return setLocation(new Argument.Keyword(((Identifier) expr), val), start, val); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 476 | } |
| 477 | } |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 478 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 479 | // parse a positional argument |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 480 | return setLocation(new Argument.Positional(expr), start, expr); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 481 | } |
| 482 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 483 | // arg ::= IDENTIFIER '=' nontupleexpr |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 484 | // | IDENTIFIER |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 485 | private Parameter<Expression, Expression> parseFunctionParameter() { |
| 486 | // TODO(bazel-team): optionally support type annotations |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 487 | int start = token.left; |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 488 | if (token.kind == TokenKind.STAR_STAR) { // kwarg |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 489 | nextToken(); |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 490 | Identifier ident = parseIdent(); |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 491 | return setLocation(new Parameter.StarStar<>(ident), start, ident); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 492 | } else if (token.kind == TokenKind.STAR) { // stararg |
| 493 | int end = token.right; |
| 494 | nextToken(); |
| 495 | if (token.kind == TokenKind.IDENTIFIER) { |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 496 | Identifier ident = parseIdent(); |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 497 | return setLocation(new Parameter.Star<>(ident), start, ident); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 498 | } else { |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 499 | return setLocation(new Parameter.Star<>(null), start, end); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 500 | } |
| 501 | } else { |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 502 | Identifier ident = parseIdent(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 503 | if (token.kind == TokenKind.EQUALS) { // there's a default value |
| 504 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 505 | Expression expr = parseNonTupleExpression(); |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 506 | return setLocation(new Parameter.Optional<>(ident, expr), start, expr); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 507 | } else { |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 508 | return setLocation(new Parameter.Mandatory<>(ident), start, ident); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 509 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 510 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 511 | } |
| 512 | |
| 513 | // funcall_suffix ::= '(' arg_list? ')' |
fzaiser | e0f1333 | 2017-08-14 12:00:51 +0200 | [diff] [blame] | 514 | private Expression parseFuncallSuffix(int start, Expression function) { |
michajlo | 5f39475 | 2017-10-06 23:51:10 +0200 | [diff] [blame] | 515 | ImmutableList<Argument.Passed> args = ImmutableList.of(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 516 | expect(TokenKind.LPAREN); |
| 517 | int end; |
| 518 | if (token.kind == TokenKind.RPAREN) { |
| 519 | end = token.right; |
| 520 | nextToken(); // RPAREN |
| 521 | } else { |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 522 | args = parseFuncallArguments(); // (includes optional trailing comma) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 523 | end = token.right; |
| 524 | expect(TokenKind.RPAREN); |
| 525 | } |
fzaiser | e0f1333 | 2017-08-14 12:00:51 +0200 | [diff] [blame] | 526 | return setLocation(new FuncallExpression(function, args), start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 527 | } |
| 528 | |
| 529 | // selector_suffix ::= '.' IDENTIFIER |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 530 | private Expression parseSelectorSuffix(int start, Expression receiver) { |
| 531 | expect(TokenKind.DOT); |
| 532 | if (token.kind == TokenKind.IDENTIFIER) { |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 533 | Identifier ident = parseIdent(); |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 534 | return setLocation(new DotExpression(receiver, ident), start, ident); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 535 | } else { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 536 | syntaxError("expected identifier after dot"); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 537 | int end = syncTo(EXPR_TERMINATOR_SET); |
| 538 | return makeErrorExpression(start, end); |
| 539 | } |
| 540 | } |
| 541 | |
| 542 | // arg_list ::= ( (arg ',')* arg ','? )? |
michajlo | 5f39475 | 2017-10-06 23:51:10 +0200 | [diff] [blame] | 543 | private ImmutableList<Argument.Passed> parseFuncallArguments() { |
| 544 | ImmutableList<Argument.Passed> arguments = parseFunctionArguments(this::parseFuncallArgument); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 545 | try { |
laurentlb | 254a4be | 2019-03-26 16:35:29 -0700 | [diff] [blame^] | 546 | Argument.validateFuncallArguments(arguments); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 547 | } catch (Argument.ArgumentException e) { |
laurentlb | 2852b36 | 2018-11-06 11:36:45 -0800 | [diff] [blame] | 548 | reportError(e.getLocation(), e.getMessage()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 549 | } |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 550 | return arguments; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 551 | } |
| 552 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 553 | // expr_list parses a comma-separated list of expression. It assumes that the |
| 554 | // first expression was already parsed, so it starts with a comma. |
| 555 | // It is used to parse tuples and list elements. |
| 556 | // expr_list ::= ( ',' expr )* ','? |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 557 | private List<Expression> parseExprList(boolean trailingColonAllowed) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 558 | List<Expression> list = new ArrayList<>(); |
| 559 | // terminating tokens for an expression list |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 560 | while (token.kind == TokenKind.COMMA) { |
| 561 | expect(TokenKind.COMMA); |
| 562 | if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) { |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 563 | if (!trailingColonAllowed) { |
| 564 | reportError( |
| 565 | lexer.createLocation(token.left, token.right), |
| 566 | "Trailing comma is allowed only in parenthesized tuples."); |
| 567 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 568 | break; |
| 569 | } |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 570 | list.add(parseNonTupleExpression()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 571 | } |
| 572 | return list; |
| 573 | } |
| 574 | |
| 575 | // dict_entry_list ::= ( (dict_entry ',')* dict_entry ','? )? |
| 576 | private List<DictionaryEntryLiteral> parseDictEntryList() { |
| 577 | List<DictionaryEntryLiteral> list = new ArrayList<>(); |
| 578 | // the terminating token for a dict entry list |
| 579 | while (token.kind != TokenKind.RBRACE) { |
| 580 | list.add(parseDictEntry()); |
| 581 | if (token.kind == TokenKind.COMMA) { |
| 582 | nextToken(); |
| 583 | } else { |
| 584 | break; |
| 585 | } |
| 586 | } |
| 587 | return list; |
| 588 | } |
| 589 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 590 | // dict_entry ::= nontupleexpr ':' nontupleexpr |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 591 | private DictionaryEntryLiteral parseDictEntry() { |
| 592 | int start = token.left; |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 593 | Expression key = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 594 | expect(TokenKind.COLON); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 595 | Expression value = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 596 | return setLocation(new DictionaryEntryLiteral(key, value), start, value); |
| 597 | } |
| 598 | |
Laurent Le Brun | 4aa2912 | 2015-09-10 11:31:30 +0000 | [diff] [blame] | 599 | /** |
| 600 | * Parse a String literal value, e.g. "str". |
| 601 | */ |
| 602 | private StringLiteral parseStringLiteral() { |
| 603 | Preconditions.checkState(token.kind == TokenKind.STRING); |
| 604 | int end = token.right; |
Laurent Le Brun | 4aa2912 | 2015-09-10 11:31:30 +0000 | [diff] [blame] | 605 | StringLiteral literal = |
nharmata | 6dbfafe | 2019-02-05 08:55:07 -0800 | [diff] [blame] | 606 | setLocation( |
| 607 | new StringLiteral(stringInterner.intern((String) token.value)), token.left, end); |
Laurent Le Brun | 4aa2912 | 2015-09-10 11:31:30 +0000 | [diff] [blame] | 608 | |
| 609 | nextToken(); |
| 610 | if (token.kind == TokenKind.STRING) { |
| 611 | reportError(lexer.createLocation(end, token.left), |
| 612 | "Implicit string concatenation is forbidden, use the + operator"); |
| 613 | } |
| 614 | return literal; |
| 615 | } |
| 616 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 617 | // primary ::= INTEGER |
| 618 | // | STRING |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 619 | // | IDENTIFIER |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 620 | // | list_expression |
| 621 | // | '(' ')' // a tuple with zero elements |
| 622 | // | '(' expr ')' // a parenthesized expression |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 623 | // | dict_expression |
| 624 | // | '-' primary_with_suffix |
| 625 | private Expression parsePrimary() { |
| 626 | int start = token.left; |
| 627 | switch (token.kind) { |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 628 | case INT: |
| 629 | { |
| 630 | IntegerLiteral literal = new IntegerLiteral((Integer) token.value); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 631 | setLocation(literal, start, token.right); |
| 632 | nextToken(); |
| 633 | return literal; |
| 634 | } |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 635 | case STRING: |
| 636 | return parseStringLiteral(); |
| 637 | case IDENTIFIER: |
fzaiser | e0f1333 | 2017-08-14 12:00:51 +0200 | [diff] [blame] | 638 | return parseIdent(); |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 639 | case LBRACKET: // it's a list |
| 640 | return parseListMaker(); |
| 641 | case LBRACE: // it's a dictionary |
| 642 | return parseDictExpression(); |
| 643 | case LPAREN: |
| 644 | { |
| 645 | nextToken(); |
| 646 | // check for the empty tuple literal |
| 647 | if (token.kind == TokenKind.RPAREN) { |
laurentlb | e5894f0 | 2018-10-25 13:02:00 -0700 | [diff] [blame] | 648 | ListLiteral literal = ListLiteral.makeTuple(ImmutableList.of()); |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 649 | setLocation(literal, start, token.right); |
| 650 | nextToken(); |
| 651 | return literal; |
| 652 | } |
| 653 | // parse the first expression |
| 654 | Expression expression = parseExpression(true); |
| 655 | setLocation(expression, start, token.right); |
| 656 | if (token.kind == TokenKind.RPAREN) { |
| 657 | nextToken(); |
| 658 | return expression; |
| 659 | } |
| 660 | expect(TokenKind.RPAREN); |
| 661 | int end = syncTo(EXPR_TERMINATOR_SET); |
| 662 | return makeErrorExpression(start, end); |
| 663 | } |
| 664 | case MINUS: |
| 665 | { |
| 666 | nextToken(); |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 667 | Expression expr = parsePrimaryWithSuffix(); |
brandjon | f2ed858 | 2017-06-27 15:05:35 +0200 | [diff] [blame] | 668 | UnaryOperatorExpression minus = new UnaryOperatorExpression(UnaryOperator.MINUS, expr); |
| 669 | return setLocation(minus, start, expr); |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 670 | } |
| 671 | default: |
| 672 | { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 673 | syntaxError("expected expression"); |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 674 | int end = syncTo(EXPR_TERMINATOR_SET); |
| 675 | return makeErrorExpression(start, end); |
| 676 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 677 | } |
| 678 | } |
| 679 | |
fzaiser | e0f1333 | 2017-08-14 12:00:51 +0200 | [diff] [blame] | 680 | // primary_with_suffix ::= primary (selector_suffix | substring_suffix | funcall_suffix)* |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 681 | private Expression parsePrimaryWithSuffix() { |
| 682 | int start = token.left; |
| 683 | Expression receiver = parsePrimary(); |
| 684 | while (true) { |
| 685 | if (token.kind == TokenKind.DOT) { |
| 686 | receiver = parseSelectorSuffix(start, receiver); |
| 687 | } else if (token.kind == TokenKind.LBRACKET) { |
| 688 | receiver = parseSubstringSuffix(start, receiver); |
fzaiser | e0f1333 | 2017-08-14 12:00:51 +0200 | [diff] [blame] | 689 | } else if (token.kind == TokenKind.LPAREN) { |
| 690 | receiver = parseFuncallSuffix(start, receiver); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 691 | } else { |
| 692 | break; |
| 693 | } |
| 694 | } |
| 695 | return receiver; |
| 696 | } |
| 697 | |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 698 | // substring_suffix ::= '[' expression? ':' expression? ':' expression? ']' |
fzaiser | e0f1333 | 2017-08-14 12:00:51 +0200 | [diff] [blame] | 699 | // | '[' expression? ':' expression? ']' |
| 700 | // | '[' expression ']' |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 701 | private Expression parseSubstringSuffix(int start, Expression receiver) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 702 | Expression startExpr; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 703 | |
| 704 | expect(TokenKind.LBRACKET); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 705 | if (token.kind == TokenKind.COLON) { |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 706 | startExpr = null; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 707 | } else { |
Laurent Le Brun | 6824d86 | 2015-09-11 13:51:41 +0000 | [diff] [blame] | 708 | startExpr = parseExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 709 | } |
Vladimir Moskva | 8d610c6 | 2016-09-15 14:36:41 +0000 | [diff] [blame] | 710 | // This is an index/key access |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 711 | if (token.kind == TokenKind.RBRACKET) { |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 712 | Expression expr = setLocation(new IndexExpression(receiver, startExpr), start, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 713 | expect(TokenKind.RBRACKET); |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 714 | return expr; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 715 | } |
Laurent Le Brun | eeef30f | 2015-03-16 15:12:35 +0000 | [diff] [blame] | 716 | // This is a slice (or substring) |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 717 | Expression endExpr = parseSliceArgument(); |
| 718 | Expression stepExpr = parseSliceArgument(); |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 719 | Expression expr = |
| 720 | setLocation( |
| 721 | new SliceExpression(receiver, startExpr, endExpr, stepExpr), start, token.right); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 722 | expect(TokenKind.RBRACKET); |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 723 | return expr; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 724 | } |
| 725 | |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 726 | /** |
| 727 | * Parses {@code [':' [expr]]} which can either be the end or the step argument of a slice |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 728 | * operation. If no such expression is found, this method returns null. |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 729 | */ |
laurentlb | 9b96c0b | 2018-02-12 02:53:19 -0800 | [diff] [blame] | 730 | private @Nullable Expression parseSliceArgument() { |
Florian Weikert | e342196 | 2015-12-17 12:46:08 +0000 | [diff] [blame] | 731 | // There has to be a colon before any end or slice argument. |
| 732 | // However, if the next token thereafter is another colon or a right bracket, no argument value |
| 733 | // was specified. |
| 734 | if (token.kind == TokenKind.COLON) { |
| 735 | expect(TokenKind.COLON); |
| 736 | if (token.kind != TokenKind.COLON && token.kind != TokenKind.RBRACKET) { |
| 737 | return parseNonTupleExpression(); |
| 738 | } |
| 739 | } |
| 740 | return null; |
| 741 | } |
| 742 | |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 743 | // Equivalent to 'exprlist' rule in Python grammar. |
| 744 | // loop_variables ::= primary_with_suffix ( ',' primary_with_suffix )* ','? |
| 745 | private Expression parseForLoopVariables() { |
| 746 | // We cannot reuse parseExpression because it would parse the 'in' operator. |
| 747 | // e.g. "for i in e: pass" -> we want to parse only "i" here. |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 748 | int start = token.left; |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 749 | Expression e1 = parsePrimaryWithSuffix(); |
| 750 | if (token.kind != TokenKind.COMMA) { |
| 751 | return e1; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 752 | } |
| 753 | |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 754 | // It's a tuple |
| 755 | List<Expression> tuple = new ArrayList<>(); |
| 756 | tuple.add(e1); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 757 | while (token.kind == TokenKind.COMMA) { |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 758 | expect(TokenKind.COMMA); |
| 759 | if (EXPR_LIST_TERMINATOR_SET.contains(token.kind)) { |
| 760 | break; |
| 761 | } |
| 762 | tuple.add(parsePrimaryWithSuffix()); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 763 | } |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 764 | return setLocation(ListLiteral.makeTuple(tuple), start, Iterables.getLast(tuple)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 765 | } |
| 766 | |
Laurent Le Brun | 443aaae | 2015-04-21 19:49:49 +0000 | [diff] [blame] | 767 | // comprehension_suffix ::= 'FOR' loop_variables 'IN' expr comprehension_suffix |
| 768 | // | 'IF' expr comprehension_suffix |
| 769 | // | ']' |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 770 | private Expression parseComprehensionSuffix( |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 771 | AbstractComprehension.AbstractBuilder comprehensionBuilder, |
| 772 | TokenKind closingBracket, |
| 773 | int comprehensionStartOffset) { |
Laurent Le Brun | 443aaae | 2015-04-21 19:49:49 +0000 | [diff] [blame] | 774 | while (true) { |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 775 | if (token.kind == TokenKind.FOR) { |
| 776 | nextToken(); |
brandjon | 990622b | 2017-07-11 19:56:45 +0200 | [diff] [blame] | 777 | Expression lhs = parseForLoopVariables(); |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 778 | expect(TokenKind.IN); |
| 779 | // The expression cannot be a ternary expression ('x if y else z') due to |
| 780 | // conflicts in Python grammar ('if' is used by the comprehension). |
| 781 | Expression listExpression = parseNonTupleExpression(0); |
brandjon | 990622b | 2017-07-11 19:56:45 +0200 | [diff] [blame] | 782 | comprehensionBuilder.addFor(new LValue(lhs), listExpression); |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 783 | } else if (token.kind == TokenKind.IF) { |
| 784 | nextToken(); |
laurentlb | c3a1af6 | 2017-06-16 14:37:43 +0200 | [diff] [blame] | 785 | // [x for x in li if 1, 2] # parse error |
| 786 | // [x for x in li if (1, 2)] # ok |
| 787 | comprehensionBuilder.addIf(parseNonTupleExpression(0)); |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 788 | } else if (token.kind == closingBracket) { |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 789 | Expression expr = comprehensionBuilder.build(); |
| 790 | setLocation(expr, comprehensionStartOffset, token.right); |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 791 | nextToken(); |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 792 | return expr; |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 793 | } else { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 794 | syntaxError("expected '" + closingBracket.getPrettyName() + "', 'for' or 'if'"); |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 795 | syncPast(LIST_TERMINATOR_SET); |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 796 | return makeErrorExpression(comprehensionStartOffset, token.right); |
Laurent Le Brun | 443aaae | 2015-04-21 19:49:49 +0000 | [diff] [blame] | 797 | } |
| 798 | } |
| 799 | } |
| 800 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 801 | // list_maker ::= '[' ']' |
| 802 | // |'[' expr ']' |
| 803 | // |'[' expr expr_list ']' |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 804 | // |'[' expr comprehension_suffix ']' |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 805 | private Expression parseListMaker() { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 806 | int start = token.left; |
| 807 | expect(TokenKind.LBRACKET); |
| 808 | if (token.kind == TokenKind.RBRACKET) { // empty List |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 809 | ListLiteral literal = ListLiteral.emptyList(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 810 | setLocation(literal, start, token.right); |
| 811 | nextToken(); |
| 812 | return literal; |
| 813 | } |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 814 | Expression expression = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 815 | Preconditions.checkNotNull(expression, |
| 816 | "null element in list in AST at %s:%s", token.left, token.right); |
| 817 | switch (token.kind) { |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 818 | case RBRACKET: // singleton List |
| 819 | { |
laurentlb | e5894f0 | 2018-10-25 13:02:00 -0700 | [diff] [blame] | 820 | ListLiteral literal = ListLiteral.makeList(ImmutableList.of(expression)); |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 821 | setLocation(literal, start, token.right); |
| 822 | nextToken(); |
| 823 | return literal; |
| 824 | } |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 825 | case FOR: |
| 826 | { // list comprehension |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 827 | return parseComprehensionSuffix( |
| 828 | new ListComprehension.Builder().setOutputExpression(expression), |
| 829 | TokenKind.RBRACKET, |
| 830 | start); |
Florian Weikert | ffd8a5a | 2015-09-18 11:51:01 +0000 | [diff] [blame] | 831 | } |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 832 | case COMMA: |
| 833 | { |
| 834 | List<Expression> list = parseExprList(true); |
| 835 | Preconditions.checkState( |
| 836 | !list.contains(null), |
| 837 | "null element in list in AST at %s:%s", |
| 838 | token.left, |
| 839 | token.right); |
| 840 | list.add(0, expression); |
| 841 | if (token.kind == TokenKind.RBRACKET) { |
| 842 | ListLiteral literal = ListLiteral.makeList(list); |
| 843 | setLocation(literal, start, token.right); |
| 844 | nextToken(); |
| 845 | return literal; |
| 846 | } |
| 847 | expect(TokenKind.RBRACKET); |
| 848 | int end = syncPast(LIST_TERMINATOR_SET); |
| 849 | return makeErrorExpression(start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 850 | } |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 851 | default: |
| 852 | { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 853 | syntaxError("expected ',', 'for' or ']'"); |
Laurent Le Brun | b639ca8 | 2017-01-17 11:18:23 +0000 | [diff] [blame] | 854 | int end = syncPast(LIST_TERMINATOR_SET); |
| 855 | return makeErrorExpression(start, end); |
| 856 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 857 | } |
| 858 | } |
| 859 | |
| 860 | // dict_expression ::= '{' '}' |
| 861 | // |'{' dict_entry_list '}' |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 862 | // |'{' dict_entry comprehension_suffix '}' |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 863 | private Expression parseDictExpression() { |
| 864 | int start = token.left; |
| 865 | expect(TokenKind.LBRACE); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 866 | if (token.kind == TokenKind.RBRACE) { // empty Dict |
| 867 | DictionaryLiteral literal = DictionaryLiteral.emptyDict(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 868 | setLocation(literal, start, token.right); |
| 869 | nextToken(); |
| 870 | return literal; |
| 871 | } |
| 872 | DictionaryEntryLiteral entry = parseDictEntry(); |
| 873 | if (token.kind == TokenKind.FOR) { |
| 874 | // Dict comprehension |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 875 | return parseComprehensionSuffix( |
brandjon | 296cd49 | 2017-05-15 16:17:16 +0200 | [diff] [blame] | 876 | new DictComprehension.Builder() |
| 877 | .setKeyExpression(entry.getKey()) |
| 878 | .setValueExpression(entry.getValue()), |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 879 | TokenKind.RBRACE, |
| 880 | start); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 881 | } |
| 882 | List<DictionaryEntryLiteral> entries = new ArrayList<>(); |
| 883 | entries.add(entry); |
| 884 | if (token.kind == TokenKind.COMMA) { |
| 885 | expect(TokenKind.COMMA); |
| 886 | entries.addAll(parseDictEntryList()); |
| 887 | } |
| 888 | if (token.kind == TokenKind.RBRACE) { |
| 889 | DictionaryLiteral literal = new DictionaryLiteral(entries); |
| 890 | setLocation(literal, start, token.right); |
| 891 | nextToken(); |
| 892 | return literal; |
| 893 | } |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 894 | expect(TokenKind.RBRACE); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 895 | int end = syncPast(DICT_TERMINATOR_SET); |
| 896 | return makeErrorExpression(start, end); |
| 897 | } |
| 898 | |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 899 | private Identifier parseIdent() { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 900 | if (token.kind != TokenKind.IDENTIFIER) { |
Laurent Le Brun | 7232986 | 2015-03-23 14:20:03 +0000 | [diff] [blame] | 901 | expect(TokenKind.IDENTIFIER); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 902 | return makeErrorExpression(token.left, token.right); |
| 903 | } |
Taras Tsugrii | 3694136 | 2018-06-08 16:31:53 -0700 | [diff] [blame] | 904 | Identifier ident = Identifier.of(((String) token.value)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 905 | setLocation(ident, token.left, token.right); |
| 906 | nextToken(); |
| 907 | return ident; |
| 908 | } |
| 909 | |
| 910 | // binop_expression ::= binop_expression OP binop_expression |
| 911 | // | parsePrimaryWithSuffix |
| 912 | // This function takes care of precedence between operators (see operatorPrecedence for |
| 913 | // the order), and it assumes left-to-right associativity. |
| 914 | private Expression parseBinOpExpression(int prec) { |
| 915 | int start = token.left; |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 916 | Expression expr = parseNonTupleExpression(prec + 1); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 917 | // The loop is not strictly needed, but it prevents risks of stack overflow. Depth is |
| 918 | // limited to number of different precedence levels (operatorPrecedence.size()). |
laurentlb | 1fcea38 | 2017-06-19 16:02:42 +0200 | [diff] [blame] | 919 | Operator lastOp = null; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 920 | for (;;) { |
Laurent Le Brun | e3f4ed7 | 2015-05-08 14:47:26 +0000 | [diff] [blame] | 921 | |
| 922 | if (token.kind == TokenKind.NOT) { |
| 923 | // If NOT appears when we expect a binary operator, it must be followed by IN. |
| 924 | // Since the code expects every operator to be a single token, we push a NOT_IN token. |
| 925 | expect(TokenKind.NOT); |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 926 | if (token.kind != TokenKind.IN) { |
| 927 | syntaxError("expected 'in'"); |
| 928 | } |
| 929 | token.kind = TokenKind.NOT_IN; |
Laurent Le Brun | e3f4ed7 | 2015-05-08 14:47:26 +0000 | [diff] [blame] | 930 | } |
| 931 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 932 | if (!binaryOperators.containsKey(token.kind)) { |
| 933 | return expr; |
| 934 | } |
| 935 | Operator operator = binaryOperators.get(token.kind); |
| 936 | if (!operatorPrecedence.get(prec).contains(operator)) { |
| 937 | return expr; |
| 938 | } |
laurentlb | 1fcea38 | 2017-06-19 16:02:42 +0200 | [diff] [blame] | 939 | |
| 940 | // Operator '==' and other operators of the same precedence (e.g. '<', 'in') |
| 941 | // are not associative. |
| 942 | if (lastOp != null && operatorPrecedence.get(prec).contains(Operator.EQUALS_EQUALS)) { |
| 943 | reportError( |
| 944 | lexer.createLocation(token.left, token.right), |
| 945 | String.format("Operator '%s' is not associative with operator '%s'. Use parens.", |
| 946 | lastOp, operator)); |
| 947 | } |
| 948 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 949 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 950 | Expression secondary = parseNonTupleExpression(prec + 1); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 951 | expr = optimizeBinOpExpression(operator, expr, secondary); |
| 952 | setLocation(expr, start, secondary); |
laurentlb | 1fcea38 | 2017-06-19 16:02:42 +0200 | [diff] [blame] | 953 | lastOp = operator; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 954 | } |
| 955 | } |
| 956 | |
| 957 | // Optimize binary expressions. |
| 958 | // string literal + string literal can be concatenated into one string literal |
| 959 | // so we don't have to do the expensive string concatenation at runtime. |
| 960 | private Expression optimizeBinOpExpression( |
| 961 | Operator operator, Expression expr, Expression secondary) { |
| 962 | if (operator == Operator.PLUS) { |
| 963 | if (expr instanceof StringLiteral && secondary instanceof StringLiteral) { |
| 964 | StringLiteral left = (StringLiteral) expr; |
| 965 | StringLiteral right = (StringLiteral) secondary; |
nharmata | 6dbfafe | 2019-02-05 08:55:07 -0800 | [diff] [blame] | 966 | return new StringLiteral(stringInterner.intern(left.getValue() + right.getValue())); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 967 | } |
| 968 | } |
| 969 | return new BinaryOperatorExpression(operator, expr, secondary); |
| 970 | } |
| 971 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 972 | // Equivalent to 'test' rule in Python grammar. |
| 973 | private Expression parseNonTupleExpression() { |
| 974 | int start = token.left; |
| 975 | Expression expr = parseNonTupleExpression(0); |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 976 | if (token.kind == TokenKind.IF) { |
| 977 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 978 | Expression condition = parseNonTupleExpression(0); |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 979 | if (token.kind == TokenKind.ELSE) { |
| 980 | nextToken(); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 981 | Expression elseClause = parseNonTupleExpression(); |
Francois-Rene Rideau | 6fc5ee7 | 2015-03-12 20:55:17 +0000 | [diff] [blame] | 982 | return setLocation(new ConditionalExpression(expr, condition, elseClause), |
| 983 | start, elseClause); |
| 984 | } else { |
| 985 | reportError(lexer.createLocation(start, token.left), |
| 986 | "missing else clause in conditional expression or semicolon before if"); |
| 987 | return expr; // Try to recover from error: drop the if and the expression after it. Ouch. |
| 988 | } |
| 989 | } |
| 990 | return expr; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 991 | } |
| 992 | |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 993 | private Expression parseNonTupleExpression(int prec) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 994 | if (prec >= operatorPrecedence.size()) { |
| 995 | return parsePrimaryWithSuffix(); |
| 996 | } |
| 997 | if (token.kind == TokenKind.NOT && operatorPrecedence.get(prec).contains(Operator.NOT)) { |
| 998 | return parseNotExpression(prec); |
| 999 | } |
| 1000 | return parseBinOpExpression(prec); |
| 1001 | } |
| 1002 | |
| 1003 | // not_expr :== 'not' expr |
| 1004 | private Expression parseNotExpression(int prec) { |
| 1005 | int start = token.left; |
| 1006 | expect(TokenKind.NOT); |
laurentlb | 7aa2c8e | 2018-10-18 10:09:30 -0700 | [diff] [blame] | 1007 | Expression expression = parseNonTupleExpression(prec); |
brandjon | f2ed858 | 2017-06-27 15:05:35 +0200 | [diff] [blame] | 1008 | UnaryOperatorExpression notExpression = |
| 1009 | new UnaryOperatorExpression(UnaryOperator.NOT, expression); |
fzaiser | aa8540d | 2017-09-26 06:01:30 -0400 | [diff] [blame] | 1010 | return setLocation(notExpression, start, expression); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1011 | } |
| 1012 | |
| 1013 | // file_input ::= ('\n' | stmt)* EOF |
| 1014 | private List<Statement> parseFileInput() { |
| 1015 | List<Statement> list = new ArrayList<>(); |
| 1016 | while (token.kind != TokenKind.EOF) { |
| 1017 | if (token.kind == TokenKind.NEWLINE) { |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 1018 | expectAndRecover(TokenKind.NEWLINE); |
| 1019 | } else if (recoveryMode) { |
| 1020 | // If there was a parse error, we want to recover here |
| 1021 | // before starting a new top-level statement. |
| 1022 | syncTo(STATEMENT_TERMINATOR_SET); |
| 1023 | recoveryMode = false; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1024 | } else { |
| 1025 | parseTopLevelStatement(list); |
| 1026 | } |
| 1027 | } |
| 1028 | return list; |
| 1029 | } |
| 1030 | |
brandjon | fe29c724 | 2018-02-22 16:24:24 -0800 | [diff] [blame] | 1031 | // load '(' STRING (COMMA [IDENTIFIER EQUALS] STRING)+ COMMA? ')' |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1032 | private void parseLoad(List<Statement> list) { |
| 1033 | int start = token.left; |
laurentlb | 2843ead | 2017-07-05 07:20:45 -0400 | [diff] [blame] | 1034 | expect(TokenKind.LOAD); |
| 1035 | expect(TokenKind.LPAREN); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1036 | if (token.kind != TokenKind.STRING) { |
| 1037 | expect(TokenKind.STRING); |
| 1038 | return; |
| 1039 | } |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 1040 | |
John Field | 9201fda | 2015-12-30 19:30:34 +0000 | [diff] [blame] | 1041 | StringLiteral importString = parseStringLiteral(); |
brandjon | fe29c724 | 2018-02-22 16:24:24 -0800 | [diff] [blame] | 1042 | if (token.kind == TokenKind.RPAREN) { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 1043 | syntaxError("expected at least one symbol to load"); |
brandjon | fe29c724 | 2018-02-22 16:24:24 -0800 | [diff] [blame] | 1044 | return; |
| 1045 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1046 | expect(TokenKind.COMMA); |
| 1047 | |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1048 | ImmutableList.Builder<LoadStatement.Binding> bindings = ImmutableList.builder(); |
| 1049 | // previousSymbols is used to detect duplicate symbols in the same statement. |
| 1050 | Set<String> previousSymbols = new HashSet<>(); |
| 1051 | |
| 1052 | parseLoadSymbol(bindings, previousSymbols); // At least one symbol is required |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1053 | |
Laurent Le Brun | 73a9849 | 2015-03-17 15:46:19 +0000 | [diff] [blame] | 1054 | while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1055 | expect(TokenKind.COMMA); |
Laurent Le Brun | 59f587a | 2015-03-16 14:51:36 +0000 | [diff] [blame] | 1056 | if (token.kind == TokenKind.RPAREN) { |
| 1057 | break; |
| 1058 | } |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1059 | |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1060 | parseLoadSymbol(bindings, previousSymbols); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1061 | } |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 1062 | |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1063 | LoadStatement stmt = new LoadStatement(importString, bindings.build()); |
fzaiser | b5768af | 2017-10-09 15:16:50 +0200 | [diff] [blame] | 1064 | list.add(setLocation(stmt, start, token.right)); |
| 1065 | expect(TokenKind.RPAREN); |
brandjon | 09771fd | 2017-07-06 08:54:29 -0400 | [diff] [blame] | 1066 | expectAndRecover(TokenKind.NEWLINE); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1067 | } |
| 1068 | |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1069 | /** |
| 1070 | * Parses the next symbol argument of a load statement and puts it into the output map. |
| 1071 | * |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1072 | * <p>The symbol is either "name" (STRING) or name = "declared" (IDENTIFIER EQUALS STRING). If no |
| 1073 | * alias is used, "name" and "declared" will be identical. "Declared" refers to the original name |
| 1074 | * in the Bazel file that should be loaded, while "name" will be the key of the entry in the map. |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1075 | */ |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1076 | private void parseLoadSymbol( |
| 1077 | ImmutableList.Builder<LoadStatement.Binding> symbols, Set<String> previousSymbols) { |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 1078 | if (token.kind != TokenKind.STRING && token.kind != TokenKind.IDENTIFIER) { |
| 1079 | syntaxError("expected either a literal string or an identifier"); |
| 1080 | return; |
| 1081 | } |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1082 | |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 1083 | String name = (String) token.value; |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1084 | Identifier local = setLocation(Identifier.of(name), token.left, token.right); |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 1085 | |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1086 | if (previousSymbols.contains(local.getName())) { |
| 1087 | syntaxError(String.format("Identifier '%s' is used more than once", local.getName())); |
| 1088 | } |
| 1089 | previousSymbols.add(local.getName()); |
| 1090 | |
| 1091 | Identifier original; |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1092 | if (token.kind == TokenKind.STRING) { |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1093 | // load(..., "name") |
| 1094 | original = local; |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1095 | } else { |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1096 | // load(..., local = "orig") |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1097 | expect(TokenKind.IDENTIFIER); |
| 1098 | expect(TokenKind.EQUALS); |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 1099 | if (token.kind != TokenKind.STRING) { |
| 1100 | syntaxError("expected string"); |
| 1101 | return; |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1102 | } |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1103 | original = setLocation(Identifier.of((String) token.value), token.left, token.right); |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1104 | } |
laurentlb | 566ef5a | 2018-05-22 10:35:06 -0700 | [diff] [blame] | 1105 | nextToken(); |
laurentlb | 14c0f40 | 2018-11-09 13:59:34 -0800 | [diff] [blame] | 1106 | symbols.add(new LoadStatement.Binding(local, original)); |
Florian Weikert | 9d659ad | 2015-07-23 14:44:36 +0000 | [diff] [blame] | 1107 | } |
| 1108 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1109 | private void parseTopLevelStatement(List<Statement> list) { |
laurentlb | 2843ead | 2017-07-05 07:20:45 -0400 | [diff] [blame] | 1110 | // Unlike Python imports, load statements can appear only at top-level. |
| 1111 | if (token.kind == TokenKind.LOAD) { |
| 1112 | parseLoad(list); |
| 1113 | } else { |
| 1114 | parseStatement(list, ParsingLevel.TOP_LEVEL); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1115 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1116 | } |
| 1117 | |
Laurent Le Brun | 0942ee9 | 2015-03-17 20:22:16 +0000 | [diff] [blame] | 1118 | // small_stmt | 'pass' |
| 1119 | private void parseSmallStatementOrPass(List<Statement> list) { |
| 1120 | if (token.kind == TokenKind.PASS) { |
fzaiser | 9542913 | 2017-10-23 18:21:36 +0200 | [diff] [blame] | 1121 | list.add(setLocation(new PassStatement(), token.left, token.right)); |
Laurent Le Brun | 0942ee9 | 2015-03-17 20:22:16 +0000 | [diff] [blame] | 1122 | expect(TokenKind.PASS); |
| 1123 | } else { |
| 1124 | list.add(parseSmallStatement()); |
| 1125 | } |
| 1126 | } |
| 1127 | |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1128 | // simple_stmt ::= small_stmt (';' small_stmt)* ';'? NEWLINE |
| 1129 | private void parseSimpleStatement(List<Statement> list) { |
Laurent Le Brun | 0942ee9 | 2015-03-17 20:22:16 +0000 | [diff] [blame] | 1130 | parseSmallStatementOrPass(list); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1131 | |
| 1132 | while (token.kind == TokenKind.SEMI) { |
| 1133 | nextToken(); |
| 1134 | if (token.kind == TokenKind.NEWLINE) { |
| 1135 | break; |
| 1136 | } |
Laurent Le Brun | 0942ee9 | 2015-03-17 20:22:16 +0000 | [diff] [blame] | 1137 | parseSmallStatementOrPass(list); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1138 | } |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 1139 | expectAndRecover(TokenKind.NEWLINE); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1140 | } |
| 1141 | |
| 1142 | // small_stmt ::= assign_stmt |
| 1143 | // | expr |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 1144 | // | return_stmt |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1145 | // | flow_stmt |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1146 | // assign_stmt ::= expr ('=' | augassign) expr |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 1147 | // augassign ::= ('+=' | '-=' | '*=' | '/=' | '%=' | '//=' ) |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1148 | // Note that these are in Python, but not implemented here (at least for now): |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 1149 | // '&=' | '|=' | '^=' |'<<=' | '>>=' | '**=' |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1150 | private Statement parseSmallStatement() { |
| 1151 | int start = token.left; |
| 1152 | if (token.kind == TokenKind.RETURN) { |
| 1153 | return parseReturnStatement(); |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1154 | } else if (token.kind == TokenKind.BREAK || token.kind == TokenKind.CONTINUE) { |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1155 | return parseFlowStatement(token.kind); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1156 | } |
| 1157 | Expression expression = parseExpression(); |
| 1158 | if (token.kind == TokenKind.EQUALS) { |
| 1159 | nextToken(); |
| 1160 | Expression rvalue = parseExpression(); |
laurentlb | 094bb26 | 2017-05-19 21:18:25 +0200 | [diff] [blame] | 1161 | return setLocation( |
brandjon | 540aac6 | 2017-06-12 23:08:09 +0200 | [diff] [blame] | 1162 | new AssignmentStatement(new LValue(expression), rvalue), |
| 1163 | start, rvalue); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1164 | } else if (augmentedAssignmentMethods.containsKey(token.kind)) { |
| 1165 | Operator operator = augmentedAssignmentMethods.get(token.kind); |
| 1166 | nextToken(); |
| 1167 | Expression operand = parseExpression(); |
Vladimir Moskva | 7153664 | 2016-12-19 13:51:57 +0000 | [diff] [blame] | 1168 | return setLocation( |
brandjon | 540aac6 | 2017-06-12 23:08:09 +0200 | [diff] [blame] | 1169 | new AugmentedAssignmentStatement(operator, new LValue(expression), operand), |
fzaiser | 1a92d56 | 2017-10-24 15:37:50 +0200 | [diff] [blame] | 1170 | start, |
| 1171 | operand); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1172 | } else { |
| 1173 | return setLocation(new ExpressionStatement(expression), start, expression); |
| 1174 | } |
| 1175 | } |
| 1176 | |
| 1177 | // if_stmt ::= IF expr ':' suite [ELIF expr ':' suite]* [ELSE ':' suite]? |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1178 | private IfStatement parseIfStatement() { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1179 | int start = token.left; |
| 1180 | List<ConditionalStatements> thenBlocks = new ArrayList<>(); |
| 1181 | thenBlocks.add(parseConditionalStatements(TokenKind.IF)); |
| 1182 | while (token.kind == TokenKind.ELIF) { |
| 1183 | thenBlocks.add(parseConditionalStatements(TokenKind.ELIF)); |
| 1184 | } |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1185 | List<Statement> elseBlock; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1186 | if (token.kind == TokenKind.ELSE) { |
| 1187 | expect(TokenKind.ELSE); |
| 1188 | expect(TokenKind.COLON); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1189 | elseBlock = parseSuite(); |
| 1190 | } else { |
| 1191 | elseBlock = ImmutableList.of(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1192 | } |
fzaiser | 1a92d56 | 2017-10-24 15:37:50 +0200 | [diff] [blame] | 1193 | List<Statement> lastBlock = |
| 1194 | elseBlock.isEmpty() ? Iterables.getLast(thenBlocks).getStatements() : elseBlock; |
| 1195 | int end = |
| 1196 | lastBlock.isEmpty() |
| 1197 | ? token.left |
| 1198 | : Iterables.getLast(lastBlock).getLocation().getEndOffset(); |
| 1199 | return setLocation(new IfStatement(thenBlocks, elseBlock), start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1200 | } |
| 1201 | |
| 1202 | // cond_stmts ::= [EL]IF expr ':' suite |
| 1203 | private ConditionalStatements parseConditionalStatements(TokenKind tokenKind) { |
| 1204 | int start = token.left; |
| 1205 | expect(tokenKind); |
Laurent Le Brun | 5609389 | 2015-03-20 13:01:58 +0000 | [diff] [blame] | 1206 | Expression expr = parseNonTupleExpression(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1207 | expect(TokenKind.COLON); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1208 | List<Statement> thenBlock = parseSuite(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1209 | ConditionalStatements stmt = new ConditionalStatements(expr, thenBlock); |
fzaiser | 1a92d56 | 2017-10-24 15:37:50 +0200 | [diff] [blame] | 1210 | int end = |
| 1211 | thenBlock.isEmpty() |
| 1212 | ? token.left |
| 1213 | : Iterables.getLast(thenBlock).getLocation().getEndOffset(); |
| 1214 | return setLocation(stmt, start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1215 | } |
| 1216 | |
| 1217 | // for_stmt ::= FOR IDENTIFIER IN expr ':' suite |
| 1218 | private void parseForStatement(List<Statement> list) { |
| 1219 | int start = token.left; |
| 1220 | expect(TokenKind.FOR); |
Laurent Le Brun | 185392d | 2015-03-20 14:41:25 +0000 | [diff] [blame] | 1221 | Expression loopVar = parseForLoopVariables(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1222 | expect(TokenKind.IN); |
| 1223 | Expression collection = parseExpression(); |
| 1224 | expect(TokenKind.COLON); |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 1225 | List<Statement> block = parseSuite(); |
| 1226 | Statement stmt = new ForStatement(new LValue(loopVar), collection, block); |
fzaiser | 1a92d56 | 2017-10-24 15:37:50 +0200 | [diff] [blame] | 1227 | int end = block.isEmpty() ? token.left : Iterables.getLast(block).getLocation().getEndOffset(); |
| 1228 | list.add(setLocation(stmt, start, end)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1229 | } |
| 1230 | |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 1231 | // def_stmt ::= DEF IDENTIFIER '(' arguments ')' ':' suite |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1232 | private void parseFunctionDefStatement(List<Statement> list) { |
| 1233 | int start = token.left; |
| 1234 | expect(TokenKind.DEF); |
Florian Weikert | 6f864c3 | 2015-07-23 11:26:39 +0000 | [diff] [blame] | 1235 | Identifier ident = parseIdent(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1236 | expect(TokenKind.LPAREN); |
laurentlb | d698367 | 2017-06-29 14:53:12 +0200 | [diff] [blame] | 1237 | List<Parameter<Expression, Expression>> params = |
laurentlb | 3d2a68c | 2017-06-30 00:32:04 +0200 | [diff] [blame] | 1238 | parseFunctionArguments(this::parseFunctionParameter); |
Laurent Le Brun | 4baefdc | 2015-09-04 11:27:46 +0000 | [diff] [blame] | 1239 | FunctionSignature.WithValues<Expression, Expression> signature = functionSignature(params); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1240 | expect(TokenKind.RPAREN); |
| 1241 | expect(TokenKind.COLON); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1242 | List<Statement> block = parseSuite(); |
Laurent Le Brun | 4baefdc | 2015-09-04 11:27:46 +0000 | [diff] [blame] | 1243 | FunctionDefStatement stmt = new FunctionDefStatement(ident, params, signature, block); |
fzaiser | 1a92d56 | 2017-10-24 15:37:50 +0200 | [diff] [blame] | 1244 | int end = block.isEmpty() ? token.left : Iterables.getLast(block).getLocation().getEndOffset(); |
| 1245 | list.add(setLocation(stmt, start, end)); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1246 | } |
| 1247 | |
Laurent Le Brun | 4baefdc | 2015-09-04 11:27:46 +0000 | [diff] [blame] | 1248 | private FunctionSignature.WithValues<Expression, Expression> functionSignature( |
| 1249 | List<Parameter<Expression, Expression>> parameters) { |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1250 | try { |
brandjon | 990622b | 2017-07-11 19:56:45 +0200 | [diff] [blame] | 1251 | return FunctionSignature.WithValues.of(parameters); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1252 | } catch (FunctionSignature.SignatureException e) { |
| 1253 | reportError(e.getParameter().getLocation(), e.getMessage()); |
| 1254 | // return bogus empty signature |
brandjon | 990622b | 2017-07-11 19:56:45 +0200 | [diff] [blame] | 1255 | return FunctionSignature.WithValues.create(FunctionSignature.of()); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1256 | } |
| 1257 | } |
| 1258 | |
| 1259 | /** |
| 1260 | * Parse a list of Argument-s. The arguments can be of class Argument.Passed or Parameter, |
| 1261 | * as returned by the Supplier parseArgument (that, taking no argument, must be closed over |
| 1262 | * the mutable input data structures). |
| 1263 | * |
| 1264 | * <p>This parser does minimal validation: it ensures the proper python use of the comma (that |
| 1265 | * can terminate before a star but not after) and the fact that a **kwarg must appear last. |
| 1266 | * It does NOT validate further ordering constraints for a {@code List<Argument.Passed>}, such as |
| 1267 | * all positional preceding keyword arguments in a call, nor does it check the more subtle |
| 1268 | * constraints for Parameter-s. This validation must happen afterwards in an appropriate method. |
| 1269 | */ |
| 1270 | private <V extends Argument> ImmutableList<V> |
| 1271 | parseFunctionArguments(Supplier<V> parseArgument) { |
| 1272 | boolean hasArg = false; |
| 1273 | boolean hasStar = false; |
| 1274 | boolean hasStarStar = false; |
michajlo | 77e8b03 | 2017-08-04 21:29:17 +0200 | [diff] [blame] | 1275 | ImmutableList.Builder<V> argumentsBuilder = ImmutableList.builder(); |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1276 | |
| 1277 | while (token.kind != TokenKind.RPAREN && token.kind != TokenKind.EOF) { |
| 1278 | if (hasStarStar) { |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1279 | reportError(lexer.createLocation(token.left, token.right), |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1280 | "unexpected tokens after kwarg"); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1281 | break; |
| 1282 | } |
Francois-Rene Rideau | 5dcdbf9 | 2015-02-19 18:36:17 +0000 | [diff] [blame] | 1283 | if (hasArg) { |
| 1284 | expect(TokenKind.COMMA); |
| 1285 | } |
| 1286 | if (token.kind == TokenKind.RPAREN && !hasStar) { |
| 1287 | // list can end with a COMMA if there is neither * nor ** |
| 1288 | break; |
| 1289 | } |
| 1290 | V arg = parseArgument.get(); |
| 1291 | hasArg = true; |
| 1292 | if (arg.isStar()) { |
| 1293 | hasStar = true; |
| 1294 | } else if (arg.isStarStar()) { |
| 1295 | hasStarStar = true; |
| 1296 | } |
michajlo | 77e8b03 | 2017-08-04 21:29:17 +0200 | [diff] [blame] | 1297 | argumentsBuilder.add(arg); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1298 | } |
michajlo | 77e8b03 | 2017-08-04 21:29:17 +0200 | [diff] [blame] | 1299 | return argumentsBuilder.build(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1300 | } |
| 1301 | |
Laurent Le Brun | 5f67445 | 2015-03-17 19:29:13 +0000 | [diff] [blame] | 1302 | // suite is typically what follows a colon (e.g. after def or for). |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1303 | // suite ::= simple_stmt |
| 1304 | // | NEWLINE INDENT stmt+ OUTDENT |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1305 | private List<Statement> parseSuite() { |
| 1306 | List<Statement> list = new ArrayList<>(); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1307 | if (token.kind == TokenKind.NEWLINE) { |
| 1308 | expect(TokenKind.NEWLINE); |
| 1309 | if (token.kind != TokenKind.INDENT) { |
| 1310 | reportError(lexer.createLocation(token.left, token.right), |
| 1311 | "expected an indented block"); |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1312 | return list; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1313 | } |
| 1314 | expect(TokenKind.INDENT); |
| 1315 | while (token.kind != TokenKind.OUTDENT && token.kind != TokenKind.EOF) { |
brandjon | 733a97d | 2017-06-27 17:11:27 +0200 | [diff] [blame] | 1316 | parseStatement(list, ParsingLevel.LOCAL_LEVEL); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1317 | } |
Laurent Le Brun | 9060e16 | 2015-04-02 10:07:28 +0000 | [diff] [blame] | 1318 | expectAndRecover(TokenKind.OUTDENT); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1319 | } else { |
Laurent Le Brun | 5f67445 | 2015-03-17 19:29:13 +0000 | [diff] [blame] | 1320 | parseSimpleStatement(list); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1321 | } |
Francois-Rene Rideau | cbebd63 | 2015-02-11 16:56:37 +0000 | [diff] [blame] | 1322 | return list; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1323 | } |
| 1324 | |
laurentlb | a9b9aea | 2017-09-04 17:39:09 +0200 | [diff] [blame] | 1325 | // flow_stmt ::= BREAK | CONTINUE |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1326 | private FlowStatement parseFlowStatement(TokenKind kind) { |
Laurent Le Brun | d412c8f | 2015-06-16 11:12:54 +0000 | [diff] [blame] | 1327 | int start = token.left; |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1328 | int end = token.right; |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1329 | expect(kind); |
Laurent Le Brun | 7d6a381 | 2015-10-26 12:07:12 +0000 | [diff] [blame] | 1330 | FlowStatement.Kind flowKind = |
| 1331 | kind == TokenKind.BREAK ? FlowStatement.Kind.BREAK : FlowStatement.Kind.CONTINUE; |
Laurent Le Brun | a3c25a6 | 2016-10-26 10:59:09 +0000 | [diff] [blame] | 1332 | return setLocation(new FlowStatement(flowKind), start, end); |
Florian Weikert | 917ceaa | 2015-06-10 13:54:26 +0000 | [diff] [blame] | 1333 | } |
Laurent Le Brun | d412c8f | 2015-06-16 11:12:54 +0000 | [diff] [blame] | 1334 | |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 1335 | // return_stmt ::= RETURN [expr] |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1336 | private ReturnStatement parseReturnStatement() { |
| 1337 | int start = token.left; |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 1338 | int end = token.right; |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1339 | expect(TokenKind.RETURN); |
Googler | 768cbc4 | 2015-08-28 12:52:14 +0000 | [diff] [blame] | 1340 | |
fzaiser | 317a269 | 2017-08-23 16:40:30 +0200 | [diff] [blame] | 1341 | Expression expression = null; |
| 1342 | if (!STATEMENT_TERMINATOR_SET.contains(token.kind)) { |
| 1343 | expression = parseExpression(); |
| 1344 | end = expression.getLocation().getEndOffset(); |
Googler | cc0d995 | 2015-08-10 12:01:34 +0000 | [diff] [blame] | 1345 | } |
fzaiser | 317a269 | 2017-08-23 16:40:30 +0200 | [diff] [blame] | 1346 | return setLocation(new ReturnStatement(expression), start, end); |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1347 | } |
Han-Wen Nienhuys | d08b27f | 2015-02-25 16:45:20 +0100 | [diff] [blame] | 1348 | } |