| // Copyright 2016 The Bazel Authors. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef THIRD_PARTY_BAZEL_SRC_TOOLS_SINGLEJAR_TOKEN_STREAM_H_ |
| #define THIRD_PARTY_BAZEL_SRC_TOOLS_SINGLEJAR_TOKEN_STREAM_H_ 1 |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include <memory> |
| #include <set> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "src/main/cpp/util/path_platform.h" |
| #include "src/tools/singlejar/diag.h" |
| #include "src/tools/singlejar/mapped_file.h" |
| |
| /* |
| * Tokenize command line containing indirect command line arguments. |
| * An '@' at the beginning of a command line argument indicates that |
| * the rest of the argument is the name of the file which should be |
| * read and tokenized as Bash does it: tokens are separated by the |
| * whitespace, quotes and double quotes can be used to have whitespace |
| * and the other quote inside the token, and backslash followed by |
| * newline is treated as empty string. |
| */ |
| |
| class ArgTokenStream { |
| /* This class is used as follows: |
| * |
| * int main(int argc, char* argv[]) { |
| * ArgTokenStream tokens(argc-1, argv+1); |
| * while (!tokens.AtEnd()) { |
| * if (tokens.MatchAndSet("--opt1", ...) || |
| * tokens.MatchAndSet("--opt2", ...) || |
| * ...) { |
| * continue; |
| * } |
| * // Process non-option argument or report an error. |
| * // ArgTokenStream::token() returns the current token. |
| * } |
| * } |
| */ |
| |
| private: |
| // Internal class to handle indirect command files. |
| class FileTokenStream { |
| public: |
| FileTokenStream(const char* filename) |
| : next_ptr_(nullptr), end_ptr_(nullptr) { |
| if (!mapped_file_.Open(filename)) { |
| diag_err(1, "Cannot open param file: %s", filename); |
| } |
| filename_ = filename; |
| next_ptr_ = static_cast<const unsigned char*>(mapped_file_.start()); |
| end_ptr_ = static_cast<const unsigned char*>(mapped_file_.end()); |
| next_char(); |
| } |
| |
| ~FileTokenStream() = default; |
| |
| // Assign next token to TOKEN, return true on success, false on EOF. |
| bool next_token(std::string* token) { |
| *token = ""; |
| while (IsAsciiSpace(current_char_)) { |
| next_char(); |
| } |
| if (current_char_ == EOF) { |
| return false; |
| } |
| for (;;) { |
| if (current_char_ == '\'' || current_char_ == '"') { |
| process_quoted(token); |
| if (IsAsciiSpace(current_char_)) { |
| next_char(); |
| return true; |
| } else { |
| next_char(); |
| } |
| } else if (current_char_ == '\\') { |
| next_char(); |
| if ((current_char_ != EOF)) { |
| token->push_back(current_char_); |
| next_char(); |
| } else { |
| diag_errx(1, "Expected character after \\, got EOF in %s", |
| filename_.c_str()); |
| } |
| } else if (current_char_ == EOF || IsAsciiSpace(current_char_)) { |
| next_char(); |
| return true; |
| } else { |
| token->push_back(current_char_); |
| next_char(); |
| } |
| } |
| } |
| |
| private: |
| // possibly marginally faster than ascii_isspace |
| static inline bool IsAsciiSpace(int c) { |
| return c == ' ' || (static_cast<unsigned int>(c) - 9 < 5); |
| } |
| |
| // Append the quoted string to the TOKEN. The quote character (which can be |
| // single or double quote) is in the current character. Everything up to the |
| // matching quote character is appended. |
| void process_quoted(std::string* token) { |
| char quote = current_char_; |
| next_char(); |
| while (current_char_ != quote) { |
| if (current_char_ == '\\' && quote == '"') { |
| // In the "-quoted token, \" stands for ", and \x |
| // is copied literally for any other x. |
| next_char(); |
| if (current_char_ == '"') { |
| token->push_back('"'); |
| next_char(); |
| } else if (current_char_ != EOF) { |
| token->push_back('\\'); |
| token->push_back(current_char_); |
| next_char(); |
| } else { |
| diag_errx(1, "No closing %c in %s", quote, filename_.c_str()); |
| } |
| } else if (current_char_ != EOF) { |
| token->push_back(current_char_); |
| next_char(); |
| } else { |
| diag_errx(1, "No closing %c in %s", quote, filename_.c_str()); |
| } |
| } |
| } |
| |
| // Get the next character from the input stream. Skip backslash followed |
| // by the newline. |
| void next_char() { |
| do { |
| current_char_ = raw_next(); |
| } while (eat_line_continuation()); |
| } |
| |
| bool eat_line_continuation() { |
| if (current_char_ == '\\' && peek_raw_next() == '\n') { |
| next_ptr_++; |
| return true; |
| } else if (current_char_ == '\\' && peek_raw_next() == '\r' && |
| peek_raw_next(1) == '\n') { |
| next_ptr_ += 2; |
| return true; |
| } |
| return false; |
| } |
| |
| int raw_next() { return next_ptr_ < end_ptr_ ? *next_ptr_++ : EOF; } |
| |
| int peek_raw_next(size_t offset = 0) { |
| return (next_ptr_ + offset) < end_ptr_ ? *(next_ptr_ + offset) : EOF; |
| } |
| |
| MappedFile mapped_file_; |
| std::string filename_; |
| const unsigned char* next_ptr_; |
| const unsigned char* end_ptr_; |
| int current_char_; |
| }; |
| |
| public: |
| // Constructor. Automatically reads the first token. |
| ArgTokenStream(int argc, const char* const* argv) |
| : argv_(argv), argv_end_(argv + argc) { |
| token_.reserve(1024); |
| next(); |
| } |
| |
| // Process --OPTION |
| // If the current token is --OPTION, set given FLAG to true, proceed to next |
| // token and return true |
| bool MatchAndSet(const char* option, bool* flag) { |
| if (token_.compare(option) != 0) { |
| return false; |
| } |
| *flag = true; |
| next(); |
| return true; |
| } |
| |
| // Process --OPTION OPTARG |
| // If the current token is --OPTION, set OPTARG to the next token, proceed to |
| // the next token after it and return true. |
| bool MatchAndSet(const char* option, std::string* optarg) { |
| if (token_.compare(option) != 0) { |
| return false; |
| } |
| next(); |
| if (AtEnd()) { |
| diag_errx(1, "%s requires argument", option); |
| } |
| *optarg = token_; |
| next(); |
| return true; |
| } |
| |
| // Process --OPTION OPTARG1 OPTARG2 ... |
| // If a current token is --OPTION, push_back all subsequent tokens up to the |
| // next option to the OPTARGS array, proceed to the next option and return |
| // true. |
| bool MatchAndSet(const char* option, std::vector<std::string>* optargs) { |
| if (token_.compare(option) != 0) { |
| return false; |
| } |
| next(); |
| while (!AtEnd() && '-' != token_.at(0)) { |
| optargs->push_back(token_); |
| next(); |
| } |
| return true; |
| } |
| |
| // Process --OPTION OPTARG1 OPTARG2 ... |
| // If a current token is --OPTION, insert all subsequent tokens up to the |
| // next option to the OPTARGS set, proceed to the next option and return |
| // true. |
| bool MatchAndSet(const char* option, std::set<std::string>* optargs) { |
| if (token_ != option) { |
| return false; |
| } |
| next(); |
| while (!AtEnd() && '-' != token_.at(0)) { |
| optargs->insert(token_); |
| next(); |
| } |
| return true; |
| } |
| |
| // Process --OPTION OPTARG1,OPTSUFF1 OPTARG2,OPTSUFF2 ... |
| // If a current token is --OPTION, push_back all subsequent tokens up to the |
| // next option to the OPTARGS array, splitting the OPTARG,OPTSUFF by a comma, |
| // proceed to the next option and return true. |
| bool MatchAndSet(const char* option, |
| std::vector<std::pair<std::string, std::string> >* optargs) { |
| if (token_.compare(option) != 0) { |
| return false; |
| } |
| next(); |
| while (!AtEnd() && '-' != token_.at(0)) { |
| size_t commapos = token_.find(','); |
| if (commapos == std::string::npos) { |
| optargs->push_back(std::pair<std::string, std::string>(token_, "")); |
| } else { |
| std::string first = token_.substr(0, commapos); |
| token_.erase(0, commapos + 1); |
| optargs->push_back(std::pair<std::string, std::string>(first, token_)); |
| } |
| |
| next(); |
| } |
| return true; |
| } |
| |
| // Current token. |
| const std::string& token() const { return token_; } |
| |
| // Read the next token. |
| void next() { |
| if (AtEnd()) { |
| return; |
| } |
| if (file_token_stream_.get() && token_from_file()) { |
| return; |
| } |
| while (argv_ < argv_end_) { |
| if (**argv_ != '@') { |
| token_ = *argv_++; |
| return; |
| } |
| file_token_stream_.reset(new FileTokenStream(*(argv_++) + 1)); |
| if (token_from_file()) { |
| return; |
| } |
| } |
| argv_++; |
| } |
| |
| // True if there are no more tokens. |
| bool AtEnd() const { return argv_ > argv_end_; } |
| |
| private: |
| bool token_from_file() { |
| if (file_token_stream_->next_token(&token_)) { |
| return true; |
| } |
| file_token_stream_.reset(nullptr); |
| return false; |
| } |
| std::unique_ptr<FileTokenStream> file_token_stream_; |
| const char* const* argv_; |
| const char* const* argv_end_; |
| std::string token_; |
| }; |
| |
| #endif // THIRD_PARTY_BAZEL_SRC_TOOLS_SINGLEJAR_TOKEN_STREAM_H_ |