blob: 3bad0a883dd540be2385cd51722f68e96a68b19b [file] [log] [blame]
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_BAZEL_SRC_TOOLS_SINGLEJAR_TOKEN_STREAM_H_
#define THIRD_PARTY_BAZEL_SRC_TOOLS_SINGLEJAR_TOKEN_STREAM_H_ 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "src/tools/singlejar/diag.h"
/*
* Tokenize command line containing indirect command line arguments.
* An '@' at the beginning of a command line argument indicates that
* the rest of the argument is the name of the file which should be
* read and tokenized as Bash does it: tokens are separated by the
* whitespace, quotes and double quotes can be used to have whitespace
* and the other quote inside the token, and backslash followed by
* newline is treated as empty string.
*/
class ArgTokenStream {
/* This class is used as follows:
*
* int main(int argc, char* argv[]) {
* ArgTokenStream tokens(argc-1, argv+1);
* while (!tokens.AtEnd()) {
* if (tokens.MatchAndSet("--opt1", ...) ||
* tokens.MatchAndSet("--opt2", ...) ||
* ...) {
* continue;
* }
* // Process non-option argument or report an error.
* // ArgTokenStream::token() returns the current token.
* }
* }
*/
private:
// Internal class to handle indirect command files.
class FileTokenStream {
public:
FileTokenStream(const char *filename) {
// TODO(laszlocsomor): use the fopen and related file handling API
// implementations from ProtoBuf, in order to support long paths:
// https://github.com/google/protobuf/blob/
// 47b7d2c7cadf74ceec90fc5042232819cd0dd557/
// src/google/protobuf/stubs/io_win32.cc
// Best would be to extract that library to a common location and use
// here, in ProtoBuf, and in Bazel itself.
if (!(fp_ = fopen(filename, "r"))) {
diag_err(1, "%s", filename);
}
filename_ = filename;
next_char();
}
~FileTokenStream() { close(); }
// Assign next token to TOKEN, return true on success, false on EOF.
bool next_token(std::string *token) {
if (!fp_) {
return false;
}
*token = "";
while (current_char_ != EOF && isspace(current_char_)) {
next_char();
}
if (current_char_ == EOF) {
close();
return false;
}
for (;;) {
if (current_char_ == '\'' || current_char_ == '"') {
process_quoted(token);
if (isspace(current_char_)) {
next_char();
return true;
} else {
next_char();
}
} else if (current_char_ == '\\') {
next_char();
if ((current_char_ != EOF)) {
token->push_back(current_char_);
next_char();
} else {
diag_errx(1, "Expected character after \\, got EOF in %s",
filename_.c_str());
}
} else if (current_char_ == EOF || isspace(current_char_)) {
next_char();
return true;
} else {
token->push_back(current_char_);
next_char();
}
}
}
private:
void close() {
if (fp_) {
fclose(fp_);
fp_ = nullptr;
}
filename_.clear();
}
// Append the quoted string to the TOKEN. The quote character (which can be
// single or double quote) is in the current character. Everything up to the
// matching quote character is appended.
void process_quoted(std::string *token) {
char quote = current_char_;
next_char();
while (current_char_ != quote) {
if (current_char_ == '\\' && quote == '"') {
// In the "-quoted token, \" stands for ", and \x
// is copied literally for any other x.
next_char();
if (current_char_ == '"') {
token->push_back('"');
next_char();
} else if (current_char_ != EOF) {
token->push_back('\\');
token->push_back(current_char_);
next_char();
} else {
diag_errx(1, "No closing %c in %s", quote, filename_.c_str());
}
} else if (current_char_ != EOF) {
token->push_back(current_char_);
next_char();
} else {
diag_errx(1, "No closing %c in %s", quote, filename_.c_str());
}
}
}
// Get the next character from the input stream. Skip backslash followed
// by the newline.
void next_char() {
if (feof(fp_)) {
current_char_ = EOF;
return;
}
current_char_ = getc(fp_);
// Eat "\\\n" sequence.
while (current_char_ == '\\') {
int c = getc(fp_);
if (c == '\n') {
current_char_ = getc(fp_);
} else {
if (c != EOF) {
ungetc(c, fp_);
}
break;
}
}
}
FILE *fp_;
std::string filename_;
int current_char_;
};
public:
// Constructor. Automatically reads the first token.
ArgTokenStream(int argc, const char *const *argv)
: argv_(argv), argv_end_(argv + argc) {
next();
}
// Process --OPTION
// If the current token is --OPTION, set given FLAG to true, proceed to next
// token and return true
bool MatchAndSet(const char *option, bool *flag) {
if (token_.compare(option) != 0) {
return false;
}
*flag = true;
next();
return true;
}
// Process --OPTION OPTARG
// If the current token is --OPTION, set OPTARG to the next token, proceed to
// the next token after it and return true.
bool MatchAndSet(const char *option, std::string *optarg) {
if (token_.compare(option) != 0) {
return false;
}
next();
if (AtEnd()) {
diag_errx(1, "%s requires argument", option);
}
*optarg = token_;
next();
return true;
}
// Process --OPTION OPTARG1 OPTARG2 ...
// If a current token is --OPTION, push_back all subsequent tokens up to the
// next option to the OPTARGS array, proceed to the next option and return
// true.
bool MatchAndSet(const char *option, std::vector<std::string> *optargs) {
if (token_.compare(option) != 0) {
return false;
}
next();
while (!AtEnd() && '-' != token_.at(0)) {
optargs->push_back(token_);
next();
}
return true;
}
// Process --OPTION OPTARG1,OPTSUFF1 OPTARG2,OPTSUFF2 ...
// If a current token is --OPTION, push_back all subsequent tokens up to the
// next option to the OPTARGS array, splitting the OPTARG,OPTSUFF by a comma,
// proceed to the next option and return true.
bool MatchAndSet(const char *option,
std::vector<std::pair<std::string, std::string> > *optargs) {
if (token_.compare(option) != 0) {
return false;
}
next();
while (!AtEnd() && '-' != token_.at(0)) {
size_t commapos = token_.find(',');
if (commapos == std::string::npos) {
optargs->push_back(std::pair<std::string, std::string>(token_, ""));
} else {
std::string first = token_.substr(0, commapos);
token_.erase(0, commapos + 1);
optargs->push_back(std::pair<std::string, std::string>(first, token_));
}
next();
}
return true;
}
// Current token.
const std::string &token() const { return token_; }
// Read the next token.
void next() {
if (AtEnd()) {
return;
}
if (file_token_stream_.get() && token_from_file()) {
return;
}
while (argv_ < argv_end_) {
if (**argv_ != '@') {
token_ = *argv_++;
return;
}
file_token_stream_.reset(new FileTokenStream(*(argv_++) + 1));
if (token_from_file()) {
return;
}
}
argv_++;
}
// True if there are no more tokens.
bool AtEnd() const { return argv_ > argv_end_; }
private:
bool token_from_file() {
if (file_token_stream_->next_token(&token_)) {
return true;
}
file_token_stream_.reset(nullptr);
return false;
}
std::unique_ptr<FileTokenStream> file_token_stream_;
const char *const *argv_;
const char *const *argv_end_;
std::string token_;
};
#endif // THIRD_PARTY_BAZEL_SRC_TOOLS_SINGLEJAR_TOKEN_STREAM_H_