| // Copyright 2014 Google Inc. All rights reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| #include "src/main/cpp/util/strings.h" |
| |
| #include <stdarg.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| #include <cassert> |
| |
| #include "src/main/cpp/util/exit_code.h" |
| |
| using std::vector; |
| |
| namespace blaze_util { |
| |
| static const char kSeparator[] = " \n\t\r"; |
| |
| // # Table generated by this Python code (bit 0x02 is currently unused): |
| // def Hex2(n): |
| // return '0x' + hex(n/16)[2:] + hex(n%16)[2:] |
| // def IsPunct(ch): |
| // return (ord(ch) >= 32 and ord(ch) < 127 and |
| // not ch.isspace() and not ch.isalnum()) |
| // def IsBlank(ch): |
| // return ch in ' \t' |
| // def IsCntrl(ch): |
| // return ord(ch) < 32 or ord(ch) == 127 |
| // def IsXDigit(ch): |
| // return ch.isdigit() or ch.lower() in 'abcdef' |
| // for i in range(128): |
| // ch = chr(i) |
| // mask = ((ch.isalpha() and 0x01 or 0) | |
| // (ch.isalnum() and 0x04 or 0) | |
| // (ch.isspace() and 0x08 or 0) | |
| // (IsPunct(ch) and 0x10 or 0) | |
| // (IsBlank(ch) and 0x20 or 0) | |
| // (IsCntrl(ch) and 0x40 or 0) | |
| // (IsXDigit(ch) and 0x80 or 0)) |
| // print Hex2(mask) + ',', |
| // if i % 16 == 7: |
| // print ' //', Hex2(i & 0x78) |
| // elif i % 16 == 15: |
| // print |
| const unsigned char kAsciiPropertyBits[256] = { |
| 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00 |
| 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, |
| 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10 |
| 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, |
| 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20 |
| 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, |
| 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30 |
| 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, |
| 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40 |
| 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, |
| 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50 |
| 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10, |
| 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60 |
| 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, |
| 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70 |
| 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40, |
| }; |
| |
| |
| bool starts_with(const string &haystack, const string &needle) { |
| return (haystack.length() >= needle.length()) && |
| (memcmp(haystack.c_str(), needle.c_str(), needle.length()) == 0); |
| } |
| |
| bool ends_with(const string &haystack, const string &needle) { |
| return ((haystack.length() >= needle.length()) && |
| (memcmp(haystack.c_str() + (haystack.length()-needle.length()), |
| needle.c_str(), needle.length()) == 0)); |
| } |
| |
| void JoinStrings( |
| const vector<string> &pieces, const char delimeter, string *output) { |
| bool first = true; |
| for (const auto &piece : pieces) { |
| if (first) { |
| *output = piece; |
| first = false; |
| } else { |
| *output += delimeter + piece; |
| } |
| } |
| } |
| |
| vector<string> Split(const string &contents, const char delimeter) { |
| vector<string> result; |
| SplitStringUsing(contents, delimeter, &result); |
| return result; |
| } |
| |
| void SplitStringUsing( |
| const string &contents, const char delimeter, vector<string> *result) { |
| assert(result); |
| |
| size_t start = 0; |
| while (start < contents.length() && contents[start] == delimeter) { |
| ++start; |
| } |
| |
| size_t newline = contents.find(delimeter, start); |
| while (newline != string::npos) { |
| result->push_back(string(contents, start, newline - start)); |
| start = newline; |
| while (start < contents.length() && contents[start] == delimeter) { |
| ++start; |
| } |
| newline = contents.find(delimeter, start); |
| } |
| |
| // If there is a trailing line, add that. |
| if (start != newline && start != contents.size()) { |
| result->push_back(string(contents, start)); |
| } |
| } |
| |
| vector<string> SplitQuoted(const string &contents, const char delimeter) { |
| vector<string> result; |
| SplitQuotedStringUsing(contents, delimeter, &result); |
| return result; |
| } |
| |
| void SplitQuotedStringUsing(const string &contents, const char delimeter, |
| std::vector<string> *output) { |
| size_t len = contents.length(); |
| size_t start = 0; |
| size_t quote = string::npos; // quote position |
| |
| for (size_t pos = 0; pos < len; ++pos) { |
| if (start == pos && contents[start] == delimeter) { |
| ++start; |
| } else if (contents[pos] == '\\') { |
| ++pos; |
| } else if (quote != string::npos && contents[pos] == contents[quote]) { |
| quote = string::npos; |
| } else if (quote == string::npos && |
| (contents[pos] == '"' || contents[pos] == '\'')) { |
| quote = pos; |
| } else if (quote == string::npos && contents[pos] == delimeter) { |
| output->push_back(string(contents, start, pos - start)); |
| start = pos + 1; |
| } |
| } |
| |
| // A trailing element |
| if (start < len) { |
| output->push_back(string(contents, start)); |
| } |
| } |
| |
| void Replace(const string &oldsub, const string &newsub, string *str) { |
| size_t start = 0; |
| // This is O(n^2) (the complexity of erase() is actually unspecified, but |
| // usually linear). |
| while ((start = str->find(oldsub, start)) != string::npos) { |
| str->erase(start, oldsub.length()); |
| str->insert(start, newsub); |
| start += newsub.length(); |
| } |
| } |
| |
| void StripWhitespace(string *str) { |
| int str_length = str->length(); |
| |
| // Strip off leading whitespace. |
| int first = 0; |
| while (first < str_length && ascii_isspace(str->at(first))) { |
| ++first; |
| } |
| // If entire string is white space. |
| if (first == str_length) { |
| str->clear(); |
| return; |
| } |
| if (first > 0) { |
| str->erase(0, first); |
| str_length -= first; |
| } |
| |
| // Strip off trailing whitespace. |
| int last = str_length - 1; |
| while (last >= 0 && ascii_isspace(str->at(last))) { |
| --last; |
| } |
| if (last != (str_length - 1) && last >= 0) { |
| str->erase(last + 1, string::npos); |
| } |
| } |
| |
| static void GetNextToken(const string &str, const char &comment, |
| string::const_iterator *iter, vector<string> *words) { |
| string output; |
| auto last = *iter; |
| char quote = '\0'; |
| // While not a delimiter. |
| while (last != str.end() && (quote || strchr(kSeparator, *last) == nullptr)) { |
| // Absorb escapes. |
| if (*last == '\\') { |
| ++last; |
| if (last == str.end()) { |
| break; |
| } |
| output += *last++; |
| continue; |
| } |
| |
| if (quote) { |
| if (*last == quote) { |
| // Absorb closing quote. |
| quote = '\0'; |
| ++last; |
| } else { |
| output += *last++; |
| } |
| } else { |
| if (*last == comment) { |
| last = str.end(); |
| break; |
| } |
| if (*last == '\'' || *last == '"') { |
| // Absorb opening quote. |
| quote = *last++; |
| } else { |
| output += *last++; |
| } |
| } |
| } |
| |
| if (!output.empty()) { |
| words->push_back(output); |
| } |
| |
| *iter = last; |
| } |
| |
| void Tokenize(const string &str, const char &comment, vector<string> *words) { |
| assert(words); |
| words->clear(); |
| |
| string::const_iterator i = str.begin(); |
| while (i != str.end()) { |
| // Skip whitespace. |
| while (i != str.end() && strchr(kSeparator, *i) != nullptr) { |
| i++; |
| } |
| if (i != str.end() && *i == comment) { |
| break; |
| } |
| GetNextToken(str, comment, &i, words); |
| } |
| } |
| |
| |
| // Evaluate a format string and store the result in 'str'. |
| void StringPrintf(string *str, const char *format, ...) { |
| assert(str); |
| |
| // Determine the required buffer size. vsnpritnf won't account for the |
| // terminating '\0'. |
| va_list args; |
| va_start(args, format); |
| int output_size = vsnprintf(nullptr, 0, format, args); |
| if (output_size < 0) { |
| fprintf(stderr, "Fatal error formatting string: %d", output_size); |
| exit(blaze_exit_code::INTERNAL_ERROR); |
| } |
| va_end(args); |
| |
| // Allocate a buffer and format the input. |
| int buffer_size = output_size + sizeof '\0'; |
| char *buf = new char[buffer_size]; |
| va_start(args, format); |
| int print_result = vsnprintf(buf, buffer_size, format, args); |
| if (print_result < 0) { |
| fprintf(stderr, "Fatal error formatting string: %d", print_result); |
| exit(blaze_exit_code::INTERNAL_ERROR); |
| } |
| va_end(args); |
| |
| *str = buf; |
| delete[] buf; |
| } |
| |
| void ToLower(string *str) { |
| assert(str); |
| if (str->empty()) { |
| return; |
| } |
| |
| string temp = ""; |
| for (auto ch : *str) { |
| temp += tolower(ch); |
| } |
| *str = temp; |
| } |
| |
| } // namespace blaze_util |