| /* |
| * |
| * Copyright 2015 gRPC authors. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| #include <grpc/support/port_platform.h> |
| |
| #include "src/core/lib/uri/uri_parser.h" |
| |
| #include <string.h> |
| |
| #include <grpc/slice_buffer.h> |
| #include <grpc/support/alloc.h> |
| #include <grpc/support/log.h> |
| #include <grpc/support/string_util.h> |
| |
| #include "src/core/lib/gpr/string.h" |
| #include "src/core/lib/slice/percent_encoding.h" |
| #include "src/core/lib/slice/slice_internal.h" |
| #include "src/core/lib/slice/slice_string_helpers.h" |
| |
| /** a size_t default value... maps to all 1's */ |
| #define NOT_SET (~(size_t)0) |
| |
| static grpc_uri* bad_uri(const char* uri_text, size_t pos, const char* section, |
| bool suppress_errors) { |
| char* line_prefix; |
| size_t pfx_len; |
| |
| if (!suppress_errors) { |
| gpr_asprintf(&line_prefix, "bad uri.%s: '", section); |
| pfx_len = strlen(line_prefix) + pos; |
| gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text); |
| gpr_free(line_prefix); |
| |
| line_prefix = static_cast<char*>(gpr_malloc(pfx_len + 1)); |
| memset(line_prefix, ' ', pfx_len); |
| line_prefix[pfx_len] = 0; |
| gpr_log(GPR_ERROR, "%s^ here", line_prefix); |
| gpr_free(line_prefix); |
| } |
| |
| return nullptr; |
| } |
| |
| /** Returns a copy of percent decoded \a src[begin, end) */ |
| static char* decode_and_copy_component(const char* src, size_t begin, |
| size_t end) { |
| grpc_slice component = |
| (begin == NOT_SET || end == NOT_SET) |
| ? grpc_empty_slice() |
| : grpc_slice_from_copied_buffer(src + begin, end - begin); |
| grpc_slice decoded_component = |
| grpc_permissive_percent_decode_slice(component); |
| char* out = grpc_dump_slice(decoded_component, GPR_DUMP_ASCII); |
| grpc_slice_unref_internal(component); |
| grpc_slice_unref_internal(decoded_component); |
| return out; |
| } |
| |
| static bool valid_hex(char c) { |
| return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')) || |
| ((c >= '0') && (c <= '9')); |
| } |
| |
| /** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar |
| * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent |
| * sign not followed by two hex digits), NOT_SET is returned. */ |
| static size_t parse_pchar(const char* uri_text, size_t i) { |
| /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@" |
| * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
| * pct-encoded = "%" HEXDIG HEXDIG |
| * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
| / "*" / "+" / "," / ";" / "=" */ |
| char c = uri_text[i]; |
| switch (c) { |
| default: |
| if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || |
| ((c >= '0') && (c <= '9'))) { |
| return 1; |
| } |
| break; |
| case ':': |
| case '@': |
| case '-': |
| case '.': |
| case '_': |
| case '~': |
| case '!': |
| case '$': |
| case '&': |
| case '\'': |
| case '(': |
| case ')': |
| case '*': |
| case '+': |
| case ',': |
| case ';': |
| case '=': |
| return 1; |
| case '%': /* pct-encoded */ |
| if (valid_hex(uri_text[i + 1]) && valid_hex(uri_text[i + 2])) { |
| return 2; |
| } |
| return NOT_SET; |
| } |
| return 0; |
| } |
| |
| /* *( pchar / "?" / "/" ) */ |
| static int parse_fragment_or_query(const char* uri_text, size_t* i) { |
| char c; |
| while ((c = uri_text[*i]) != 0) { |
| const size_t advance = parse_pchar(uri_text, *i); /* pchar */ |
| switch (advance) { |
| case 0: /* uri_text[i] isn't in pchar */ |
| /* maybe it's ? or / */ |
| if (uri_text[*i] == '?' || uri_text[*i] == '/') { |
| (*i)++; |
| break; |
| } else { |
| return 1; |
| } |
| GPR_UNREACHABLE_CODE(return 0); |
| default: |
| (*i) += advance; |
| break; |
| case NOT_SET: /* uri_text[i] introduces an invalid URI */ |
| return 0; |
| } |
| } |
| /* *i is the first uri_text position past the \a query production, maybe \0 */ |
| return 1; |
| } |
| |
| static void parse_query_parts(grpc_uri* uri) { |
| static const char* QUERY_PARTS_SEPARATOR = "&"; |
| static const char* QUERY_PARTS_VALUE_SEPARATOR = "="; |
| GPR_ASSERT(uri->query != nullptr); |
| if (uri->query[0] == '\0') { |
| uri->query_parts = nullptr; |
| uri->query_parts_values = nullptr; |
| uri->num_query_parts = 0; |
| return; |
| } |
| |
| gpr_string_split(uri->query, QUERY_PARTS_SEPARATOR, &uri->query_parts, |
| &uri->num_query_parts); |
| uri->query_parts_values = |
| static_cast<char**>(gpr_malloc(uri->num_query_parts * sizeof(char**))); |
| for (size_t i = 0; i < uri->num_query_parts; i++) { |
| char** query_param_parts; |
| size_t num_query_param_parts; |
| char* full = uri->query_parts[i]; |
| gpr_string_split(full, QUERY_PARTS_VALUE_SEPARATOR, &query_param_parts, |
| &num_query_param_parts); |
| GPR_ASSERT(num_query_param_parts > 0); |
| uri->query_parts[i] = query_param_parts[0]; |
| if (num_query_param_parts > 1) { |
| /* TODO(dgq): only the first value after the separator is considered. |
| * Perhaps all chars after the first separator for the query part should |
| * be included, even if they include the separator. */ |
| uri->query_parts_values[i] = query_param_parts[1]; |
| } else { |
| uri->query_parts_values[i] = nullptr; |
| } |
| for (size_t j = 2; j < num_query_param_parts; j++) { |
| gpr_free(query_param_parts[j]); |
| } |
| gpr_free(query_param_parts); |
| gpr_free(full); |
| } |
| } |
| |
| grpc_uri* grpc_uri_parse(const char* uri_text, bool suppress_errors) { |
| grpc_uri* uri; |
| size_t scheme_begin = 0; |
| size_t scheme_end = NOT_SET; |
| size_t authority_begin = NOT_SET; |
| size_t authority_end = NOT_SET; |
| size_t path_begin = NOT_SET; |
| size_t path_end = NOT_SET; |
| size_t query_begin = NOT_SET; |
| size_t query_end = NOT_SET; |
| size_t fragment_begin = NOT_SET; |
| size_t fragment_end = NOT_SET; |
| size_t i; |
| |
| for (i = scheme_begin; uri_text[i] != 0; i++) { |
| if (uri_text[i] == ':') { |
| scheme_end = i; |
| break; |
| } |
| if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue; |
| if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue; |
| if (i != scheme_begin) { |
| if (uri_text[i] >= '0' && uri_text[i] <= '9') continue; |
| if (uri_text[i] == '+') continue; |
| if (uri_text[i] == '-') continue; |
| if (uri_text[i] == '.') continue; |
| } |
| break; |
| } |
| if (scheme_end == NOT_SET) { |
| return bad_uri(uri_text, i, "scheme", suppress_errors); |
| } |
| |
| if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') { |
| authority_begin = scheme_end + 3; |
| for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET; |
| i++) { |
| if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') { |
| authority_end = i; |
| } |
| } |
| if (authority_end == NOT_SET && uri_text[i] == 0) { |
| authority_end = i; |
| } |
| if (authority_end == NOT_SET) { |
| return bad_uri(uri_text, i, "authority", suppress_errors); |
| } |
| /* TODO(ctiller): parse the authority correctly */ |
| path_begin = authority_end; |
| } else { |
| path_begin = scheme_end + 1; |
| } |
| |
| for (i = path_begin; uri_text[i] != 0; i++) { |
| if (uri_text[i] == '?' || uri_text[i] == '#') { |
| path_end = i; |
| break; |
| } |
| } |
| if (path_end == NOT_SET && uri_text[i] == 0) { |
| path_end = i; |
| } |
| if (path_end == NOT_SET) { |
| return bad_uri(uri_text, i, "path", suppress_errors); |
| } |
| |
| if (uri_text[i] == '?') { |
| query_begin = ++i; |
| if (!parse_fragment_or_query(uri_text, &i)) { |
| return bad_uri(uri_text, i, "query", suppress_errors); |
| } else if (uri_text[i] != 0 && uri_text[i] != '#') { |
| /* We must be at the end or at the beginning of a fragment */ |
| return bad_uri(uri_text, i, "query", suppress_errors); |
| } |
| query_end = i; |
| } |
| if (uri_text[i] == '#') { |
| fragment_begin = ++i; |
| if (!parse_fragment_or_query(uri_text, &i)) { |
| return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors); |
| } else if (uri_text[i] != 0) { |
| /* We must be at the end */ |
| return bad_uri(uri_text, i, "fragment", suppress_errors); |
| } |
| fragment_end = i; |
| } |
| |
| uri = static_cast<grpc_uri*>(gpr_zalloc(sizeof(*uri))); |
| uri->scheme = decode_and_copy_component(uri_text, scheme_begin, scheme_end); |
| uri->authority = |
| decode_and_copy_component(uri_text, authority_begin, authority_end); |
| uri->path = decode_and_copy_component(uri_text, path_begin, path_end); |
| uri->query = decode_and_copy_component(uri_text, query_begin, query_end); |
| uri->fragment = |
| decode_and_copy_component(uri_text, fragment_begin, fragment_end); |
| parse_query_parts(uri); |
| |
| return uri; |
| } |
| |
| const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key) { |
| GPR_ASSERT(key != nullptr); |
| if (key[0] == '\0') return nullptr; |
| |
| for (size_t i = 0; i < uri->num_query_parts; ++i) { |
| if (0 == strcmp(key, uri->query_parts[i])) { |
| return uri->query_parts_values[i]; |
| } |
| } |
| return nullptr; |
| } |
| |
| void grpc_uri_destroy(grpc_uri* uri) { |
| if (!uri) return; |
| gpr_free(uri->scheme); |
| gpr_free(uri->authority); |
| gpr_free(uri->path); |
| gpr_free(uri->query); |
| for (size_t i = 0; i < uri->num_query_parts; ++i) { |
| gpr_free(uri->query_parts[i]); |
| gpr_free(uri->query_parts_values[i]); |
| } |
| gpr_free(uri->query_parts); |
| gpr_free(uri->query_parts_values); |
| gpr_free(uri->fragment); |
| gpr_free(uri); |
| } |