Make URI parser able to tolerate nonconformant URIs.

If the EVHTTP_URI_NONCONFORMANT flag is passed in (which it is when
parsing URIs we get over the wire), then we relax our checks a lot.
Specifically, we do nothing to check for correct characters in the
path, query, and fragment parts of such a URI.

We could do much more here: we could relax our hostname requirements,
deal with spaces differently/better, trap some errors but not others,
etc.  But this should solve the worst user-agent compatibility issues
for now; the other issues can wait for a later release.
This commit is contained in:
Nick Mathewson 2011-02-13 00:41:22 -05:00
parent f736198086
commit 95060b54fe
3 changed files with 140 additions and 46 deletions

69
http.c
View File

@ -1472,7 +1472,8 @@ evhttp_parse_request_line(struct evhttp_request *req, char *line)
return (-1);
}
if ((req->uri_elems = evhttp_uri_parse(req->uri)) == NULL) {
if ((req->uri_elems = evhttp_uri_parse_with_flags(req->uri,
EVHTTP_URI_NONCONFORMANT)) == NULL) {
return -1;
}
@ -3777,6 +3778,7 @@ bind_socket(const char *address, ev_uint16_t port, int reuse)
}
struct evhttp_uri {
unsigned flags;
char *scheme; /* scheme; e.g http, ftp etc */
char *userinfo; /* userinfo (typically username:pass), or NULL */
char *host; /* hostname, IP address, or NULL */
@ -3795,7 +3797,13 @@ evhttp_uri_new(void)
return uri;
}
/* Return true of the string starting at s and ending immediately before eos
void
evhttp_uri_set_flags(struct evhttp_uri *uri, unsigned flags)
{
uri->flags = flags;
}
/* Return true if the string starting at s and ending immediately before eos
* is a valid URI scheme according to RFC3986
*/
static int
@ -3987,13 +3995,41 @@ end_of_authority(char *cp)
return cp;
}
enum uri_part {
PART_PATH,
PART_QUERY,
PART_FRAGMENT
};
/* Return the character after the longest prefix of 'cp' that matches...
* *pchar / "/" if allow_qchars is false, or
* *(pchar / "/" / "?") if allow_chars is true.
* *(pchar / "/" / "?") if allow_qchars is true.
*/
static char *
end_of_path(char *cp, int allow_qchars)
end_of_path(char *cp, enum uri_part part, unsigned flags)
{
if (flags & EVHTTP_URI_NONCONFORMANT) {
/* If NONCONFORMANT:
* Path is everything up to a # or ? or nul.
* Query is everything up a # or nul
* Fragment is everything up to a nul.
*/
switch (part) {
case PART_PATH:
while (*cp && *cp != '#' && *cp != '?')
++cp;
break;
case PART_QUERY:
while (*cp && *cp != '#')
++cp;
break;
case PART_FRAGMENT:
cp += strlen(cp);
break;
};
return cp;
}
while (*cp) {
if (CHAR_IS_UNRESERVED(*cp) ||
strchr(SUBDELIMS, *cp) ||
@ -4002,7 +4038,7 @@ end_of_path(char *cp, int allow_qchars)
else if (*cp == '%' && EVUTIL_ISXDIGIT(cp[1]) &&
EVUTIL_ISXDIGIT(cp[2]))
cp += 3;
else if (*cp == '?' && allow_qchars)
else if (*cp == '?' && part != PART_PATH)
++cp;
else
return cp;
@ -4025,6 +4061,12 @@ path_matches_noscheme(const char *cp)
struct evhttp_uri *
evhttp_uri_parse(const char *source_uri)
{
return evhttp_uri_parse_with_flags(source_uri, 0);
}
struct evhttp_uri *
evhttp_uri_parse_with_flags(const char *source_uri, unsigned flags)
{
char *readbuf = NULL, *readp = NULL, *token = NULL, *query = NULL;
char *path = NULL, *fragment = NULL;
@ -4036,6 +4078,7 @@ evhttp_uri_parse(const char *source_uri)
goto err;
}
uri->port = -1;
uri->flags = flags;
readbuf = mm_strdup(source_uri);
if (readbuf == NULL) {
@ -4052,7 +4095,6 @@ evhttp_uri_parse(const char *source_uri)
URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
relative-ref = relative-part [ "?" query ] [ "#" fragment ]
*/
/* 1. scheme: */
@ -4082,21 +4124,21 @@ evhttp_uri_parse(const char *source_uri)
/* 3. Query: path-abempty, path-absolute, path-rootless, or path-empty
*/
path = readp;
readp = end_of_path(path, 0);
readp = end_of_path(path, PART_PATH, flags);
/* Query */
if (*readp == '?') {
*readp = '\0';
++readp;
query = readp;
readp = end_of_path(readp, 1);
readp = end_of_path(readp, PART_QUERY, flags);
}
/* fragment */
if (*readp == '#') {
*readp = '\0';
++readp;
fragment = readp;
readp = end_of_path(readp, 1);
readp = end_of_path(readp, PART_FRAGMENT, flags);
}
if (*readp != '\0') {
goto err;
@ -4324,12 +4366,13 @@ evhttp_uri_set_port(struct evhttp_uri *uri, int port)
uri->port = port;
return 0;
}
#define end_of_cpath(cp,aq) ((const char*)(end_of_path(((char*)(cp)), (aq))))
#define end_of_cpath(cp,p,f) \
((const char*)(end_of_path(((char*)(cp)), (p), (f))))
int
evhttp_uri_set_path(struct evhttp_uri *uri, const char *path)
{
if (path && end_of_cpath(path, 0) != path+strlen(path))
if (path && end_of_cpath(path, PART_PATH, uri->flags) != path+strlen(path))
return -1;
_URI_SET_STR(path);
@ -4338,7 +4381,7 @@ evhttp_uri_set_path(struct evhttp_uri *uri, const char *path)
int
evhttp_uri_set_query(struct evhttp_uri *uri, const char *query)
{
if (query && end_of_cpath(query, 1) != query+strlen(query))
if (query && end_of_cpath(query, PART_QUERY, uri->flags) != query+strlen(query))
return -1;
_URI_SET_STR(query);
return 0;
@ -4346,7 +4389,7 @@ evhttp_uri_set_query(struct evhttp_uri *uri, const char *query)
int
evhttp_uri_set_fragment(struct evhttp_uri *uri, const char *fragment)
{
if (fragment && end_of_cpath(fragment, 1) != fragment+strlen(fragment))
if (fragment && end_of_cpath(fragment, PART_FRAGMENT, uri->flags) != fragment+strlen(fragment))
return -1;
_URI_SET_STR(fragment);
return 0;

View File

@ -707,6 +707,12 @@ char *evhttp_htmlescape(const char *html);
*/
struct evhttp_uri *evhttp_uri_new(void);
/**
* Changes the flags set on a given URI. See EVHTTP_URI_* for
* a list of flags.
**/
void evhttp_uri_set_flags(struct evhttp_uri *uri, unsigned flags);
/** Return the scheme of an evhttp_uri, or NULL if there is no scheme has
* been set and the evhttp_uri contains a Relative-Ref. */
const char *evhttp_uri_get_scheme(const struct evhttp_uri *uri);
@ -792,9 +798,29 @@ int evhttp_uri_set_fragment(struct evhttp_uri *uri, const char *fragment);
* accepts all of them as valid.
*
* @param source_uri the request URI
* @param flags Zero or more EVHTTP_URI_* flags to affect the behavior
* of the parser.
* @return uri container to hold parsed data, or NULL if there is error
* @see evhttp_uri_free()
*/
struct evhttp_uri *evhttp_uri_parse_with_flags(const char *source_uri,
unsigned flags);
/** Tolerate URIs that do not conform to RFC3986.
*
* Unfortunately, some HTTP clients generate URIs that, according to RFC3986,
* are not conformant URIs. If you need to support these URIs, you can
* do so by passing this flag to evhttp_uri_parse_with_flags.
*
* Currently, these changes are:
* <ul>
* <li> Nonconformant URIs are allowed to contain otherwise unreasonable
* characters in their path, query, and fragment components.
* </ul>
*/
#define EVHTTP_URI_NONCONFORMANT 0x01
/** Alias for evhttp_uri_parse_with_flags(source_uri, 0) */
struct evhttp_uri *evhttp_uri_parse(const char *source_uri);
/**
@ -817,7 +843,7 @@ void evhttp_uri_free(struct evhttp_uri *uri);
* @param buf destination buffer
* @param limit destination buffer size
* @return an joined uri as string or NULL on error
@see evhttp_uri_parse()
* @see evhttp_uri_parse()
*/
char *evhttp_uri_join(struct evhttp_uri *uri, char *buf, size_t limit);

View File

@ -1990,8 +1990,13 @@ end:
static void
http_parse_uri_test(void *ptr)
{
const int nonconform = (ptr != NULL);
const unsigned parse_flags =
nonconform ? EVHTTP_URI_NONCONFORMANT : 0;
struct evhttp_uri *uri = NULL;
char url_tmp[4096];
#define URI_PARSE(uri) \
evhttp_uri_parse_with_flags((uri), parse_flags)
#define TT_URI(want) do { \
char *ret = evhttp_uri_join(uri, url_tmp, sizeof(url_tmp)); \
@ -2007,21 +2012,37 @@ http_parse_uri_test(void *ptr)
/* bad URIs: parsing */
#define BAD(s) do { \
if (evhttp_uri_parse(s) != NULL) \
if (URI_PARSE(s) != NULL) \
TT_FAIL(("Expected error parsing \"%s\"",s)); \
} while(0)
BAD("http://www.test.com/ why hello");
BAD("http://www.test.com/why-hello\x01");
BAD("http://www.test.com/why-hello?\x01");
BAD("http://www.test.com/why-hello#\x01");
/* Nonconformant URIs we can parse: parsing */
#define NCF(s) do { \
uri = URI_PARSE(s); \
if (uri != NULL && !nonconform) { \
TT_FAIL(("Expected error parsing \"%s\"",s)); \
} else if (uri == NULL && nonconform) { \
TT_FAIL(("Couldn't parse nonconformant URI \"%s\"", \
s)); \
} \
if (uri) { \
tt_want(evhttp_uri_join(uri, url_tmp, \
sizeof(url_tmp))); \
evhttp_uri_free(uri); \
} \
} while(0)
NCF("http://www.test.com/ why hello");
NCF("http://www.test.com/why-hello\x01");
NCF("http://www.test.com/why-hello?\x01");
NCF("http://www.test.com/why-hello#\x01");
BAD("http://www.\x01.test.com/why-hello");
BAD("http://www.%7test.com/why-hello");
BAD("http://www.test.com/why-hell%7o");
NCF("http://www.test.com/why-hell%7o");
BAD("h%3ttp://www.test.com/why-hello");
BAD("http://www.test.com/why-hello%7");
BAD("http://www.test.com/why-hell%7o");
BAD("http://www.test.com/foo?ba%r");
BAD("http://www.test.com/foo#ba%r");
NCF("http://www.test.com/why-hello%7");
NCF("http://www.test.com/why-hell%7o");
NCF("http://www.test.com/foo?ba%r");
NCF("http://www.test.com/foo#ba%r");
BAD("99:99/foo");
BAD("http://www.test.com:999x/");
BAD("http://www.test.com:x/");
@ -2057,7 +2078,7 @@ http_parse_uri_test(void *ptr)
tt_want(evhttp_uri_join(uri, NULL, sizeof(url_tmp))==NULL);
tt_want(evhttp_uri_join(uri, url_tmp, 0)==NULL);
evhttp_uri_free(uri);
uri = evhttp_uri_parse("mailto:foo@bar");
uri = URI_PARSE("mailto:foo@bar");
tt_want(uri != NULL);
tt_want(evhttp_uri_get_host(uri) == NULL);
tt_want(evhttp_uri_get_userinfo(uri) == NULL);
@ -2109,7 +2130,7 @@ http_parse_uri_test(void *ptr)
evhttp_uri_free(uri);
/* Valid parsing */
uri = evhttp_uri_parse("http://www.test.com/?q=t%33est");
uri = URI_PARSE("http://www.test.com/?q=t%33est");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0);
@ -2120,7 +2141,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://www.test.com/?q=t%33est");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("http://%77ww.test.com");
uri = URI_PARSE("http://%77ww.test.com");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "%77ww.test.com") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0);
@ -2131,7 +2152,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://%77ww.test.com");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("http://www.test.com?q=test");
uri = URI_PARSE("http://www.test.com?q=test");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0);
@ -2142,7 +2163,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://www.test.com?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("http://www.test.com#fragment");
uri = URI_PARSE("http://www.test.com#fragment");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0);
@ -2153,7 +2174,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://www.test.com#fragment");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("http://8000/");
uri = URI_PARSE("http://8000/");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "8000") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0);
@ -2164,7 +2185,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://8000/");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("http://:8000/");
uri = URI_PARSE("http://:8000/");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0);
@ -2175,7 +2196,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://:8000/");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("http://www.test.com:/"); /* empty port */
uri = URI_PARSE("http://www.test.com:/"); /* empty port */
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0);
tt_want_str_op(evhttp_uri_get_path(uri), ==, "/");
@ -2186,7 +2207,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://www.test.com/");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("http://www.test.com:"); /* empty port 2 */
uri = URI_PARSE("http://www.test.com:"); /* empty port 2 */
tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0);
@ -2197,7 +2218,7 @@ http_parse_uri_test(void *ptr)
TT_URI("http://www.test.com");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("ftp://www.test.com/?q=test");
uri = URI_PARSE("ftp://www.test.com/?q=test");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0);
@ -2208,7 +2229,7 @@ http_parse_uri_test(void *ptr)
TT_URI("ftp://www.test.com/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("ftp://[::1]:999/?q=test");
uri = URI_PARSE("ftp://[::1]:999/?q=test");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "[::1]") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0);
@ -2219,7 +2240,7 @@ http_parse_uri_test(void *ptr)
TT_URI("ftp://[::1]:999/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("ftp://[ff00::127.0.0.1]/?q=test");
uri = URI_PARSE("ftp://[ff00::127.0.0.1]/?q=test");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "[ff00::127.0.0.1]") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0);
@ -2230,7 +2251,7 @@ http_parse_uri_test(void *ptr)
TT_URI("ftp://[ff00::127.0.0.1]/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("ftp://[v99.not_(any:time)_soon]/?q=test");
uri = URI_PARSE("ftp://[v99.not_(any:time)_soon]/?q=test");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "[v99.not_(any:time)_soon]") == 0);
tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0);
@ -2241,7 +2262,7 @@ http_parse_uri_test(void *ptr)
TT_URI("ftp://[v99.not_(any:time)_soon]/?q=test");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment");
uri = URI_PARSE("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "scheme") == 0);
tt_want(strcmp(evhttp_uri_get_userinfo(uri), "user:pass") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "foo.com") == 0);
@ -2252,7 +2273,7 @@ http_parse_uri_test(void *ptr)
TT_URI("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("scheme://user@foo.com/#fragment");
uri = URI_PARSE("scheme://user@foo.com/#fragment");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "scheme") == 0);
tt_want(strcmp(evhttp_uri_get_userinfo(uri), "user") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "foo.com") == 0);
@ -2263,7 +2284,7 @@ http_parse_uri_test(void *ptr)
TT_URI("scheme://user@foo.com/#fragment");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("scheme://%75ser@foo.com/#frag@ment");
uri = URI_PARSE("scheme://%75ser@foo.com/#frag@ment");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "scheme") == 0);
tt_want(strcmp(evhttp_uri_get_userinfo(uri), "%75ser") == 0);
tt_want(strcmp(evhttp_uri_get_host(uri), "foo.com") == 0);
@ -2274,7 +2295,7 @@ http_parse_uri_test(void *ptr)
TT_URI("scheme://%75ser@foo.com/#frag@ment");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("file:///some/path/to/the/file");
uri = URI_PARSE("file:///some/path/to/the/file");
tt_want(strcmp(evhttp_uri_get_scheme(uri), "file") == 0);
tt_want(evhttp_uri_get_userinfo(uri) == NULL);
tt_want(strcmp(evhttp_uri_get_host(uri), "") == 0);
@ -2285,7 +2306,7 @@ http_parse_uri_test(void *ptr)
TT_URI("file:///some/path/to/the/file");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("///some/path/to/the-file");
uri = URI_PARSE("///some/path/to/the-file");
tt_want(uri != NULL);
tt_want(evhttp_uri_get_scheme(uri) == NULL);
tt_want(evhttp_uri_get_userinfo(uri) == NULL);
@ -2297,7 +2318,7 @@ http_parse_uri_test(void *ptr)
TT_URI("///some/path/to/the-file");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("/s:ome/path/to/the-file?q=99#fred");
uri = URI_PARSE("/s:ome/path/to/the-file?q=99#fred");
tt_want(uri != NULL);
tt_want(evhttp_uri_get_scheme(uri) == NULL);
tt_want(evhttp_uri_get_userinfo(uri) == NULL);
@ -2309,7 +2330,7 @@ http_parse_uri_test(void *ptr)
TT_URI("/s:ome/path/to/the-file?q=99#fred");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("relative/path/with/co:lon");
uri = URI_PARSE("relative/path/with/co:lon");
tt_want(uri != NULL);
tt_want(evhttp_uri_get_scheme(uri) == NULL);
tt_want(evhttp_uri_get_userinfo(uri) == NULL);
@ -2321,7 +2342,7 @@ http_parse_uri_test(void *ptr)
TT_URI("relative/path/with/co:lon");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("bob?q=99&q2=q?33#fr?ed");
uri = URI_PARSE("bob?q=99&q2=q?33#fr?ed");
tt_want(uri != NULL);
tt_want(evhttp_uri_get_scheme(uri) == NULL);
tt_want(evhttp_uri_get_userinfo(uri) == NULL);
@ -2333,7 +2354,7 @@ http_parse_uri_test(void *ptr)
TT_URI("bob?q=99&q2=q?33#fr?ed");
evhttp_uri_free(uri);
uri = evhttp_uri_parse("#fr?ed");
uri = URI_PARSE("#fr?ed");
tt_want(uri != NULL);
tt_want(evhttp_uri_get_scheme(uri) == NULL);
tt_want(evhttp_uri_get_userinfo(uri) == NULL);
@ -2344,6 +2365,9 @@ http_parse_uri_test(void *ptr)
tt_want(strcmp(evhttp_uri_get_fragment(uri), "fr?ed") == 0);
TT_URI("#fr?ed");
evhttp_uri_free(uri);
#undef URI_PARSE
#undef TT_URI
#undef BAD
}
static void
@ -3489,6 +3513,7 @@ struct testcase_t http_testcases[] = {
{ "bad_headers", http_bad_header_test, 0, NULL, NULL },
{ "parse_query", http_parse_query_test, 0, NULL, NULL },
{ "parse_uri", http_parse_uri_test, 0, NULL, NULL },
{ "parse_uri_nc", http_parse_uri_test, 0, &basic_setup, (void*)"nc" },
{ "uriencode", http_uriencode_test, 0, NULL, NULL },
HTTP(basic),
HTTP(cancel),