From 3367f003e37cdde8bd9d4c0888b7aa49cc9d63b6 Mon Sep 17 00:00:00 2001 From: Filipe David Manana Date: Tue, 12 Apr 2011 19:30:51 +0100 Subject: [PATCH] ibrowse_lib:parse_url/1 now accepts IPv6 literals as hostnames As specified in the following RFC: http://www.ietf.org/rfc/rfc2732.txt --- Makefile | 1 + rebar.config | 1 + src/ibrowse.hrl | 11 ++- src/ibrowse_lib.erl | 51 +++++++++++++- test/ibrowse_lib_tests.erl | 135 +++++++++++++++++++++++++++++++++++++ 5 files changed, 197 insertions(+), 2 deletions(-) create mode 100644 test/ibrowse_lib_tests.erl diff --git a/Makefile b/Makefile index c6eb971..4021c5b 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,7 @@ install: all cp -r ebin $(DESTDIR)/lib/ibrowse-$(IBROWSE_VSN)/ test: all + ./rebar eunit (cd test; make) erl -noshell -pa ebin -pa test -s ibrowse -s ibrowse_test unit_tests \ -s ibrowse_test verify_chunked_streaming \ diff --git a/rebar.config b/rebar.config index b919c94..a23b6e1 100644 --- a/rebar.config +++ b/rebar.config @@ -1 +1,2 @@ {erl_opts, [debug_info, warn_unused_vars, nowarn_shadow_vars, warn_unused_import]}. +{eunit_opts, [verbose]}. \ No newline at end of file diff --git a/src/ibrowse.hrl b/src/ibrowse.hrl index ebf3bb3..18dde82 100644 --- a/src/ibrowse.hrl +++ b/src/ibrowse.hrl @@ -1,7 +1,16 @@ -ifndef(IBROWSE_HRL). -define(IBROWSE_HRL, "ibrowse.hrl"). --record(url, {abspath, host, port, username, password, path, protocol}). +-record(url, { + abspath, + host, + port, + username, + password, + path, + protocol, + host_type % 'hostname', 'ipv4_address' or 'ipv6_address' +}). -record(lb_pid, {host_port, pid}). diff --git a/src/ibrowse_lib.erl b/src/ibrowse_lib.erl index 696d0f6..3cbe3ac 100644 --- a/src/ibrowse_lib.erl +++ b/src/ibrowse_lib.erl @@ -180,7 +180,19 @@ get_value(Tag, TVL) -> V. parse_url(Url) -> - parse_url(Url, get_protocol, #url{abspath=Url}, []). + case parse_url(Url, get_protocol, #url{abspath=Url}, []) of + #url{host_type = undefined, host = Host} = UrlRec -> + case inet_parse:address(Host) of + {ok, {_, _, _, _, _, _, _, _}} -> + UrlRec#url{host_type = ipv6_address}; + {ok, {_, _, _, _}} -> + UrlRec#url{host_type = ipv4_address}; + _ -> + UrlRec#url{host_type = hostname} + end; + Else -> + Else + end. parse_url([$:, $/, $/ | _], get_protocol, Url, []) -> {invalid_uri_1, Url}; @@ -215,6 +227,21 @@ parse_url([$@ | T], get_username, Url, TmpAcc) -> Url#url{username = lists:reverse(TmpAcc), password = ""}, []); +parse_url([$[ | T], get_username, Url, []) -> + % IPv6 address literals are enclosed by square brackets: + % http://www.ietf.org/rfc/rfc2732.txt + parse_url(T, get_ipv6_address, Url#url{host_type = ipv6_address}, []); +parse_url([$[ | T], get_username, _Url, TmpAcc) -> + {error, {invalid_username_or_host, lists:reverse(TmpAcc) ++ "[" ++ T}}; +parse_url([$[ | _], get_password, _Url, []) -> + {error, missing_password}; +parse_url([$[ | T], get_password, Url, TmpAcc) -> + % IPv6 address literals are enclosed by square brackets: + % http://www.ietf.org/rfc/rfc2732.txt + parse_url(T, get_ipv6_address, + Url#url{host_type = ipv6_address, + password = lists:reverse(TmpAcc)}, + []); parse_url([$@ | T], get_password, Url, TmpAcc) -> parse_url(T, get_host, Url#url{password = lists:reverse(TmpAcc)}, @@ -236,6 +263,28 @@ parse_url([H | T], get_password, Url, TmpAcc) when H == $/; username = undefined, password = undefined, path = Path}; +parse_url([$] | T], get_ipv6_address, #url{protocol = Prot} = Url, TmpAcc) -> + Addr = lists:reverse(TmpAcc), + case inet_parse:address(Addr) of + {ok, {_, _, _, _, _, _, _, _}} -> + Url2 = Url#url{host = Addr, port = default_port(Prot)}, + case T of + [$: | T2] -> + parse_url(T2, get_port, Url2, []); + [$/ | T2] -> + Url2#url{path = [$/ | T2]}; + [$? | T2] -> + Url2#url{path = [$/, $? | T2]}; + [] -> + Url2#url{path = "/"}; + _ -> + {error, {invalid_host, "[" ++ Addr ++ "]" ++ T}} + end; + _ -> + {error, {invalid_ipv6_address, Addr}} + end; +parse_url([$[ | T], get_host, #url{} = Url, []) -> + parse_url(T, get_ipv6_address, Url#url{host_type = ipv6_address}, []); parse_url([$: | T], get_host, #url{} = Url, TmpAcc) -> parse_url(T, get_port, Url#url{host = lists:reverse(TmpAcc)}, diff --git a/test/ibrowse_lib_tests.erl b/test/ibrowse_lib_tests.erl new file mode 100644 index 0000000..ef3cb05 --- /dev/null +++ b/test/ibrowse_lib_tests.erl @@ -0,0 +1,135 @@ +%%% File : ibrowse_lib.erl +%%% Authors : Chandrashekhar Mullaparthi , +%%% Filipe David Manana +%%% Description : Tests for the module ibrowse_lib.erl +%%% Created : 12 April 2011 by Filipe David Manana + +-module(ibrowse_lib_tests). +-include_lib("eunit/include/eunit.hrl"). +-include("src/ibrowse.hrl"). + + +parse_urls_test_() -> + {timeout, 60, [fun parse_urls/0]}. + + +parse_urls() -> + ?assertMatch(#url{ + abspath = "http://localhost", + host = "localhost", + host_type = hostname, + port = 80, + path = "/", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://localhost")), + ?assertMatch(#url{ + abspath = "http://localhost:80/", + host = "localhost", + host_type = hostname, + port = 80, + path = "/", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://localhost:80/")), + ?assertMatch(#url{ + abspath = "http://127.0.0.1:8000/", + host = "127.0.0.1", + host_type = ipv4_address, + port = 8000, + path = "/", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://127.0.0.1:8000/")), + ?assertMatch(#url{ + abspath = "https://foo:bar@127.0.0.1:8000/test", + host = "127.0.0.1", + host_type = ipv4_address, + port = 8000, + path = "/test", + username = "foo", + password = "bar", + protocol = https + }, + ibrowse_lib:parse_url("https://foo:bar@127.0.0.1:8000/test")), + ?assertMatch(#url{ + abspath = "https://[::1]", + host = "::1", + host_type = ipv6_address, + port = 443, + path = "/", + username = undefined, + password = undefined, + protocol = https + }, + ibrowse_lib:parse_url("https://[::1]")), + ?assertMatch(#url{ + abspath = "http://[::1]:8080", + host = "::1", + host_type = ipv6_address, + port = 8080, + path = "/", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://[::1]:8080")), + ?assertMatch(#url{ + abspath = "http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8081/index.html", + host = "FEDC:BA98:7654:3210:FEDC:BA98:7654:3210", + host_type = ipv6_address, + port = 8081, + path = "/index.html", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://[FEDC:BA98:7654:3210:FEDC:BA98:7654:3210]:8081/index.html")), + ?assertMatch(#url{ + abspath = "http://[1080:0:0:0:8:800:200C:417A]/foo/bar", + host = "1080:0:0:0:8:800:200C:417A", + host_type = ipv6_address, + port = 80, + path = "/foo/bar", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://[1080:0:0:0:8:800:200C:417A]/foo/bar")), + ?assertMatch(#url{ + abspath = "http://[1080:0:0:0:8:800:200C:417A]:8080/foo/bar", + host = "1080:0:0:0:8:800:200C:417A", + host_type = ipv6_address, + port = 8080, + path = "/foo/bar", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://[1080:0:0:0:8:800:200C:417A]:8080/foo/bar")), + ?assertMatch(#url{ + abspath = "http://[::192.9.5.5]:6000/foo?q=bar", + host = "::192.9.5.5", + host_type = ipv6_address, + port = 6000, + path = "/foo?q=bar", + username = undefined, + password = undefined, + protocol = http + }, + ibrowse_lib:parse_url("http://[::192.9.5.5]:6000/foo?q=bar")), + ?assertMatch({error, {invalid_ipv6_address, ":1080:0:0:0:8:800:200C:417A:"}}, + ibrowse_lib:parse_url("http://[:1080:0:0:0:8:800:200C:417A:]:6000/foo?q=bar")), + ?assertMatch({error, {invalid_ipv6_address, "12::z"}}, + ibrowse_lib:parse_url("http://[12::z]")), + ?assertMatch({error, {invalid_username_or_host, _}}, + ibrowse_lib:parse_url("http://foo[1080:0:0:0:8:800:200C:417A]:6000")), + ?assertMatch({error, missing_password}, + ibrowse_lib:parse_url("http://foo:[1080:0:0:0:8:800:200C:417A]:6000")), + ok.