From b5b26d5de3bbbc3297549c19188730618895c865 Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Sun, 13 Jul 2014 18:45:34 +0100 Subject: [PATCH] Add SOCKS5 Support Supports HTTP and HTTPS destinations and performs all hostname resolution via the SOCKS proxy, preventing privacy leakage if used with Tor. If hostname is a valid IP address then it is used directly, the hostname is not resolved again via the SOCKS5 proxy (Patch suggested by Klaus Trainer but implementation here slightly different). --- README.md | 15 +++++ src/ibrowse_http_client.erl | 25 ++++++--- src/ibrowse_lib.erl | 1 + src/ibrowse_socks5.erl | 108 ++++++++++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 src/ibrowse_socks5.erl diff --git a/README.md b/README.md index 8cf10a7..b68f197 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ ibrowse is a HTTP client written in erlang. * Asynchronous requests. Responses are streamed to a process * Basic authentication * Supports proxy authentication +* Supports socks5 * Can talk to secure webservers using SSL * *Any other features in the code not listed here :)* @@ -279,3 +280,17 @@ support this. Nor did www.google.com. But good old BBC supports this: {"Via","1.1 hatproxy01 (NetCache NetApp/5.6.2)"}], "TRACE / HTTP/1.1\r\nHost: www.bbc.co.uk\r\nConnection: keep-alive\r\nX-Forwarded-For: 172.24.28.29\r\nVia: 1.1 hatproxy01 (NetCache NetApp/5.6.2)\r\nCookie: BBC-UID=7452e...\r\n\r\n"} ``` + +A `GET` using a socks5: + +```erlang +ibrowse:send_req("http://google.com", [], get, [], + [{socks5_host, "127.0.0.1"}, + {socks5_port, 5335}]). + +ibrowse:send_req("http://google.com", [], get, [], + [{socks5_host, "127.0.0.1"}, + {socks5_port, 5335}, + {socks5_user, "user4321"}, + {socks5_pass, "pass7654"}]). +``` diff --git a/src/ibrowse_http_client.erl b/src/ibrowse_http_client.erl index 822deb8..ad24351 100644 --- a/src/ibrowse_http_client.erl +++ b/src/ibrowse_http_client.erl @@ -504,13 +504,24 @@ handle_sock_closed(#state{reply_buffer = Buf, reqs = Reqs, http_status_code = SC State end. -do_connect(Host, Port, Options, #state{is_ssl = true, - use_proxy = false, - ssl_options = SSLOptions}, - Timeout) -> - ssl:connect(Host, Port, get_sock_options(Host, Options, SSLOptions), Timeout); -do_connect(Host, Port, Options, _State, Timeout) -> - gen_tcp:connect(Host, Port, get_sock_options(Host, Options, []), Timeout). +do_connect(Host, Port, Options, State, Timeout) -> + SockOptions = get_sock_options(Host, Options, State#state.ssl_options), + case {get_value(socks5_host, Options, undefined), State#state.is_ssl} of + {undefined, true} -> + ssl:connect(Host, Port, SockOptions, Timeout); + {undefined, false} -> + gen_tcp:connect(Host, Port, SockOptions, Timeout); + {_, _} -> + case {ibrowse_socks5:connect(Host, Port, Options, SockOptions, Timeout), + State#state.is_ssl} of + {{ok, Socket}, true} -> + ssl:connect(Socket, SockOptions, Timeout); + {{ok, Socket}, false} -> + {ok, Socket}; + {Else, _} -> + Else + end + end. get_sock_options(Host, Options, SSLOptions) -> Caller_socket_options = get_value(socket_options, Options, []), diff --git a/src/ibrowse_lib.erl b/src/ibrowse_lib.erl index 1ce6bd4..3362b39 100644 --- a/src/ibrowse_lib.erl +++ b/src/ibrowse_lib.erl @@ -362,6 +362,7 @@ parse_url([], get_password, Url, TmpAcc) -> parse_url([], State, Url, TmpAcc) -> {invalid_uri_2, State, Url, TmpAcc}. +default_port(socks5) -> 1080; default_port(http) -> 80; default_port(https) -> 443; default_port(ftp) -> 21. diff --git a/src/ibrowse_socks5.erl b/src/ibrowse_socks5.erl new file mode 100644 index 0000000..417f595 --- /dev/null +++ b/src/ibrowse_socks5.erl @@ -0,0 +1,108 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(ibrowse_socks5). + +-define(VERSION, 5). +-define(CONNECT, 1). + +-define(NO_AUTH, 0). +-define(USERPASS, 2). +-define(UNACCEPTABLE, 16#FF). +-define(RESERVED, 0). + +-define(ATYP_IPV4, 1). +-define(ATYP_DOMAINNAME, 3). +-define(ATYP_IPV6, 4). + +-define(SUCCEEDED, 0). + +-export([connect/5]). + +-import(ibrowse_lib, [get_value/2, get_value/3]). + +connect(Host, Port, Options, SockOptions, Timeout) -> + Socks5Host = get_value(socks5_host, Options), + Socks5Port = get_value(socks5_port, Options), + case gen_tcp:connect(Socks5Host, Socks5Port, SockOptions, Timeout) of + {ok, Socket} -> + case handshake(Socket, Options) of + ok -> + case connect(Host, Port, Socket) of + ok -> + {ok, Socket}; + Else -> + gen_tcp:close(Socket), + Else + end; + Else -> + gen_tcp:close(Socket), + Else + end; + Else -> + Else + end. + +handshake(Socket, Options) when is_port(Socket) -> + {Handshake, Success} = case get_value(socks5_user, Options, <<>>) of + <<>> -> + {<>, ?NO_AUTH}; + User -> + Password = get_value(socks5_password, Options, <<>>), + {<>, ?USERPASS} + end, + ok = gen_tcp:send(Socket, Handshake), + case gen_tcp:recv(Socket, 0) of + {ok, <>} -> + ok; + {ok, <>} -> + {error, unacceptable}; + {error, Reason} -> + {error, Reason} + end. + +connect(Host, Port, Via) when is_list(Host) -> + connect(list_to_binary(Host), Port, Via); +connect(Host, Port, Via) when is_binary(Host), is_integer(Port), + is_port(Via) -> + {AddressType, Address} = case inet:parse_address(binary_to_list(Host)) of + {ok, {IP1, IP2, IP3, IP4}} -> + {?ATYP_IPV4, <>}; + {ok, {IP1, IP2, IP3, IP4, IP5, IP6, IP7, IP8}} -> + {?ATYP_IPV6, <>}; + _ -> + HostLength = byte_size(Host), + {?ATYP_DOMAINNAME, <>} + end, + ok = gen_tcp:send(Via, + <>), + case gen_tcp:recv(Via, 0) of + {ok, <>} -> + ok; + {ok, <>} -> + {error, rep(Rep)}; + {error, Reason} -> + {error, Reason} + end. + +rep(0) -> succeeded; +rep(1) -> server_fail; +rep(2) -> disallowed_by_ruleset; +rep(3) -> network_unreachable; +rep(4) -> host_unreachable; +rep(5) -> connection_refused; +rep(6) -> ttl_expired; +rep(7) -> command_not_supported; +rep(8) -> address_type_not_supported.