commit cb7826c054e0d743fc1f3a741a8e6ba78d6994a4 Author: chandrusf Date: Thu May 5 22:28:27 2005 +0000 First version of ibrowse diff --git a/README b/README new file mode 100644 index 0000000..8d1e2a1 --- /dev/null +++ b/README @@ -0,0 +1,216 @@ +$Id: README,v 1.1 2005/05/05 22:28:27 chandrusf Exp $ + +ibrowse is a HTTP client. The following are a list of features. + - RFC2616 compliant (AFAIK) + - supports GET, POST, OPTIONS and HEAD only + - Understands HTTP/0.9, HTTP/1.0 and HTTP/1.1 + - Understands chunked encoding + - Named pools of connections to each webserver + - Pipelining support + - Download to file + - Asynchronous requests. Responses are streamed to a process + - Basic authentication + - Supports proxy authentication + - Can talk to Secure webservers using SSL + - any other features in the code not listed here :) + +Comments to : Chandrashekhar.Mullaparthi@t-mobile.co.uk + +Here are some usage examples. Enjoy! + +5> ibrowse:start(). +{ok,<0.94.0>} + +%% A simple GET +6> ibrowse:send_req("http://intranet/messenger/", [], get). +{ok,"200", + [{"Server","Microsoft-IIS/5.0"}, + {"Content-Location","http://intranet/messenger/index.html"}, + {"Date","Fri, 17 Dec 2004 15:16:19 GMT"}, + {"Content-Type","text/html"}, + {"Accept-Ranges","bytes"}, + {"Last-Modified","Fri, 17 Dec 2004 08:38:21 GMT"}, + {"Etag","\"aa7c9dc313e4c41:d77\""}, + {"Content-Length","953"}], + "\r\n\r\n\r\nMessenger\r\n\r\n\r\n\r\n\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n <body>\r\n\r\n <p><i>This site requires a browser that can view frames.</i></p>\r\n\r\n </body>\r\n \r\n\r\n\r\n"} + +%% ============================================================================= +%% A GET using a proxy +7> ibrowse:send_req("http://www.google.com/", [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}], 1000). +{ok,"302", + [{"Date","Fri, 17 Dec 2004 15:22:56 GMT"}, + {"Content-Length","217"}, + {"Content-Type","text/html"}, + {"Set-Cookie", + "PREF=ID=f58155c797f96096:CR=1:TM=1103296999:LM=1103296999:S=FiWdtAqQvhQ0TvHq; expires=Sun, 17-Jan-2038 19:14:07 GMT; path=/; domain=.google.com"}, + {"Server","GWS/2.1"}, + {"Location", + "http://www.google.co.uk/cxfer?c=PREF%3D:TM%3D1103296999:S%3Do8bEY2FIHwdyGenS&prev=/"}, + {"Via","1.1 netapp01 (NetCache NetApp/5.5R2)"}], + "302 Moved\n

302 Moved

\nThe document has moved\nhere.\r\n\r\n"} + +%% ============================================================================= +%% A GET response saved to file. A temporary file is created and the +%% filename returned. The response will only be saved to file is the +%% status code is in the 200 range. The directory to download to can +%% be set using the application env var 'download_dir' - the default +%% is the current working directory. +8> ibrowse:send_req("http://www.erlang.se/", [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}, + {save_response_to_file, true}], 1000). +{error,req_timedout} + +%% ============================================================================= +9> ibrowse:send_req("http://www.erlang.se/", [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}, + {save_response_to_file, true}], 5000). +{ok,"200", + [{"Transfer-Encoding","chunked"}, + {"Date","Fri, 17 Dec 2004 15:24:36 GMT"}, + {"Content-Type","text/html"}, + {"Server","Apache/1.3.9 (Unix)"}, + {"Via","1.1 netapp01 (NetCache NetApp/5.5R2)"}], + {file,"/Users/chandru/code/ibrowse/src/ibrowse_tmp_file_1103297041125854"}} + +%% ============================================================================= +%% Setting size of connection pool and pipeline size. This sets the +%% number of maximum connections to this server to 10 and the pipeline +%% size to 1. Connections are setup a required. +11> ibrowse:set_dest("www.hotmail.com", 80, [{max_sessions, 10}, + {max_pipeline_size, 1}]). +ok + +%% ============================================================================= +%% Example using the HEAD method +56> ibrowse:send_req("http://www.erlang.org", [], head). +{ok,"200", + [{"Date","Mon, 28 Feb 2005 04:40:53 GMT"}, + {"Server","Apache/1.3.9 (Unix)"}, + {"Last-Modified","Thu, 10 Feb 2005 09:31:23 GMT"}, + {"Etag","\"8d71d-1efa-420b29eb\""}, + {"Accept-ranges","bytes"}, + {"Content-Length","7930"}, + {"Content-Type","text/html"}], + []} + +%% ============================================================================= +%% Example using the OPTIONS method +62> ibrowse:send_req("http://www.sun.com", [], options). +{ok,"200", + [{"Server","Sun Java System Web Server 6.1"}, + {"Date","Mon, 28 Feb 2005 04:44:39 GMT"}, + {"Content-Length","0"}, + {"P3p", + "policyref=\"http://www.sun.com/p3p/Sun_P3P_Policy.xml\", CP=\"CAO DSP COR CUR ADMa DEVa TAIa PSAa PSDa CONi TELi OUR SAMi PUBi IND PHY ONL PUR COM NAV INT DEM CNT STA POL PRE GOV\""}, + {"Set-Cookie", + "SUN_ID=X.X.X.X:169191109565879; EXPIRES=Wednesday, 31-Dec-2025 23:59:59 GMT; DOMAIN=.sun.com; PATH=/"}, + {"Allow", + "HEAD, GET, PUT, POST, DELETE, TRACE, OPTIONS, MOVE, INDEX, MKDIR, RMDIR"}], + []} + +%% ============================================================================= +%% Example of using Asynchronous requests +18> ibrowse:send_req("http://www.google.com", [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}, + {stream_to, self()}]). +{ibrowse_req_id,{1115,327256,389608}} +19> flush(). +Shell got {ibrowse_async_headers,{1115,327256,389608}, + "302", + [{"Date","Thu, 05 May 2005 21:06:41 GMT"}, + {"Content-Length","217"}, + {"Content-Type","text/html"}, + {"Set-Cookie", + "PREF=ID=b601f16bfa32f071:CR=1:TM=1115327201:LM=1115327201:S=OX5hSB525AMjUUu7; expires=Sun, 17-Jan-2038 19:14:07 GMT; path=/; domain=.google.com"}, + {"Server","GWS/2.1"}, + {"Location", + "http://www.google.co.uk/cxfer?c=PREF%3D:TM%3D1115327201:S%3DDS9pDJ4IHcAuZ_AS&prev=/"}, + {"Via", + "1.1 hatproxy01 (NetCache NetApp/5.6.2)"}]} +Shell got {ibrowse_async_response,{1115,327256,389608}, + "302 Moved\n

302 Moved

\nThe document has moved\nhere.\r\n\r\n"} +Shell got {ibrowse_async_response_end,{1115,327256,389608}} +ok + +%% ============================================================================= +%% Another example of using async requests +24> ibrowse:send_req("http://yaws.hyber.org/simple_ex2.yaws", [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}, + {stream_to, self()}]). +{ibrowse_req_id,{1115,327430,512314}} +25> flush(). +Shell got {ibrowse_async_headers,{1115,327430,512314}, + "200", + [{"Date","Thu, 05 May 2005 20:58:08 GMT"}, + {"Content-Length","64"}, + {"Content-Type","text/html;charset="}, + {"Server", + "Yaws/1.54 Yet Another Web Server"}, + {"Via", + "1.1 hatproxy01 (NetCache NetApp/5.6.2)"}]} +Shell got {ibrowse_async_response,{1115,327430,512314}, + "\n\n\n

Yesssssss

\n\n

Hello again

\n\n\n\n"} +Shell got {ibrowse_async_response_end,{1115,327430,512314}} + +%% ============================================================================= +%% Example of request which fails when using the async option. Here +%% the {ibrowse_req_id, ReqId} is not returned. Instead the error code is +%% returned. +68> ibrowse:send_req("http://www.earlyriser.org", [], get, [], [{stream_to, self()}]). +{error,conn_failed} + +%% Example of request using both Proxy-Authorization and authorization by the final webserver. +17> ibrowse:send_req("http://www.erlang.se/lic_area/protected/patches/erl_756_otp_beam.README", + [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}, + {basic_auth, {"XXXXX", "XXXXXX"}}]). +{ok,"200", + [{"Accept-Ranges","bytes"}, + {"Date","Thu, 05 May 2005 21:02:09 GMT"}, + {"Content-Length","2088"}, + {"Content-Type","text/plain"}, + {"Server","Apache/1.3.9 (Unix)"}, + {"Last-Modified","Tue, 03 May 2005 15:08:18 GMT"}, + {"ETag","\"1384c8-828-427793e2\""}, + {"Via","1.1 hatproxy01 (NetCache NetApp/5.6.2)"}], + "Patch Id:\t\terl_756_otp_beam\nLabel:\t\t\tinets patch\nDate:\t\t\t2005-05-03\nTrouble Report Id:\tOTP-5513, OTP-5514, OTP-5516, OTP-5517, OTP-5521, OTP-5537\nSeq num:\t\tseq9806\nSystem:\t\t\totp\nRelease:\t\tR10B\nOperating System:\tall\nArchitecture:\t\tall\nErlang machine:\t\tBEAM\nApplication:\t\tinets-4.4\nFiles:\t\t\tall\n\nDescription:\n\n OTP-5513 The server did not handle HTTP-0.9 messages with an implicit\n\t version.\n\n OTP-5514 An internal server timeout killed the request handling\n\t process without sending a message back to the client. As this\n\t timeout only affects a single request it has been set to\n\t infinity (if the main server process dies the request\n\t handling process will also die and the client will receive an\n\t error). This might make a client that does not use a timeout\n\t hang for a longer period of time, but that is an expected\n\t behavior!\n\n OTP-5516 That a third party closes the http servers accept socket is\n\t recoverable for inets, hence intes will only produce an info\n\t report as there was no error in inets but measures where\n\t taken to avoid failure due to errors elsewhere.\n\n OTP-5517 The HTTP client proxy settings where ignored. Bug introduced\n\t in inets-4.3.\n\n OTP-5521 Inets only sent the \"WWW-Authenticate\" header at the first\n\t attempt to get a page, if the user supplied the wrong\n\t user/password combination the header was not sent again. This\n\t forces the user to kill the browser entirely after a failed\n\t login attempt, before the user may try to login again. Inets\n\t now always send the authentication header.\n\n OTP-5537 A major rewrite of big parts of the HTTP server code was\n\t performed. There where many things that did not work\n\t satisfactory. Cgi script handling can never have worked\n\t properly and the cases when it did sort of work, a big\n\t unnecessary delay was enforced. Headers where not always\n\t treated as expected and HTTP version handling did not work,\n\t all responses where sent as version HTTP/1.1 no matter what.\n\n\n"} + +%% ============================================================================= +%% Example of a TRACE request. Very interesting! yaws.hyber.org didn't +%% support this. Nor did www.google.com. But good old BBC supports +%% this. +35> 37> ibrowse:send_req("http://www.bbc.co.uk/", [], trace, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}]). +{ok,"200", + [{"Transfer-Encoding","chunked"}, + {"Date","Thu, 05 May 2005 21:40:27 GMT"}, + {"Content-Type","message/http"}, + {"Server","Apache/2.0.51 (Unix)"}, + {"Set-Cookie", + "BBC-UID=7452e72a29424c5b0b232c7131c7d9395d209b7170e8604072e0fcb3630467300; expires=Mon, 04-May-09 21:40:27 GMT; path=/; domain=bbc.co.uk;"}, + {"Set-Cookie", + "BBC-UID=7452e72a29424c5b0b232c7131c7d9395d209b7170e8604072e0fcb3630467300; expires=Mon, 04-May-09 21:40:27 GMT; path=/; domain=bbc.co.uk;"}, + {"Via","1.1 hatproxy01 (NetCache NetApp/5.6.2)"}], + "TRACE / HTTP/1.1\r\nHost: www.bbc.co.uk\r\nConnection: keep-alive\r\nX-Forwarded-For: 172.24.28.29\r\nVia: 1.1 hatproxy01 (NetCache NetApp/5.6.2)\r\nCookie: BBC-UID=7452e72a29424c5b0b232c7131c7d9395d209b7170e8604072e0fcb3630467300\r\n\r\n"} diff --git a/c_src/build_darwin b/c_src/build_darwin new file mode 100755 index 0000000..fbd041a --- /dev/null +++ b/c_src/build_darwin @@ -0,0 +1 @@ +cc -o ../priv/ibrowse_drv.so -I ~/R9C-0/usr/include/ -bundle -flat_namespace -undefined suppress -fno-common ibrowse_drv.c diff --git a/c_src/ibrowse_drv.c b/c_src/ibrowse_drv.c new file mode 100644 index 0000000..8f912a9 --- /dev/null +++ b/c_src/ibrowse_drv.c @@ -0,0 +1,162 @@ +/* Created 07/March/2004 Chandrashekhar Mullaparthi + + $Id: ibrowse_drv.c,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ + + Erlang Linked in driver to URL encode a set of data +*/ +#include +#include +#include +#include "erl_driver.h" + +static ErlDrvData ibrowse_drv_start(ErlDrvPort port, char* buff); +static void ibrowse_drv_stop(ErlDrvData handle); +static void ibrowse_drv_command(ErlDrvData handle, char *buff, int bufflen); +static void ibrowse_drv_finish(void); +static int ibrowse_drv_control(ErlDrvData handle, unsigned int command, + char* buf, int count, char** res, int res_size); + +/* The driver entry */ +static ErlDrvEntry ibrowse_driver_entry = { + NULL, /* init, N/A */ + ibrowse_drv_start, /* start, called when port is opened */ + ibrowse_drv_stop, /* stop, called when port is closed */ + NULL, /* output, called when erlang has sent */ + NULL, /* ready_input, called when input descriptor + ready */ + NULL, /* ready_output, called when output + descriptor ready */ + "ibrowse_drv", /* char *driver_name, the argument + to open_port */ + NULL, /* finish, called when unloaded */ + NULL, /* void * that is not used (BC) */ + ibrowse_drv_control, /* control, port_control callback */ + NULL, /* timeout, called on timeouts */ + NULL, /* outputv, vector output interface */ + NULL, + NULL, + NULL, /* call, synchronous call to driver */ + NULL +}; + +typedef struct ibrowse_drv_data { + unsigned int count; + void *alloc_ptr; +} State; + +static State *ibrowse_drv_data; + +DRIVER_INIT(ibrowse_drv) +{ + ibrowse_drv_data = NULL; + return &ibrowse_driver_entry; +} + +static ErlDrvData ibrowse_drv_start(ErlDrvPort port, char *buff) +{ + State *state; + + state = driver_alloc(sizeof(State)); + state->count = 0; + state->alloc_ptr = NULL; + + ibrowse_drv_data = state; + return ((ErlDrvData) state); +} + +void ibrowse_drv_stop(ErlDrvData desc) +{ + return; +} + +static int ibrowse_drv_control(ErlDrvData handle, unsigned int command, + char *buf, int bufflen, char **rbuf, int rlen) +{ + State* state = (State *) handle; + unsigned int j = 0, i = 0; + unsigned int temp = 0, rlen_1 = 0; + char* replybuf; + + fprintf(stderr, "alloc_ptr -> %d\n", state->alloc_ptr); +/* if(state->alloc_ptr != NULL) */ +/* { */ +/* driver_free(state->alloc_ptr); */ +/* } */ + + /* Calculate encoded length. If same as bufflen, it means there is + no encoding to do. Do return an empty list */ + rlen_1 = calc_encoded_length(buf, bufflen); + if(rlen_1 == bufflen) + { + *rbuf = NULL; + state->alloc_ptr = NULL; + return 0; + } + *rbuf = driver_alloc(rlen_1); + state->alloc_ptr = *rbuf; + fprintf(stderr, "*rbuf -> %d\n", *rbuf); + replybuf = *rbuf; + + for(i=0;i %d\n", j); */ + } + else + { + replybuf[j++] = 37; + /* printf("temp -> %d\n", temp); + printf("d2h(temp >> 4) -> %d\n", d2h(temp >> 4)); + printf("d2h(temp & 15) -> %d\n", d2h(temp & 15)); */ + replybuf[j++] = d2h(temp >> 4); + replybuf[j++] = d2h(temp & 15); + /* printf("j -> %d\n", j); */ + } + } + return rlen_1; +} + +/* Calculates the length of the resulting buffer if a string is URL encoded */ +int calc_encoded_length(char* buf, int bufflen) +{ + unsigned int count=0, i=0, temp=0; + + for(i=0;i 49 (ascii value of 1) + 10 -> 97 (ascii value of a) +*/ +int d2h(unsigned int i) +{ + if( i < 10 ) + { + return i + 48; + } + else + { + return i + 97 - 10; + } +} diff --git a/doc/ibrowse.html b/doc/ibrowse.html new file mode 100644 index 0000000..a7fd963 --- /dev/null +++ b/doc/ibrowse.html @@ -0,0 +1,225 @@ + + + +Module ibrowse + + + +

Module ibrowse

+ + +

Description

+The ibrowse application implements an HTTP 1.1 client. This + module implements the API of the HTTP client. There is one named + process called 'ibrowse' which acts as a load balancer. There is + one process to handle one TCP connection to a webserver + (implemented in the module ibrowse_http_client). Multiple connections to a + webserver are setup based on the settings for each webserver. The + ibrowse process also determines which connection to pipeline a + certain request on. The functions to call are send_req/3, + send_req/4, send_req/5, send_req/6. + +

Here are a few sample invocations.

+ + + ibrowse:send_req("http://intranet/messenger/", [], get). +

+ + ibrowse:send_req("http://www.google.com/", [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}], 1000). +

+ + ibrowse:send_req("http://www.erlang.org/download/otp_src_R10B-3.tar.gz", [], get, [], + [{proxy_user, "XXXXX"}, + {proxy_password, "XXXXX"}, + {proxy_host, "proxy"}, + {proxy_port, 8080}, + {save_response_to_file, true}], 1000). +

+ + ibrowse:set_dest("www.hotmail.com", 80, [{max_sessions, 10}, + {max_pipeline_size, 1}]). +

+ + ibrowse:send_req("http://www.erlang.org", [], head). + +

+ ibrowse:send_req("http://www.sun.com", [], options). + +

+ ibrowse:send_req("http://www.bbc.co.uk", [], trace). + +

+ ibrowse:send_req("http://www.google.com", [], get, [], + [{stream_to, self()}]). +
+ +

A driver exists which implements URL encoding in C, but the + speed achieved using only erlang has been good enough, so the + driver isn't actually used.

+ +

Function Index

+ + + + + + + + + + + + + + + + + + + + + + + +
Exported Functions
code_change/3
finished_async_request/0Internal export.
handle_call/3
handle_cast/2
handle_info/2
init/1
reply/2Internal export.
send_req/3This is the basic function to send a HTTP request.
send_req/4Same as send_req/3.
send_req/5Same as send_req/4.
send_req/6Same as send_req/5.
set_dest/3Sets options for a destination.
shutting_down/0Internal export.
start/0
start_link/0
stop/0
terminate/2
trace_off/0Turn tracing off for the ibrowse process.
trace_off/2Turn tracing OFF for all connections to the specified HTTP + server.
trace_on/0Turn tracing on for the ibrowse process.
trace_on/2Turn tracing on for all connections to the specified HTTP + server.
+ +

Exported Functions

+ +

code_change/3

+ +

code_change(Arg1, Arg2, Arg3) -> term()

+

+ +

finished_async_request/0

+ +

finished_async_request() -> term()

+

Internal export. Called by a HTTP connection process to + indicate to the load balancing process (ibrowse) that an + asynchronous request has finished processing.

+ +

handle_call/3

+ +

handle_call(Arg1, Arg2, Arg3) -> term()

+

+ +

handle_cast/2

+ +

handle_cast(Arg1, Arg2) -> term()

+

+ +

handle_info/2

+ +

handle_info(Arg1, Arg2) -> term()

+

+ +

init/1

+ +

init(Arg1) -> term()

+

+ +

reply/2

+ +

reply(Arg1, Arg2) -> term()

+

Internal export. Called by a HTTP connection process to + indicate to the load balancing process (ibrowse) that a synchronous + request has finished processing.

+ +

send_req/3

+ +

send_req(Url::string(), Headers::headerList(), Method::method()) -> response()

+

This is the basic function to send a HTTP request. + The Status return value indicates the HTTP status code returned by the webserver

+ +

send_req/4

+ +

send_req(Url, Headers, Method::method(), Body::body()) -> response()

  • body() = [] | string() | binary()

+

Same as send_req/3. + If a list is specified for the body it has to be a flat list.

+ +

send_req/5

+ +

send_req(Url::string(), Headers::headerList(), Method::method(), Body::body(), Options::optionList()) -> response()

  • optionList() = [option()]
  • option() = {max_sessions, integer()} | {max_pipeline_size, integer()} | {trace, boolean()} | {is_ssl, boolean()} | {ssl_options, [SSLOpt]} | {pool_name, atom()} | {proxy_host, string()} | {proxy_port, integer()} | {proxy_user, string()} | {proxy_password, string()} | {use_absolute_uri, boolean()} | {basic_auth, {username(), password()}} | {cookie, string()} | {content_length, integer()} | {content_type, string()} | {save_response_to_file, boolean()} | {stream_to, process()} | {http_vsn, {MajorVsn, MinorVsn}}
  • process() = pid() | atom()
  • username() = string()
  • password() = string()
  • SSLOpt = term()

+

Same as send_req/4. + For a description of SSL Options, look in the ssl manpage. If the + HTTP Version to use is not specified, the default is 1.1

+ +

send_req/6

+ +

send_req(Url, Headers::headerList(), Method::method(), Body::body(), Options::optionList(), Timeout) -> response()

  • Timeout = integer() | infinity

+

Same as send_req/5. + All timeout values are in milliseconds.

+ +

set_dest/3

+ +

set_dest(Host::string(), Port::integer(), Opts::opt_list()) -> ok

+

Sets options for a destination. If the options have not been + set in the ibrowse.conf file, it can be set using this function + before sending the first request to the destination. If not, + defaults will be used. Entries in ibrowse.conf look like this. +
+ {dest, Host, Port, MaxSess, MaxPipe, Options}.
+ where
+ Host = string(). "www.erlang.org" | "193.180.168.23"
+ Port = integer()
+ MaxSess = integer()
+ MaxPipe = integer()
+ Options = optionList() -- see options in send_req/5
+

+ +

shutting_down/0

+ +

shutting_down() -> term()

+

Internal export. Called by a HTTP connection process to + indicate to ibrowse that it is shutting down and further requests + should not be sent it's way.

+ +

start/0

+ +

start() -> term()

+

+ +

start_link/0

+ +

start_link() -> term()

+

+ +

stop/0

+ +

stop() -> term()

+

+ +

terminate/2

+ +

terminate(Arg1, Arg2) -> term()

+

+ +

trace_off/0

+ +

trace_off() -> term()

+

Turn tracing off for the ibrowse process

+ +

trace_off/2

+ +

trace_off(Host, Port) -> term()

+

Turn tracing OFF for all connections to the specified HTTP + server.

+ +

trace_on/0

+ +

trace_on() -> term()

+

Turn tracing on for the ibrowse process

+ +

trace_on/2

+ +

trace_on(Host, Port) -> term()

  • Host = string()
  • Port = integer()

+

Turn tracing on for all connections to the specified HTTP + server. Host is whatever is specified as the domain name in the URL

+ \ No newline at end of file diff --git a/doc/ibrowse_lib.html b/doc/ibrowse_lib.html new file mode 100644 index 0000000..ecc42ca --- /dev/null +++ b/doc/ibrowse_lib.html @@ -0,0 +1,52 @@ + + + +Module ibrowse_lib + + + +

Module ibrowse_lib

+ + +

Description

+Module with a few useful functions + +

Function Index

+ + + + + + + +
Exported Functions
decode_rfc822_date/1
drv_ue/1
drv_ue/2
status_code/1Given a status code, returns an atom describing the status code.
url_encode/1URL-encodes a string based on RFC 1738.
+ +

Exported Functions

+ +

decode_rfc822_date/1

+ +

decode_rfc822_date(Arg1) -> term()

+

+ +

drv_ue/1

+ +

drv_ue(Arg1) -> term()

+

+ +

drv_ue/2

+ +

drv_ue(Arg1, Arg2) -> term()

+

+ +

status_code/1

+ +

status_code(StatusCode) -> StatusDescription

  • StatusCode = string() | integer()
  • StatusDescription = atom()

+

Given a status code, returns an atom describing the status code.

+ +

url_encode/1

+ +

url_encode(Str) -> UrlEncodedStr

  • Str = string()
  • UrlEncodedStr = string()

+

URL-encodes a string based on RFC 1738. Returns a flat list.

+ \ No newline at end of file diff --git a/priv/ibrowse.conf b/priv/ibrowse.conf new file mode 100644 index 0000000..c142601 --- /dev/null +++ b/priv/ibrowse.conf @@ -0,0 +1,17 @@ +%% Configuration file for specifying settings for HTTP servers which this +%% client will connect to. +%% The format of each entry is (one per line) +%% {dest, Hostname, Portnumber, MaxSessions, MaxPipelineSize, Options}. +%% +%% where Hostname = string() +%% Portnumber = integer() +%% MaxSessions = integer() +%% MaxPipelineSize = integer() +%% Options = [{Tag, Val} | ...] +%% Tag = term() +%% Value = term() +%% e.g. +%% {dest, "covig02", 8000, 10, 10, [{is_ssl, true}, {ssl_options, [option()]}]}. +%% If SSL is to be used, both the options, is_ssl and ssl_options MUST be specified +%% where option() is all options supported in the ssl module + diff --git a/src/Emakefile.src b/src/Emakefile.src new file mode 100644 index 0000000..d3198bc --- /dev/null +++ b/src/Emakefile.src @@ -0,0 +1,6 @@ +'../src/ibrowse'. +'../src/ibrowse_http_client'. +'../src/ibrowse_app'. +'../src/ibrowse_sup'. +'../src/ibrowse_lib'. +'../src/ibrowse_test'. diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..7dbf4a3 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,24 @@ +ERL_FILES = ibrowse.erl \ + ibrowse_http_client.erl \ + ibrowse_app.erl \ + ibrowse_sup.erl \ + ibrowse_lib.erl \ + ibrowse_test.erl + + +INCLUDE_DIRS = -I./ + +ERLC ?= erlc +ERLC_EMULATOR ?= erl -boot start_clean +COMPILER_OPTIONS = -W +warn_unused_vars +nowarn_shadow_vars +warn_unused_import + +.SUFFIXES: .erl .beam $(SUFFIXES) +EBIN = ../ebin + +all: $(ERL_FILES:%.erl=$(EBIN)/%.beam) + +$(EBIN)/%.beam: %.erl + ${ERLC} $(COMPILER_OPTIONS) $(INCLUDE_DIRS) -o ../ebin $< + +clean: + rm -f $(EBIN)/*.beam diff --git a/src/ibrowse.app.src b/src/ibrowse.app.src new file mode 100644 index 0000000..c39e51b --- /dev/null +++ b/src/ibrowse.app.src @@ -0,0 +1,12 @@ +{application, ibrowse, + [{description, "HTTP client application"}, + {vsn, "%IBROWSE_VSN%"}, + {modules, [ ibrowse, + ibrowse_http_client, + ibrowse_app, + ibrowse_sup, + ibrowse_lib ]}, + {registered, []}, + {applications, [kernel,stdlib,sasl]}, + {env, []}, + {mod, {ibrowse_app, []}}]}. diff --git a/src/ibrowse.erl b/src/ibrowse.erl new file mode 100644 index 0000000..73908ef --- /dev/null +++ b/src/ibrowse.erl @@ -0,0 +1,629 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse.erl +%%% Author : Chandrashekhar Mullaparthi +%%% Description : Load balancer process for HTTP client connections. +%%% +%%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi +%%%------------------------------------------------------------------- +%% @doc The ibrowse application implements an HTTP 1.1 client. This +%% module implements the API of the HTTP client. There is one named +%% process called 'ibrowse' which acts as a load balancer. There is +%% one process to handle one TCP connection to a webserver +%% (implemented in the module ibrowse_http_client). Multiple connections to a +%% webserver are setup based on the settings for each webserver. The +%% ibrowse process also determines which connection to pipeline a +%% certain request on. The functions to call are send_req/3, +%% send_req/4, send_req/5, send_req/6. +%% +%%

Here are a few sample invocations.

+%% +%% +%% ibrowse:send_req("http://intranet/messenger/", [], get). +%%

+%% +%% ibrowse:send_req("http://www.google.com/", [], get, [], +%% [{proxy_user, "XXXXX"}, +%% {proxy_password, "XXXXX"}, +%% {proxy_host, "proxy"}, +%% {proxy_port, 8080}], 1000). +%%

+%% +%%ibrowse:send_req("http://www.erlang.org/download/otp_src_R10B-3.tar.gz", [], get, [], +%% [{proxy_user, "XXXXX"}, +%% {proxy_password, "XXXXX"}, +%% {proxy_host, "proxy"}, +%% {proxy_port, 8080}, +%% {save_response_to_file, true}], 1000). +%%

+%% +%% ibrowse:set_dest("www.hotmail.com", 80, [{max_sessions, 10}, +%% {max_pipeline_size, 1}]). +%%

+%% +%% ibrowse:send_req("http://www.erlang.org", [], head). +%% +%%

+%% ibrowse:send_req("http://www.sun.com", [], options). +%% +%%

+%% ibrowse:send_req("http://www.bbc.co.uk", [], trace). +%% +%%

+%% ibrowse:send_req("http://www.google.com", [], get, [], +%% [{stream_to, self()}]). +%%
+%% +%%

A driver exists which implements URL encoding in C, but the +%% speed achieved using only erlang has been good enough, so the +%% driver isn't actually used.

+ +-module(ibrowse). +-vsn('$Id: ibrowse.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([start_link/0, start/0, stop/0]). + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +%% API interface +-export([send_req/3, + send_req/4, + send_req/5, + send_req/6, + trace_on/0, + trace_off/0, + trace_on/2, + trace_off/2, + set_dest/3]). + +%% Internal exports +-export([reply/2, + finished_async_request/0, + shutting_down/0]). + +-ifdef(debug). +-compile(export_all). +-endif. + +-import(ibrowse_http_client, [parse_url/1, + printable_date/0]). + +-record(state, {dests=[], trace=false, port}). + +-include("ibrowse.hrl"). + +-define(DEF_MAX_SESSIONS,10). +-define(DEF_MAX_PIPELINE_SIZE,10). + +%% key = {Host, Port} where Host is a string, or {Name, Host, Port} +%% where Name is an atom. +%% conns = queue() +-record(dest, {key, + conns=queue:new(), + num_sessions=0, + max_sessions=?DEF_MAX_SESSIONS, + max_pipeline_size=?DEF_MAX_PIPELINE_SIZE, + options=[], + trace=false}). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +start() -> + gen_server:start({local, ?MODULE}, ?MODULE, [], [{debug, []}]). + +stop() -> + catch gen_server:call(ibrowse, stop). + +%% @doc Sets options for a destination. If the options have not been +%% set in the ibrowse.conf file, it can be set using this function +%% before sending the first request to the destination. If not, +%% defaults will be used. Entries in ibrowse.conf look like this. +%%
+%% {dest, Host, Port, MaxSess, MaxPipe, Options}.
+%% where
+%% Host = string(). "www.erlang.org" | "193.180.168.23"
+%% Port = integer()
+%% MaxSess = integer()
+%% MaxPipe = integer()
+%% Options = optionList() -- see options in send_req/5
+%%
+%% @spec set_dest(Host::string(),Port::integer(),Opts::opt_list()) -> ok +%% opt_list() = [opt] +%% opt() = {max_sessions, integer()} | +%% {max_pipeline_size, integer()} | +%% {trace, boolean()} +set_dest(Host,Port,Opts) -> + gen_server:call(?MODULE,{set_dest,Host,Port,Opts}). + +%% @doc This is the basic function to send a HTTP request. +%% The Status return value indicates the HTTP status code returned by the webserver +%% @spec send_req(Url::string(), Headers::headerList(), Method::method()) -> response() +%% headerList() = [{header(), value()}] +%% header() = atom() | string() +%% value() = term() +%% method() = get | post | head | options | put | delete | trace +%% Status = string() +%% ResponseHeaders = [respHeader()] +%% respHeader() = {headerName(), headerValue()} +%% headerName() = string() +%% headerValue() = string() +%% response() = {ok, Status, ResponseHeaders, ResponseBody} | {error, Reason} +%% ResponseBody = string() +%% Reason = term() +send_req(Url, Headers, Method) -> + send_req(Url, Headers, Method, [], []). + +%% @doc Same as send_req/3. +%% If a list is specified for the body it has to be a flat list. +%% @spec send_req(Url, Headers, Method::method(), Body::body()) -> response() +%% body() = [] | string() | binary() +send_req(Url, Headers, Method, Body) -> + send_req(Url, Headers, Method, Body, []). + +%% @doc Same as send_req/4. +%% For a description of SSL Options, look in the ssl manpage. If the +%% HTTP Version to use is not specified, the default is 1.1 +%% @spec send_req(Url::string(), Headers::headerList(), Method::method(), Body::body(), Options::optionList()) -> response() +%% optionList() = [option()] +%% option() = {max_sessions, integer()} | +%% {max_pipeline_size, integer()} | +%% {trace, boolean()} | +%% {is_ssl, boolean()} | +%% {ssl_options, [SSLOpt]} | +%% {pool_name, atom()} | +%% {proxy_host, string()} | +%% {proxy_port, integer()} | +%% {proxy_user, string()} | +%% {proxy_password, string()} | +%% {use_absolute_uri, boolean()} | +%% {basic_auth, {username(), password()}} | +%% {cookie, string()} | +%% {content_length, integer()} | +%% {content_type, string()} | +%% {save_response_to_file, boolean()} | +%% {stream_to, process()} | +%% {http_vsn, {MajorVsn, MinorVsn}} +%% process() = pid() | atom() +%% username() = string() +%% password() = string() +%% SSLOpt = term() +send_req(Url, Headers, Method, Body, Options) -> + send_req(Url, Headers, Method, Body, Options, 30000). + +%% @doc Same as send_req/5. +%% All timeout values are in milliseconds. +%% @spec send_req(Url, Headers::headerList(), Method::method(), Body::body(), Options::optionList(), Timeout) -> response() +%% Timeout = integer() | infinity +send_req(Url, Headers, Method, Body, Options, Timeout) -> + Timeout_1 = case Timeout of + infinity -> + infinity; + _ -> + Timeout + 1000 + end, + case catch gen_server:call(ibrowse, + {send_req, [Url, Headers, Method, + Body, Options, Timeout]}, + Timeout_1) of + {'EXIT', {timeout, _}} -> + {error, genserver_timedout}; + Res -> + Res + end. + +%% @doc Turn tracing on for the ibrowse process +trace_on() -> + ibrowse ! {trace, true}. +%% @doc Turn tracing off for the ibrowse process +trace_off() -> + ibrowse ! {trace, false}. + +%% @doc Turn tracing on for all connections to the specified HTTP +%% server. Host is whatever is specified as the domain name in the URL +%% @spec trace_on(Host, Port) -> term() +%% Host = string() +%% Port = integer() +trace_on(Host, Port) -> + ibrowse ! {trace, true, Host, Port}. + +%% @doc Turn tracing OFF for all connections to the specified HTTP +%% server. +%% @spec trace_off(Host, Port) -> term() +trace_off(Host, Port) -> + ibrowse ! {trace, false, Host, Port}. + +%% @doc Internal export. Called by a HTTP connection process to +%% indicate to the load balancing process (ibrowse) that a synchronous +%% request has finished processing. +reply(OrigCaller, Reply) -> + gen_server:call(ibrowse, {reply, OrigCaller, Reply, self()}). + +%% @doc Internal export. Called by a HTTP connection process to +%% indicate to the load balancing process (ibrowse) that an +%% asynchronous request has finished processing. +finished_async_request() -> + gen_server:call(ibrowse, {finished_async_request, self()}). + +%% @doc Internal export. Called by a HTTP connection process to +%% indicate to ibrowse that it is shutting down and further requests +%% should not be sent it's way. +shutting_down() -> + gen_server:call(ibrowse, {shutting_down, self()}). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%%-------------------------------------------------------------------- +init(_) -> + process_flag(trap_exit, true), + State = #state{}, + put(my_trace_flag, State#state.trace), + case code:priv_dir(ibrowse) of + {error, _} -> + {ok, #state{}}; + PrivDir -> + Filename = filename:join(PrivDir, "ibrowse.conf"), + case file:consult(Filename) of + {ok, Terms} -> + Fun = fun({dest, Host, Port, MaxSess, MaxPipe, Options}, Acc) + when list(Host), integer(Port), + integer(MaxSess), MaxSess > 0, + integer(MaxPipe), MaxPipe > 0, list(Options) -> + Key = maybe_named_key(Host, Port, Options), + NewDest = #dest{key=Key, + options=Options, + max_sessions=MaxSess, + max_pipeline_size=MaxPipe}, + [NewDest | Acc]; + (_, Acc) -> + Acc + end, + {ok, #state{dests=lists:foldl(Fun, [], Terms)}}; + _Else -> + {ok, #state{}} + end + end. + +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_call({send_req, _}=Req, From, State) -> + State_1 = handle_send_req(Req, From, State), + {noreply, State_1}; + +handle_call({reply, OrigCaller, Reply, HttpClientPid}, From, State) -> + gen_server:reply(From, ok), + gen_server:reply(OrigCaller, Reply), + Key = {HttpClientPid, pending_reqs}, + case get(Key) of + NumPend when integer(NumPend) -> + put(Key, NumPend - 1); + _ -> + ok + end, + {noreply, State}; + +handle_call({finished_async_request, HttpClientPid}, From, State) -> + gen_server:reply(From, ok), + Key = {HttpClientPid, pending_reqs}, + case get(Key) of + NumPend when integer(NumPend) -> + put(Key, NumPend - 1); + _ -> + ok + end, + {noreply, State}; + +handle_call({shutting_down, Pid}, _From, State) -> + State_1 = handle_conn_closing(Pid, State), + {reply, ok, State_1}; + +handle_call({set_dest,Host,Port,Opts}, _From, State) -> + State2 = set_destI(State,Host,Port,Opts), + {reply, ok, State2}; + +handle_call(stop, _From, State) -> + {stop, shutting_down, ok, State}; + +handle_call(_Request, _From, State) -> + Reply = ok, + {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- + +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +%% A bit of a bodge here...ideally, would be good to store connection state +%% in the queue itself against each Pid. +handle_info({done_req, Pid}, State) -> + Key = {Pid, pending_reqs}, + case get(Key) of + NumPend when integer(NumPend) -> + put(Key, NumPend - 1); + _ -> + ok + end, + do_trace("~p has finished a request~n", [Pid]), + {noreply, State}; + +handle_info({'EXIT', _, normal}, State) -> + {noreply, State}; + +handle_info({'EXIT', Pid, _Reason}, State) -> + %% TODO: We have to reply to all the pending requests + State_1 = handle_conn_closing(Pid, State), + do_trace("~p has exited~n", [Pid]), + {noreply, State_1}; + +handle_info({shutting_down, Pid}, State) -> + State_1 = handle_conn_closing(Pid, State), + {noreply, State_1}; + +handle_info({conn_closing, Pid, OriReq, From}, State) -> + State_1 = handle_conn_closing(Pid, State), + State_2 = handle_send_req(OriReq, From, State_1), + {noreply, State_2}; + +handle_info({trace, Bool}, State) -> + put(my_trace_flag, Bool), + {noreply, State#state{trace=Bool}}; + +handle_info({trace, Bool, Host, Port}, #state{dests=Dests}=State) -> + case lists:keysearch({Host, Port}, #dest.key, Dests) of + {value, Dest} -> + lists:foreach(fun(ConnPid) -> + ConnPid ! {trace, Bool} + end, queue:to_list(Dest#dest.conns)), + {noreply, State#state{dests=lists:keyreplace({Host,Port}, #dest.key, Dests, Dest#dest{trace=Bool})}}; + false -> + do_trace("Not found any state information for specified Host, Port.~n", []), + {noreply, State} + end; + +handle_info(_Info, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, _State) -> + ok. + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +handle_send_req({send_req, [Url, _Headers, _Method, _Body, Options, _Timeout]}=Req, + From, State) -> + case get_host_port(Url, Options) of + {Host, Port, _RelPath} -> + Key = maybe_named_key(Host, Port, Options), + case lists:keysearch(Key, #dest.key, State#state.dests) of + false -> + {ok, Pid} = spawn_new_connection(Key, false, Options), + Pid ! {Req, From}, + Q = queue:new(), + Q_1 = queue:in(Pid, Q), + NewDest = #dest{key=Key,conns=Q_1,num_sessions=1}, %% MISSING is_ssl + State#state{dests=[NewDest|State#state.dests]}; + {value, #dest{conns=Conns, + num_sessions=NumS, + max_pipeline_size=MaxPSz, + max_sessions=MaxS}=Dest} -> + case get_free_worker(Conns, NumS, MaxS, MaxPSz) of + spawn_new_connection -> + do_trace("Spawning new connection~n", []), + {ok, Pid} = spawn_new_connection(Key, Dest#dest.trace, Dest#dest.options), + Pid ! {Req, From}, + Q_1 = queue:in(Pid, Conns), + Dest_1 = Dest#dest{conns=Q_1, num_sessions=NumS+1}, + State#state{dests=lists:keyreplace(Key, #dest.key, State#state.dests, Dest_1)}; + not_found -> + do_trace("State -> ~p~nPDict -> ~p~n", [State, get()]), + gen_server:reply(From, {error, retry_later}), + State; + {ok, Pid, _, ConnPids} -> + do_trace("Reusing existing pid: ~p~n", [Pid]), + Pid_key = {Pid, pending_reqs}, + put(Pid_key, get(Pid_key) + 1), + Pid ! {Req, From}, + State#state{dests=lists:keyreplace(Key, #dest.key, State#state.dests,Dest#dest{conns=ConnPids})} + end + end; + invalid_uri -> + gen_server:reply(From, {error, invalid_uri}), + State + end. + +get_host_port(Url, Options) -> + case get_value(proxy_host, Options, false) of + false -> + case parse_url(Url) of + #url{host=H, port=P, path=Path} -> + {H, P, Path}; + _Err -> + invalid_uri + end; + PxyHost -> + PxyPort = get_value(proxy_port, Options, 80), + {PxyHost, PxyPort, Url} + end. + +handle_conn_closing(Pid, #state{dests=Dests}=State) -> + erase({Pid, pending_reqs}), + HostKey = get({Pid, hostport}), + erase({Pid, hostport}), + do_trace("~p is shutting down~n", [Pid]), + case lists:keysearch(HostKey, #dest.key, Dests) of + {value, #dest{num_sessions=Num, conns=Q}=Dest} -> + State#state{dests=lists:keyreplace(HostKey, #dest.key, Dests, + Dest#dest{conns=del_from_q(Q, Num, Pid), num_sessions=Num-1})}; + false -> + State + end. + +%% Replaces destination information if found, otherwise appends it. +%% Copies over Connection Queue and Number of sessions. +set_destI(State,Host,Port,Opts) -> + #state{dests=DestList} = State, + Key = maybe_named_key(Host, Port, Opts), + NewDests = case lists:keysearch(Key, #dest.key, DestList) of + false -> + Dest = insert_opts(Opts,#dest{key=Key}), + [Dest | DestList]; + {value, OldDest} -> + OldDest_1 = insert_opts(Opts, OldDest), + [OldDest_1 | (DestList -- [OldDest])] + end, + State#state{dests=NewDests}. + +insert_opts(Opts, Dest) -> + insert_opts_1(Opts, Dest#dest{options=Opts}). + +insert_opts_1([],Dest) -> Dest; +insert_opts_1([{max_sessions,Msess}|T],Dest) -> + insert_opts_1(T,Dest#dest{max_sessions=Msess}); +insert_opts_1([{max_pipeline_size,Mpls}|T],Dest) -> + insert_opts_1(T,Dest#dest{max_pipeline_size=Mpls}); +insert_opts_1([{trace,Bool}|T],Dest) when Bool==true; Bool==false -> + insert_opts_1(T,Dest#dest{trace=Bool}); +insert_opts_1([_|T],Dest) -> %% ignores other + insert_opts_1(T,Dest). + +% Picks out the worker with the minimum pipeline size +% If a worker is found with a non-zero pipeline size, but the number of sessins +% is less than the max allowed sessions, a new connection is spawned. +get_free_worker(Q, NumSessions, MaxSessions, MaxPSz) -> + case get_free_worker_1(Q, NumSessions, MaxPSz, {undefined, undefined}) of + not_found when NumSessions < MaxSessions -> + spawn_new_connection; + not_found -> + not_found; + {ok, Pid, PSz, _Q1} when NumSessions < MaxSessions, PSz > 0 -> + do_trace("Found Pid -> ~p. PSz -> ~p~n", [Pid, PSz]), + spawn_new_connection; + Ret -> + do_trace("get_free_worker: Ret -> ~p~n", [Ret]), + Ret + end. + +get_free_worker_1(_, 0, _, {undefined, undefined}) -> + not_found; +get_free_worker_1({{value, WorkerPid}, Q}, 0, _, {MinPSzPid, PSz}) -> + {ok, MinPSzPid, PSz, queue:in(WorkerPid, Q)}; +get_free_worker_1({{value, Pid}, Q1}, NumSessions, MaxPSz, {_MinPSzPid, MinPSz}=V) -> + do_trace("Pid -> ~p. MaxPSz -> ~p MinPSz -> ~p~n", [Pid, MaxPSz, MinPSz]), + case get({Pid, pending_reqs}) of + NumP when NumP < MaxPSz, NumP < MinPSz -> + get_free_worker_1(queue:out(queue:in(Pid, Q1)), NumSessions-1, MaxPSz, {Pid, NumP}); + _ -> + get_free_worker_1(queue:out(queue:in(Pid, Q1)), NumSessions-1, MaxPSz, V) + end; +get_free_worker_1({empty, _Q}, _, _, _) -> + do_trace("Queue empty -> not_found~n", []), + not_found; +get_free_worker_1(Q, NumSessions, MaxPSz, MinPSz) -> + get_free_worker_1(queue:out(Q), NumSessions, MaxPSz, MinPSz). + +spawn_new_connection({_Pool_name, Host, Port}, Trace, Options) -> + spawn_new_connection({Host, Port}, Trace, Options); +spawn_new_connection({Host, Port}, Trace, Options) -> + {ok, Pid} = ibrowse_http_client:start_link([Host, Port, Trace, Options]), + Key = maybe_named_key(Host, Port, Options), + put({Pid, pending_reqs}, 1), + put({Pid, hostport}, Key), + {ok, Pid}. + +del_from_q({empty, Q}, _, _) -> + Q; +del_from_q({{value, V}, Q}, 0, _Elem) -> + queue:in(V, Q); +del_from_q({{value, Elem}, Q1}, QSize, Elem) -> + del_from_q(queue:out(Q1), QSize-1, Elem); +del_from_q({{value, V}, Q}, QSize, Elem) -> + del_from_q(queue:out(queue:in(V, Q)), QSize-1, Elem); +del_from_q(Q, QSize, Elem) -> + del_from_q(queue:out(Q), QSize, Elem). + +maybe_named_key(Host, Port, Opts) -> + case lists:keysearch(name, 1, Opts) of + {value, {name, Pool_name}} when is_atom(Pool_name) -> + {Pool_name, Host, Port}; + _ -> + {Host, Port} + end. + +% get_value(Tag, TVL) -> +% {value, {_, V}} = lists:keysearch(Tag,1,TVL), +% V. + +get_value(Tag, TVL, DefVal) -> + case lists:keysearch(Tag, 1, TVL) of + {value, {_, V}} -> + V; + false -> + DefVal + end. + +do_trace(Fmt, Args) -> + do_trace(get(my_trace_flag), Fmt, Args). +% do_trace(true, Fmt, Args) -> +% io:format("~s -- IBROWSE - "++Fmt, [printable_date() | Args]); +do_trace(true, Fmt, Args) -> + io:format("~s -- IBROWSE - "++Fmt, [printable_date() | Args]); +do_trace(_, _, _) -> ok. diff --git a/src/ibrowse.hrl b/src/ibrowse.hrl new file mode 100644 index 0000000..b4fa114 --- /dev/null +++ b/src/ibrowse.hrl @@ -0,0 +1,6 @@ +-ifndef(IBROWSE_HRL). +-define(IBROWSE_HRL, "ibrowse.hrl"). + +-record(url, {abspath, host, port, username, password, path, protocol}). + +-endif. diff --git a/src/ibrowse_app.erl b/src/ibrowse_app.erl new file mode 100644 index 0000000..f5e523c --- /dev/null +++ b/src/ibrowse_app.erl @@ -0,0 +1,64 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse_app.erl +%%% Author : Chandrashekhar Mullaparthi +%%% Description : +%%% +%%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi +%%%------------------------------------------------------------------- +-module(ibrowse_app). +-vsn('$Id: ibrowse_app.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(application). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +%%-------------------------------------------------------------------- +-export([ + start/2, + stop/1 + ]). + +%%-------------------------------------------------------------------- +%% Internal exports +%%-------------------------------------------------------------------- +-export([ + ]). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Records +%%-------------------------------------------------------------------- + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Func: start/2 +%% Returns: {ok, Pid} | +%% {ok, Pid, State} | +%% {error, Reason} +%%-------------------------------------------------------------------- +start(_Type, _StartArgs) -> + case ibrowse_sup:start_link() of + {ok, Pid} -> + {ok, Pid}; + Error -> + Error + end. + +%%-------------------------------------------------------------------- +%% Func: stop/1 +%% Returns: any +%%-------------------------------------------------------------------- +stop(_State) -> + ok. + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/src/ibrowse_http_client.erl b/src/ibrowse_http_client.erl new file mode 100644 index 0000000..ceb678d --- /dev/null +++ b/src/ibrowse_http_client.erl @@ -0,0 +1,1249 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse_http_client.erl +%%% Author : Chandrashekhar Mullaparthi +%%% Description : The name says it all +%%% +%%% Created : 11 Oct 2003 by Chandrashekhar Mullaparthi +%%%------------------------------------------------------------------- +-module(ibrowse_http_client). +-vsn('$Id: ibrowse_http_client.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(gen_server). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +-export([start_link/1]). + +-ifdef(debug). +-compile(export_all). +-endif. + +%% gen_server callbacks +-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). + +-export([parse_url/1, + printable_date/0]). + +-include("ibrowse.hrl"). + +-record(state, {host, port, use_proxy = false, proxy_auth_digest, + ssl_options=[], is_ssl, socket, + reqs=queue:new(), cur_req, status=idle, http_status_code, + reply_buffer=[], rep_buf_size=0, recvd_headers=[], + is_closing, send_timer, content_length, + deleted_crlf = false, transfer_encoding, chunk_size, + chunks=[], save_response_to_file = false, + tmp_file_name, tmp_file_fd}). + +-record(request, {url, method, options, from, + stream_to, req_id}). + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the server +%%-------------------------------------------------------------------- +start_link(Args) -> + gen_server:start_link(?MODULE, Args, []). + +%%==================================================================== +%% Server functions +%%==================================================================== + +%%-------------------------------------------------------------------- +%% Function: init/1 +%% Description: Initiates the server +%% Returns: {ok, State} | +%% {ok, State, Timeout} | +%% ignore | +%% {stop, Reason} +%%-------------------------------------------------------------------- +init([Host, Port, Trace, Options]) -> + {SSLOptions, IsSSL} = case get_value(is_ssl, Options, false) of + false -> {[], false}; + true -> {get_value(ssl_options, Options), true} + end, + State = #state{host=Host, port=Port, is_ssl=IsSSL, ssl_options=SSLOptions}, + put(ibrowse_http_client_host, Host), + put(ibrowse_http_client_port, Port), + put(my_trace_flag, Trace), + {ok, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_call/3 +%% Description: Handling call messages +%% Returns: {reply, Reply, State} | +%% {reply, Reply, State, Timeout} | +%% {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, Reply, State} | (terminate/2 is called) +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_call(_Request, _From, State) -> + Reply = ok, + {reply, Reply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_cast/2 +%% Description: Handling cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +handle_cast(_Msg, State) -> + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: handle_info/2 +%% Description: Handling all non call/cast messages +%% Returns: {noreply, State} | +%% {noreply, State, Timeout} | +%% {stop, Reason, State} (terminate/2 is called) +%%-------------------------------------------------------------------- +%% Received a request when the remote server has already sent us a +%% Connection: Close header +handle_info({{send_req, Req}, From}, #state{is_closing=true}=State) -> + ibrowse ! {conn_closing, self(), {send_req, Req}, From}, + {noreply, State}; + +%% First request when no connection exists. +handle_info({{send_req, [Url, Headers, Method, + Body, Options, Timeout]}, From}, + #state{socket=undefined, + host=Host, port=Port}=State) -> + State_1 = case get_value(proxy_host, Options, false) of + false -> + State; + _PHost -> + ProxyUser = get_value(proxy_user, Options), + ProxyPassword = get_value(proxy_password, Options), + SaveResponseToFile = get_value(save_response_to_file, Options, false), + Digest = http_auth_digest(ProxyUser, ProxyPassword), + State#state{use_proxy = true, + save_response_to_file = SaveResponseToFile, + proxy_auth_digest = Digest} + end, + StreamTo = get_value(stream_to, Options, undefined), + ReqId = make_req_id(), + NewReq = #request{url=Url, + method=Method, + stream_to=StreamTo, + options=Options, + req_id=ReqId, + from=From}, + Reqs = queue:in(NewReq, State#state.reqs), + State_2 = check_ssl_options(Options, State_1#state{reqs = Reqs}), + do_trace("Connecting...~n", []), + Timeout_1 = case Timeout of + infinity -> + infinity; + _ -> + round(Timeout*0.9) + end, + case do_connect(Host, Port, Options, State_2, Timeout_1) of + {ok, Sock} -> + Ref = case Timeout of + infinity -> + undefined; + _ -> + erlang:send_after(Timeout, self(), {req_timedout, From}) + end, + do_trace("Connected!~n", []), + case send_req_1(Url, Headers, Method, Body, Options, Sock, State_2) of + ok -> + case StreamTo of + undefined -> + ok; + _ -> + gen_server:reply(From, {ibrowse_req_id, ReqId}) + end, + {noreply, State_2#state{socket=Sock, + send_timer = Ref, + cur_req = NewReq, + status=get_header}}; + Err -> + do_trace("Send failed... Reason: ~p~n", [Err]), + ibrowse:shutting_down(), + ibrowse:reply(From, {error, send_failed}), + {stop, normal, State_2} + end; + Err -> + do_trace("Error connecting. Reason: ~1000.p~n", [Err]), + ibrowse:shutting_down(), + ibrowse:reply(From, {error, conn_failed}), + {stop, normal, State_2} + end; + +%% Request which is to be pipelined +handle_info({{send_req, [Url, Headers, Method, + Body, Options, Timeout]}, From}, + #state{socket=Sock, status=Status, reqs=Reqs}=State) -> + do_trace("Recvd request in connected state. Status -> ~p NumPending: ~p~n", [Status, length(queue:to_list(Reqs))]), + StreamTo = get_value(stream_to, Options, undefined), + ReqId = make_req_id(), + NewReq = #request{url=Url, + stream_to=StreamTo, + method=Method, + options=Options, + req_id=ReqId, + from=From}, + State_1 = State#state{reqs=queue:in(NewReq, State#state.reqs)}, + case send_req_1(Url, Headers, Method, Body, Options, Sock, State_1) of + ok -> + do_setopts(Sock, [{active, true}], State#state.is_ssl), + case Timeout of + infinity -> + ok; + _ -> + erlang:send_after(Timeout, self(), {req_timedout, From}) + end, + State_2 = case Status of + idle -> + State_1#state{status=get_header, + cur_req=NewReq}; + _ -> + State_1 + end, + case StreamTo of + undefined -> + ok; + _ -> + %% We don't use ibrowse:reply here because we are + %% just sending back the request ID. Not the + %% response + gen_server:reply(From, {ibrowse_req_id, ReqId}) + end, + {noreply, State_2}; + Err -> + do_trace("Send request failed: Reason: ~p~n", [Err]), + ibrowse:reply(From, {error, send_failed}), + do_error_reply(State, send_failed), + ibrowse:shutting_down(), + {stop, normal, State_1} + end; + +handle_info({tcp, _Sock, Data}, State) -> + handle_sock_data(Data, State); +handle_info({ssl, _Sock, Data}, State) -> + handle_sock_data(Data, State); + +handle_info({tcp_closed, _Sock}, State) -> + do_trace("TCP connection closed by peer!~n", []), + handle_sock_closed(State), + {stop, normal, State}; +handle_info({ssl_closed, _Sock}, State) -> + do_trace("SSL connection closed by peer!~n", []), + handle_sock_closed(State), + {stop, normal, State}; + +handle_info({req_timedout, From}, State) -> + case lists:keysearch(From, #request.from, queue:to_list(State#state.reqs)) of + false -> + {noreply, State}; + {value, _} -> + ibrowse:shutting_down(), + do_error_reply(State, req_timedout), + {stop, normal, State} + end; + +handle_info({trace, Bool}, State) -> + do_trace("Turning trace on: Host: ~p Port: ~p~n", [State#state.host, State#state.port]), + put(my_trace_flag, Bool), + {noreply, State}; + +handle_info(Info, State) -> + io:format("Recvd unknown message ~p when in state: ~p~n", [Info, State]), + {noreply, State}. + +%%-------------------------------------------------------------------- +%% Function: terminate/2 +%% Description: Shutdown the server +%% Returns: any (ignored by gen_server) +%%-------------------------------------------------------------------- +terminate(_Reason, State) -> + case State#state.socket of + undefined -> + ok; + Sock -> + do_close(Sock, State#state.is_ssl) + end. + +%%-------------------------------------------------------------------- +%% Func: code_change/3 +%% Purpose: Convert process state when code is changed +%% Returns: {ok, NewState} +%%-------------------------------------------------------------------- +code_change(_OldVsn, State, _Extra) -> + {ok, State}. + +%%-------------------------------------------------------------------- +%%% Internal functions +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% Handles data recvd on the socket +%%-------------------------------------------------------------------- +handle_sock_data(Data, #state{status=idle}=State) -> + do_trace("Data recvd on socket in state idle!. ~1000.p~n", [Data]), + ibrowse:shutting_down(), + do_error_reply(State, data_in_status_idle), + do_close(State#state.socket, State#state.is_ssl), + {stop, normal, State}; + +handle_sock_data(Data, #state{status=get_header, socket=Sock}=State) -> + case parse_response(Data, State) of + {error, _Reason} -> + ibrowse:shutting_down(), + {stop, normal, State}; + stop -> + ibrowse:shutting_down(), + {stop, normal, State}; + State_1 -> + do_setopts(Sock, [{active, true}], State#state.is_ssl), + {noreply, State_1} + end; + +handle_sock_data(Data, #state{status=get_body, content_length=CL, + recvd_headers=Headers, cur_req=CurReq, + chunk_size=CSz, reqs=Reqs, socket=Sock}=State) -> + case (CL == undefined) and (CSz == undefined) of + true -> + case accumulate_response(Data, State) of + {error, Reason} -> + ibrowse:shutting_down(), + {_, Reqs_1} = queue:out(Reqs), +% {{value, Req}, Reqs_1} = queue:out(Reqs), + #request{from=From, stream_to=StreamTo, req_id=ReqId} = CurReq, + do_reply(From, StreamTo, ReqId, + {error, {file_open_error, Reason, Headers}}), + do_error_reply(State#state{reqs=Reqs_1}, previous_request_failed), + {stop, normal, State}; + State_1 -> + do_setopts(Sock, [{active, true}], State#state.is_ssl), + {noreply, State_1} + end; + _ -> + case parse_11_response(Data, State) of + {error, _Reason} -> + ibrowse:shutting_down(), + {stop, normal, State}; + stop -> + ibrowse:shutting_down(), + {stop, normal, State}; + State_1 -> + do_setopts(Sock, [{active, true}], State#state.is_ssl), + {noreply, State_1} + end + end. + +accumulate_response(Data, #state{save_response_to_file = true, + tmp_file_fd = undefined, + http_status_code=[$2 | _]}=State) -> + TmpFilename = make_tmp_filename(), + case file:open(TmpFilename, [write, delayed_write, raw]) of + {ok, Fd} -> + accumulate_response(Data, State#state{tmp_file_fd=Fd, + tmp_file_name=TmpFilename}); + {error, Reason} -> + {error, {file_open_error, Reason}} + end; +accumulate_response(Data, #state{save_response_to_file=true, + transfer_encoding=chunked, + chunks = Chunks, + http_status_code=[$2 | _], + tmp_file_fd=Fd}=State) -> + case file:write(Fd, [Chunks | Data]) of + ok -> + State#state{chunks = []}; + {error, Reason} -> + {error, {file_write_error, Reason}} + end; +accumulate_response(Data, #state{save_response_to_file=true, + reply_buffer = RepBuf, + http_status_code=[$2 | _], + tmp_file_fd=Fd}=State) -> + case file:write(Fd, [RepBuf | Data]) of + ok -> + State#state{reply_buffer = []}; + {error, Reason} -> + {error, {file_write_error, Reason}} + end; +accumulate_response([], State) -> + State; +accumulate_response(Data, #state{reply_buffer=RepBuf, + cur_req=CurReq}=State) -> + #request{stream_to=StreamTo, req_id=ReqId} = CurReq, + case StreamTo of + undefined -> + State#state{reply_buffer = [Data | RepBuf]}; + _ -> + do_interim_reply(StreamTo, ReqId, Data), + State + end. + +make_tmp_filename() -> + DownloadDir = safe_get_env(ibrowse, download_dir, filename:absname("./")), + {A,B,C} = now(), + filename:join([DownloadDir, + "ibrowse_tmp_file_"++ + integer_to_list(A) ++ + integer_to_list(B) ++ + integer_to_list(C)]). + + +%%-------------------------------------------------------------------- +%% Handles the case when the server closes the socket +%%-------------------------------------------------------------------- +handle_sock_closed(#state{status=get_header}=State) -> + ibrowse:shutting_down(), + do_error_reply(State, connection_closed); + +handle_sock_closed(#state{cur_req=undefined}) -> + ibrowse:shutting_down(); + +%% We check for IsClosing because this the server could have sent a +%% Connection-Close header and has closed the socket to indicate end +%% of response. There maybe requests pipelined which need a response. +handle_sock_closed(#state{reply_buffer=Buf, reqs=Reqs, http_status_code=SC, + is_closing=IsClosing, cur_req=CurReq, + status=get_body, recvd_headers=Headers}=State) -> + #request{from=From, stream_to=StreamTo, req_id=ReqId} = CurReq, + case IsClosing of + true -> + {_, Reqs_1} = queue:out(Reqs), +% {{value, Req}, Reqs_1} = queue:out(Reqs), + do_reply(From, StreamTo, ReqId, {ok, SC, Headers, lists:flatten(lists:reverse(Buf))}), + do_error_reply(State#state{reqs = Reqs_1}, connection_closed); + _ -> + do_error_reply(State, connection_closed) + end. + +do_connect(Host, Port, _Options, #state{is_ssl=true, ssl_options=SSLOptions}, Timeout) -> + ssl:connect(Host, Port, [{active, false} | SSLOptions], Timeout); +do_connect(Host, Port, _Options, _State, Timeout) -> + gen_tcp:connect(Host, Port, [{active, false}], Timeout). + +do_send(Sock, Req, true) -> ssl:send(Sock, Req); +do_send(Sock, Req, false) -> gen_tcp:send(Sock, Req). + +do_close(Sock, true) -> ssl:close(Sock); +do_close(Sock, false) -> gen_tcp:close(Sock). + +do_setopts(Sock, Opts, true) -> ssl:setopts(Sock, Opts); +do_setopts(Sock, Opts, false) -> inet:setopts(Sock, Opts). + +check_ssl_options(Options, State) -> + case get_value(is_ssl, Options, false) of + false -> + State; + true -> + State#state{is_ssl=true, ssl_options=get_value(ssl_options, Options)} + end. + +send_req_1(Url, Headers, Method, Body, Options, Sock, State) -> + #url{abspath = AbsPath, + host = Host, + port = Port, + path = RelPath} = Url_1 = parse_url(Url), + Headers_1 = add_auth_headers(Url_1, Options, Headers, State), + Req = make_request(Method, + [{"Host", [Host, ":", integer_to_list(Port)]} | Headers_1], + AbsPath, RelPath, Body, Options, State#state.use_proxy), + case get(my_trace_flag) of %%Avoid the binary operations if trace is not on... + true -> + NReq = binary_to_list(list_to_binary(Req)), + do_trace("Sending request: ~n" + "--- Request Begin ---~n~s~n" + "--- Request End ---~n", [NReq]); + _ -> ok + end, + SndRes = do_send(Sock, Req, State#state.is_ssl), + do_setopts(Sock, [{active, true}], State#state.is_ssl), + SndRes. + +add_auth_headers(#url{username = User, + password = UPw}, + Options, + Headers, + #state{use_proxy = UseProxy, + proxy_auth_digest = ProxyAuthDigest}) -> + Headers_1 = case User of + undefined -> + case get_value(basic_auth, Options, undefined) of + undefined -> + Headers; + {U,P} -> + [{"Authorization", ["Basic ", http_auth_digest(U, P)]} | Headers] + end; + _ -> + [{"Authorization", ["Basic ", http_auth_digest(User, UPw)]} | Headers] + end, + case UseProxy of + false -> + Headers_1; + true -> + [{"Proxy-Authorization", ["Basic ", ProxyAuthDigest]} | Headers_1] + end. + +http_auth_digest(Username, Password) -> + encode_base64(Username ++ [$: | Password]). + +encode_base64([]) -> + []; +encode_base64([A]) -> + [e(A bsr 2), e((A band 3) bsl 4), $=, $=]; +encode_base64([A,B]) -> + [e(A bsr 2), e(((A band 3) bsl 4) bor (B bsr 4)), e((B band 15) bsl 2), $=]; +encode_base64([A,B,C|Ls]) -> + encode_base64_do(A,B,C, Ls). +encode_base64_do(A,B,C, Rest) -> + BB = (A bsl 16) bor (B bsl 8) bor C, + [e(BB bsr 18), e((BB bsr 12) band 63), + e((BB bsr 6) band 63), e(BB band 63)|encode_base64(Rest)]. + +e(X) when X >= 0, X < 26 -> X+65; +e(X) when X>25, X<52 -> X+71; +e(X) when X>51, X<62 -> X-4; +e(62) -> $+; +e(63) -> $/; +e(X) -> exit({bad_encode_base64_token, X}). + +make_request(Method, Headers, AbsPath, RelPath, Body, Options, UseProxy) -> + HttpVsn = http_vsn_string(get_value(http_vsn, Options, {1,1})), + Headers_1 = case get_value(content_length, Headers, false) of + false when (Body == []) or (Body == <<>>) -> + Headers; + false when is_binary(Body) -> + [{"content-length", integer_to_list(size(Body))} | Headers]; + false -> + [{"content-length", integer_to_list(length(Body))} | Headers]; + true -> + Headers + end, + Headers_2 = cons_headers(Headers_1), + Uri = case get_value(use_absolute_uri, Options, false) or UseProxy of + true -> + AbsPath; + false -> + RelPath + end, + [method(Method), " ", Uri, " ", HttpVsn, crnl(), Headers_2, crnl(), Body]. + +http_vsn_string({0,9}) -> "HTTP/0.9"; +http_vsn_string({1,0}) -> "HTTP/1.0"; +http_vsn_string({1,1}) -> "HTTP/1.1". + +cons_headers(Headers) -> + cons_headers(Headers, []). +cons_headers([], Acc) -> + encode_headers(Acc); +cons_headers([{basic_auth, {U,P}} | T], Acc) -> + cons_headers(T, [{"Authorization", ["Basic ", httpd_util:encode_base64(U++":"++P)]} | Acc]); +cons_headers([{cookie, Cookie} | T], Acc) -> + cons_headers(T, [{"Cookie", Cookie} | Acc]); +cons_headers([{content_length, L} | T], Acc) -> + cons_headers(T, [{"Content-Length", L} | Acc]); +cons_headers([{content_type, L} | T], Acc) -> + cons_headers(T, [{"Content-Type", L} | Acc]); +cons_headers([H | T], Acc) -> + cons_headers(T, [H | Acc]). + +encode_headers(L) -> + encode_headers(L, []). +encode_headers([{http_vsn, _Val} | T], Acc) -> + encode_headers(T, Acc); +encode_headers([{Name,Val} | T], Acc) when list(Name) -> + encode_headers(T, [[Name, ": ", fmt_val(Val), crnl()] | Acc]); +encode_headers([{Name,Val} | T], Acc) when atom(Name) -> + encode_headers(T, [[atom_to_list(Name), ": ", fmt_val(Val), crnl()] | Acc]); +encode_headers([], Acc) -> + lists:reverse(Acc). + +parse_response(_Data, #state{cur_req = undefined}=State) -> + State#state{status = idle}; +parse_response(Data, #state{reply_buffer=Acc, reqs=Reqs, + cur_req=CurReq}=State) -> + #request{from=From, stream_to=StreamTo, req_id=ReqId} = CurReq, + MaxHeaderSize = safe_get_env(ibrowse, max_headers_size, infinity), + case scan_header(Data, Acc) of + {yes, Headers, Data_1} -> + do_trace("Recvd Header Data -> ~s~n----~n", [Headers]), + do_trace("Recvd headers~n--- Headers Begin ---~n~s~n--- Headers End ---~n~n", [Headers]), + {HttpVsn, StatCode, Headers_1} = parse_headers(Headers), + do_trace("HttpVsn: ~p StatusCode: ~p Headers_1 -> ~1000.p~n", [HttpVsn, StatCode, Headers_1]), + LCHeaders = [{to_lower(X), Y} || {X,Y} <- Headers_1], + ConnClose = to_lower(get_value("connection", LCHeaders, "false")), + IsClosing = is_connection_closing(HttpVsn, ConnClose), + case IsClosing of + true -> + ibrowse:shutting_down(); + false -> + ok + end, + [#request{options = CurReqOptions, + method=Method} | _] = queue:to_list(Reqs), + SaveResponseToFile = get_value(save_response_to_file, CurReqOptions, false), + State_1 = State#state{recvd_headers=Headers_1, status=get_body, + save_response_to_file = SaveResponseToFile, + tmp_file_fd = undefined, tmp_file_name = undefined, + http_status_code=StatCode, is_closing=IsClosing}, + put(conn_close, ConnClose), + TransferEncoding = to_lower(get_value("transfer-encoding", LCHeaders, "false")), + case get_value("content-length", LCHeaders, undefined) of + _ when Method == head -> + {_, Reqs_1} = queue:out(Reqs), +% {{value, Req}, Reqs_1} = queue:out(Reqs), + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + do_reply(From, StreamTo, ReqId, {ok, StatCode, Headers_1, []}), +% ibrowse:reply(From, {ok, StatCode, Headers_1, []}), + cancel_timer(State#state.send_timer, {eat_message, {req_timedout, From}}), + State_2 = reset_state(State_1), + parse_response(Data_1, State_2#state{reqs=Reqs_1}); + _ when hd(StatCode) == $1 -> + %% No message body is expected. Server may send + %% one or more 1XX responses before a proper + %% response. + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + do_trace("Recvd a status code of ~p. Ignoring and waiting for a proper response~n", [StatCode]), + parse_response(Data_1, State_1); + _ when StatCode == "204"; + StatCode == "304" -> + %% No message body is expected for these Status Codes. + %% RFC2616 - Sec 4.4 + {_, Reqs_1} = queue:out(Reqs), +% {{value, Req}, Reqs_1} = queue:out(Reqs), + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + do_reply(From, StreamTo, ReqId, {ok, StatCode, Headers_1, []}), +% ibrowse:reply(From, {ok, StatCode, Headers_1, []}), + cancel_timer(State#state.send_timer, {eat_message, {req_timedout, From}}), + State_2 = reset_state(State_1), + parse_response(Data_1, State_2#state{reqs=Reqs_1}); + _ when TransferEncoding == "chunked" -> + do_trace("Chunked encoding detected...~n",[]), + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + parse_11_response(Data_1, State_1#state{transfer_encoding=chunked, + chunk_size=chunk_start, + reply_buffer=[], chunks=[]}); + undefined when HttpVsn == "HTTP/1.0"; + ConnClose == "close" -> + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + State_1#state{reply_buffer=[Data_1]}; + undefined -> + {_, Reqs_1} = queue:out(Reqs), +% {{value, Req}, Reqs_1} = queue:out(Reqs), + do_reply(From, StreamTo, ReqId, + {error, {content_length_undefined, Headers}}), + do_error_reply(State_1#state{reqs=Reqs_1}, previous_request_failed), + {error, content_length_undefined}; + V -> + case catch list_to_integer(V) of + V_1 when integer(V_1), V_1 >= 0 -> + send_async_headers(ReqId, StreamTo, StatCode, Headers_1), + do_trace("Recvd Content-Length of ~p~n", [V_1]), + parse_11_response(Data_1, + State_1#state{rep_buf_size=0, + reply_buffer=[], + content_length=V_1}); + _ -> + {_, Reqs_1} = queue:out(Reqs), +% {{value, Req}, Reqs_1} = queue:out(Reqs), + do_reply(From, StreamTo, ReqId, + {error, {content_length_undefined, Headers}}), + do_error_reply(State_1#state{reqs=Reqs_1}, previous_request_failed), + {error, content_length_undefined} + end + end; + {no, Acc_1} when MaxHeaderSize == infinity -> + State#state{reply_buffer=Acc_1}; + {no, Acc_1} when length(Acc_1) < MaxHeaderSize -> + State#state{reply_buffer=Acc_1}; + {no, _Acc_1} -> + do_reply(From, StreamTo, ReqId, {error, max_headers_size_exceeded}), + {_, Reqs_1} = queue:out(Reqs), + do_error_reply(State#state{reqs=Reqs_1}, previous_request_failed), + {error, max_headers_size_exceeded} + end. + +is_connection_closing("HTTP/0.9", _) -> true; +is_connection_closing(_, "close") -> true; +is_connection_closing("HTTP/1.0", "false") -> true; +is_connection_closing(_, _) -> false. + +%% This clause determines the chunk size when given data from the beginning of the chunk +parse_11_response(DataRecvd, + #state{transfer_encoding=chunked, + chunk_size=chunk_start, + cur_req=CurReq, + reply_buffer=Buf}=State) -> + case scan_crlf(DataRecvd, Buf) of + {yes, ChunkHeader, Data_1} -> + case parse_chunk_header(ChunkHeader) of + {error, Reason} -> + {error, Reason}; + ChunkSize -> + #request{stream_to=StreamTo, req_id=ReqId} = CurReq, + %% + %% Do we have to preserve the chunk encoding when streaming? + %% +% do_interim_reply(From, StreamTo, ReqId, ChunkHeader++[$\r, $\n]), + do_interim_reply(StreamTo, ReqId, {chunk_start, ChunkSize}), + RemLen = length(Data_1), + do_trace("Determined chunk size: ~p. Already recvd: ~p~n", [ChunkSize, RemLen]), + parse_11_response(Data_1, State#state{rep_buf_size=0, + reply_buffer=[], + deleted_crlf=true, + chunk_size=ChunkSize}) + end; + {no, Data_1} -> + State#state{reply_buffer=Data_1} + end; + +%% This clause is there to remove the CRLF between two chunks +%% +parse_11_response(DataRecvd, + #state{transfer_encoding=chunked, + chunk_size=tbd, + chunks = Chunks, + cur_req=CurReq, + reply_buffer=Buf}=State) -> + case scan_crlf(DataRecvd, Buf) of + {yes, _, NextChunk} -> + #request{stream_to=StreamTo, req_id=ReqId} = CurReq, + %% + %% Do we have to preserve the chunk encoding when streaming? + %% +% do_interim_reply(From, StreamTo, ReqId, [$\r, $\n]), + State_1 = State#state{chunk_size=chunk_start, deleted_crlf=true}, + State_2 = case StreamTo of + undefined -> + State_1#state{chunks = [Buf | Chunks]}; + _ -> + do_interim_reply(StreamTo, ReqId, chunk_end), + State_1 + end, + parse_11_response(NextChunk, State_2); + {no, Data_1} -> + State#state{reply_buffer=Data_1} + end; + +%% This clause deals with the end of a chunked transfer +parse_11_response(DataRecvd, + #state{transfer_encoding=chunked, chunk_size=0, + cur_req=CurReq, + deleted_crlf = DelCrlf, + reply_buffer=Trailer, reqs=Reqs}=State) -> + do_trace("Detected end of chunked transfer...~n", []), + DataRecvd_1 = case DelCrlf of + false -> + DataRecvd; + true -> + [$\r, $\n | DataRecvd] + end, + #request{stream_to=StreamTo, req_id=ReqId} = CurReq, + case scan_header(DataRecvd_1, Trailer) of + {yes, _TEHeaders, Rem} -> + {_, Reqs_1} = queue:out(Reqs), +% {{value, Req}, Reqs_1} = queue:out(Reqs), + %% + %% Do we have to preserve the chunk encoding when streaming? + %% +% do_interim_reply(StreamTo, ReqId, [$\r, $\n]++TEHeaders++[$\r, $\n]), + do_interim_reply(StreamTo, ReqId, chunk_end), + State_1 = handle_response(CurReq, State#state{reqs=Reqs_1}), + parse_response(Rem, reset_state(State_1)); + {no, Rem} -> + State#state{reply_buffer=Rem, rep_buf_size=length(Rem), chunk_size=tbd} + end; + +%% This clause extracts a chunk, given the size. +parse_11_response(DataRecvd, + #state{transfer_encoding=chunked, chunk_size=CSz, + rep_buf_size=RepBufSz}=State) -> + NeedBytes = CSz - RepBufSz, + DataLen = length(DataRecvd), + do_trace("Recvd more data: size: ~p. NeedBytes: ~p~n", [DataLen, NeedBytes]), + case DataLen >= NeedBytes of + true -> + {RemChunk, RemData} = split_list_at(DataRecvd, NeedBytes), + do_trace("Recvd another chunk...~n", []), + do_trace("RemData -> ~p~n", [RemData]), + case accumulate_response(RemChunk, State) of + {error, Reason} -> + {error, Reason}; + #state{reply_buffer = NewRepBuf, + chunks = NewChunks} = State_1 -> + State_2 = State_1#state{reply_buffer=[], + chunks = [lists:reverse(NewRepBuf) | NewChunks], + rep_buf_size=0, + chunk_size=tbd}, + parse_11_response(RemData, State_2) + end; + false -> + accumulate_response(DataRecvd, State#state{rep_buf_size=RepBufSz + DataLen}) + end; + +%% This clause to extract the body when Content-Length is specified +parse_11_response(DataRecvd, + #state{content_length=CL, rep_buf_size=RepBufSz, + cur_req = CurReq, + reqs=Reqs}=State) -> + NeedBytes = CL - RepBufSz, + DataLen = length(DataRecvd), + case DataLen >= NeedBytes of + true -> + {RemBody, Rem} = split_list_at(DataRecvd, NeedBytes), + {_, Reqs_1} = queue:out(Reqs), + State_1 = accumulate_response(RemBody, State), + State_2 = handle_response(CurReq, State_1#state{reqs=Reqs_1}), + State_3 = reset_state(State_2), + parse_response(Rem, State_3); + false -> + accumulate_response(DataRecvd, State#state{rep_buf_size=RepBufSz+DataLen}) + end. + +handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId}, + #state{save_response_to_file = true, + reqs = Reqs, + http_status_code=SCode, + tmp_file_name=TmpFilename, + tmp_file_fd=Fd, + send_timer=ReqTimer, + recvd_headers = RespHeaders}=State) -> + State_1 = case queue:to_list(Reqs) of + [] -> + State#state{cur_req = undefined}; + [NextReq | _] -> + State#state{cur_req = NextReq} + end, + do_reply(From, StreamTo, ReqId, {ok, SCode, RespHeaders, {file, TmpFilename}}), + cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), + file:close(Fd), + State_1#state{tmp_file_name=undefined, tmp_file_fd=undefined}; +handle_response(#request{from=From, stream_to=StreamTo, req_id=ReqId}, + #state{http_status_code=SCode, recvd_headers=RespHeaders, + reply_buffer=RepBuf, transfer_encoding=TEnc, + reqs = Reqs, + chunks=Chunks, send_timer=ReqTimer}=State) -> + Body = case TEnc of + chunked -> + lists:flatten(lists:reverse(Chunks)); + _ -> + lists:flatten(lists:reverse(RepBuf)) + end, + State_1 = case queue:to_list(Reqs) of + [] -> + State#state{cur_req = undefined}; + [NextReq | _] -> + State#state{cur_req = NextReq} + end, + case get(conn_close) of + "close" -> + do_reply(From, StreamTo, ReqId, {ok, SCode, RespHeaders, Body}), + exit(normal); + _ -> + do_reply(From, StreamTo, ReqId, {ok, SCode, RespHeaders, Body}), + cancel_timer(ReqTimer, {eat_message, {req_timedout, From}}), + State_1 + end. + +reset_state(State) -> + State#state{status=get_header, rep_buf_size=0,content_length=undefined, + reply_buffer=[], chunks=[], recvd_headers=[], deleted_crlf=false, + http_status_code=undefined, chunk_size=0, transfer_encoding=undefined}. + +parse_headers(Headers) -> + case scan_crlf(Headers, []) of + {yes, StatusLine, T} -> + Headers_1 = parse_headers_1(T), + case parse_status_line(StatusLine) of + {ok, HttpVsn, StatCode, _Msg} -> + put(http_prot_vsn, HttpVsn), + {HttpVsn, StatCode, Headers_1}; + _ -> %% A HTTP 0.9 response? + put(http_prot_vsn, "HTTP/0.9"), + {"HTTP/0.9", undefined, Headers} + end; + _ -> + {error, no_status_line} + end. + +% From RFC 2616 +% +% HTTP/1.1 header field values can be folded onto multiple lines if +% the continuation line begins with a space or horizontal tab. All +% linear white space, including folding, has the same semantics as +% SP. A recipient MAY replace any linear white space with a single +% SP before interpreting the field value or forwarding the message +% downstream. +parse_headers_1(String) -> + parse_headers_1(String, [], []). + +parse_headers_1([$\n, H |T], [$\r | L], Acc) when H == 32; + H == $\t -> + parse_headers_1(lists:dropwhile(fun(X) -> + is_whitespace(X) + end, T), [32 | L], Acc); +parse_headers_1([$\n|T], [$\r | L], Acc) -> + case parse_header(lists:reverse(L)) of + invalid -> + parse_headers_1(T, [], Acc); + NewHeader -> + parse_headers_1(T, [], [NewHeader | Acc]) + end; +parse_headers_1([H|T], L, Acc) -> + parse_headers_1(T, [H|L], Acc); +parse_headers_1([], [], Acc) -> + lists:reverse(Acc); +parse_headers_1([], L, Acc) -> + Acc_1 = case parse_header(lists:reverse(L)) of + invalid -> + Acc; + NewHeader -> + [NewHeader | Acc] + end, + lists:reverse(Acc_1). + +parse_status_line(Line) -> + parse_status_line(Line, get_prot_vsn, [], []). +parse_status_line([32 | T], get_prot_vsn, ProtVsn, StatCode) -> + parse_status_line(T, get_status_code, ProtVsn, StatCode); +parse_status_line([32 | T], get_status_code, ProtVsn, StatCode) -> + {ok, lists:reverse(ProtVsn), lists:reverse(StatCode), T}; +parse_status_line([H | T], get_prot_vsn, ProtVsn, StatCode) -> + parse_status_line(T, get_prot_vsn, [H|ProtVsn], StatCode); +parse_status_line([H | T], get_status_code, ProtVsn, StatCode) -> + parse_status_line(T, get_status_code, ProtVsn, [H | StatCode]); +parse_status_line([], _, _, _) -> + http_09. + +parse_header(L) -> + parse_header(L, []). +parse_header([$: | V], Acc) -> + {lists:reverse(Acc), string:strip(V)}; +parse_header([H | T], Acc) -> + parse_header(T, [H | Acc]); +parse_header([], _) -> + invalid. + +scan_header([$\n|T], [$\r,$\n,$\r|L]) -> {yes, lists:reverse(L), T}; +scan_header([H|T], L) -> scan_header(T, [H|L]); +scan_header([], L) -> {no, L}. + +scan_crlf([$\n|T], [$\r | L]) -> {yes, lists:reverse(L), T}; +scan_crlf([H|T], L) -> scan_crlf(T, [H|L]); +scan_crlf([], L) -> {no, L}. + +fmt_val(L) when list(L) -> L; +fmt_val(I) when integer(I) -> integer_to_list(I); +fmt_val(A) when atom(A) -> atom_to_list(A); +fmt_val(Term) -> io_lib:format("~p", [Term]). + +crnl() -> "\r\n". + +method(get) -> "GET"; +method(post) -> "POST"; +method(head) -> "HEAD"; +method(options) -> "OPTIONS"; +method(put) -> "PUT"; +method(delete) -> "DELETE"; +method(trace) -> "TRACE". + +%% From RFC 2616 +%% +% The chunked encoding modifies the body of a message in order to +% transfer it as a series of chunks, each with its own size indicator, +% followed by an OPTIONAL trailer containing entity-header +% fields. This allows dynamically produced content to be transferred +% along with the information necessary for the recipient to verify +% that it has received the full message. +% Chunked-Body = *chunk +% last-chunk +% trailer +% CRLF +% chunk = chunk-size [ chunk-extension ] CRLF +% chunk-data CRLF +% chunk-size = 1*HEX +% last-chunk = 1*("0") [ chunk-extension ] CRLF +% chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) +% chunk-ext-name = token +% chunk-ext-val = token | quoted-string +% chunk-data = chunk-size(OCTET) +% trailer = *(entity-header CRLF) +% The chunk-size field is a string of hex digits indicating the size +% of the chunk. The chunked encoding is ended by any chunk whose size +% is zero, followed by the trailer, which is terminated by an empty +% line. +%% +%% The parsing implemented here discards all chunk extensions. It also +%% strips trailing spaces from the chunk size fields as Apache 1.3.27 was +%% sending them. +parse_chunk_header([]) -> + throw({error, invalid_chunk_size}); +parse_chunk_header(ChunkHeader) -> + parse_chunk_header(ChunkHeader, []). + +parse_chunk_header([$; | _], Acc) -> + hexlist_to_integer(lists:reverse(Acc)); +parse_chunk_header([H | T], Acc) -> + case is_whitespace(H) of + true -> + parse_chunk_header(T, Acc); + false -> + parse_chunk_header(T, [H | Acc]) + end; +parse_chunk_header([], Acc) -> + hexlist_to_integer(lists:reverse(Acc)). + +is_whitespace(32) -> true; +is_whitespace($\r) -> true; +is_whitespace($\n) -> true; +is_whitespace($\t) -> true; +is_whitespace(_) -> false. + + +parse_url(Url) -> + parse_url(Url, get_protocol, #url{abspath=Url}, []). + +parse_url([$:, $/, $/ | _], get_protocol, Url, []) -> + {invalid_uri_1, Url}; +parse_url([$:, $/, $/ | T], get_protocol, Url, TmpAcc) -> + Prot = list_to_atom(lists:reverse(TmpAcc)), + parse_url(T, get_username, + Url#url{protocol = Prot}, + []); +parse_url([$/ | T], get_username, Url, TmpAcc) -> + %% No username/password. No port number + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Url#url.protocol), + path = [$/ | T]}; +parse_url([$: | T], get_username, Url, TmpAcc) -> + %% It is possible that no username/password has been + %% specified. But we'll continue with the assumption that there is + %% a username/password. If we encounter a '@' later on, there is a + %% username/password indeed. If we encounter a '/', it was + %% actually the hostname + parse_url(T, get_password, + Url#url{username = lists:reverse(TmpAcc)}, + []); +parse_url([$@ | T], get_username, Url, TmpAcc) -> + parse_url(T, get_host, + Url#url{username = lists:reverse(TmpAcc), + password = ""}, + []); +parse_url([$@ | T], get_password, Url, TmpAcc) -> + parse_url(T, get_host, + Url#url{password = lists:reverse(TmpAcc)}, + []); +parse_url([$/ | T], get_password, Url, TmpAcc) -> + %% Ok, what we thought was the username/password was the hostname + %% and portnumber + #url{username=User} = Url, + Port = list_to_integer(lists:reverse(TmpAcc)), + Url#url{host = User, + port = Port, + username = undefined, + password = undefined, + path = [$/ | T]}; +parse_url([$: | T], get_host, #url{} = Url, TmpAcc) -> + parse_url(T, get_port, + Url#url{host = lists:reverse(TmpAcc)}, + []); +parse_url([$/ | T], get_host, #url{protocol=Prot} = Url, TmpAcc) -> + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Prot), + path = [$/ | T]}; +parse_url([$/ | T], get_port, #url{protocol=Prot} = Url, TmpAcc) -> + Port = case TmpAcc of + [] -> + default_port(Prot); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{port = Port, path = [$/ | T]}; +parse_url([H | T], State, Url, TmpAcc) -> + parse_url(T, State, Url, [H | TmpAcc]); +parse_url([], get_host, Url, TmpAcc) when TmpAcc /= [] -> + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Url#url.protocol), + path = "/"}; +parse_url([], get_username, Url, TmpAcc) when TmpAcc /= [] -> + Url#url{host = lists:reverse(TmpAcc), + port = default_port(Url#url.protocol), + path = "/"}; +parse_url([], get_port, #url{protocol=Prot} = Url, TmpAcc) -> + Port = case TmpAcc of + [] -> + default_port(Prot); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{port = Port, + path = "/"}; +parse_url([], get_password, Url, TmpAcc) -> + %% Ok, what we thought was the username/password was the hostname + %% and portnumber + #url{username=User} = Url, + Port = case TmpAcc of + [] -> + default_port(Url#url.protocol); + _ -> + list_to_integer(lists:reverse(TmpAcc)) + end, + Url#url{host = User, + port = Port, + username = undefined, + password = undefined, + path = "/"}; +parse_url([], State, Url, TmpAcc) -> + {invalid_uri_2, State, Url, TmpAcc}. + +default_port(http) -> 80; +default_port(https) -> 443; +default_port(ftp) -> 21. + +send_async_headers(_ReqId, undefined, _StatCode, _Headers) -> + ok; +send_async_headers(ReqId, StreamTo, StatCode, Headers) -> + catch StreamTo ! {ibrowse_async_headers, ReqId, StatCode, Headers}. + +do_reply(From, undefined, _, Msg) -> + ibrowse:reply(From, Msg); +do_reply(_From, StreamTo, ReqId, {ok, _, _, _}) -> + ibrowse:finished_async_request(), + catch StreamTo ! {ibrowse_async_response_end, ReqId}; +% do_reply(_From, StreamTo, ReqId, {ok, _, _, Data}) -> +% ibrowse:finished_async_request(), +% catch StreamTo ! {ibrowse_async_response_end, ReqId, Data}; +do_reply(_From, StreamTo, ReqId, Msg) -> + catch StreamTo ! {ibrowse_async_response, ReqId, Msg}. + +do_interim_reply(undefined, _ReqId, _Msg) -> + ok; +do_interim_reply(StreamTo, ReqId, Msg) -> + catch StreamTo ! {ibrowse_async_response, ReqId, Msg}. + +do_error_reply(#state{reqs = Reqs}, Err) -> + ReqList = queue:to_list(Reqs), + lists:foreach(fun(#request{from=From, stream_to=StreamTo, req_id=ReqId}) -> + do_reply(From, StreamTo, ReqId, {error, Err}) + end, ReqList). + +split_list_at(List, N) -> + split_list_at(List, N, []). +split_list_at([], _, Acc) -> + {lists:reverse(Acc), []}; +split_list_at(List2, 0, List1) -> + {lists:reverse(List1), List2}; +split_list_at([H | List2], N, List1) -> + split_list_at(List2, N-1, [H | List1]). + +get_value(Tag, TVL) -> + {value, {_, V}} = lists:keysearch(Tag,1,TVL), + V. + +get_value(Tag, TVL, DefVal) -> + case lists:keysearch(Tag, 1, TVL) of + {value, {_, V}} -> + V; + false -> + DefVal + end. + +hexlist_to_integer(List) -> + hexlist_to_integer(lists:reverse(List), 1, 0). +hexlist_to_integer([H | T], Multiplier, Acc) -> + hexlist_to_integer(T, Multiplier*16, Multiplier*to_ascii(H) + Acc); +hexlist_to_integer([], _, Acc) -> + Acc. + +to_ascii($A) -> 10; +to_ascii($a) -> 10; +to_ascii($B) -> 11; +to_ascii($b) -> 11; +to_ascii($C) -> 12; +to_ascii($c) -> 12; +to_ascii($D) -> 13; +to_ascii($d) -> 13; +to_ascii($E) -> 14; +to_ascii($e) -> 14; +to_ascii($F) -> 15; +to_ascii($f) -> 15; +to_ascii($1) -> 1; +to_ascii($2) -> 2; +to_ascii($3) -> 3; +to_ascii($4) -> 4; +to_ascii($5) -> 5; +to_ascii($6) -> 6; +to_ascii($7) -> 7; +to_ascii($8) -> 8; +to_ascii($9) -> 9; +to_ascii($0) -> 0. + +safe_get_env(App, EnvVar, DefaultValue) -> + case application:get_env(App,EnvVar) of + undefined -> + DefaultValue; + {ok, V} -> + V + end. + +cancel_timer(undefined) -> ok; +cancel_timer(Ref) -> erlang:cancel_timer(Ref). + +cancel_timer(Ref, {eat_message, Msg}) -> + cancel_timer(Ref), + receive + Msg -> + ok + after 0 -> + ok + end. + +make_req_id() -> + now(). + +do_trace(Fmt, Args) -> + do_trace(get(my_trace_flag), Fmt, Args). + +% Useful for debugging +% do_trace(_, Fmt, Args) -> +% io:format("~s -- CLI(~p,~p) - "++Fmt, [printable_date(), +% get(ibrowse_http_client_host), +% get(ibrowse_http_client_port) | Args]); +do_trace(true, Fmt, Args) -> + io:format("~s -- CLI(~p,~p) - "++Fmt, + [printable_date(), + get(ibrowse_http_client_host), + get(ibrowse_http_client_port) | Args]); +do_trace(_, _, _) -> ok. + +printable_date() -> + {{Y,Mo,D},{H, M, S}} = calendar:local_time(), + {_,_,MicroSecs} = now(), + [integer_to_list(Y), + $-, + integer_to_list(Mo), + $-, + integer_to_list(D), + $_, + integer_to_list(H), + $:, + integer_to_list(M), + $:, + integer_to_list(S), + $:, + integer_to_list(MicroSecs div 1000)]. + +to_lower(Str) -> + to_lower(Str, []). +to_lower([H|T], Acc) when H >= $A, H =< $Z -> + to_lower(T, [H+32|Acc]); +to_lower([H|T], Acc) -> + to_lower(T, [H|Acc]); +to_lower([], Acc) -> + lists:reverse(Acc). diff --git a/src/ibrowse_lib.erl b/src/ibrowse_lib.erl new file mode 100644 index 0000000..f98c69d --- /dev/null +++ b/src/ibrowse_lib.erl @@ -0,0 +1,141 @@ +%%% File : ibrowse_lib.erl +%%% Author : Chandrashekhar Mullaparthi +%%% Description : +%%% Created : 27 Feb 2004 by Chandrashekhar Mullaparthi +%% @doc Module with a few useful functions + +-module(ibrowse_lib). +-vsn('$Id: ibrowse_lib.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). +-author('chandru'). +-ifdef(debug). +-compile(export_all). +-endif. + +-export([url_encode/1, + decode_rfc822_date/1, + status_code/1, + drv_ue/1, + drv_ue/2]). + +drv_ue(Str) -> + [{port, Port}| _] = ets:lookup(ibrowse_table, port), + drv_ue(Str, Port). +drv_ue(Str, Port) -> + case erlang:port_control(Port, 1, Str) of + [] -> + Str; + Res -> + Res + end. + +%% @doc URL-encodes a string based on RFC 1738. Returns a flat list. +%% @spec url_encode(Str) -> UrlEncodedStr +%% Str = string() +%% UrlEncodedStr = string() +url_encode(Str) when list(Str) -> + url_encode_char(lists:reverse(Str), []). + +url_encode_char([X | T], Acc) when X >= $a, X =< $z -> + url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X >= $A, X =< $Z -> + url_encode_char(T, [X | Acc]); +url_encode_char([X | T], Acc) when X == $-; X == $_; X == $. -> + url_encode_char(T, [X | Acc]); +url_encode_char([32 | T], Acc) -> + url_encode_char(T, [$+ | Acc]); +url_encode_char([X | T], Acc) -> + url_encode_char(T, [$%, d2h(X bsr 4), d2h(X band 16#0f) | Acc]); +url_encode_char([], Acc) -> + Acc. + +d2h(N) when N<10 -> N+$0; +d2h(N) -> N+$a-10. + +decode_rfc822_date(String) when list(String) -> + case catch decode_rfc822_date_1(string:tokens(String, ", \t\r\n")) of + {'EXIT', _} -> + {error, invalid_date}; + Res -> + Res + end. + +% TODO: Have to handle the Zone +decode_rfc822_date_1([_,DayInt,Month,Year, Time,Zone]) -> + decode_rfc822_date_1([DayInt,Month,Year, Time,Zone]); +decode_rfc822_date_1([Day,Month,Year, Time,_Zone]) -> + DayI = list_to_integer(Day), + MonthI = month_int(Month), + YearI = list_to_integer(Year), + TimeTup = case string:tokens(Time, ":") of + [H,M] -> + {list_to_integer(H), + list_to_integer(M), + 0}; + [H,M,S] -> + {list_to_integer(H), + list_to_integer(M), + list_to_integer(S)} + end, + {{YearI,MonthI,DayI}, TimeTup}. + +month_int("Jan") -> 1; +month_int("Feb") -> 2; +month_int("Mar") -> 3; +month_int("Apr") -> 4; +month_int("May") -> 5; +month_int("Jun") -> 6; +month_int("Jul") -> 7; +month_int("Aug") -> 8; +month_int("Sep") -> 9; +month_int("Oct") -> 10; +month_int("Nov") -> 11; +month_int("Dec") -> 12. + +%% @doc Given a status code, returns an atom describing the status code. +%% @spec status_code(StatusCode) -> StatusDescription +%% StatusCode = string() | integer() +%% StatusDescription = atom() +status_code(100) -> continue; +status_code(101) -> switching_protocols; +status_code(200) -> ok; +status_code(201) -> created; +status_code(202) -> accepted; +status_code(203) -> non_authoritative_information; +status_code(204) -> no_content; +status_code(205) -> reset_content; +status_code(206) -> partial_content; +status_code(300) -> multiple_choices; +status_code(301) -> moved_permanently; +status_code(302) -> found; +status_code(303) -> see_other; +status_code(304) -> not_modified; +status_code(305) -> use_proxy; +status_code(306) -> unused; +status_code(307) -> temporary_redirect; +status_code(400) -> bad_request; +status_code(401) -> unauthorized; +status_code(402) -> payment_required; +status_code(403) -> forbidden; +status_code(404) -> not_found; +status_code(405) -> method_not_allowed; +status_code(406) -> not_acceptable; +status_code(407) -> proxy_authentication_required; +status_code(408) -> request_tunnel; +status_code(408) -> request_timeout; +status_code(409) -> conflict; +status_code(410) -> gone; +status_code(411) -> length_required; +status_code(412) -> precondition_failed; +status_code(413) -> request_entity_too_large; +status_code(414) -> request_uri_too_long; +status_code(415) -> unsupported_media_type; +status_code(416) -> requested_range_not_satisfiable; +status_code(417) -> expectation_failed; +status_code(500) -> internal_server_error; +status_code(501) -> not_implemented; +status_code(502) -> bad_gateway; +status_code(503) -> service_unavailable; +status_code(504) -> gateway_timeout; +status_code(505) -> http_version_not_supported; +status_code(X) when is_list(X) -> status_code(list_to_integer(X)); +status_code(_) -> unknown_status_code. diff --git a/src/ibrowse_sup.erl b/src/ibrowse_sup.erl new file mode 100644 index 0000000..300435d --- /dev/null +++ b/src/ibrowse_sup.erl @@ -0,0 +1,65 @@ +%%%------------------------------------------------------------------- +%%% File : ibrowse_sup.erl +%%% Author : Chandrashekhar Mullaparthi +%%% Description : +%%% +%%% Created : 15 Oct 2003 by Chandrashekhar Mullaparthi +%%%------------------------------------------------------------------- +-module(ibrowse_sup). +-vsn('$Id: ibrowse_sup.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-behaviour(supervisor). +%%-------------------------------------------------------------------- +%% Include files +%%-------------------------------------------------------------------- + +%%-------------------------------------------------------------------- +%% External exports +%%-------------------------------------------------------------------- +-export([ + start_link/0 + ]). + +%%-------------------------------------------------------------------- +%% Internal exports +%%-------------------------------------------------------------------- +-export([ + init/1 + ]). + +%%-------------------------------------------------------------------- +%% Macros +%%-------------------------------------------------------------------- +-define(SERVER, ?MODULE). + +%%-------------------------------------------------------------------- +%% Records +%%-------------------------------------------------------------------- + +%%==================================================================== +%% External functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Function: start_link/0 +%% Description: Starts the supervisor +%%-------------------------------------------------------------------- +start_link() -> + supervisor:start_link({local, ?SERVER}, ?MODULE, []). + +%%==================================================================== +%% Server functions +%%==================================================================== +%%-------------------------------------------------------------------- +%% Func: init/1 +%% Returns: {ok, {SupFlags, [ChildSpec]}} | +%% ignore | +%% {error, Reason} +%%-------------------------------------------------------------------- +init([]) -> + AChild = {ibrowse,{ibrowse,start_link,[]}, + permanent,2000,worker,[ibrowse, ibrowse_http_client]}, + {ok,{{one_for_all,10,1}, [AChild]}}. + +%%==================================================================== +%% Internal functions +%%==================================================================== diff --git a/src/ibrowse_test.erl b/src/ibrowse_test.erl new file mode 100644 index 0000000..0984177 --- /dev/null +++ b/src/ibrowse_test.erl @@ -0,0 +1,74 @@ +%%% File : ibrowse_test.erl +%%% Author : Chandrashekhar Mullaparthi +%%% Description : Test ibrowse +%%% Created : 14 Oct 2003 by Chandrashekhar Mullaparthi + +-module(ibrowse_test). +-vsn('$Id: ibrowse_test.erl,v 1.1 2005/05/05 22:28:28 chandrusf Exp $ '). + +-compile(export_all). +-import(ibrowse_http_client, [printable_date/0]). + +send_reqs(Url, NumWorkers, NumReqsPerWorker) -> + proc_lib:spawn(?MODULE, send_reqs_1, [Url, NumWorkers, NumReqsPerWorker]). + +send_reqs_1(Url, NumWorkers, NumReqsPerWorker) -> + process_flag(trap_exit, true), + Pids = lists:map(fun(_X) -> + proc_lib:spawn_link(?MODULE, do_send_req, [Url, NumReqsPerWorker, self()]) + end, lists:seq(1,NumWorkers)), + put(num_reqs_per_worker, NumReqsPerWorker), + do_wait(Pids, now(), printable_date(), 0, 0). + +do_wait([], _StartNow, StartTime, NumSucc, NumErrs) -> + io:format("~n~nDone...~nStartTime -> ~s~n", [StartTime]), + io:format("EndTime -> ~s~n", [printable_date()]), + io:format("NumSucc -> ~p~n", [NumSucc]), + io:format("NumErrs -> ~p~n", [NumErrs]); +do_wait(Pids, StartNow, StartTime, NumSucc, NumErrs) -> + receive + {done, From, _Time, {ChildNumSucc, ChildNumFail}} -> + do_wait(Pids--[From], StartNow, StartTime, NumSucc+ChildNumSucc, NumErrs+ChildNumFail); + {'EXIT',_, normal} -> + do_wait(Pids, StartNow, StartTime, NumSucc, NumErrs); + {'EXIT', From, _Reason} -> + do_wait(Pids--[From], StartNow, StartTime, NumSucc, NumErrs + get(num_reqs_per_worker)) + end. + +do_send_req(Url, NumReqs, Parent) -> + StartTime = now(), + Res = do_send_req_1(Url, NumReqs, {0, 0}), + Parent ! {done, self(), StartTime, Res}. + +do_send_req_1(_Url, 0, {NumSucc, NumFail}) -> + {NumSucc, NumFail}; +do_send_req_1(Url, NumReqs, {NumSucc, NumFail}) -> + case ibrowse:send_req(Url, [], get, [], [], 10000) of + {ok, _Status, _Headers, _Body} -> + do_send_req_1(Url, NumReqs-1, {NumSucc+1, NumFail}); + _Err -> + do_send_req_1(Url, NumReqs-1, {NumSucc, NumFail+1}) + end. + +drv_ue_test() -> + drv_ue_test(lists:duplicate(1024, 127)). +drv_ue_test(Data) -> + [{port, Port}| _] = ets:lookup(ibrowse_table, port), +% erl_ddll:unload_driver("ibrowse_drv"), +% timer:sleep(1000), +% erl_ddll:load_driver("../priv", "ibrowse_drv"), +% Port = open_port({spawn, "ibrowse_drv"}, []), + {Time, Res} = timer:tc(ibrowse_lib, drv_ue, [Data, Port]), + io:format("Time -> ~p~n", [Time]), + io:format("Data Length -> ~p~n", [length(Data)]), + io:format("Res Length -> ~p~n", [length(Res)]). +% io:format("Result -> ~s~n", [Res]). + +ue_test() -> + ue_test(lists:duplicate(1024, $?)). +ue_test(Data) -> + {Time, Res} = timer:tc(ibrowse_lib, url_encode, [Data]), + io:format("Time -> ~p~n", [Time]), + io:format("Data Length -> ~p~n", [length(Data)]), + io:format("Res Length -> ~p~n", [length(Res)]). +% io:format("Result -> ~s~n", [Res]). diff --git a/vsn.mk b/vsn.mk new file mode 100644 index 0000000..742db41 --- /dev/null +++ b/vsn.mk @@ -0,0 +1,2 @@ +IBROWSE_VSN = 1.2 +