/*
	Copyright (c) 2003, WebThing Ltd
	Author: Nick Kew <nick@webthing.com>
 
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 
*/
//#define apr_socket_send apr_send
//#define apr_socket_connect apr_connect
//#define apr_socket_recv apr_recv

#ifndef HTTPCLIENT
#define HTTPCLIENT

#define ApacheHTTP HTTPClient

#include <http_protocol.h>
#include <apr_strings.h>
#if 1
#include <apr_network_io.h>
#include <apr_tables.h>
#include <apr_base64.h>
#else
#include <libxml/nanohttp.h>
#endif
#include <ctype.h>

//#define BUFLEN 4096
#define BUFLEN 8192

#define TIMEOUT 3000000
class HTTPClient {
  apr_pool_t* pool ;
  const char* args ;
  apr_socket_t* sock ;
  char* ctype ;
  char* enc ;

  size_t bytes ;
  int code ;
  bool is_open ;

  char buf[BUFLEN] ;

  apr_table_t* resp_headers ;
  size_t offs ;
  size_t count ;
  size_t clen ;
  size_t redirects ;

  typedef enum { CONTENT_OK, REDIRECT_OK, ERROR_OK, BAD_HTTP } resp_type ;

  resp_type interpret_headers() {
    if ( ( code >= 300 ) && ( code < 400 ) ) {
      const char* loc = header("Location") ;
      if ( loc && *loc )
	return REDIRECT_OK ;
    }
    const char* len = header("Content-Length") ;
    if ( len )
      for ( const char* p = len; isdigit(*p); ++p )
	clen = 10 * clen + (*p - '0') ;

    const char* tp = header("Content-Type") ;
    if ( tp ) {
      char* sep = strchr(tp, ';') ;
      ctype = apr_pstrndup(pool, tp, (sep-tp)) ;
      regex_t* encrx = ap_pregcomp(pool,
	"charset[ \t\r\n]*=[ \t\r\n]*[\"']?([A-Za-z0-9_-]+)",
	REG_ICASE|REG_EXTENDED) ;
      regmatch_t match[2] ;
      if ( ap_regexec(encrx, tp, 2, match, 0) == 0 )
	enc = apr_pstrndup(pool, tp+match[1].rm_so,
		match[1].rm_eo - match[1].rm_so) ;
      ap_pregfree(pool, encrx) ;
      if ( ( code >= 200 ) && ( code < 300 ) )
	return CONTENT_OK ;
      else
	return ERROR_OK ;
    } else {
      return BAD_HTTP ;
    }
  }

  bool parse_status(BasicWriter& out) {
    unsigned int i;
    code = 0 ;
    char* p = buf ;
    enum { BEFORE, HTTP_VERSION, SP1, STATUS, SP2, REASON } state = BEFORE ;
    for ( offs = i = 0 ; ( i < bytes) && (offs == 0) ; ++i, ++p)
      switch ( state ) {
	case BEFORE:
	  if ( ! isspace(*p) )
	    state = HTTP_VERSION ;
	    break ;
	case HTTP_VERSION:
	  if ( isspace(*p) )
	    state = SP1 ;
	    break ;
	case SP1:
	  if ( isdigit(*p) ) {
	    code = (*p - '0') ;
	    state = STATUS ;
	  }
	  break ;
	case STATUS:
	  if ( isdigit(*p) )
	    code = 10*code + (*p - '0') ;
	  else
	    state = SP2 ;
	  break ;
	case SP2:
	  if ( !isspace(*p) )
	    state = REASON ;
	  //fallthrough
	case REASON:
	  if ( ( *p == '\r' ) && ( *(p+1) == '\n' ) )
	    offs = (i + 2) ;
	  else if ( *p == '\n' ) {
	    out.puts("<val:message>Malformed HTTP response (your server's line-endings are not compatible with HTTP).  I'll try error-correction.</val:message>") ;
	    offs = (i + 1 ) ;
	  }
	  break ;
      }

    if ( ( offs <= 0 ) || (offs >= bytes ) )
      return false ;
    else
      return true ;
  }
  bool parse_header() {
    char* key = 0 ;
    char* val ;
    if ( resp_headers )
      apr_table_clear(resp_headers) ;	// explicit clear to deal with redirects
    else
      resp_headers = apr_table_make(pool, 12) ;
    for ( char* p = buf+offs; offs < bytes ; offs = (p - buf) ) {
      char* eol = strpbrk(p, "\r\n") ;
      char* colon ;
      if ( eol == p ) { // end of headers
	p = eol + ((*eol == '\r') ? 2 : 1) ;
	offs = ( p - buf ) ;
	break ;
      }
      if ( colon = strchr(p, ':') , colon) {
	key = apr_pstrndup(pool, p, (colon - p) ) ;
	for (p = colon+1; isspace(*p); ++p)
		;
	val = apr_pstrndup(pool, p, (eol - p) ) ;
	apr_table_set(resp_headers, key, val) ;

      } else if ( key ) {
	val = apr_pstrndup(pool, p, (eol - p) ) ;
	apr_table_merge(resp_headers, key, val) ;
      }
      p = eol+ ((*eol == '\r') ? 2 : 1) ;
    }
    if ( offs > bytes )
      return false ;
    else
      return true ;
  }

  const HTTPClient& send(const char* buf) const {
     apr_size_t len = strlen(buf) ;
     apr_socket_send(sock, buf, &len) ;
     return *this ;
  }
  void send_hdr(const char* hdr) const {
     char* val = getArg(pool, args, hdr) ;
     if ( val && strlen(val) ) {
	ap_unescape_url(val) ;
	send(hdr).send(": ").send(val).send("\r\n") ;
     }
  }
  void send_request(apr_uri_t& uri, BasicWriter& out) const {

    //const char* method = "GET" ;

    //send(method).send(" ").send(uri.path) ;
    send("GET ").send(uri.path?uri.path:"/") ;
    if ( uri.query )
      send("?").send(uri.query) ;
    send(" HTTP/1.0\r\n"
	"Connection: Close\r\n"
	"User-Agent: Page Valet/4.1pre5\r\n"
	"Host: ").send(uri.hostname).send("\r\n") ;

    send_hdr("Accept") ;
    send_hdr("Accept-Charset") ;
    send_hdr("Accept-Language") ;
    send_hdr("Cookie") ;
    const char* name = getArg(pool, args, "username") ;
    const char* pass = getArg(pool, args, "password") ;
    if ( name && pass ) {
      char* buf = apr_pstrcat(pool, name, ":", pass, NULL) ;
//      char* dst = 0 ;
      if ( strlen(buf) < 64 ) {
        char dst[128] ;
        apr_base64_encode(dst, buf, strlen(buf)) ;
        send("Authorization: Basic ").send(dst).send("\r\n") ;
      } else
	out.puts("<val:message>Username or password too long - ignored.</val:message>") ;
    }
    

    send("\r\n") ;
  }

  bool open1(const char* url, BasicWriter& out) {
    regex_t* http_url = ap_pregcomp(pool, "http://[A-Za-z\\..-_]+", REG_ICASE|REG_NOMATCH) ;
    if ( ap_regexec(http_url, url, 0, 0, 0) != 0 ) {
      out.puts("<val:message>Bad URL: ").escape(url).puts(" (only HTTP supported)</val:message>\n") ;
      ap_pregfree(pool, http_url) ;
      return false ;
    }
    ap_pregfree(pool, http_url) ;

    apr_uri_t uri ;
    apr_sockaddr_t* sa = 0 ;
    if ( apr_uri_parse(pool, url, &uri) != APR_SUCCESS ) {
      out.puts("<val:message>Bad URL: ").escape(url).puts("</val:message>\n") ;
      return 0 ;
    }
    if ( ! uri.port )
	uri.port = 80 ;

    //const char* req = req_headers(uri) ;

    if ( apr_socket_create(&sock, PF_INET, SOCK_STREAM, pool) != APR_SUCCESS )
      return false ;

    if ( apr_socket_timeout_set(sock, TIMEOUT) != APR_SUCCESS )
      return false ;

    if ( apr_sockaddr_info_get(&sa, uri.hostname, APR_UNSPEC,
	uri.port, APR_IPV4_ADDR_OK, pool) != APR_SUCCESS )
	    return false ;

    int err ;
    if ( err = apr_socket_connect(sock, sa), err != APR_SUCCESS  ) {
	out.puts("<val:message>Error ").puti(err).puts(" in connect</val:message>\n") ;
	return false ;
    }

    send_request(uri, out) ;
    /*
    apr_size_t len = strlen(req) ;
    if ( apr_socket_send(sock, req, &len ) != APR_SUCCESS )
	return false ;
	*/

#ifdef SIMPLE_READ
    bytes = BUFLEN ;
    if ( apr_socket_recv( sock, buf, &bytes ) != APR_SUCCESS ) {
      out.puts("<val:message>Can't talk to ").escape(url).puts("</val:message>\n") ;
      return false ;
    }
#else
    bytes = 0 ;
    apr_status_t recv_status ;
    do {
      size_t to_read = BUFLEN - bytes ;
      recv_status = apr_socket_recv( sock, buf + bytes, &to_read ) ;
      bytes += to_read ;
    } while ( ( recv_status == APR_SUCCESS ) && ( bytes < BUFLEN ) ) ;
    if ( APR_STATUS_IS_TIMEUP(recv_status) ) {
      out.puts("<val:message>Timeout reading response from ").escape(url)
	.puts("</val:message>") ;
    }
#endif

    if ( ! parse_status(out) || ! parse_header() ) {
      out.puts("<val:message>Bad response from ").escape(url).puts("</val:message>\n") ;
      return false ;
    }
    return true ;
  }

#if 0
  bool parseHead(void* http, char* tp) {
    int noenc = 1 ;
    char* sep = strchr(tp, ';') ;
    ctype = apr_pstrndup(pool, tp, (sep-tp)) ;
    if ( sep ) {
      regex_t* encrx = ap_pregcomp(pool,
	"charset[ \t\r\n]*=[ \t\r\n]*[\"']?([A-Za-z0-9_-]+)",
	REG_ICASE|REG_EXTENDED) ;
      regmatch_t match[2] ;
      if ( noenc = ap_regexec(encrx, sep, 2, match, 0) , !noenc )
        set_encoding(apr_pstrndup(pool, sep+match[1].rm_so,
		match[1].rm_eo - match[1].rm_so) ) ;
      ap_pregfree(pool, encrx) ;
    }
    code = xmlNanoHTTPReturnCode(http) ;
    return !noenc ;
  }
#endif
public:
  HTTPClient(request_rec* r) : pool(r->pool), args(r->args) ,
	sock(0), ctype(0), enc(0), bytes(0), is_open(true) ,
	resp_headers(0), offs(0) , count(0), clen(0), redirects(0) {
  }
  ~HTTPClient() { close() ; }
  void close() {
    if (  is_open ) {
#if 0
      if ( http )
	xmlNanoHTTPClose(http) ;
#endif
      if ( sock )
	apr_socket_close(sock) ;
      is_open = false ;
    }
  }
  bool open(const char* url, BasicWriter& w) {
    if ( ++redirects >= 4 ) {
      w.puts("<val:message>Too many redirects - bailing out</val:message>") ;
      close() ;
      return false ;
    }
    if ( ! open1(url, w) ) {
      w.puts("<val:message>Error accessing ").escape(url)
	.puts(" - aborting.</val:message>\n") ;
      close() ;
      return false ;
    }
    switch ( interpret_headers() ) {
      char* newurl ;
      case ERROR_OK:
	w.puts("<val:message>HTTP Error ").puti(code)
		.puts(" - validating error document</val:message>") ;
	// fallthrough
      case CONTENT_OK:
        w.puts("<val:subject>").escape(url).puts("</val:subject>") ;
	return true ;
      case REDIRECT_OK:
	newurl = apr_pstrdup(pool, header("Location") ) ;
	w.puts("<val:message>").escape(url).puts(" redirected us to ")
		.escape(newurl).puts("</val:message>") ;
	return open(newurl, w) ;
      default:
	return false ;
    }
    return is_open ;
  }
  size_t read(char** b) {
    if ( ( offs <= 0 ) || ( offs >= bytes ) ) {
      bytes = BUFLEN ;
      if ( ( clen > 0 ) && ( clen - count < BUFLEN ) )
	bytes = clen - count ;
      apr_status_t s = apr_socket_recv( sock, buf, &bytes) ;
      if ( APR_STATUS_IS_EOF( s ) || (bytes == 0) )
	close() ;
      *b = buf ;
    } else {
      bytes -= offs ;
      *b = buf + offs ;
      //memmove(buf, buf+offs, bytes) ;
    }
    count += bytes ;
    offs = 0 ;
    /*
    if ( ! bytes )
      close() ;
      */
    return bytes ;
#if 0
    if (!is_open)
      return 0 ;
    //bytes = xmlNanoHTTPRead(http, buf, BUFLEN) ;
    *b = buf ;
    if ( ! bytes )
	close() ;
    return bytes ;
#endif
  }
  const bool isopen() const { return is_open ; }
  const char* encoding() const { return enc ; }
  const char* content_type() const { return ctype ; }
  const size_t content_length() const { return clen ; }
  const size_t length() const { return count ; }
  const char* header(const char* key) const {
    return apr_table_get(resp_headers, key) ;
  }
  const int status() const { return code ; }
  void headers(apr_table_do_callback_fn_t fn, BasicWriter& out) const {
    out.puts("<val:http code=\"").puti(code).puts("\">") ;
    apr_table_do ( fn, (void*) &out, resp_headers, 0 ) ;
    out.puts("</val:http>") ;
  }

/* should move this to OpenSP? */
#if 0
  void set_encoding(char* x) {
    enc = x ;
    putenv("SP_CHARSET_FIXED=1") ;
    putenv(apr_pstrcat(pool, "SP_ENCODING=", enc, NULL)) ;
  } 
#define SUPPORTED_ENCODINGS "ascii,us-ascii,utf-8,utf-16,ucs-2,iso-10646-ucs-2,ucs-4,iso-10646-ucs-4,utf-32,unicode,euc-jp,euc-kr,euc-cn,cn-gb,gb2312,sjis,shift_jis,big5,cn-big5,iso-8859-1,iso-8859-2,iso-8859-3,iso-8859-4,iso-8859-5,iso-8859-6,iso-8859-7,iso-8859-8,iso-8859-9,iso-8859-15,koi8-r,koi8,xml"
  const bool supported_encoding() const {
    if ( ! enc || strlen(enc) < 3 )
	return false ;
    for ( char* x = enc; *x; ++x )
      if ( isupper(*x) )
	*x = tolower(*x) ;
    if ( ! strstr(SUPPORTED_ENCODINGS, enc) )
	return false ;
    else
	return true ;
  }
#endif
} ;
#endif
