#define HAVE_UPLOAD
/*
	Copyright (c) 2003, WebThing Ltd
	Author: Nick Kew <nick@webthing.com>
 
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
 
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
 
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <httpd.h>
#include <util_cfgtree.h>
#include <http_config.h>
#include <apr_strings.h>
#include <apr_hash.h>
#include "ValidatorModes.h"

#ifdef HAVE_UPLOAD
extern "C" apr_table_t* mod_upload_form(request_rec*) ;
#endif

#include "Transcoder.h"

extern module AP_MODULE_DECLARE_DATA validator_module ;
  typedef enum { NULL_PARSER = 0 , OpenSP_ = 0x100 , Xerces_ = 0x200 } parser ;
  typedef int parsers ;
  typedef struct parser_rec {
    const char* ctype ;
    parser preferred ;
    parsers allowed ;
    struct parser_rec* next ;
  } parser_rec ;
  typedef struct {
    parser defaultparser ;
    parsers defaultallowed ;
    parser_rec* plist ;
    apr_hash_t* cache ;
    const char* mlbase ;
    const char* xslbase ;
    const char* xmlCatalogue ;
    const char* htmlCatalogue ;
  } validator_conf ;
  static char* getArg(apr_pool_t* pool, const char* args, const char* name) {
    const char* key = strstr(args, name) ;
    if ( ! key )
	return 0 ;
    if ( ( key != args ) && ( key[-1] != '&' ) )
	return getArg(pool, key+1, name) ;
    char after = key[strlen(name)] ;
    if ( after != '=' )
      if ( after )
	return getArg(pool, key+1, name) ;
      else
	return 0 ;
    const char* val = key + strlen(name) + 1 ;
    const char* end = strchr(val, '&') ;
    if ( end )
      return apr_pstrndup(pool, val, (end-val) ) ;
    else
      return apr_pstrdup(pool, val) ;
  }
  bool getFeature(const char* args, const char* name) {
    const char* key = strstr(args, name) ;
    if ( ! key )
	return false ;				// no match
    if ( ( key != args ) && ( key[-1] != '&' ) )
	return getFeature(key+1, name) ;		// false match
    switch ( key[strlen(name)] ) {
      case 0: return false ;			// malformed
      case '=':
      switch ( key[strlen(name)+1] ) {
	case 0: return false ;			// empty val
	case '&': return false ;		// empty val
	default: return true ;			// has a value
      }
      default: return getFeature(key+1, name) ;	// false match
    }
  }

typedef enum { UNSET, HTML, XHTML, XML, SGML } parsetype ;

#include "ApacheWriter.h"
#include "HTTPClient.h"

#include <OpenSP/config.h>
#include <OpenSP/Boolean.h>
#include "OpenSPValidator.h"

#include <util/PlatformUtils.hpp>
#include "XercesValidator.h"

#include "ParserFactory.h"

#include <modxml/GnomeXSLT.h>

extern "C" {

  static int getResultsMode(const char* cmode) {
    if ( cmode ) {
      struct {
	char* name ;
	int value ;
      } modes[] = {
	{ "normsrc", CTYPE_HTML | RESULTS_NORM | RESULTS_ERRORS } ,
	{ "noerrors", CTYPE_HTML | RESULTS_NORM } ,
	{ "traditional", CTYPE_HTML | RESULTS_SOURCE | RESULTS_ERRORS } ,
	{ "errors",	CTYPE_HTML | RESULTS_ERRORS } ,
	{ "earl",	CTYPE_RDF | RESULTS_ERRORS } ,
	{ "bare",	CTYPE_RDF } ,
	{ NULL , 0 }
      } ;
      for ( int i = 0 ; modes[i].name != NULL ; ++i)
	if ( ! strcmp(cmode, modes[i].name) )
	  return modes[i].value ;
    }
    return 0 ;	// error
  }

#define PARSER_MASK 0x300

  static void* cr_dir_conf(apr_pool_t* pool, char* x) {
    validator_conf* conf = (validator_conf*) apr_pcalloc(pool,
		    sizeof(validator_conf) ) ;
    conf->defaultparser = OpenSP_ ;
    conf->defaultallowed = OpenSP_ | Xerces_ ;
    conf->cache = apr_hash_make(pool) ;
    return conf ;
  }
  static const char* ValidatorParser (cmd_parms* cmd, void* cfg,
	const char* ctype, const char* pref, const char* others) {
    validator_conf* conf = (validator_conf*) cfg ;
    parser_rec* pr = (parser_rec*) apr_pcalloc(cmd->pool, sizeof(parser_rec) ) ;
    pr->ctype = apr_pstrdup(cmd->pool, ctype) ;
    if ( !strcmp(pref, "OpenSP") ) {
      pr->allowed = pr->preferred = OpenSP_ ;
    } else if ( !strcmp(pref, "Xerces") ) {
      pr->allowed = pr->preferred = Xerces_ ;
    } else
      return "Available Validator parsers are OpenSP and Xerces" ;
    if ( strstr(others, "OpenSP") )
      pr->allowed |= OpenSP_ ;
    if ( strstr(others, "Xerces") )
      pr->allowed |= Xerces_ ;
    pr->next = conf->plist ;
    conf->plist = pr ;
    return 0 ;
  }
  static const char* ValidatorDefault (cmd_parms* cmd, void* cfg,
	const char* pref, const char* others) {
    validator_conf* conf = (validator_conf*) cfg ;
    if ( !strcmp(pref, "OpenSP") ) {
      conf->defaultallowed = conf->defaultparser = OpenSP_ ;
    } else if ( !strcmp(pref, "Xerces") ) {
      conf->defaultallowed = conf->defaultparser = Xerces_ ;
    } else
      return "Available Validator parsers are OpenSP and Xerces" ;
    if ( strstr(others, "OpenSP") )
      conf->defaultallowed |= OpenSP_ ;
    if ( strstr(others, "Xerces") )
      conf->defaultallowed |= Xerces_ ;
    return 0 ;
  }
  static const char* ValidatorHTMLCatalogue (cmd_parms* cmd, void* cfg,
	const char* filename) {
    validator_conf* conf = (validator_conf*) cfg ;
    conf->htmlCatalogue = apr_pstrdup(cmd->pool, filename) ;
    return 0 ;
  }
  static const char* ValidatorXSLTBase (cmd_parms* cmd, void* cfg,
	const char* filename) {
    validator_conf* conf = (validator_conf*) cfg ;
    conf->xslbase = apr_pstrdup(cmd->pool, filename) ;
    return 0 ;
  }
  static const char* ValidatorXMLCatalogue (cmd_parms* cmd, void* cfg,
	const char* filename) {
    validator_conf* conf = (validator_conf*) cfg ;
    FILE* f = fopen(filename, "r") ;
    if ( f ) {
      conf->xmlCatalogue = apr_pstrdup(cmd->pool, filename) ;
      char* dirend = strrchr(filename, '/') ;
      conf->mlbase = apr_pstrndup(cmd->pool, filename, dirend-filename) ;
      char* lineptr = 0 ;
      size_t n = 0 ;
      while ( getline(&lineptr, &n, f) != -1 ) {
	if (!strncmp(lineptr, "PUBLIC", 6) ) {
	  char* pid = strchr(lineptr, '"') ;
	  if ( ! pid++ )	// notfound or last char
	    continue ;
	  if ( (*pid != '-' ) && (*pid != '+' ) )
	    continue ;
	  char* pend = strchr(pid, '"') ;
	  if ( ! pend )
	    continue ;
	  char* sp = pend + 1 ;
	  if ( ! isspace(*sp) )
	    continue ;
	  while ( isspace(*sp) )
	    ++sp ;
	  char delim = *sp ;
	  char* spend ;
	  if ( ! delim )
	    continue ;
	  else if ( delim == '"' || delim == '\'' ) {
	    spend = strchr(++sp, delim) ;
	  } else {
	    for ( spend = sp; *spend && !isspace(*spend); ++spend) ;
	  }
	  if ( spend > sp ) {	// Line parses OK
	    apr_hash_set(conf->cache,
		apr_pstrndup(cmd->pool, pid, pend-pid), pend - pid,
		apr_pstrndup(cmd->pool, sp, spend-sp) ) ;
	  }
	}
      }
      free(lineptr) ;
      fclose(f) ;
      return 0 ;
    } else
      return "Failed to open XML Catalogue" ;
  }

  static int show_header(void* ctx, const char* key, const char* val) {
    BasicWriter* out = (BasicWriter*) ctx ;
    out->puts("<val:httphdr name=\"").escape(key).puts("\" value=\"")
		.escape(val).puts("\"/>") ;
    return 1 ;
  }

  static apr_status_t validator_get(request_rec* r) {
    if ( strcmp(r->handler, "validator") )
	return DECLINED ;
    char* url = getArg(r->pool, r->args, "url") ;
    char* cmode = getArg(r->pool, r->args, "resultsMode") ;
    int resultsMode = getResultsMode(cmode) ;
    if ( getFeature(r->args, "http") )
      resultsMode |= RESULTS_HTTP ;

    if ( url && resultsMode ) {
      validator_conf* conf = (validator_conf*)
	ap_get_module_config(r->per_dir_config, &validator_module) ;

      if ( resultsMode & CTYPE_HTML ) {
	ap_set_content_type(r, "text/html;charset=utf-8") ;
      } else if ( resultsMode & CTYPE_RDF ) {
	ap_set_content_type(r, "application/rdf+xml;charset=utf-8") ;
      }

    /* Tell mod_xml what stylesheet to use so it can take it
     * precompiled from cache; chop this if not using mod_xml
     */
      modxmlGnomeSetXSLT(r, apr_pstrcat(r->pool,
		conf->xslbase, cmode, ".xsl", NULL) ) ;
      ap_unescape_url(url) ;
      ApacheWriter out(r) ;
      out.puts("<?xml version=\"1.0\"?>\n")
	.puts("<?xml-stylesheet type=\"text/xsl\" href=\"")
	.puts(conf->xslbase).putstr(cmode).puts(".xsl\"?>\n").startdoc()
	;

      char datestring[APR_RFC822_DATE_LEN] ;
      apr_time_t now = apr_time_now() ;
      apr_rfc822_date(datestring, now) ;
      out
	.puts("<val:date>").puts(datestring).puts("</val:date>\n")
      ;

      apr_file_t* fd = 0 ;
      char tmpname[28] ;
      strcpy(tmpname, "/tmp/mod-validator.XXXXXX") ;
      if ( apr_file_mktemp(&fd, tmpname, 0, r->pool) != APR_SUCCESS ) {
	out.puts("<val:message>Internal Error - bailing out</val:message>\n") ;
	out.puts("</val:validation>\n") ;
	return OK ;
      }
      apr_pool_cleanup_register(r->pool, fd, (apr_status_t(*)(void*))apr_file_close, apr_pool_cleanup_null) ;
      HTTPClient http(r) ;
      if ( ! http.open(url, out) ) {
	out.puts("</val:validation>\n") ;
	return OK ;
      }
      Transcoder Iconv(fd) ;
      Iconv.setEncoding(http.encoding()) ;
//      ParserFactory parserFactory(r, http, out) ;
      ParserFactory parserFactory(r, Iconv, out, http) ;
      char* buf ;
      int num = 0 ;
      do {
	apr_size_t bytes = http.read(&buf) ; //, delta) ;
	if ( ! num++ ) {
	  if ( parserFactory.sniff_doc(buf, bytes) == UNSET ) {
	    http.close() ;
	    //apr_file_close(fd) ;
	    out.puts("</val:validation>\n") ;
	    return OK ;
	  }
	  size_t skip = parserFactory.xml_bytes() ;
	  if ( skip ) {
		  /* do transcoding on input */
	    //apr_file_puts("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n", fd) ;
	    apr_file_puts(parserFactory.xmlDecl(), fd) ;
	    Iconv.write(buf + skip, bytes - skip) ;
	  } else
	    Iconv.write(buf , bytes) ;
	} else
	  Iconv.write(buf , bytes) ;

      } while ( http.isopen() ) ;
      if ( Iconv.errors() )
	out.puts("<val:message>Error(s) transcoding input document (is your charset declaration correct?)  Document may be corrupted/incomplete.</val:message>") ;
      Iconv.close() ;
      out.puts("<val:metadata ctype=\"").escape(http.content_type())
	.puts("\" encoding=\"").escape(http.encoding()).puts("\"/>\n") ;
      http.close() ;

      ApacheValidator* validator = parserFactory.selectParser(resultsMode, 0) ;

      if ( ! validator ) {
	out.puts("</val:validation>\n") ;
	return OK ;
      }

      validator->version(out) ;

      apr_file_flush(fd) ;
      apr_off_t offs = 0 ;
      apr_file_seek(fd, APR_SET, &offs) ;
      validator->validate(tmpname) ;

      if ( resultsMode & RESULTS_HTTP ) {
	http.headers(show_header, out) ;
      }
      out.puts("<val:httpcode>").puti(http.status()).puts("</val:httpcode>") ;
      if ( resultsMode & RESULTS_SOURCE ) {
	FILE* f = fopen(tmpname, "r") ;
	if ( f ) {
	  char* lineptr = 0 ;
	  size_t n = 0 ;
	  unsigned int num = 0 ;
	  out.puts("<val:source>\n") ;
	  while ( n = getline(&lineptr, &n, f), n != (size_t)-1 ) {
	    out.puts("<val:line n=\"").puti(++num).puts("\">")
		.escape(lineptr).puts("</val:line>\n") ;
	  }
	  free(lineptr) ;
	  out.puts("</val:source>\n") ;
	  fclose(f) ;
	}
      }
      out.puts("</val:validation>\n") ;
      delete validator ;

      return OK ;
    } else
      return 400 ;
  }
  static apr_status_t validator_upload(request_rec* r) {
#ifdef HAVE_UPLOAD
    validator_conf* conf = (validator_conf*)
	ap_get_module_config(r->per_dir_config, &validator_module) ;

    apr_table_set ( r->headers_out , "Connection", "Close" ) ;

    ApacheWriter out(r) ;
    out.puts("<?xml version=\"1.0\"?>\n")
	//.puts("<?xml-stylesheet type=\"text/xsl\" href=\"")
	//.puts(conf->xslbase).putstr(cmode).puts(".xsl\"?>\n").startdoc()
	.startdoc()
	.puts("<val:subject>Uploaded File</val:subject>") ;
    ;
    char datestring[APR_RFC822_DATE_LEN] ;
    apr_time_t now = apr_time_now() ;
    apr_rfc822_date(datestring, now) ;
    out.puts("<val:date>").puts(datestring).puts("</val:date>\n") ;

    apr_file_t* fd = 0 ;
    char tmpname[28] ;
    strcpy(tmpname, "/tmp/mod-validator.XXXXXX") ;
    if ( apr_file_mktemp(&fd, tmpname, 0, r->pool) != APR_SUCCESS ) {
	out.puts("<val:message>Internal Error - bailing out</val:message>\n") ;
	out.puts("</val:validation>\n") ;
	return OK ;
    }
    apr_pool_cleanup_register(r->pool, fd, (apr_status_t(*)(void*))apr_file_close, apr_pool_cleanup_null) ;
    Transcoder Iconv(fd) ;
    ParserFactory parserFactory(r, Iconv, out) ;
    char buf[BUFLEN] ;
    int num = 0 ;
    if ( ap_setup_client_block(r, REQUEST_CHUNKED_DECHUNK) != OK )
      return 500 ;
    if ( ! ap_should_client_block(r) )
      return 500 ;
    apr_size_t bytes ;
    do {
      bytes = ap_get_client_block(r, buf, BUFLEN) ;
      // first time through we sniff it
      // and every time we save to tmpfile
      size_t skip = 0 ;
      if ( ! num++ ) {
	parserFactory.sniff_quiet(buf, bytes) ;
	skip = parserFactory.xml_bytes() ;
      }
      if ( skip ) {
	apr_file_puts(parserFactory.xmlDecl(), fd) ;
	Iconv.write(buf + skip, bytes - skip) ;
      } else
	Iconv.write(buf, bytes) ;
    } while ( bytes ) ;

    if ( Iconv.errors() )
      out.puts("<val:message>Error(s) transcoding input document (is your charset declaration correct?)  Document may be corrupted/incomplete.</val:message>") ;
    Iconv.close() ;

    apr_file_flush(fd) ;
    apr_off_t offs = 0 ;
    apr_file_seek(fd, APR_SET, &offs) ;

    apr_table_t* formdata = mod_upload_form(r) ;
    const char* cmode = apr_table_get(formdata, "resultsMode") ;
    int resultsMode = getResultsMode(cmode) ;

    if ( resultsMode & CTYPE_HTML ) {
      ap_set_content_type(r, "text/html;charset=utf-8") ;
    } else if ( resultsMode & CTYPE_RDF ) {
      ap_set_content_type(r, "application/rdf+xml;charset=utf-8") ;
    } else
      return 400 ;


  /* Tell mod_xml what stylesheet to use so it can take it
   * precompiled from cache; chop this if not using mod_xml
   */
    modxmlGnomeSetXSLT(r, apr_pstrcat(r->pool,
	conf->xslbase, cmode, ".xsl", NULL) ) ;

    ApacheValidator* validator
	    = parserFactory.selectParser(resultsMode, formdata) ;

    if ( ! validator ) {
      out.puts("</val:validation>\n") ;
      return OK ;
    }

    validator->version(out) ;
    validator->validate(tmpname) ;
    out.puts("<val:httpcode>N/A</val:httpcode>") ;

    if ( resultsMode & RESULTS_SOURCE ) {
      FILE* f = fopen(tmpname, "r") ;
      if ( f ) {
	char* lineptr = 0 ;
	size_t n = 0 ;
	unsigned int num = 0 ;
	out.puts("<val:source>\n") ;
	while ( n = getline(&lineptr, &n, f), n != (size_t)-1 ) {
	  out.puts("<val:line n=\"").puti(++num).puts("\">")
		.escape(lineptr).puts("</val:line>\n") ;
	}
	free(lineptr) ;
	out.puts("</val:source>\n") ;
	fclose(f) ;
      }
    }

    out.puts("</val:validation>\n") ;
    delete validator ;

    return OK ;

#else
	return DECLINED ;
#endif
  }
  static apr_status_t validator_main(request_rec* r) {
    if ( strcmp(r->handler, "validator") )
	return DECLINED ;
    switch ( r->method_number ) {
      case M_GET:
	return validator_get(r) ;
      case M_POST:
	return validator_upload(r) ;
      default:
	return DECLINED ;
    }
  }

  static void validator_hooks(apr_pool_t* p) {
    ap_hook_handler(validator_main, NULL, NULL, APR_HOOK_MIDDLE) ;
  }
  static const command_rec validator_cmds[] = {
    AP_INIT_TAKE2("ValidatorDefault", (const char*(*)())ValidatorDefault,
	NULL, OR_ALL, "Default parser; default allowed parsers" ) ,
    AP_INIT_TAKE3("ValidatorParser", (const char*(*)())ValidatorParser,
	NULL, OR_ALL, "Preferred and allowed parsers by content-type" ) ,
    AP_INIT_TAKE1("ValidatorXMLCatalogue",
	(const char*(*)())ValidatorXMLCatalogue,
	NULL, OR_ALL, "XML Entity Catalogue" ) ,
    AP_INIT_TAKE1("ValidatorHTMLCatalogue",
	(const char*(*)())ValidatorHTMLCatalogue,
	NULL, OR_ALL, "HTML Entity Catalogue" ) ,
    AP_INIT_TAKE1("ValidatorXSLTBase",
	(const char*(*)())ValidatorXSLTBase,
	NULL, OR_ALL, "Base for XSLT files" ) ,
    {NULL}
  } ;
  module AP_MODULE_DECLARE_DATA validator_module = {
	STANDARD20_MODULE_STUFF,
	cr_dir_conf,
	NULL,
	NULL,
	NULL,
	validator_cmds,
	validator_hooks
  } ;
}
/* Xerces per-process crap */

class XercesProcess {
public:
  XercesProcess() { XMLPlatformUtils::Initialize() ; }
  ~XercesProcess() { XMLPlatformUtils::Terminate() ; }
} ;
static XercesProcess thisProcess ;
