/* -*- mode: c -*- * * $Id: mod_encoding.c,v 1.6 2001/12/11 12:55:38 tai Exp $ * */ /* * mod_encoding core module test implementation for Apache2. * by Kunio Miyamoto (wakatono@todo.gr.jp) * Original security fix port 2002/06/06 * by Kunio Miyamoto (wakatono@todo.gr.jp) * Port new function of 20020611a * by Kunio Miyamoto (wakatono@todo.gr.jp) * Port new function of 20020611a * by Kunio Miyamoto (wakatono@todo.gr.jp) * Add COPYING statement for redistribute only this code. * by Kunio Miyamoto (wakatono@todo.gr.jp) */ /* Copyright (c) 2000-2004 Internet Initiative Japan Inc. and Kunio Miyamoto All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. All advertising materials mentioning features or use of this software must display the following acknowledgment: This product includes software developed by Internet Initiative Japan Inc. and Kunio Miyamoto for use in the mod_encoding module for Apache2. 4. Products derived from this software may not be called "mod_encoding" nor may "mod_encoding" appear in their names without prior written permission of Internet Initiative Japan Inc. For written permission, please contact tai@iij.ad.jp (Taisuke Yamada). 5. Redistributions of any form whatsoever must retain the following acknowledgment: This product includes software developed by Internet Initiative Japan Inc. and Kunio Miyamoto for use in the mod_encoding module for Apache2 (http://www.apache.org/). THIS SOFTWARE IS PROVIDED BY INTERNET INITIATIVE JAPAN INC. AND KUNIO MIYAMOTO ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTERNET INITIATIVE JAPAN INC., KUNIO MIYAMOTO OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include /* #include */ #include /** * Core part of the module. * Here, the module hooks into filename translation stage, * and converts all non-ascii-or-utf8 expressions into * appropriate encoding speficied by the configuration. * * Note UTF-8 is the implicit default and will be tried first, * regardless of user configuration. */ #ifndef MOD_ENCODING_DEBUG #ifdef DEBUG #define MOD_ENCODING_DEBUG 1 #else #define MOD_ENCODING_DEBUG 0 #endif #endif #define DBG(expr) if (MOD_ENCODING_DEBUG) { expr; } /* FIXME: remove gcc-dependency */ #define ENABLE_FLAG_UNSET 0 #define ENABLE_FLAG_OFF 1 #define ENABLE_FLAG_ON 2 #define STRIP_FLAG_UNSET 0 #define STRIP_FLAG_OFF 1 #define STRIP_FLAG_ON 2 /** * module-local information storage structure */ typedef struct { int enable_function; /* flag to enable this module */ char *server_encoding; /* server-side filesystem encoding */ apr_array_header_t *client_encoding; /* useragent-to-encoding-list sets */ apr_array_header_t *default_encoding; /* useragent-to-encoding-list sets */ int strip_msaccount; /* normalize wierd WinXP username */ } encoding_config; module AP_MODULE_DECLARE_DATA encoding_module; /*************************************************************************** * utility methods ***************************************************************************/ /** * Converts encoding of the input string. * Returns NULL on error, else, appropriate string on success. * * @param p Memory pool of apache * @param cd Conversion descriptor, made by iconv_open(3). * @param srcbuf Input string * @param srclen Length of the input string. Usually strlen(srcbuf). */ static char * iconv_string(request_rec *r, iconv_t cd, char *srcbuf, size_t srclen) { char *outbuf, *marker; size_t outlen; if (srclen == 0) { return srcbuf; } /* Allocate space for conversion. Note max bloat factor is 4 of UCS-4 */ marker = outbuf = (char *)apr_palloc(r->pool, outlen = srclen * 4 + 1); if (outbuf == NULL) { return NULL; } /* Convert every character within input string. */ while (srclen > 0) { if (iconv(cd, &srcbuf, &srclen, &outbuf, &outlen) == (size_t)(-1)) { return NULL; } } #if 0 /* Commented out for now as some iconv seems to break with NULL */ /* Everything done. Flush buffer/state and return result */ iconv(cd, NULL, NULL, &outbuf, &outlen); iconv(cd, NULL, NULL, NULL, NULL); #endif *outbuf = '\0'; return marker; } /** * Nomalize charset in HTTP request line and HTTP header(s). * Returns 0 on success, -1 (non-zero) on error. * * FIXME: Should handle/consider partial success? * * @param r Apache request object structure * @param cd Conversion descriptor, made by iconv_open(3). */ static int iconv_header(request_rec *r, iconv_t cd) { char *buff; char *keys[] = { "Destination", NULL }; int i; /* Normalize encoding in HTTP request line */ ap_unescape_url(r->unparsed_uri); if ((buff = iconv_string(r, cd, r->unparsed_uri, strlen(r->unparsed_uri))) == NULL) return -1; ap_parse_uri(r, buff); ap_getparents(r->uri); /* normalize given path for security */ /* Normalize encoding in HTTP request header(s) */ for (i = 0 ; keys[i] ; i++) { if ((buff = (char *)apr_table_get(r->headers_in, keys[i])) != NULL) { ap_unescape_url(buff); if ((buff = iconv_string(r, cd, buff, strlen(buff))) == NULL) return -1; apr_table_set(r->headers_in, keys[i], buff); } } return 0; } /** * Return the list of encoding(s) (defaults to (list "UTF-8")) * which named client is expected to send. * * @param r Apache request object structure * @param encmap Table of UA-to-encoding(s) * @param lookup Name of the useragent to look for */ static apr_array_header_t * get_client_encoding(request_rec *r, apr_array_header_t *encmap, const char *lookup) { void **list = (void **)encmap->elts; apr_array_header_t *encs = apr_array_make(r->pool, 1, sizeof(char *)); int i; /* push UTF-8 as the first candidate of expected encoding */ *((char **)apr_array_push(encs)) = apr_pstrdup(r->pool, "UTF-8"); if (! lookup) return encs; for (i = 0 ; i < encmap->nelts ; i += 2) { if (ap_regexec((regex_t *)list[i], lookup, 0, NULL, 0) == 0) { apr_array_cat(encs, (apr_array_header_t *)list[i + 1]); return encs; } } return encs; } /** * Handler for "EncodingEngine" directive. */ static const char * set_encoding_engine(cmd_parms *cmd, encoding_config *conf, int flag) { if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->enable_function = flag ? ENABLE_FLAG_ON : ENABLE_FLAG_OFF; return NULL; } /** * Handler for "SetServerEncoding" directive. */ static const char * set_server_encoding(cmd_parms *cmd, encoding_config *conf, char *arg) { if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->server_encoding = apr_pstrdup(cmd->pool, arg); return NULL; } /** * Handler for "AddClientEncoding" directive. * * This registers regex pattern of UserAgent: header and expected * encoding(s) from that useragent. */ static const char * add_client_encoding(cmd_parms *cmd, encoding_config *conf, char *args) { apr_array_header_t *encs; char *arg; if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } encs = apr_array_make(cmd->pool, 1, sizeof(void *)); /* register useragent with UserAgent: pattern */ if (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) { *(void **)apr_array_push(conf->client_encoding) = ap_pregcomp(cmd->pool, arg, REG_EXTENDED|REG_ICASE|REG_NOSUB); } /* register list of possible encodings from above useragent */ while (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) { *(void **)apr_array_push(encs) = apr_pstrdup(cmd->pool, arg); } *(void **)apr_array_push(conf->client_encoding) = encs; return NULL; } /** * Handler for "DefaultClientEncoding" directive. * * This registers encodings that work as "fallback" for most clients. */ static const char * default_client_encoding(cmd_parms *cmd, encoding_config *conf, char *args) { char *arg; if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->default_encoding = apr_array_make(cmd->pool, 1, sizeof(char *)); /* register list of possible encodings as a default */ while (*args && (arg = ap_getword_conf_nc(cmd->pool, &args))) { *(void **)apr_array_push(conf->default_encoding) = apr_pstrdup(cmd->pool, arg); } return NULL; } /** * Handler for "NormalizeUsername" directive. * * NOTE: This has nothing to do with encoding conversion. * So where should this go? */ static const char * set_normalize_username(cmd_parms *cmd, encoding_config *conf, int flag) { if (! cmd->path) { conf = ap_get_module_config(cmd->server->module_config, &encoding_module); } conf->strip_msaccount = flag ? STRIP_FLAG_ON : STRIP_FLAG_OFF; return NULL; } /*************************************************************************** * module-unique command table * * FIXME: decide which scope to apply for following directives ***************************************************************************/ static const command_rec mod_enc_commands[] = { {"EncodingEngine", set_encoding_engine, NULL, OR_ALL, FLAG, "Usage: EncodingEngine (on|off)"}, {"SetServerEncoding", set_server_encoding, NULL, OR_ALL, TAKE1, "Usage: SetServerEncoding "}, {"AddClientEncoding", add_client_encoding, NULL, OR_ALL, RAW_ARGS, "Usage: AddClientEncoding [ ...]"}, {"DefaultClientEncoding", default_client_encoding, NULL, OR_ALL, RAW_ARGS, "Usage: DefaultClientEncoding "}, {"NormalizeUsername", set_normalize_username, NULL, OR_ALL, FLAG, "Usage: NormalizeUsername (on|off)"}, {NULL} }; /*************************************************************************** * module methods ***************************************************************************/ /** * Setup server-level module internal data strcuture. */ static void * server_setup(apr_pool_t *p, server_rec *s) { encoding_config *conf; DBG(fprintf(stderr, "server_setup: entered\n")); conf = (encoding_config *)apr_pcalloc(p, sizeof(encoding_config)); conf->enable_function = ENABLE_FLAG_UNSET; conf->server_encoding = NULL; conf->client_encoding = apr_array_make(p, 2, sizeof(void *)); conf->default_encoding = NULL; conf->strip_msaccount = STRIP_FLAG_UNSET; return conf; } /** * Setup folder-level module internal data strcuture. */ static void * folder_setup(apr_pool_t *p, char *dir) { DBG(fprintf(stderr, "folder_setup: entered\n")); return server_setup(p, NULL); } /** * Merge configuration. */ static void * config_merge(apr_pool_t *p, void *base, void *override) { encoding_config *parent = base; encoding_config *child = override; encoding_config *merge; DBG(fprintf(stderr, "config_merge: entered\n")); merge = (encoding_config *)apr_pcalloc(p, sizeof(encoding_config)); if (child->enable_function != ENABLE_FLAG_UNSET) merge->enable_function = child->enable_function; else merge->enable_function = parent->enable_function; DBG(fprintf(stderr, "merged: enable_function == %d\n", merge->enable_function)); if (child->strip_msaccount != STRIP_FLAG_UNSET) merge->strip_msaccount = child->strip_msaccount; else merge->strip_msaccount = parent->strip_msaccount; DBG(fprintf(stderr, "merged: strip_msaccount == %d\n", merge->strip_msaccount)); if (child->server_encoding) merge->server_encoding = child->server_encoding; else merge->server_encoding = parent->server_encoding; DBG(fprintf(stderr, "merged: server_encoding == %s\n", merge->server_encoding)); if (child->default_encoding) merge->default_encoding = child->default_encoding; else merge->default_encoding = parent->default_encoding; merge->client_encoding = apr_array_append(p, child->client_encoding, parent->client_encoding); return merge; } /** * Handler for encoding conversion. * * Here, expected encoding by client/server is determined, and * whenever needed, client input will be converted to that of * server-side expected encoding. */ static int mod_enc_convert(request_rec *r) { encoding_config *conf, *dconf, *sconf; const char *oenc; /* server-side encoding */ apr_array_header_t *ienc; /* list of possible encodings */ void **list; /* same as above (for iteration) */ iconv_t cd; /* conversion descriptor */ int i; sconf = ap_get_module_config(r->server->module_config, &encoding_module); dconf = ap_get_module_config(r->per_dir_config, &encoding_module); conf = config_merge(r->pool, sconf, dconf); if (conf->enable_function != ENABLE_FLAG_ON) { return DECLINED; } oenc = conf->server_encoding ? conf->server_encoding : "UTF-8"; ienc = get_client_encoding(r, conf->client_encoding, apr_table_get(r->headers_in, "User-Agent")); if (conf->default_encoding) apr_array_cat(ienc, conf->default_encoding); list = (void **)ienc->elts; /* try specified encodings in order */ for (i = 0 ; i < ienc->nelts ; i++) { /* pick appropriate converter module */ if ((cd = iconv_open(oenc, list[i])) == (iconv_t)(-1)) continue; /* conversion tryout */ if (iconv_header(r, cd) == 0) { iconv_close(cd); return DECLINED; } /* don't fail, but just continue on until list ends */ iconv_close(cd); } return DECLINED; } /** * Handler for HTTP header parsing. * * Here, username is normalized and wierd "hostname\" prepended * to username is removed (if configured to do so). */ static int mod_enc_parse(request_rec *r) { encoding_config *conf, *dconf, *sconf; const char *pass; char *user; char *buff; sconf = ap_get_module_config(r->server->module_config, &encoding_module); dconf = ap_get_module_config(r->per_dir_config, &encoding_module); conf = config_merge(r->pool, sconf, dconf); if (conf->enable_function != ENABLE_FLAG_ON) { return DECLINED; } /* Parse header and strip "hostname\" from username - for WinXP */ if (conf->strip_msaccount == STRIP_FLAG_ON) { /* Only basic auth is supported for now */ if (ap_get_basic_auth_pw(r, &pass) != OK) return DECLINED; /* Is this username broken? */ if ((user = index(r->user, '\\')) == NULL) return DECLINED; /* Re-generate authorization header */ if (*(user + 1)) { buff = ap_pbase64encode(r->pool, apr_psprintf(r->pool, "%s:%s", user + 1, pass)); apr_table_set(r->headers_in, "Authorization", apr_pstrcat(r->pool, "Basic ", buff, NULL)); ap_get_basic_auth_pw(r, &pass); /* update */ } } return DECLINED; } static void register_hooks(apr_pool_t *p) { /* filename-to-URI translation */ /* ap_hook_translate_name(mod_enc_convert,NULL,NULL,APR_HOOK_FIRST); */ ap_hook_post_read_request(mod_enc_convert,NULL,NULL,APR_HOOK_FIRST); ap_hook_header_parser(mod_enc_parse,NULL,NULL,APR_HOOK_FIRST); } /*************************************************************************** * exported module structure ***************************************************************************/ module AP_MODULE_DECLARE_DATA encoding_module = { STANDARD20_MODULE_STUFF, folder_setup, /* create per-directory config structure */ config_merge, /* merge per-directory(?) config str */ server_setup, /* create per-server config structure */ config_merge, /* merge per-server config ...*/ mod_enc_commands, /* command handlers */ register_hooks };