记录Nginx proxy_set_header的坑

1. 背景

一个项目,使用nginx进行代理回源,新版本提交给测试同学后反馈说,回源Host被修改成固定的origin_upstream了。

第一反应:不应该啊,这个版本我没改回源Host相关的内容啊,是不是你环境问题?哈哈。但又一想,origin_upstream这个值莫名其妙,恰好是proxy_pass配置中url出现的,明显是有问题。

首先看了下代码提交记录,lua代码没有修改回源Host,排除;在回源的location中,增加了两行配置。然后就没有其他改动了。难道是location中的配置导致的?

配置文件结构如下:

http {
	...
	upstream origin_upstream {
		...
	}
	server {
		proxy_set_header Host $host;
		proxy_set_header Connection "";
		proxy_http_version 1.1;
		...
		location x {
			...
		}
		location y {
			...
		}
		
		location @origin{
			...
			# 如下两行是新版新增的,此处不是重复,是根据配置设置源站长连接
			proxy_set_header Connection $switch;
			proxy_http_version 1.1;
			
			proxy_pass http://origin_upstream$request_uri;
			...
		}
	}
}

去掉location中上述新增的两行配置后,就正常了!最终确认,是proxy_set_header Connection $switch;的问题。

解决办法:
在location中再加上如下配置,即可解决:

	proxy_set_header Host $host;

2. 原理

为什么location中的Host不会继承server块中配置的呢?看下nginx文档,如果在当前级别中没有配置,则继承上级配置。默认情况,Host和Connection 会被重定义。

Syntax: proxy_set_header field value;
Default:
proxy_set_header Host $proxy_host;
proxy_set_header Connection close;
Context: http, server, location
Allows redefining or appending fields to the request header passed to the proxied server. The value can contain text, variables, and their combinations. These directives are inherited from the previous configuration level if and only if there are no proxy_set_header directives defined on the current level. By default, only two fields are redefined:
proxy_set_header Host $proxy_host;
proxy_set_header Connection close;

到底什么情况下,Host和Connection 才会被覆盖呢?

怀着疑问,去看proxy_set_header这个指令的实现。

    { ngx_string("proxy_set_header"),
      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_TAKE2,
      ngx_conf_set_keyval_slot,
      NGX_HTTP_LOC_CONF_OFFSET,
      offsetof(ngx_http_proxy_loc_conf_t, headers_source),
      NULL },

通过ngx_conf_set_keyval_slot这函数设置了ngx_http_proxy_loc_conf_t结构体重的headers_source字段。从下面实现可以看出,这个字段是一个kv键值对的数组。

char *
ngx_conf_set_keyval_slot(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{
    char  *p = conf;

    ngx_str_t         *value;
    ngx_array_t      **a;
    ngx_keyval_t      *kv;
    ngx_conf_post_t   *post;

    a = (ngx_array_t **) (p + cmd->offset);

    if (*a == NULL) {
        *a = ngx_array_create(cf->pool, 4, sizeof(ngx_keyval_t));
        if (*a == NULL) {
            return NGX_CONF_ERROR;
        }
    }

    kv = ngx_array_push(*a);
    if (kv == NULL) {
        return NGX_CONF_ERROR;
    }

    value = cf->args->elts;

    kv->key = value[1];
    kv->value = value[2];

    if (cmd->post) {
        post = cmd->post;
        return post->post_handler(cf, post, kv);
    }

    return NGX_CONF_OK;
}

headers_source字段在什么地方使用呢?

一处是在在ngx_http_proxy_merge_loc_conf合并配置时,一处是在ngx_http_proxy_init_headers这个函数中。

继续看,发现,ngx_http_proxy_init_headers这个函数在ngx_http_proxy_merge_loc_conf有调用。

static char *
ngx_http_proxy_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child)
{
    ngx_http_proxy_loc_conf_t *prev = parent;
    ngx_http_proxy_loc_conf_t *conf = child;
    ...
    
    if (conf->headers_source == NULL) {
        conf->headers = prev->headers;
#if (NGX_HTTP_CACHE)
        conf->headers_cache = prev->headers_cache;
#endif
		// 如果location中没有proxy_set_header配置,就集成上级配置
        conf->headers_source = prev->headers_source;
    }
    //紧接着就是调用ngx_http_proxy_init_headers
    rc = ngx_http_proxy_init_headers(cf, conf, &conf->headers,
                                     ngx_http_proxy_headers);
    if (rc != NGX_OK) {
        return NGX_CONF_ERROR;
    }
    ...
   }

我们继续看下ngx_http_proxy_init_headers的逻辑:

static ngx_int_t
ngx_http_proxy_init_headers(ngx_conf_t *cf, ngx_http_proxy_loc_conf_t *conf,
    ngx_http_proxy_headers_t *headers, ngx_keyval_t *default_headers)
{
    u_char                       *p;
    size_t                        size;
    uintptr_t                    *code;
    ngx_uint_t                    i;
    ngx_array_t                   headers_names, headers_merged;
    ngx_keyval_t                 *src, *s, *h;
    ngx_hash_key_t               *hk;
    ngx_hash_init_t               hash;
    ngx_http_script_compile_t     sc;
    ngx_http_script_copy_code_t  *copy;

    if (headers->hash.buckets) {
        return NGX_OK;
    }

    if (ngx_array_init(&headers_names, cf->temp_pool, 4, sizeof(ngx_hash_key_t))
        != NGX_OK)
    {
        return NGX_ERROR;
    }

    if (ngx_array_init(&headers_merged, cf->temp_pool, 4, sizeof(ngx_keyval_t))
        != NGX_OK)
    {
        return NGX_ERROR;
    }

    headers->lengths = ngx_array_create(cf->pool, 64, 1);
    if (headers->lengths == NULL) {
        return NGX_ERROR;
    }

    headers->values = ngx_array_create(cf->pool, 512, 1);
    if (headers->values == NULL) {
        return NGX_ERROR;
    }

    // location中有配置proxy_set_header,则将其放入到headers_merged中。
    if (conf->headers_source) {

        src = conf->headers_source->elts;
        for (i = 0; i < conf->headers_source->nelts; i++) {

            s = ngx_array_push(&headers_merged);
            if (s == NULL) {
                return NGX_ERROR;
            }

            *s = src[i];
        }
    }

    h = default_headers;
	
	// while语句对default_headers对了一次遍历,如果headers_merged没有相同的key值,则使用default_headers的覆盖
	// default_headers是什么?我们下面来看
    while (h->key.len) {

        src = headers_merged.elts;
        for (i = 0; i < headers_merged.nelts; i++) {
            if (ngx_strcasecmp(h->key.data, src[i].key.data) == 0) {
                goto next;
            }
        }

        s = ngx_array_push(&headers_merged);
        if (s == NULL) {
            return NGX_ERROR;
        }

        *s = *h;

    next:

        h++;
    }

    // 下面流程是将headers_merged合并如到入参headers->hash中,即请求的信息中。
    src = headers_merged.elts;
    for (i = 0; i < headers_merged.nelts; i++) {

        hk = ngx_array_push(&headers_names);
        if (hk == NULL) {
            return NGX_ERROR;
        }

        hk->key = src[i].key;
        hk->key_hash = ngx_hash_key_lc(src[i].key.data, src[i].key.len);
        hk->value = (void *) 1;

        if (src[i].value.len == 0) {
            continue;
        }

        copy = ngx_array_push_n(headers->lengths,
                                sizeof(ngx_http_script_copy_code_t));
        if (copy == NULL) {
            return NGX_ERROR;
        }

        copy->code = (ngx_http_script_code_pt) (void *)
                                                 ngx_http_script_copy_len_code;
        copy->len = src[i].key.len;

        size = (sizeof(ngx_http_script_copy_code_t)
                + src[i].key.len + sizeof(uintptr_t) - 1)
               & ~(sizeof(uintptr_t) - 1);

        copy = ngx_array_push_n(headers->values, size);
        if (copy == NULL) {
            return NGX_ERROR;
        }

        copy->code = ngx_http_script_copy_code;
        copy->len = src[i].key.len;

        p = (u_char *) copy + sizeof(ngx_http_script_copy_code_t);
        ngx_memcpy(p, src[i].key.data, src[i].key.len);

        ngx_memzero(&sc, sizeof(ngx_http_script_compile_t));

        sc.cf = cf;
        sc.source = &src[i].value;
        sc.flushes = &headers->flushes;
        sc.lengths = &headers->lengths;
        sc.values = &headers->values;

        if (ngx_http_script_compile(&sc) != NGX_OK) {
            return NGX_ERROR;
        }

        code = ngx_array_push_n(headers->lengths, sizeof(uintptr_t));
        if (code == NULL) {
            return NGX_ERROR;
        }

        *code = (uintptr_t) NULL;

        code = ngx_array_push_n(headers->values, sizeof(uintptr_t));
        if (code == NULL) {
            return NGX_ERROR;
        }

        *code = (uintptr_t) NULL;
    }

    code = ngx_array_push_n(headers->lengths, sizeof(uintptr_t));
    if (code == NULL) {
        return NGX_ERROR;
    }

    *code = (uintptr_t) NULL;


    hash.hash = &headers->hash;
    hash.key = ngx_hash_key_lc;
    hash.max_size = conf->headers_hash_max_size;
    hash.bucket_size = conf->headers_hash_bucket_size;
    hash.name = "proxy_headers_hash";
    hash.pool = cf->pool;
    hash.temp_pool = NULL;

    return ngx_hash_init(&hash, headers_names.elts, headers_names.nelts);
}

从上面可以看到,location中如果headers_source为空,会使用default_headers进行覆盖。

入参default_headers其实是ngx_http_proxy_headers,定义如下:

static ngx_keyval_t  ngx_http_proxy_headers[] = {
    { ngx_string("Host"), ngx_string("$proxy_host") },
    { ngx_string("Connection"), ngx_string("close") },
    { ngx_string("Content-Length"), ngx_string("$proxy_internal_body_length") },
    { ngx_string("Transfer-Encoding"), ngx_string("$proxy_internal_chunked") },
    { ngx_string("TE"), ngx_string("") },
    { ngx_string("Keep-Alive"), ngx_string("") },
    { ngx_string("Expect"), ngx_string("") },
    { ngx_string("Upgrade"), ngx_string("") },
    { ngx_null_string, ngx_null_string }
};

我们看到Host对应的默认值为$proxy_host。
$proxy_host怎么获取的呢?查看代码可知,是通过对proxy_pass配置的url进行解析得出的。
而我们的配置文件中,$proxy_host会被解析为origin_upstream。

proxy_pass http://origin_upstream$request_uri;

$proxy_host解析的相关代码:

static ngx_http_variable_t  ngx_http_proxy_vars[] = {

    { ngx_string("proxy_host"), NULL, ngx_http_proxy_host_variable, 0,
      NGX_HTTP_VAR_CHANGEABLE|NGX_HTTP_VAR_NOCACHEABLE, 0 },
     ...
 }

//proxy_host就是ctx->vars.host_header
static ngx_int_t
ngx_http_proxy_host_variable(ngx_http_request_t *r,
    ngx_http_variable_value_t *v, uintptr_t data)
{
    ngx_http_proxy_ctx_t  *ctx;

    ctx = ngx_http_get_module_ctx(r, ngx_http_proxy_module);

    if (ctx == NULL) {
        v->not_found = 1;
        return NGX_OK;
    }

    v->len = ctx->vars.host_header.len;
    v->valid = 1;
    v->no_cacheable = 0;
    v->not_found = 0;
    v->data = ctx->vars.host_header.data;

    return NGX_OK;
}

 //在proxy_pass指令解析函数中,解析url,得到host,然后通过ngx_http_proxy_set_vars将host设置给host_header
static char *
ngx_http_proxy_pass(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
{ 
    ngx_str_t                  *value, *url;
    ngx_url_t                   u;
    ...
    
    u.url.len = url->len - add;
    u.url.data = url->data + add;
    u.default_port = port;
    u.uri_part = 1;
    u.no_resolve = 1;
	
	// ngx_http_upstream_add中,会调用ngx_parse_url(cf->pool, u)对url进行解析。
    plcf->upstream.upstream = ngx_http_upstream_add(cf, &u, 0);
    if (plcf->upstream.upstream == NULL) {
        return NGX_CONF_ERROR;
    }

    plcf->vars.schema.len = add;
    plcf->vars.schema.data = url->data;
    plcf->vars.key_start = plcf->vars.schema;
	
	// 完成对host_header的设置
    ngx_http_proxy_set_vars(&u, &plcf->vars);
	...
}

static void
ngx_http_proxy_set_vars(ngx_url_t *u, ngx_http_proxy_vars_t *v)
{
    if (u->family != AF_UNIX) {

        if (u->no_port || u->port == u->default_port) {

            v->host_header = u->host;

            if (u->default_port == 80) {
                ngx_str_set(&v->port, "80");

            } else {
                ngx_str_set(&v->port, "443");
            }

        } else {
            v->host_header.len = u->host.len + 1 + u->port_text.len;
            v->host_header.data = u->host.data;
            v->port = u->port_text;
        }

        v->key_start.len += v->host_header.len;

    } else {
        ngx_str_set(&v->host_header, "localhost");
        ngx_str_null(&v->port);
        v->key_start.len += sizeof("unix:") - 1 + u->host.len + 1;
    }

    v->uri = u->uri;
}

到这里,其实已经很清楚了:

  1. 旧版本中,location中没有proxy_set_header这个指令,合并配置时headers_source会继承server中设置的Host、Connecion头,从而不会被default_headers给覆盖。所以,回源Host正确。
  2. 新版本中,location中新增了Connecion头的配置,但是没有Host头,headers_source不会继承server中的配置,从而导致Host头被默认头覆盖。

你可能感兴趣的:(nginx)