flume http source windows 中文乱码

flume HTTPSource默认的是utf-8的编码,flume服务用在linux机器上都是没问题的,但当把flume服务部署在windows机器上的时候,会出现乱码问题 。从源码检查乱码出现在那里:

1.在刚接收到请求的时候,打印requestBody里的内容是正常的,数据正常。

2.数据存到event里之后,打印event里的内容,出现乱码。

源码里有个JSONHandler的类处理了request的请求,然后将request的内容放到了event里。代码里都是用“utf-8”进行编码的,而windows机器默认的编码是“GBK”,猜测是这里的问题,将编码统一换成“GBK”中文不乱码了。将源码改动了下边两个地方。

 

package org.apache.flume.source.http;

import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonSyntaxException;
import com.google.gson.reflect.TypeToken;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.event.JSONEvent;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.lang.reflect.Type;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.List;

public class JSONHandlerTest implements HTTPSourceHandler {

  private static final Logger LOG = LoggerFactory.getLogger(JSONHandlerTest.class);
  private final Type listType = new TypeToken>() {}.getType();
  private final Gson gson;

  public JSONHandlerTest() {
    gson = new GsonBuilder().disableHtmlEscaping().create();
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public List getEvents(HttpServletRequest request) throws Exception {
    BufferedReader reader = request.getReader();
    String charset = request.getCharacterEncoding();
    //UTF-8 is default for JSON. If no charset is specified, UTF-8 is to
    //be assumed.
    if (charset == null) {
      LOG.debug("Charset is null, default charset of UTF-8 will be used.");
      charset = "UTF-8";
    } else if (!(charset.equalsIgnoreCase("utf-8")
            || charset.equalsIgnoreCase("utf-16")
            || charset.equalsIgnoreCase("utf-32"))) {
      LOG.error("Unsupported character set in request {}. "
              + "JSON handler supports UTF-8, "
              + "UTF-16 and UTF-32 only.", charset);
      throw new UnsupportedCharsetException("JSON handler supports UTF-8, "
              + "UTF-16 and UTF-32 only.");
    }

    /*
     * Gson throws Exception if the data is not parseable to JSON.
     * Need not catch it since the source will catch it and return error.
     */
    List eventList = new ArrayList(0);
    try {
      eventList = gson.fromJson(reader, listType);
    } catch (JsonSyntaxException ex) {
      throw new HTTPBadRequestException("Request has invalid JSON Syntax.", ex);
    }

    for (Event e : eventList) {
      ((JSONEvent) e).setCharset("GBK");
    }
    return getSimpleEvents(eventList);
  }

  @Override
  public void configure(Context context) {
  }

  private List getSimpleEvents(List events) {
    Charset ch=Charset.forName("GBK");
    List newEvents = new ArrayList(events.size());
    for (Event e:events) {
//生成event的时候,用“GBK”编码。
      newEvents.add(EventBuilder.withBody(new String (e.getBody()),ch,e.getHeaders()));
    }
    return newEvents;
  }
}

个人观点,欢迎指正。

你可能感兴趣的:(flume)