由类图看:
1、Deserializer:定义反序列化接口;
2、Serializer:定义序列化接口;
3、Serialization:定义了一系列和序列化相关并相互依赖对象的接口。
依据这三个接口,分别实现了2个类,分别是支持Writable机制的WritableSerialization和支持Java序列化的JavaSerialization,这样一共是6个实现类。
SerilizationFactory:维护一个Serilization的ArrayList。它具有参数为Configuration的构造函数,把parameter io.serializations中逗号隔开的serialization都添加进来。
源码:
package org.apache.hadoop.io.serializer; import java.io.IOException; import java.io.InputStream; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; /** * <p> * Provides a facility for deserializing objects of type <T> from an * {@link InputStream}. * </p> * * <p> * Deserializers are stateful, but must not buffer the input since * other producers may read from the input between calls to * {@link #deserialize(Object)}. * </p> * @param <T> */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public interface Deserializer<T> { /** * <p>Prepare the deserializer for reading.</p> */ void open(InputStream in) throws IOException; /** * <p> * Deserialize the next object from the underlying input stream. * If the object <code>t</code> is non-null then this deserializer * <i>may</i> set its internal state to the next object read from the input * stream. Otherwise, if the object <code>t</code> is null a new * deserialized object will be created. * </p> * @return the deserialized object */ T deserialize(T t) throws IOException; /** * <p>Close the underlying input stream and clear up any resources.</p> */ void close() throws IOException; }
源码:
package org.apache.hadoop.io.serializer; import java.io.IOException; import java.io.OutputStream; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; /** * <p> * Provides a facility for serializing objects of type <T> to an * {@link OutputStream}. * </p> * * <p> * Serializers are stateful, but must not buffer the output since * other producers may write to the output between calls to * {@link #serialize(Object)}. * </p> * @param <T> */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public interface Serializer<T> { /** * <p>Prepare the serializer for writing.</p> */ void open(OutputStream out) throws IOException; /** * <p>Serialize <code>t</code> to the underlying output stream.</p> */ void serialize(T t) throws IOException; /** * <p>Close the underlying output stream and clear up any resources.</p> */ void close() throws IOException; }
源码:
package org.apache.hadoop.io.serializer; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; /** * <p> * Encapsulates a {@link Serializer}/{@link Deserializer} pair. * </p> * @param <T> */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public interface Serialization<T> { /** * Allows clients to test whether this {@link Serialization} * supports the given class. */ boolean accept(Class<?> c); /** * @return a {@link Serializer} for the given class. */ Serializer<T> getSerializer(Class<T> c); /** * @return a {@link Deserializer} for the given class. */ Deserializer<T> getDeserializer(Class<T> c); }
源码:
package org.apache.hadoop.io.serializer; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.io.serializer.avro.AvroReflectSerialization; import org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization; import org.apache.hadoop.util.ReflectionUtils; /** * <p> * A factory for {@link Serialization}s. * </p> */ @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) @InterfaceStability.Evolving public class SerializationFactory extends Configured { private static final Log LOG = LogFactory.getLog(SerializationFactory.class.getName()); private List<Serialization<?>> serializations = new ArrayList<Serialization<?>>(); /** * <p> * Serializations are found by reading the <code>io.serializations</code> * property from <code>conf</code>, which is a comma-delimited list of * classnames. * </p> */ public SerializationFactory(Configuration conf) { super(conf); for (String serializerName : conf.getTrimmedStrings( CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, new String[]{WritableSerialization.class.getName(), AvroSpecificSerialization.class.getName(), AvroReflectSerialization.class.getName()})) { add(conf, serializerName); } } @SuppressWarnings("unchecked") private void add(Configuration conf, String serializationName) { try { Class<? extends Serialization> serializionClass = (Class<? extends Serialization>) conf.getClassByName(serializationName); serializations.add((Serialization) ReflectionUtils.newInstance(serializionClass, getConf())); } catch (ClassNotFoundException e) { LOG.warn("Serialization class not found: ", e); } } public <T> Serializer<T> getSerializer(Class<T> c) { Serialization<T> serializer = getSerialization(c); if (serializer != null) { return serializer.getSerializer(c); } return null; } public <T> Deserializer<T> getDeserializer(Class<T> c) { Serialization<T> serializer = getSerialization(c); if (serializer != null) { return serializer.getDeserializer(c); } return null; } @SuppressWarnings("unchecked") public <T> Serialization<T> getSerialization(Class<T> c) { for (Serialization serialization : serializations) { if (serialization.accept(c)) { return (Serialization<T>) serialization; } } return null; } }
首先来看其构造函数里的一个全局参数:CommonConfigurationKeys.IO_SERIALIZATIONS_KEY,它的值定义如下:
/** See <a href="{@docRoot}/../core-default.html">core-default.xml</a> */ public static final String IO_SERIALIZATIONS_KEY = "io.serializations";
<property> <name>io.serializations</name> <value>org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization<alue> <description>A list of serialization classes that can be used for obtaining serializers and deserializers.</description> </property>
public <T> Serializer<T> getSerializer(Class<T> c) { Serialization<T> serializer = getSerialization(c); if (serializer != null) { return serializer.getSerializer(c); } return null; } <div> @SuppressWarnings("unchecked") public <T> Serialization<T> getSerialization(Class<T> c) { for (Serialization serialization : serializations) { if (serialization.accept(c))<strong> </strong>{ //注1 return (Serialization<T>) serialization; } } return null; }</div>
@InterfaceAudience.Private @Override public boolean accept(Class<?> c) { return Writable.class.isAssignableFrom(c); }
@InterfaceAudience.Private @Override public boolean accept(Class<?> c) { return SpecificRecord.class.isAssignableFrom(c); //注2 }
注2:
public boolean isAssignableFrom(Class<?> cls)
Class
对象所表示的类或接口与指定的
Class
参数所表示的类或接口是否相同,或是否是其超类或超接口。如果是则返回
true
;否则返回
false
。如果该
Class
表示一个基本类型,且指定的
Class
参数正是该
Class
对象,则该方法返回
true
;否则返回
false
。
特别地,通过身份转换或扩展引用转换,此方法能测试指定 Class
参数所表示的类型能否转换为此 Class
对象所表示的类型。有关详细信息,请参阅 Java Language Specification 的第 5.1.1 和 5.1.4 节。
cls
- 要检查的
Class
对象
cls
类型的对象能否赋予此类对象的
boolean
值