chiweitree

【Flume】flume ng中HDFS sink设置按天滚动，0点滚动文件，修改源码实现

HDFS sink里有个属性hdfs.rollInterval=86400，这个属性你设置了24小时滚动一次，它的确就到了24小时才滚动，但是我们的需求往往是到了0点就滚动文件了，因为离线的job因为都会放在夜里执行。

如果flume是早上9点启动的，那么要到明天早上9点，hdfs的文件才会关闭，难道job要等到9点后才执行，这显然不合适，所以通过修改源码使其能够在0点滚动文件。

首先添加一个属性，可配置为day，hour，min

只需修改一个文件：具体修改内容，请自行比较

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flume.sink.hdfs;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import java.util.Map.Entry;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;

import com.google.common.annotations.VisibleForTesting;

import org.apache.flume.Channel;
import org.apache.flume.Clock;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.SystemClock;
import org.apache.flume.Transaction;
import org.apache.flume.conf.Configurable;
import org.apache.flume.formatter.output.BucketPath;
import org.apache.flume.instrumentation.SinkCounter;
import org.apache.flume.sink.AbstractSink;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

public class HDFSEventSink extends AbstractSink implements Configurable {
	public interface WriterCallback {
		public void run(String filePath);
	}

	private static final Logger LOG = LoggerFactory.getLogger(HDFSEventSink.class);

	private static String DIRECTORY_DELIMITER = System.getProperty("file.separator");

	private static final long defaultRollInterval = 30;
	private static final long defaultRollSize = 1024;
	private static final long defaultRollCount = 10;
	private static final String defaultFileName = "FlumeData";
	private static final String defaultSuffix = "";
	private static final String defaultInUsePrefix = "";
	private static final String defaultInUseSuffix = ".tmp";
	private static final long defaultBatchSize = 100;
	private static final String defaultFileType = HDFSWriterFactory.SequenceFileType;
	private static final int defaultMaxOpenFiles = 5000;
	// Time between close retries, in seconds
	private static final long defaultRetryInterval = 180;
	// Retry forever.
	private static final int defaultTryCount = Integer.MAX_VALUE;

	/**
	 * Default length of time we wait for blocking BucketWriter calls before
	 * timing out the operation. Intended to prevent server hangs.
	 */
	private static final long defaultCallTimeout = 10000;
	/**
	 * Default number of threads available for tasks such as
	 * append/open/close/flush with hdfs. These tasks are done in a separate
	 * thread in the case that they take too long. In which case we create a new
	 * file and move on.
	 */
	private static final int defaultThreadPoolSize = 10;
	private static final int defaultRollTimerPoolSize = 1;

	/**
	 * Singleton credential manager that manages static credentials for the
	 * entire JVM
	 */
	private static final AtomicReference staticLogin = new AtomicReference();

	private final HDFSWriterFactory writerFactory;
	private WriterLinkedHashMap sfWriters;

	private long rollInterval;
	private long rollSize;
	private long rollCount;
	private long batchSize;
	private int threadsPoolSize;
	private int rollTimerPoolSize;
	private CompressionCodec codeC;
	private CompressionType compType;
	private String fileType;
	private String filePath;
	private String fileName;
	private String suffix;
	private String inUsePrefix;
	private String inUseSuffix;
	private TimeZone timeZone;
	private int maxOpenFiles;
	private ExecutorService callTimeoutPool;
	private ScheduledExecutorService timedRollerPool;

	private String kerbConfPrincipal;
	private String kerbKeytab;
	private String proxyUserName;
	private UserGroupInformation proxyTicket;

	private boolean needRounding = false;
	private int roundUnit = Calendar.SECOND;
	private int roundValue = 1;
	private boolean useLocalTime = false;

	private long callTimeout;
	private Context context;
	private SinkCounter sinkCounter;

	private volatile int idleTimeout;
	private Clock clock;
	private FileSystem mockFs;
	private HDFSWriter mockWriter;
	private final Object sfWritersLock = new Object();
	private long retryInterval;
	private int tryCount;

	private String timeRollerFlag;

	private static Date fileOpenTime = new Date();
	private Calendar calendar = Calendar.getInstance();
	private int lastDayOfYear;
	private int lastYear;
	private int lastHour;
	private int lastMin;
	private int nowDayOfYear;
	private int nowYear;
	private int nowHour;
	private int nowMin;

	private int sfWritersMapSize = 0;
	
	/*
	 * Extended Java LinkedHashMap for open file handle LRU queue. We want to
	 * clear the oldest file handle if there are too many open ones.
	 */
	private static class WriterLinkedHashMap extends LinkedHashMap {

		private final int maxOpenFiles;

		public WriterLinkedHashMap(int maxOpenFiles) {
			super(16, 0.75f, true); // stock initial capacity/load, access
									// ordering
			this.maxOpenFiles = maxOpenFiles;
		}

		@Override
		protected boolean removeEldestEntry(Entry eldest) {
			if (size() > maxOpenFiles) {
				// If we have more that max open files, then close the last one
				// and
				// return true
				try {
					eldest.getValue().close();
				} catch (IOException e) {
					LOG.warn(eldest.getKey().toString(), e);
				} catch (InterruptedException e) {
					LOG.warn(eldest.getKey().toString(), e);
					Thread.currentThread().interrupt();
				}
				return true;
			} else {
				return false;
			}
		}
	}

	public HDFSEventSink() {
		this(new HDFSWriterFactory());
	}

	public HDFSEventSink(HDFSWriterFactory writerFactory) {
		this.writerFactory = writerFactory;
	}

	@VisibleForTesting
	Map getSfWriters() {
		return sfWriters;
	}

	// read configuration and setup thresholds
	@Override
	public void configure(Context context) {
		this.context = context;

		filePath = Preconditions.checkNotNull(context.getString("hdfs.path"),
				"hdfs.path is required");
		fileName = context.getString("hdfs.filePrefix", defaultFileName);
		this.suffix = context.getString("hdfs.fileSuffix", defaultSuffix);
		inUsePrefix = context.getString("hdfs.inUsePrefix", defaultInUsePrefix);
		inUseSuffix = context.getString("hdfs.inUseSuffix", defaultInUseSuffix);
		String tzName = context.getString("hdfs.timeZone");
		timeZone = tzName == null ? null : TimeZone.getTimeZone(tzName);
		rollInterval = context.getLong("hdfs.rollInterval", defaultRollInterval);
		rollSize = context.getLong("hdfs.rollSize", defaultRollSize);
		rollCount = context.getLong("hdfs.rollCount", defaultRollCount);
		batchSize = context.getLong("hdfs.batchSize", defaultBatchSize);
		idleTimeout = context.getInteger("hdfs.idleTimeout", 0);
		String codecName = context.getString("hdfs.codeC");
		fileType = context.getString("hdfs.fileType", defaultFileType);
		maxOpenFiles = context.getInteger("hdfs.maxOpenFiles", defaultMaxOpenFiles);
		callTimeout = context.getLong("hdfs.callTimeout", defaultCallTimeout);
		threadsPoolSize = context.getInteger("hdfs.threadsPoolSize", defaultThreadPoolSize);
		rollTimerPoolSize = context.getInteger("hdfs.rollTimerPoolSize", defaultRollTimerPoolSize);
		kerbConfPrincipal = context.getString("hdfs.kerberosPrincipal", "");
		kerbKeytab = context.getString("hdfs.kerberosKeytab", "");
		proxyUserName = context.getString("hdfs.proxyUser", "");
		tryCount = context.getInteger("hdfs.closeTries", defaultTryCount);
		timeRollerFlag = context.getString("hdfs.timeroller.flag",
				Constants.defaultTimeRollerFlagDay);
		if (tryCount <= 0) {
			LOG.warn("Retry count value : " + tryCount + " is not "
					+ "valid. The sink will try to close the file until the file "
					+ "is eventually closed.");
			tryCount = defaultTryCount;
		}
		retryInterval = context.getLong("hdfs.retryInterval", defaultRetryInterval);
		if (retryInterval <= 0) {
			LOG.warn("Retry Interval value: " + retryInterval + " is not "
					+ "valid. If the first close of a file fails, "
					+ "it may remain open and will not be renamed.");
			tryCount = 1;
		}

		Preconditions.checkArgument(batchSize > 0, "batchSize must be greater than 0");
		if (codecName == null) {
			codeC = null;
			compType = CompressionType.NONE;
		} else {
			codeC = getCodec(codecName);
			// TODO : set proper compression type
			compType = CompressionType.BLOCK;
		}

		// Do not allow user to set fileType DataStream with codeC together
		// To prevent output file with compress extension (like .snappy)
		if (fileType.equalsIgnoreCase(HDFSWriterFactory.DataStreamType) && codecName != null) {
			throw new IllegalArgumentException("fileType: " + fileType
					+ " which does NOT support compressed output. Please don't set codeC"
					+ " or change the fileType if compressed output is desired.");
		}

		if (fileType.equalsIgnoreCase(HDFSWriterFactory.CompStreamType)) {
			Preconditions.checkNotNull(codeC, "It's essential to set compress codec"
					+ " when fileType is: " + fileType);
		}

		if (!authenticate()) {
			LOG.error("Failed to authenticate!");
		}
		needRounding = context.getBoolean("hdfs.round", false);

		if (needRounding) {
			String unit = context.getString("hdfs.roundUnit", "second");
			if (unit.equalsIgnoreCase("hour")) {
				this.roundUnit = Calendar.HOUR_OF_DAY;
			} else if (unit.equalsIgnoreCase("minute")) {
				this.roundUnit = Calendar.MINUTE;
			} else if (unit.equalsIgnoreCase("second")) {
				this.roundUnit = Calendar.SECOND;
			} else {
				LOG.warn("Rounding unit is not valid, please set one of"
						+ "minute, hour, or second. Rounding will be disabled");
				needRounding = false;
			}
			this.roundValue = context.getInteger("hdfs.roundValue", 1);
			if (roundUnit == Calendar.SECOND || roundUnit == Calendar.MINUTE) {
				Preconditions.checkArgument(roundValue > 0 && roundValue <= 60, "Round value"
						+ "must be > 0 and <= 60");
			} else if (roundUnit == Calendar.HOUR_OF_DAY) {
				Preconditions.checkArgument(roundValue > 0 && roundValue <= 24, "Round value"
						+ "must be > 0 and <= 24");
			}
		}

		this.useLocalTime = context.getBoolean("hdfs.useLocalTimeStamp", false);
		if (useLocalTime) {
			clock = new SystemClock();
		}

		if (sinkCounter == null) {
			sinkCounter = new SinkCounter(getName());
		}
	}

	private static boolean codecMatches(Class cls, String codecName) {
		String simpleName = cls.getSimpleName();
		if (cls.getName().equals(codecName) || simpleName.equalsIgnoreCase(codecName)) {
			return true;
		}
		if (simpleName.endsWith("Codec")) {
			String prefix = simpleName.substring(0, simpleName.length() - "Codec".length());
			if (prefix.equalsIgnoreCase(codecName)) {
				return true;
			}
		}
		return false;
	}

	@VisibleForTesting
	static CompressionCodec getCodec(String codecName) {
		Configuration conf = new Configuration();
		List> codecs = CompressionCodecFactory
				.getCodecClasses(conf);
		// Wish we could base this on DefaultCodec but appears not all codec's
		// extend DefaultCodec(Lzo)
		CompressionCodec codec = null;
		ArrayList codecStrs = new ArrayList();
		codecStrs.add("None");
		for (Class cls : codecs) {
			codecStrs.add(cls.getSimpleName());
			if (codecMatches(cls, codecName)) {
				try {
					codec = cls.newInstance();
				} catch (InstantiationException e) {
					LOG.error("Unable to instantiate " + cls + " class");
				} catch (IllegalAccessException e) {
					LOG.error("Unable to access " + cls + " class");
				}
			}
		}

		if (codec == null) {
			if (!codecName.equalsIgnoreCase("None")) {
				throw new IllegalArgumentException("Unsupported compression codec " + codecName
						+ ".  Please choose from: " + codecStrs);
			}
		} else if (codec instanceof org.apache.hadoop.conf.Configurable) {
			// Must check instanceof codec as BZip2Codec doesn't inherit
			// Configurable
			// Must set the configuration for Configurable objects that may or
			// do use
			// native libs
			((org.apache.hadoop.conf.Configurable) codec).setConf(conf);
		}
		return codec;
	}

	private void closeNowFile() {
		for (Entry entry : sfWriters.entrySet()) {
			String file = entry.getKey();
			LOG.info("Closing {}", file);

			try {
				entry.getValue().close();
				sfWriters.remove(file);
			} catch (Exception ex) {
				LOG.warn("Exception while closing " + entry.getKey() + ". " + "Exception follows.",
						ex);
				if (ex instanceof InterruptedException) {
					Thread.currentThread().interrupt();
				}
			}
		}
	}

	/**
	 * Pull events out of channel and send it to HDFS. Take at most batchSize
	 * events per Transaction. Find the corresponding bucket for the event.
	 * Ensure the file is open. Serialize the data and write it to the file on
	 * HDFS. 

	 * This method is not thread safe.
	 */
	public Status process() throws EventDeliveryException {
		sfWritersMapSize = sfWriters.size();
		if(sfWritersMapSize>0) {
			if(LOG.isDebugEnabled()) {
				LOG.debug("##############HDFS sink process() method");
				LOG.debug("The sfWriters Map size is {},need to judge whether to roll or not",sfWritersMapSize);
			}
			calendar.setTime(fileOpenTime);
			lastDayOfYear = calendar.get(Calendar.DAY_OF_YEAR);
			lastYear = calendar.get(Calendar.YEAR);
			lastHour = calendar.get(Calendar.HOUR_OF_DAY);
			lastMin = calendar.get(Calendar.MINUTE);
			Date now = new Date();
			calendar.setTime(now);
			nowDayOfYear = calendar.get(Calendar.DAY_OF_YEAR);
			nowYear = calendar.get(Calendar.YEAR);
			nowHour = calendar.get(Calendar.HOUR_OF_DAY);
			nowMin = calendar.get(Calendar.MINUTE);
			LOG.debug("fileOpenTime = {},nowTime = {}", JodaTimeUtil.parseToString(fileOpenTime,
					JodaTimeUtil.FORMAT_FULL_DATE_TIME_WITH_SYMBOL), JodaTimeUtil.parseToString(now,
					JodaTimeUtil.FORMAT_FULL_DATE_TIME_WITH_SYMBOL));

			// 年份相同，日期+1，年份+1，now日期=1
			boolean condition1 = (lastYear == nowYear && (nowDayOfYear == (lastDayOfYear + 1)))
					|| (nowYear == (lastYear + 1) && nowDayOfYear == 1);
			// day相同，小时+1，或者day不同，小时=0
			boolean condition2 = (lastDayOfYear == nowDayOfYear && nowHour == (lastHour + 1))
					|| (lastDayOfYear != nowDayOfYear && nowHour == 0);
			// hour相同，分钟+1，或者hour不同，分钟=0
			boolean condition3 = (lastHour == nowHour && nowMin == (lastMin + 1))
					|| (lastHour != nowHour && nowMin == 0);
			// 判断滚动标识
			if (timeRollerFlag.equals(Constants.defaultTimeRollerFlagDay)) {
				if (condition1) {
					LOG.info("rollflag = {},rolling", Constants.defaultTimeRollerFlagDay);
					closeNowFile();
				}
			} else if (timeRollerFlag.equals(Constants.timeRollerFlagHour)) {
				if (condition2) {
					LOG.info("rollflag = {},rolling", Constants.timeRollerFlagHour);
					closeNowFile();
				}
			} else if (timeRollerFlag.equals(Constants.timeRollerFlagMin)) {
				if (condition3) {
					LOG.info("rollflag = {},rolling", Constants.timeRollerFlagMin);
					closeNowFile();
				}
			}
		}

		Channel channel = getChannel();
		Transaction transaction = channel.getTransaction();
		List writers = Lists.newArrayList();
		transaction.begin();
		try {
			int txnEventCount = 0;
			for (txnEventCount = 0; txnEventCount < batchSize; txnEventCount++) {
				Event event = channel.take();
				if (event == null) {
					break;
				}

				// reconstruct the path name by substituting place holders
				String realPath = BucketPath.escapeString(filePath, event.getHeaders(), timeZone,
						needRounding, roundUnit, roundValue, useLocalTime);
				String realName = BucketPath.escapeString(fileName, event.getHeaders(), timeZone,
						needRounding, roundUnit, roundValue, useLocalTime);

				String lookupPath = realPath + DIRECTORY_DELIMITER + realName;
				BucketWriter bucketWriter;
				HDFSWriter hdfsWriter = null;
				// Callback to remove the reference to the bucket writer from
				// the
				// sfWriters map so that all buffers used by the HDFS file
				// handles are garbage collected.
				WriterCallback closeCallback = new WriterCallback() {
					@Override
					public void run(String bucketPath) {
						LOG.info("Writer callback called.");
						synchronized (sfWritersLock) {
							sfWriters.remove(bucketPath);
						}
					}
				};
				synchronized (sfWritersLock) {
					bucketWriter = sfWriters.get(lookupPath);
					// we haven't seen this file yet, so open it and cache the
					// handle
					if (bucketWriter == null) {
						hdfsWriter = writerFactory.getWriter(fileType);
						bucketWriter = initializeBucketWriter(realPath, realName, lookupPath,
								hdfsWriter, closeCallback);
						// when open the file in hdfs with
						// inUseSuffix,instantiate the
						// fileOpenTime
						fileOpenTime = new Date();
						sfWriters.put(lookupPath, bucketWriter);
					}
				}

				// track the buckets getting written in this transaction
				if (!writers.contains(bucketWriter)) {
					writers.add(bucketWriter);
				}

				// Write the data to HDFS
				try {
					bucketWriter.append(event);
				} catch (BucketClosedException ex) {
					LOG.info("Bucket was closed while trying to append, "
							+ "reinitializing bucket and writing event.");
					hdfsWriter = writerFactory.getWriter(fileType);
					bucketWriter = initializeBucketWriter(realPath, realName, lookupPath,
							hdfsWriter, closeCallback);
					synchronized (sfWritersLock) {
						sfWriters.put(lookupPath, bucketWriter);
					}
					bucketWriter.append(event);
				}
			}

			if (txnEventCount == 0) {
				sinkCounter.incrementBatchEmptyCount();
			} else if (txnEventCount == batchSize) {
				sinkCounter.incrementBatchCompleteCount();
			} else {
				sinkCounter.incrementBatchUnderflowCount();
			}

			// flush all pending buckets before committing the transaction
			for (BucketWriter bucketWriter : writers) {
				bucketWriter.flush();
			}

			transaction.commit();

			if (txnEventCount < 1) {
				return Status.BACKOFF;
			} else {
				sinkCounter.addToEventDrainSuccessCount(txnEventCount);
				return Status.READY;
			}
		} catch (IOException eIO) {
			transaction.rollback();
			LOG.warn("HDFS IO error", eIO);
			return Status.BACKOFF;
		} catch (Throwable th) {
			transaction.rollback();
			LOG.error("process failed", th);
			if (th instanceof Error) {
				throw (Error) th;
			} else {
				throw new EventDeliveryException(th);
			}
		} finally {
			transaction.close();
		}
	}

	private BucketWriter initializeBucketWriter(String realPath, String realName,
			String lookupPath, HDFSWriter hdfsWriter, WriterCallback closeCallback) {
		BucketWriter bucketWriter = new BucketWriter(rollInterval, rollSize, rollCount, batchSize,
				context, realPath, realName, inUsePrefix, inUseSuffix, suffix, codeC, compType,
				hdfsWriter, timedRollerPool, proxyTicket, sinkCounter, idleTimeout, closeCallback,
				lookupPath, callTimeout, callTimeoutPool, retryInterval, tryCount);
		if (mockFs != null) {
			bucketWriter.setFileSystem(mockFs);
			bucketWriter.setMockStream(mockWriter);
		}
		return bucketWriter;
	}

	@Override
	public void stop() {
		// do not constrain close() calls with a timeout
		for (Entry entry : sfWriters.entrySet()) {
			LOG.info("Closing {}", entry.getKey());

			try {
				entry.getValue().close();
			} catch (Exception ex) {
				LOG.warn("Exception while closing " + entry.getKey() + ". " + "Exception follows.",
						ex);
				if (ex instanceof InterruptedException) {
					Thread.currentThread().interrupt();
				}
			}
		}

		// shut down all our thread pools
		ExecutorService toShutdown[] = { callTimeoutPool, timedRollerPool };
		for (ExecutorService execService : toShutdown) {
			execService.shutdown();
			try {
				while (execService.isTerminated() == false) {
					execService.awaitTermination(Math.max(defaultCallTimeout, callTimeout),
							TimeUnit.MILLISECONDS);
				}
			} catch (InterruptedException ex) {
				LOG.warn("shutdown interrupted on " + execService, ex);
			}
		}

		callTimeoutPool = null;
		timedRollerPool = null;

		sfWriters.clear();
		sfWriters = null;
		sinkCounter.stop();
		super.stop();
	}

	@Override
	public void start() {
		String timeoutName = "hdfs-" + getName() + "-call-runner-%d";
		callTimeoutPool = Executors.newFixedThreadPool(threadsPoolSize, new ThreadFactoryBuilder()
				.setNameFormat(timeoutName).build());

		String rollerName = "hdfs-" + getName() + "-roll-timer-%d";
		timedRollerPool = Executors.newScheduledThreadPool(rollTimerPoolSize,
				new ThreadFactoryBuilder().setNameFormat(rollerName).build());

		this.sfWriters = new WriterLinkedHashMap(maxOpenFiles);
		sinkCounter.start();
		super.start();
	}

	private boolean authenticate() {

		// logic for kerberos login
		boolean useSecurity = UserGroupInformation.isSecurityEnabled();

		LOG.info("Hadoop Security enabled: " + useSecurity);

		if (useSecurity) {

			// sanity checking
			if (kerbConfPrincipal.isEmpty()) {
				LOG.error("Hadoop running in secure mode, but Flume config doesn't "
						+ "specify a principal to use for Kerberos auth.");
				return false;
			}
			if (kerbKeytab.isEmpty()) {
				LOG.error("Hadoop running in secure mode, but Flume config doesn't "
						+ "specify a keytab to use for Kerberos auth.");
				return false;
			} else {
				// If keytab is specified, user should want it take effect.
				// HDFSEventSink will halt when keytab file is non-exist or
				// unreadable
				File kfile = new File(kerbKeytab);
				if (!(kfile.isFile() && kfile.canRead())) {
					throw new IllegalArgumentException("The keyTab file: " + kerbKeytab
							+ " is nonexistent or can't read. "
							+ "Please specify a readable keytab file for Kerberos auth.");
				}
			}

			String principal;
			try {
				// resolves _HOST pattern using standard Hadoop search/replace
				// via DNS lookup when 2nd argument is empty
				principal = SecurityUtil.getServerPrincipal(kerbConfPrincipal, "");
			} catch (IOException e) {
				LOG.error("Host lookup error resolving kerberos principal (" + kerbConfPrincipal
						+ "). Exception follows.", e);
				return false;
			}

			Preconditions.checkNotNull(principal, "Principal must not be null");
			KerberosUser prevUser = staticLogin.get();
			KerberosUser newUser = new KerberosUser(principal, kerbKeytab);

			// be cruel and unusual when user tries to login as multiple
			// principals
			// this isn't really valid with a reconfigure but this should be
			// rare
			// enough to warrant a restart of the agent JVM
			// TODO: find a way to interrogate the entire current config state,
			// since we don't have to be unnecessarily protective if they switch
			// all
			// HDFS sinks to use a different principal all at once.
			Preconditions.checkState(prevUser == null || prevUser.equals(newUser),
					"Cannot use multiple kerberos principals in the same agent. "
							+ " Must restart agent to use new principal or keytab. "
							+ "Previous = %s, New = %s", prevUser, newUser);

			// attempt to use cached credential if the user is the same
			// this is polite and should avoid flooding the KDC with auth
			// requests
			UserGroupInformation curUser = null;
			if (prevUser != null && prevUser.equals(newUser)) {
				try {
					curUser = UserGroupInformation.getLoginUser();
				} catch (IOException e) {
					LOG.warn("User unexpectedly had no active login. Continuing with "
							+ "authentication", e);
				}
			}

			if (curUser == null || !curUser.getUserName().equals(principal)) {
				try {
					// static login
					kerberosLogin(this, principal, kerbKeytab);
				} catch (IOException e) {
					LOG.error("Authentication or file read error while attempting to "
							+ "login as kerberos principal (" + principal + ") using " + "keytab ("
							+ kerbKeytab + "). Exception follows.", e);
					return false;
				}
			} else {
				LOG.debug("{}: Using existing principal login: {}", this, curUser);
			}

			// we supposedly got through this unscathed... so store the static
			// user
			staticLogin.set(newUser);
		}

		// hadoop impersonation works with or without kerberos security
		proxyTicket = null;
		if (!proxyUserName.isEmpty()) {
			try {
				proxyTicket = UserGroupInformation.createProxyUser(proxyUserName,
						UserGroupInformation.getLoginUser());
			} catch (IOException e) {
				LOG.error("Unable to login as proxy user. Exception follows.", e);
				return false;
			}
		}

		UserGroupInformation ugi = null;
		if (proxyTicket != null) {
			ugi = proxyTicket;
		} else if (useSecurity) {
			try {
				ugi = UserGroupInformation.getLoginUser();
			} catch (IOException e) {
				LOG.error("Unexpected error: Unable to get authenticated user after "
						+ "apparent successful login! Exception follows.", e);
				return false;
			}
		}

		if (ugi != null) {
			// dump login information
			AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
			LOG.info("Auth method: {}", authMethod);
			LOG.info(" User name: {}", ugi.getUserName());
			LOG.info(" Using keytab: {}", ugi.isFromKeytab());
			if (authMethod == AuthenticationMethod.PROXY) {
				UserGroupInformation superUser;
				try {
					superUser = UserGroupInformation.getLoginUser();
					LOG.info(" Superuser auth: {}", superUser.getAuthenticationMethod());
					LOG.info(" Superuser name: {}", superUser.getUserName());
					LOG.info(" Superuser using keytab: {}", superUser.isFromKeytab());
				} catch (IOException e) {
					LOG.error("Unexpected error: unknown superuser impersonating proxy.", e);
					return false;
				}
			}

			LOG.info("Logged in as user {}", ugi.getUserName());

			return true;
		}

		return true;
	}

	/**
	 * Static synchronized method for static Kerberos login. 

	 * Static synchronized due to a thundering herd problem when multiple Sinks
	 * attempt to log in using the same principal at the same time with the
	 * intention of impersonating different users (or even the same user). If
	 * this is not controlled, MIT Kerberos v5 believes it is seeing a replay
	 * attach and it returns: Request is a replay (34) -
	 * PROCESS_TGS In addition, since the underlying Hadoop APIs we
	 * are using for impersonation are static, we define this method as static
	 * as well.
	 *
	 * @param principal
	 *            Fully-qualified principal to use for authentication.
	 * @param keytab
	 *            Location of keytab file containing credentials for principal.
	 * @return Logged-in user
	 * @throws IOException
	 *             if login fails.
	 */
	private static synchronized UserGroupInformation kerberosLogin(HDFSEventSink sink,
			String principal, String keytab) throws IOException {

		// if we are the 2nd user thru the lock, the login should already be
		// available statically if login was successful
		UserGroupInformation curUser = null;
		try {
			curUser = UserGroupInformation.getLoginUser();
		} catch (IOException e) {
			// not a big deal but this shouldn't typically happen because it
			// will
			// generally fall back to the UNIX user
			LOG.debug("Unable to get login user before Kerberos auth attempt.", e);
		}

		// we already have logged in successfully
		if (curUser != null && curUser.getUserName().equals(principal)) {
			LOG.debug("{}: Using existing principal ({}): {}", new Object[] { sink, principal,
					curUser });

			// no principal found
		} else {

			LOG.info("{}: Attempting kerberos login as principal ({}) from keytab " + "file ({})",
					new Object[] { sink, principal, keytab });

			// attempt static kerberos login
			UserGroupInformation.loginUserFromKeytab(principal, keytab);
			curUser = UserGroupInformation.getLoginUser();
		}

		return curUser;
	}

	@Override
	public String toString() {
		return "{ Sink type:" + getClass().getSimpleName() + ", name:" + getName() + " }";
	}

	@VisibleForTesting
	void setBucketClock(Clock clock) {
		BucketPath.setClock(clock);
	}

	@VisibleForTesting
	void setMockFs(FileSystem mockFs) {
		this.mockFs = mockFs;
	}

	@VisibleForTesting
	void setMockWriter(HDFSWriter writer) {
		this.mockWriter = writer;
	}

	@VisibleForTesting
	int getTryCount() {
		return tryCount;
	}
}

望各位不吝指教！！

你可能感兴趣的:(Flume)

大数据集成方案对比：Kafka vs Flume vs Sqoop AI天才研究院计算 AI大模型应用入门实战与进阶 Agentic AI 实战大数据 kafka flume ai
大数据集成方案对比：KafkavsFlumevsSqoop关键词：大数据集成、Kafka、Flume、Sqoop、流处理、批量迁移、日志收集摘要：在大数据生态中，数据集成是连接数据源与数据处理平台的关键环节。本文深度对比Kafka、Flume、Sqoop三大主流集成工具，从核心架构、技术原理、适用场景到实战案例展开系统性分析。通过数学模型量化性能差异，结合实际项目经验总结选型策略，帮助开发者根据业
Flume到Kafka且均分到多个partition 小学僧来啦 Flume Kafka partition Flume
@Author:Spinach|GHB@Link:http://blog.csdn.net/bocai8058文章目录说明情况解决方法说明情况Flume向kafka发布数据时，发现kafka接收到的数据总是在一个partition中，而我们希望发布来的数据在所有的partition平均分布。应该怎么做呢？解决方法Flume的官方文档是这么说的：KafkaSinkusesthetopicandkey
大数据ETL工具比较：Sqoop vs Flume vs Kafka AI天才研究院 AI人工智能与大数据大数据 etl sqoop ai
大数据ETL工具比较：SqoopvsFlumevsKafka关键词：大数据ETL、Sqoop、Flume、Kafka、数据迁移、日志采集、消息队列摘要：在大数据生态中，ETL（抽取-转换-加载）是数据价值挖掘的关键环节。不同业务场景对数据传输的实时性、可靠性、数据类型有差异化需求，催生了Sqoop、Flume、Kafka等特色鲜明的ETL工具。本文从核心架构、工作原理、性能指标、实战案例四个维度，
在大数据求职面试中如何回答分布式协调与数据挖掘问题
在大数据求职面试中如何回答分布式协调与数据挖掘问题场景：小白的大数据求职面试小白是一名初出茅庐的程序员，今天他来到一家知名互联网公司的面试现场，面试官是经验丰富的老黑。以下是他们之间的对话：第一轮提问：分布式与数据采集老黑：小白，你对Zookeeper有了解吗？小白：当然，Zookeeper是一个分布式协调服务，主要用于分布式应用程序中的同步服务、命名服务和配置管理。老黑：不错，你能说说Flume
手把手教你玩转 Sqoop：从数据库到大数据的「数据搬运工」 AAA建材批发王师傅数据库 sqoop 大数据 hive hdfs
一、Sqoop是什么？——数据界的「超级搬运工」兄弟们，今天咱们聊个大数据圈的「搬运小能手」——Sqoop！可能有人会问：这玩意儿跟Flume啥区别？简单来说：Flume是专门搬日志数据的「快递员」而Sqoop是搬数据库数据的「搬家公司」它的名字咋来的？SQL+Hadoop，直接告诉你核心技能：在关系型数据库（比如MySQL）和Hadoop家族（HDFS、Hive、HBase）之间疯狂倒腾数据！核
Flum的组件和原理。以及配置和基础命令
ApacheFlume架构的原理和组成ApacheFlume是一个高可靠、高性能的服务，用于收集、聚合和移动大量日志数据。它的架构设计灵活且可扩展，能够适应各种不同的数据源和目的地。一、Flume的核心组件及其任务1.Agent定义：Flume的基本运行单元，是一个独立的进程。功能：负责执行数据采集任务，包含Source、Channel和Sink三个主要部分。2.Source（源）定义：数据进入F
Flume入门指南：大数据日志采集的秘密武器 £菜鸟也有梦大数据基础大数据 flume kafka hadoop hive
目录一、Flume是什么？为何如此重要？二、Flume核心概念大揭秘2.1Agent：Flume的核心引擎2.2Source：数据的入口大门2.3Channel：数据的临时港湾2.4Sink：数据的最终归宿2.5Event：数据的最小单元三、Flume工作原理深度剖析3.1数据如何流动3.2可靠性保障机制四、Flume安装与配置实战4.1安装前的准备工作4.2下载与解压4.3配置文件详解4.4启动
Flume进阶之路：从基础到高阶的飞跃 £菜鸟也有梦大数据基础 flume 大数据 hadoop hive
目录一、Flume高阶特性揭秘二、拦截器：数据的精细雕琢师2.1拦截器的概念与作用2.2常见拦截器类型及案例分析2.2.1时间添加戳拦截器2.2.2Host添加拦截器2.2.3正则表达式过滤拦截器三、选择器：数据流向的掌控者3.1选择器的概念与分类3.2不同选择器的工作原理与案例3.2.1复制选择器3.2.2多路复用选择器3.2.3自定义选择器四、Sink组逻辑处理器：数据传输的保障者4.1Sin
记一次·Spark读Hbase
记一次·Spark读Hbase一、背景过年回来，数仓发现hive的一个表丢数据了，需要想办法补数据。这个表是flume消费kafka写hive。但是kafka里只保存最近7天数据，有部分数据kafka里已经没有了。不过这份数据会同时被消费到HBase内存储一份，并且HBase内的数据是正常的。所以这次任务是读HBase数据写Hive表。HBase表内，只有一个列族info，列族内只有一个列valu
Kafka整合Flume 小顽童王 kafka flume
Kafka与flume1）准备jar包1、将Kafka主目录lib下的如下jar拷贝至Flume的lib目录下kafka_2.10-0.8.2.1.jar、kafka-clients-0.8.2.1.jar、jopt-simple-3.2.jar、metrics-core-2.2.0.jar、scala-library-2.10.4.jar、zkclient-0.3.jar等2、将如下jar拷贝至
电商数仓项目(八) Flume(3) 生产者和消费者配置涛2021 数据仓库:Hadoop+Hive flume kafka
目录一、生产数据写到kafka二、消费kafka数据写到hdfs本节讲解Flume生产者和消费者配置。源码下载一、生产数据写到kafka将上节生成的flume-interceptor-1.0.0.jar文件上传到$FLUME_HOME/lib目录下在$FLUME_HOME/conf目录中创建file-flume-kafka.conf文件，文件目录：/u01/gmall/data/in/log-da
运维-ES集群介绍 ww22652098814 运维 elasticsearch
什么是ElasticStackElasticStack早期名称为elk。elk分别代表了3个组件:-ElasticSearch负责数据存储和检索。-Logstash:负责数据的采集，将源数据采集到ElasticSearch进行存储。-Kibana:负责数据的展示。由于Logstash是一个重量级产品，安装包超过300MB+，很多同学只是用于采集日志，于是使用其他采集工具代替，比如flume，flu
《云计算》第三版总结冰菓Neko 书籍云计算
《云计算》第三版总结云计算体系结构云计算成本优势开源云计算架构Hadoop2.0Hadoop体系架构Hadoop访问接口Hadoop编程接口Hadoop大家族分布式组件概述ZooKeeperHbasePigHiveOozieFlumeMahout虚拟化技术服务器虚拟化存储虚拟化网络虚拟化桌面虚拟化OpenStack开源虚拟化平台NovaSwiftGlance云计算核心算法PaxosDHTGossi
数据采集与接入：Kafka、Flume、Flink CDC、Debezium（实时/离线数据获取方式）晴天彩虹雨 kafka flume flink 大数据
数据采集是大数据平台中的关键步骤，它负责将数据从多个数据源传输到数据处理系统。对于大数据处理平台来说，数据的实时与离线获取方式至关重要，能够确保系统的响应性与可扩展性。在本篇文章中，我们将深入探讨四种常见的数据采集与接入技术：Kafka、Flume、FlinkCDC、Debezium，并分析它们的适用场景。1.Kafka-分布式流处理平台概述：Kafka是一个分布式流平台，用于高吞吐量、低延迟的数
Flume启动报错，guava.java包冲突 Lion-ha 大数据
Flume启动时报错如下：(SinkRunner-PollingRunner-DefaultSinkProcessor)[ERROR-org.apache.flume.sink.hdfs.HDFSEventSink.process(HDFSEventSink.java:459)]processfailedjava.lang.NoSuchMethodError:com.google.common.b
Flume(二十一)Memory Channel 薛定谔的猫1982 #flume flume 大数据
MemoryChannel是将收集来的数据临时存储到内存队列中，如果不指定，那么该队列默认大小是100，即最多允许在队列中存储100条数据。如果队列被占满，那么后来的数据就会被阻塞(即Source收集到的数据就无法放入队列中,产生rollback回滚)，直到队列中有位置被空出。实际过程中，这个值一般会调大，一般会调节为10W~30W，如果数据量较大，那么也可以考虑调节为50W。需要注意的是，Mem
【课程笔记】华为 HCIA-Big Data 大数据总结淵_ken 华为 HCIA-Big Data 大数据大数据
目录HDFS分布式文件系统ZooKeeper分布式应用程序协调服务HBase非关系型分布式数据库Hive分布式数据仓库ClickHouse列式数据库管理系统MapReduce分布式计算框架Yarn资源管理调度器Spark分布式计算框架Flink分布式计算框架Flume日志采集工具Kafka分布式消息队列本课程主要围绕以下几个服务展开：HDFS(Hadoop分布式文件系统)ZooKeeper(分布式
Windows PC上创建大数据职业技能竞赛实验环境之三--Spark、Hive、Flume、Kafka和Flink环境的搭建 liu9ang 大数据平台 hadoop spark kafka flink
在前述hadoop-base基础容器环境的基础上，实现Spark、Hive、Flume、kafka和Flink实验环境的搭建。我们已将前述的hadoop-base基础容器进行可阶段的保存：sudodockercommit"hadoopbasev3"hadoop-basecentos/hadoop-base:v3现在，如果已经将前述作业的hadoop-base容器停用并删除，用保存的centos/h
[大数据技术与应用省赛学习记录一]——软件准备 Ench77 大数据技术与应用比赛筹备大数据
@JIAQI第一章大数据平台环境搭建在指定主机上完成Hadoop完全分布式、Spark、Flink、kafka、flume的安装配置赛前准备主办方要求使用以下相关版本软件环境，仅供参考：设备类型软件类别软件名称、版本号竞赛服务器竞赛环境大数据集群操作系统Centos7大数据平台组件unbuntu18.04Hadoop2.7.7Hive2.3.4Spark2.1.1Kafka2.0.0Redis4.
Flume+kafka+SparkStreaming整合逆水行舟如何大数据架构 kafka常用命令 flume进行数据收集的编写实时架构
一、需求模拟一个流式处理场景：我再说话，我编写好的一个sparkstreaming做词频统计1.模拟说话：nc-lk3399flumesource:avro(qyl01:3399)channel:memorysink:kafkasink模拟实时的日志生成：echoaabbcc>>/home/qyl/logs/flume.logflumesource：exec(tail-f)channel:memo
Flume Source原理与代码实例讲解 AI天才研究院计算计算科学神经计算深度学习神经网络大数据人工智能大型语言模型 AI AGI LLM Java Python 架构设计 Agent RPA
FlumeSource原理与代码实例讲解1.背景介绍ApacheFlume是一个分布式、可靠且高可用的海量日志采集、聚合和传输的系统,它是Apache软件基金会的一个顶级项目。在大数据时代,日志数据作为企业的重要资产,如何高效地收集和传输海量日志数据成为了一个迫切需要解决的问题。Flume应运而生,它可以从不同的数据源采集数据,经过聚合后再将数据传输到下一个节点,最终存储到HDFS、HBase或S
SparkStreaming概述淋一遍下雨天 spark 大数据学习
SparkStreaming主要用于流式计算，处理实时数据。DStream是SparkStreaming中的数据抽象模型，表示随着时间推移收到的数据序列。SparkStreaming支持多种数据输入源（如Kafka、Flume、Twitter、TCP套接字等）和数据输出位置（如HDFS、数据库等）。SparkStreaming特点易用性：支持Java、Python、Scala等编程语言，编写实时计
kafka spark java_Kafka与Spark整合 weixin_39630247 kafka spark java
本篇文章帮大家学习Kafka与Spark整合，包含了Kafka与Spark整合使用方法、操作技巧、实例演示和注意事项，有一定的学习价值，大家可以用来参考。在本章中，将讨论如何将apacheKafka与SparkStreamingAPI集成。Spark是什么？SparkStreamingAPI支持实时数据流的可扩展，高吞吐量，容错流处理。数据可以从Kafka，Flume，Twitter等许多来源获取
Spark-Streaming 美味的大香蕉笔记
探索Spark-Streaming：实时数据处理的得力助手在大数据处理领域，实时处理越来越重要。今天就来聊一聊Spark生态中处理流式数据的利器——Spark-Streaming。Spark-Streaming主要用于处理流式数据，像从Kafka、Flume等数据源来的数据，它都能轻松应对。它使用离散化流（DStream）作为核心抽象。简单来说，DStream就是把随时间收到的数据，按照时间区间封
数据中台架构与技术体系 Aurora_NeAr 架构大数据
数据中台整体架构设计数据中台分层架构数据采集层数据源类型：业务系统（ERP、CRM）、日志、IoT设备、第三方API等。采集方式：实时采集：Kafka、FlinkCDC（变更数据捕获）。离线采集：Sqoop、DataX（批量同步数据库）。日志采集：Flume、Filebeat。数据缓冲与预处理：使用消息队列（如Kafka）作为缓冲区，应对数据流量峰值。数据存储层数据湖（DataLake）：存储原始
大数据面试题目_综合面试_hadoop面试题_hive面试题_sqoop面试题_spark面试题_flume面试题_kafka面试题---大数据面试题007 添柴程序猿大数据 hadoop hive 大数据面试题 flume
大数据面试:1.说一下hadoop的集群部署模式有哪几种,完全分布式如何部署以及配置?2.hadoop的守护进程有哪些?2.之前的公司,为什么要离职?3.之前公司的待遇工资多少?4.用Flink处理过什么场景的业务,是如何实现的,说一下流程?5.有没有用过NIFI?6.做的时候后端是如何做的,用的什么框架?有没有了解过springcloudTencent?7.hadoop中的代理用户功能的作用,和
探索Hadoop生态圈：核心组件介绍放。756 hadoop 大数据分布式
Hadoop生态圈包括多个组件，如HDFS提供分布式存储，MapReduce处理大数据计算，YARN管理资源调度，HBase支持非结构化数据存储，Hive实现数据仓库功能，Pig提供高级数据流处理，Sqoop实现数据迁移，Flume处理日志收集等。这些组件共同构建起强大的大数据处理框架。
flume 负载均衡详解 goTsHgo flume 大数据分布式 flume 负载均衡大数据
ApacheFlume是一个分布式、可靠且可用的系统，旨在有效地从多个数据源收集、聚合和移动大量日志数据到集中存储系统（如HDFS、HBase等）。在数据传输过程中，负载均衡是Flume的一个重要功能，它有助于确保多个节点间的负载均匀分布，从而提高系统的稳定性和吞吐量。从Flume的架构角度来看，它的负载均衡涉及多个组件，包括Source、Channel和Sink，下面我们逐层从底层原理和部分源代
数据仓库：如何解决ODS数据零点漂移问题夜希辰数据仓库大数据
本篇文章讲解的是从业务库同步数据至数仓导致的零点漂移，查看flume+kafka同步数据导致的零点漂移参考该文章：业务数据采集_零点漂移处理方法(Flume+Kafka+HDFS)一、数据零点漂移概念1、什么是零点漂移：数据零点漂移指的是数据同步过程中，ODS表按时间字段分区时，同一个业务日期(分区)包含前一天的数据或丢失了当天的数据、或者包含后一天凌晨附近的数据。由于ODS需要承接面向历史的细节
Windows PC上创建大数据职业技能竞赛实验环境之六--Flume、Kafka和Flink编程 liu9ang 大数据平台 hadoop kafka flink redis
1Flume参看日志采集工具Flume的安装与使用方法_厦大数据库实验室博客(xmu.edu.cn)。查看Flume安装root@client1:~#flume-ngversionFlume1.7.0Sourcecoderepository:https://git-wip-us.apache.org/repos/asf/flume.gitRevision:511d868555dd4d16e6ce4
Java 并发包之线程池和原子计数 lijingyao8206 Java计数 ThreadPool 并发包 java线程池
对于大数据量关联的业务处理逻辑，比较直接的想法就是用JDK提供的并发包去解决多线程情况下的业务数据处理。线程池可以提供很好的管理线程的方式，并且可以提高线程利用率，并发包中的原子计数在多线程的情况下可以让我们避免去写一些同步代码。这里就先把jdk并发包中的线程池处理器ThreadPoolExecutor 以原子计数类AomicInteger 和倒数计时锁C
java编程思想抽象类和接口百合不是茶 java 抽象类接口
接口c++对接口和内部类只有简介的支持,但在java中有队这些类的直接支持 1 ,抽象类 : 如果一个类包含一个或多个抽象方法,该类必须限定为抽象类(否者编译器报错) 抽象方法 : 在方法中仅有声明而没有方法体 package com.wj.Interface;
[房地产与大数据]房地产数据挖掘系统 comsci 数据挖掘
随着一个关键核心技术的突破,我们已经是独立自主的开发某些先进模块,但是要完全实现,还需要一定的时间... 所以,除了代码工作以外,我们还需要关心一下非技术领域的事件..比如说房地产 &nb
数组队列总结沐刃青蛟数组队列
数组队列是一种大小可以改变，类型没有定死的类似数组的工具。不过与数组相比，它更具有灵活性。因为它不但不用担心越界问题，而且因为泛型（类似c++中模板的东西）的存在而支持各种类型。以下是数组队列的功能实现代码： import List.Student; public class
Oracle存储过程无法编译的解决方法 IT独行者 oracle 存储过程　
今天同事修改Oracle存储过程又导致2个过程无法被编译，流程规范上的东西，Dave 这里不多说，看看怎么解决问题。 1. 查看无效对象 XEZF@xezf(qs-xezf-db1)> select object_name,object_type,status from all_objects where status='IN
重装系统之后oracle恢复文强chu oracle
前几天正在使用电脑，没有暂停oracle的各种服务。突然win8.1系统奔溃，无法修复，开机时系统提示正在搜集错误信息，然后再开机，再提示的无限循环中。无耐我拿出系统u盘准备重装系统，没想到竟然无法从u盘引导成功。晚上到外面早了一家修电脑店，让人家给装了个系统，并且那哥们在我没反应过来的时候，直接把我的c盘给格式化了并且清理了注册表，再装系统。然后的结果就是我的oracl
python学习二（一些基础语法）小桔子 pthon 基础语法
紧接着把！昨天没看继续看django 官方教程，学了下python的基本语法与c类语言还是有些小差别： 1.ptyhon的源文件以UTF-8编码格式 2. / 除结果浮点型 // 除结果整形 % 除取余数 * 乘 ** 乘方 eg 5**2 结果是5的2次方25 _&
svn 常用命令 aichenglong SVN 版本回退
1 svn回退版本 1)在window中选择log,根据想要回退的内容,选择revert this version或revert chanages from this version 两者的区别: revert this version:表示回退到当前版本(该版本后的版本全部作废) revert chanages from this versio
某小公司面试归来 alafqq 面试
先填单子，还要写笔试题，我以时间为急，拒绝了它。。时间宝贵。老拿这些对付毕业生的东东来吓唬我。。面试官很刁难，问了几个问题，记录下； 1，包的范围。。。public,private,protect. --悲剧了 2，hashcode方法和equals方法的区别。谁覆盖谁.结果，他说我说反了。 3，最恶心的一道题，抽象类继承抽象类吗？（察，一般它都是被继承的啊） 4，stru
动态数组的存储速度比较集合框架百合不是茶集合框架
集合框架：自定义数据结构(增删改查等) package 数组; /** * 创建动态数组 * @author 百合 * */ public class ArrayDemo{ //定义一个数组来存放数据 String[] src = new String[0]; /** * 增加元素加入容器 * @param s要加入容器
用JS实现一个JS对象，对象里有两个属性一个方法 bijian1013 js对象
<html> <head> </head> <body> 用js代码实现一个js对象，对象里有两个属性，一个方法 </body> <script> var obj={a:'1234567',b:'bbbbbbbbbb',c:function(x){
探索JUnit4扩展：使用Rule bijian1013 java 单元测试 JUnit Rule
在上一篇文章中，讨论了使用Runner扩展JUnit4的方式，即直接修改Test Runner的实现(BlockJUnit4ClassRunner)。但这种方法显然不便于灵活地添加或删除扩展功能。下面将使用JUnit4.7才开始引入的扩展方式——Rule来实现相同的扩展功能。 1. Rule &n
[Gson一]非泛型POJO对象的反序列化 bit1129 POJO
当要将JSON数据串反序列化自身为非泛型的POJO时，使用Gson.fromJson(String, Class)方法。自身为非泛型的POJO的包括两种： 1. POJO对象不包含任何泛型的字段 2. POJO对象包含泛型字段，例如泛型集合或者泛型类 Data类 a.不是泛型类， b.Data中的集合List和Map都是泛型的 c.Data中不包含其它的POJO
【Kakfa五】Kafka Producer和Consumer基本使用 bit1129 kafka
0.Kafka服务器的配置一个Broker，一个Topic Topic中只有一个Partition（） 1. Producer： package kafka.examples.producers; import kafka.producer.KeyedMessage; import kafka.javaapi.producer.Producer; impor
lsyncd实时同步搭建指南——取代rsync+inotify ronin47
1. 几大实时同步工具比较 1.1 inotify + rsync 最近一直在寻求生产服务服务器上的同步替代方案，原先使用的是 inotify + rsync，但随着文件数量的增大到100W+，目录下的文件列表就达20M，在网络状况不佳或者限速的情况下，变更的文件可能10来个才几M，却因此要发送的文件列表就达20M，严重减低的带宽的使用效率以及同步效率；更为要紧的是，加入inotify
java-9. 判断整数序列是不是二元查找树的后序遍历结果 bylijinnan java
public class IsBinTreePostTraverse{ static boolean isBSTPostOrder(int[] a){ if(a==null){ return false; } /*1.只有一个结点时，肯定是查找树 *2.只有两个结点时，肯定是查找树。例如{5,6}对应的BST是 6 {6,5}对应的BST是
MySQL的sum函数返回的类型 bylijinnan java spring sql mysql jdbc
今天项目切换数据库时，出错访问数据库的代码大概是这样： String sql = "select sum(number) as sumNumberOfOneDay from tableName"; List<Map> rows = getJdbcTemplate().queryForList(sql); for (Map row : rows
java设计模式之单例模式 chicony java设计模式
在阎宏博士的《JAVA与模式》一书中开头是这样描述单例模式的：　　作为对象的创建模式，单例模式确保某一个类只有一个实例，而且自行实例化并向整个系统提供这个实例。这个类称为单例类。单例模式的结构　　单例模式的特点：单例类只能有一个实例。单例类必须自己创建自己的唯一实例。单例类必须给所有其他对象提供这一实例。　　饿汉式单例类 publ
javascript取当月最后一天 ctrain JavaScript
 <script language=javascript> var current = new Date(); var year = current.getYear(); var month = current.getMonth(); showMonthLastDay(year, mont
linux tune2fs命令详解 daizj linux tune2fs 查看系统文件块信息
一.简介： tune2fs是调整和查看ext2/ext3文件系统的文件系统参数，Windows下面如果出现意外断电死机情况，下次开机一般都会出现系统自检。Linux系统下面也有文件系统自检，而且是可以通过tune2fs命令，自行定义自检周期及方式。二.用法： Usage: tune2fs [-c max_mounts_count] [-e errors_behavior] [-g grou
做有中国特色的程序员 dcj3sjt126com 程序员
从出版业说起网络作品排到靠前的，都不会太难看，一般人不爱看某部作品也是因为不喜欢这个类型，而此人也不会全不喜欢这些网络作品。究其原因，是因为网络作品都是让人先白看的，看的好了才出了头。而纸质作品就不一定了，排行榜靠前的，有好作品，也有垃圾。许多大牛都是写了博客，后来出了书。这些书也都不次，可能有人让为不好，是因为技术书不像小说，小说在读故事，技术书是在学知识或温习知识，有
Android：TextView属性大全 dcj3sjt126com textview
android:autoLink 设置是否当文本为URL链接/email/电话号码/map时，文本显示为可点击的链接。可选值(none/web/email/phone/map/all) android:autoText 如果设置，将自动执行输入值的拼写纠正。此处无效果，在显示输入法并输
tomcat虚拟目录安装及其配置 eksliang tomcat配置说明 tomca部署web应用 tomcat虚拟目录安装
转载请出自出处：http://eksliang.iteye.com/blog/2097184 1.-------------------------------------------tomcat 目录结构 config：存放tomcat的配置文件 temp ：存放tomcat跑起来后存放临时文件用的 work ：当第一次访问应用中的jsp
浅谈：APP有哪些常被黑客利用的安全漏洞 gg163 APP
首先，说到APP的安全漏洞，身为程序猿的大家应该不陌生；如果抛开安卓自身开源的问题的话，其主要产生的原因就是开发过程中疏忽或者代码不严谨引起的。但这些责任也不能怪在程序猿头上，有时会因为BOSS时间催得紧等很多可观原因。由国内移动应用安全检测团队爱内测（ineice.com）的CTO给我们浅谈关于Android 系统的开源设计以及生态环境。 1. 应用反编译漏洞：APK 包非常容易被反编译成可读
C#根据网址生成静态页面 hvt Web .net C#asp.net hovertree
HoverTree开源项目中HoverTreeWeb.HVTPanel的Index.aspx文件是后台管理的首页。包含生成留言板首页，以及显示用户名，退出等功能。根据网址生成页面的方法： bool CreateHtmlFile(string url, string path) { //http://keleyi.com/a/bjae/3d10wfax.htm stri
SVG 教程（一）天梯梦 svg
SVG 简介 SVG 是使用 XML 来描述二维图形和绘图程序的语言。学习之前应具备的基础知识：继续学习之前，你应该对以下内容有基本的了解： HTML XML 基础如果希望首先学习这些内容，请在本站的首页选择相应的教程。什么是SVG？ SVG 指可伸缩矢量图形 (Scalable Vector Graphics) SVG 用来定义用于网络的基于矢量
一个简单的java栈 luyulong java 数据结构栈
public class MyStack { private long[] arr; private int top; public MyStack() { arr = new long[10]; top = -1; } public MyStack(int maxsize) { arr = new long[maxsize]; top
基础数据结构和算法八：Binary search sunwinner Algorithm Binary search
Binary search needs an ordered array so that it can use array indexing to dramatically reduce the number of compares required for each search, using the classic and venerable binary search algori
12个C语言面试题，涉及指针、进程、运算、结构体、函数、内存，看看你能做出几个！刘星宇 c 面试
12个C语言面试题，涉及指针、进程、运算、结构体、函数、内存，看看你能做出几个！ 1.gets()函数问：请找出下面代码里的问题： #include<stdio.h> int main(void) { char buff[10]; memset(buff,0,sizeof(buff));
ITeye 7月技术图书有奖试读获奖名单公布 ITeye管理员活动 ITeye 试读
ITeye携手人民邮电出版社图灵教育共同举办的7月技术图书有奖试读活动已圆满结束，非常感谢广大用户对本次活动的关注与参与。 7月试读活动回顾： http://webmaster.iteye.com/blog/2092746 本次技术图书试读活动的优秀奖获奖名单及相应作品如下（优秀文章有很多，但名额有限，没获奖并不代表不优秀）：《Java性能优化权威指南》