流程
- 解析配置,包括配置文件和命令行参数
- 检查外部环境和内部环境,例如,JVM版本、操作系统内核参数等
- 初始化内部资源,创建内部模块,初始化探测器
- 启动各个子模块和keepalive线程
源码阅读
Elasticsearch.java
//启动elasticsearch
class Elasticsearch extends EnvironmentAwareCommand
//一个cli命令,需要org.elasticsearch.env.Environment来使用当前路径和设置
public abstract class EnvironmentAwareCommand extends Command
//要在cli中执行的操作
public abstract class Command implements Closeable
Elasticsearch.main()
public static void main(final String[] args) throws Exception {
//重写DNS Cache属性
//将属性转为整数再转成字符串,以确保正确解析
overrideDnsCachePolicyProperties();
//设置安全管理器,让基于安全管理器存在与否的内部策略生效(如DNS缓存策略)
//授予所有权限,以便稍后可以将安全管理器设置为所需的权限
System.setSecurityManager(new SecurityManager() {
@Override
public void checkPermission(Permission perm){}
});
//为状态记录器StatusLogger注册侦听器侦听error事件
LogConfigurator.registerErrorListener();
final Elasticsearch elasticsearch = new Elasticsearch();
//进入Command.main()
int status = main(args, elasticsearch, Terminal.DEFAULT);
//失败,建议用户查看日志文件
if (status != ExitCodes.OK) {
final String basePath = System.getProperty("es.logs.base_path");
if (basePath != null) {
Terminal.DEFAULT.errorPrintln(
"ERROR: Elasticsearch did not exit normally - check the logs at "
+ basePath
+ System.getProperty("file.separator")
+ System.getProperty("es.logs.cluster_name") + ".log"
);
}
exit(status);
}
}
SecurityManager
安全管理器是一个允许应用程序实现安全策略的类
它允许应用程序在执行一个可能不安全或敏感的操作前确定该操作是什么,以及是否是在允许执行该操作的安全上下文中执行它。应用程序可以允许或不允许该操作。
类包含了很多名称以单词 check 开头的方法,Java 库中的各种方法在执行某些潜在的敏感操作前可以调用这些方法
安全管理器通过抛出异常来提供阻止操作完成的机会。如果允许执行该操作,则安全管理器例程只是简单地返回。但如果不允许执行该操作,则抛出一个 SecurityException。该约定的唯一例外是 checkTopLevelWindow,它返回 boolean 值。
特殊方法 checkPermission(java.security.Permission)确定是应该允许还是拒绝由指定权限所指示的访问请求
从 Java 2 SDK v1.2 开始,SecurityManager 中其他所有 check 方法的默认实现都是调用SecurityManager checkPermission 方法来确定调用线程是否具有执行所请求操作的权限。
StatusLogger
记录日志系统中发生的事件。默认情况下,只有错误消息被记录到System.err。
Command.main()
//从args中解析此命令的选项并执行它
public final int main(String[] args, Terminal terminal) throws Exception {
//是否添加ShuntdownHook以在退出时清理资源
//返回true
if (addShutdownHook()) {
shutdownHookThread = new Thread(() -> {
try {
this.close();
} catch (final IOException e) {
//当Runtime异常关闭时打印异常信息
try (
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw)) {
e.printStackTrace(pw);
terminal.errorPrintln(sw.toString());
} catch (final IOException impossible) {
throw new AssertionError(impossible);
}
}
});
//RunTime.getRunTime().addShutdownHook的作用就是在JVM销毁前执行的一个线程
Runtime.getRuntime().addShutdownHook(shutdownHookThread);
}
beforeMain.run();
try {
//打印参数,设置参数,执行命令,抛出所有异常
mainWithoutErrorHandling(args, terminal);
} catch (OptionException e) {
printHelp(terminal, true);
terminal.errorPrintln(Terminal.Verbosity.SILENT, "ERROR: " + e.getMessage());
return ExitCodes.USAGE;
} catch (UserException e) {
if (e.exitCode == ExitCodes.USAGE) {
printHelp(terminal, true);
}
if (e.getMessage() != null) {
terminal.errorPrintln(Terminal.Verbosity.SILENT, "ERROR: " + e.getMessage());
}
return e.exitCode;
}
return ExitCodes.OK;
}
Command.mainWithoutErrorHandling()
//打印参数,设置参数,执行命令,抛出所有异常
void mainWithoutErrorHandling(String[] args, Terminal terminal) throws Exception {
final OptionSet options = parser.parse(args);
if (options.has(helpOption)) {
printHelp(terminal, false);
return;
}
if (options.has(silentOption)) {
terminal.setVerbosity(Terminal.Verbosity.SILENT);
} else if (options.has(verboseOption)) {
terminal.setVerbosity(Terminal.Verbosity.VERBOSE);
} else {
terminal.setVerbosity(Terminal.Verbosity.NORMAL);
}
//进入EnviromentAwareCommand.execute()
execute(terminal, options);
}
EnvironmentAwareCommand.execute()
protected void execute(Terminal terminal, OptionSet options) throws Exception {
final Map settings = new HashMap<>();
for (final KeyValuePair kvp : settingOption.values(options)) {
if (kvp.value.isEmpty()) {
throw new UserException(ExitCodes.USAGE, "setting [" + kvp.key + "] must not be empty");
}
if (settings.containsKey(kvp.key)) {
final String message = String.format(
Locale.ROOT,
"setting [%s] already set, saw [%s] and [%s]",
kvp.key,
settings.get(kvp.key),
kvp.value);
throw new UserException(ExitCodes.USAGE, message);
}
settings.put(kvp.key, kvp.value);
}
//确保给定设置存在,如果尚未设置,则从系统属性中读取该设置
putSystemPropertyIfSettingIsMissing(settings, "path.data", "es.path.data");
putSystemPropertyIfSettingIsMissing(settings, "path.home", "es.path.home");
putSystemPropertyIfSettingIsMissing(settings, "path.logs", "es.path.logs");
//进入Elasticsearch.execute()
execute(terminal, options, createEnv(settings));
}
EnvironmentAwareCommand.createEnv()
//创建要使用的命令的Environment
protected Environment createEnv(final Map settings) throws UserException {
return createEnv(Settings.EMPTY, settings);
}
protected final Environment createEnv(final Settings baseSettings, final Map settings) throws UserException {
final String esPathConf = System.getProperty("es.path.conf");
if (esPathConf == null) {
throw new UserException(ExitCodes.CONFIG, "the system property [es.path.conf] must be set");
}
//读取config下的配置文件elasticsearch.yml内容,收集plugins,bin,lib,modules等目录下的文件信息
return InternalSettingsPreparer.prepareEnvironment(baseSettings, settings,
getConfigPath(esPathConf),
() -> System.getenv("HOSTNAME"));
}
Elasticsearch.execute()
protected void execute(Terminal terminal, OptionSet options, Environment env) throws UserException {
if (options.nonOptionArguments().isEmpty() == false) {
throw new UserException(ExitCodes.USAGE, "Positional arguments not allowed, found " + options.nonOptionArguments());
}
if (options.has(versionOption)) {
final String versionOutput = String.format(
Locale.ROOT,
"Version: %s, Build: %s/%s/%s/%s, JVM: %s",
Build.CURRENT.getQualifiedVersion(),
Build.CURRENT.flavor().displayName(),
Build.CURRENT.type().displayName(),
Build.CURRENT.hash(),
Build.CURRENT.date(),
JvmInfo.jvmInfo().version()
);
terminal.println(versionOutput);
return;
}
//读取daemonize,pidFile,quiet的值,并确保配置的临时目录是有效目录
final boolean daemonize = options.has(daemonizeOption);
final Path pidFile = pidfileOption.value(options);
final boolean quiet = options.has(quietOption);
//配置错误的tmpdir可能会导致以后难以诊断的问题,因此立即拒绝它
try {
env.validateTmpFile();
} catch (IOException e) {
throw new UserException(ExitCodes.CONFIG, e.getMessage());
}
try {
//调用Bootstrap.init()
init(daemonize, pidFile, quiet, env);
} catch (NodeValidationException e) {
throw new UserException(ExitCodes.CONFIG, e.getMessage());
}
}
Bootstrap.init()(1)
private static volatile Bootstrap INSTANCE;
static void init(
final boolean foreground,
final Path pidFile,
final boolean quiet,
final Environment initialEnv) throws BootstrapException, NodeValidationException, UserException {
//让Bootstrap的类初始化在安全管理器安装前进行
//里面什么也没做
BootstrapInfo.init();
//创建keepAliveThread,利用CountDownLatch,在shutdown前保持该线程存活
INSTANCE = new Bootstrap();
Bootstrap()
private final CountDownLatch keepAliveLatch = new CountDownLatch(1);
Bootstrap() {
keepAliveThread = new Thread(new Runnable() {
@Override
public void run() {
try {
keepAliveLatch.await();
} catch (InterruptedException e) {
// bail out
}
}
}, "elasticsearch[keepAlive/" + Version.CURRENT + "]");
keepAliveThread.setDaemon(false);
//在shutdown前保持该线程存活
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
keepAliveLatch.countDown();
}
});
}
keepAliveThread
线程本身不做具体的工作。主线程执行完启动流程后会退出,keepalive线程是唯一的用户线程,作用是保持进程运行。在Java程序中,至少要有一个用户线程。当用户线程为零时退出进程。
CountDownLatch
CountDownLatch是一个同步工具类,用来协调多个线程之间的同步,或者说起到线程之间的通信(而不是用作互斥的作用)。
CountDownLatch能够使一个线程在等待另外一些线程完成各自工作之后,再继续执行。使用一个计数器进行实现。计数器初始值为线程的数量。当每一个线程完成自己任务后,计数器的值就会减一。当计数器的值为0时,表示所有的线程都已经完成一些任务,然后在CountDownLatch上等待的线程就可以恢复执行接下来的任务。
CountDownLatch典型用法:1、某一线程在开始运行前等待n个线程执行完毕。将CountDownLatch的计数器初始化为new CountDownLatch(n),每当一个任务线程执行完毕,就将计数器减1 countdownLatch.countDown(),当计数器的值变为0时,在CountDownLatch上await()的线程就会被唤醒。一个典型应用场景就是启动一个服务时,主线程需要等待多个组件加载完毕,之后再继续执行。
CountDownLatch典型用法:2、实现多个线程开始执行任务的最大并行性。注意是并行性,不是并发,强调的是多个线程在某一时刻同时开始执行。类似于赛跑,将多个线程放到起点,等待发令枪响,然后同时开跑。做法是初始化一个共享的CountDownLatch(1),将其计算器初始化为1,多个线程在开始执行任务前首先countdownlatch.await(),当主线程调用countDown()时,计数器变为0,多个线程同时被唤醒。
Bootstrap.init()(2)
//加载 keystore 安全配置,keystore文件不存在则创建,保存;存在则解密,更新keystore
//创建Environment
final SecureSettings keystore = loadSecureSettings(initialEnv);
final Environment environment = createEnvironment(pidFile, keystore, initialEnv.settings(), initialEnv.configFile());
//LogConfigurator将用对我们的日志文件的重定向替换System.out和System.err
//因此我们需要在调用LogConfigurator之前捕获流对象,以便能够在适当的时候关闭它们
final Runnable sysOutCloser = getSysOutCloser();
final Runnable sysErrorCloser = getSysErrorCloser();
LogConfigurator.setNodeName(Node.NODE_NAME_SETTING.get(environment.settings()));
try {
LogConfigurator.configure(environment);
} catch (IOException e) {
throw new BootstrapException(e);
}
if (environment.pidFile() != null) {
try {
PidFile.create(environment.pidFile(), true);
} catch (IOException e) {
throw new BootstrapException(e);
}
}
try {
final boolean closeStandardStreams = (foreground == false) || quiet;
if (closeStandardStreams) {
final Logger rootLogger = LogManager.getRootLogger();
final Appender maybeConsoleAppender = Loggers.findAppender(rootLogger, ConsoleAppender.class);
if (maybeConsoleAppender != null) {
Loggers.removeAppender(rootLogger, maybeConsoleAppender);
}
sysOutCloser.run();
}
//检查Lucene版本
checkLucene();
//安装默认的未捕获异常处理程序
//必须在安全初始化之前完成,因为我们不想授予runtime权限setDefaultUncaughtExceptionHandler
Thread.setDefaultUncaughtExceptionHandler(new ElasticsearchUncaughtExceptionHandler());
INSTANCE.setup(true, environment);
keystore
keystore安全配置是为了解决有些敏感的信息不适合放到配置文件中的,因为配置文件是明文保存的,虽然文件系统有基于用户权限的保护,但这仍然不够。因此ES把这些敏感配置信息加密,单独放到一个文件中:config/elasticsearch.keystore。
Bootstrap.setup()
private void setup(boolean addShutdownHook, Environment environment) throws BootstrapException {
Settings settings = environment.settings();
try {
//遍历所有模块,为每个模块生成Native Controller
spawner.spawnNativeControllers(environment, true);
} catch (IOException e) {
throw new BootstrapException(e);
}
//初始化本地资源
initializeNatives(
environment.tmpFile(),
BootstrapSettings.MEMORY_LOCK_SETTING.get(settings),
BootstrapSettings.SYSTEM_CALL_FILTER_SETTING.get(settings),
BootstrapSettings.CTRLHANDLER_SETTING.get(settings));
//在安装安全管理器之前初始化探测
initializeProbes();
//当ES退出时关闭必要的IO流和日志上下文
if (addShutdownHook) {
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
try {
//关闭节点
//在Node.close()中调用各个模块的doStop()和doClose()
IOUtils.close(node, spawner);
LoggerContext context = (LoggerContext) LogManager.getContext(false);
Configurator.shutdown(context);
if (node != null && node.awaitClose(10, TimeUnit.SECONDS) == false) {
throw new IllegalStateException("Node didn't stop within 10 seconds. " +
"Any outstanding requests or tasks might get killed.");
}
} catch (IOException ex) {
throw new ElasticsearchException("failed to stop node", ex);
} catch (InterruptedException e) {
LogManager.getLogger(Bootstrap.class).warn("Thread got interrupted while waiting for the node to shutdown.");
Thread.currentThread().interrupt();
}
}
});
}
try {
//检查jar冲突,检查当前类路径是否存在重复的类
final Logger logger = LogManager.getLogger(JarHell.class);
JarHell.checkJarHell(logger::debug);
} catch (IOException | URISyntaxException e) {
throw new BootstrapException(e);
}
//在安装SecurityManager之前记录ifconfig输出
IfConfig.logIfNecessary();
//安装SecurityManager
try {
Security.configure(environment, BootstrapSettings.SECURITY_FILTER_BAD_DEFAULTS_SETTING.get(settings));
} catch (IOException | NoSuchAlgorithmException e) {
throw new BootstrapException(e);
}
//创建Node
node = new Node(environment) {
@Override
protected void validateNodeBeforeAcceptingRequests(
final BootstrapContext context,
final BoundTransportAddress boundTransportAddress, List checks) throws NodeValidationException {
BootstrapChecks.check(context, boundTransportAddress, checks);
}
};
}
Node()
protected Node(final Environment initialEnvironment,
Collection> classpathPlugins, boolean forbidPrivateIndexSettings) {
logger = LogManager.getLogger(Node.class);
//关闭流程中需要关闭的service等资源
final List resourcesToClose = new ArrayList<>();
boolean success = false;
try {
//节点环境
Settings tmpSettings = Settings.builder().put(initialEnvironment.settings())
.put(Client.CLIENT_TYPE_SETTING_S.getKey(), CLIENT_TYPE).build();
final JvmInfo jvmInfo = JvmInfo.jvmInfo();
logger.info(...)
//检查版本
if (Build.CURRENT.isProductionRelease() == false) {
logger.warn(
"version [{}] is a pre-release version of Elasticsearch and is not suitable for production",
Build.CURRENT.getQualifiedVersion());
}
if (logger.isDebugEnabled()) {
logger.debug("using config [{}], data [{}], logs [{}], plugins [{}]",
initialEnvironment.configFile(), Arrays.toString(initialEnvironment.dataFiles()),
initialEnvironment.logsFile(), initialEnvironment.pluginsFile());
}
//读取并加载所有的插件和模块
this.pluginsService = new PluginsService(tmpSettings, initialEnvironment.configFile(), initialEnvironment.modulesFile(),
initialEnvironment.pluginsFile(), classpathPlugins);
final Settings settings = pluginsService.updatedSettings();
final Set possibleRoles = Stream.concat(
DiscoveryNodeRole.BUILT_IN_ROLES.stream(),
pluginsService.filterPlugins(Plugin.class)
.stream()
.map(Plugin::getRoles)
.flatMap(Set::stream))
.collect(Collectors.toSet());
DiscoveryNode.setPossibleRoles(possibleRoles);
//根据设置的最终视图创建环境
//这是为了确保组件获得相同的设定值,无论它们从哪里进行请求
this.environment = new Environment(settings, initialEnvironment.configFile());
Environment.assertEquivalent(initialEnvironment, this.environment);
nodeEnvironment = new NodeEnvironment(tmpSettings, environment);
logger.info("node name [{}], node ID [{}], cluster name [{}]",
NODE_NAME_SETTING.get(tmpSettings), nodeEnvironment.nodeId(), ClusterName.CLUSTER_NAME_SETTING.get(tmpSettings).value());
resourcesToClose.add(nodeEnvironment);
localNodeFactory = new LocalNodeFactory(settings, nodeEnvironment.nodeId());
final List> executorBuilders = pluginsService.getExecutorBuilders(settings);
//创建线程池
final ThreadPool threadPool = new ThreadPool(settings, executorBuilders.toArray(new ExecutorBuilder[0]));
resourcesToClose.add(() -> ThreadPool.terminate(threadPool, 10, TimeUnit.SECONDS));
final ResourceWatcherService resourceWatcherService = new ResourceWatcherService(settings, threadPool);
resourcesToClose.add(resourceWatcherService);
//将上下文添加到DeprecationLogger,这样它就不需要被注入到任何地方
HeaderWarning.setThreadContext(threadPool.getThreadContext());
resourcesToClose.add(() -> HeaderWarning.removeThreadContext(threadPool.getThreadContext()));
final List> additionalSettings = new ArrayList<>(pluginsService.getPluginSettings());
final List additionalSettingsFilter = new ArrayList<>(pluginsService.getPluginSettingsFilter());
for (final ExecutorBuilder> builder : threadPool.builders()) {
additionalSettings.addAll(builder.getRegisteredSettings());
}
//创建NodeClient
client = new NodeClient(settings, threadPool);
//创建各种模块和服务
final ScriptModule scriptModule = new ScriptModule(settings, pluginsService.filterPlugins(ScriptPlugin.class));
final ScriptService scriptService = newScriptService(settings, scriptModule.engines, scriptModule.contexts);
......
//定义好的模块由ModulesBuilder类统一管理
//ModulesBuilder是ES对Guice的封装
ModulesBuilder modules = new ModulesBuilder();
......
//绑定依赖,依赖注入
modules.add(b -> {
b.bind(Node.class).toInstance(this);
......
});
//可以通过injector获取相应Service类的实例
injector = modules.createInjector();
//我们通过在集群中寻找分片的可用副本来分配现有的分片副本
//对可用副本的搜索由分配尝试(即reroute)触发,并异步执行
//当它完成时,我们触发另一个reroute再次尝试分配
//这意味着存在循环依赖:分配服务需要访问现有的分片分配器(例如,GatewayAllocator)
//这些分配器需要能够触发reroute,reroute需要调用分配服务。我们在这里关闭循环:
clusterModule.setExistingShardsAllocators(injector.getInstance(GatewayAllocator.class));
List pluginLifecycleComponents = pluginComponents.stream()
.filter(p -> p instanceof LifecycleComponent)
.map(p -> (LifecycleComponent) p).collect(Collectors.toList());
resourcesToClose.addAll(pluginLifecycleComponents);
resourcesToClose.add(injector.getInstance(PeerRecoverySourceService.class));
this.pluginLifecycleComponents = Collections.unmodifiableList(pluginLifecycleComponents);
client.initialize(injector.getInstance(new Key
Bootstrap.init()(3)
try {
IOUtils.close(keystore);
} catch (IOException e) {
throw new BootstrapException(e);
}
//node.start();启动节点
//keepAliveThread.start();启动保活线程
INSTANCE.start();
if (foreground == false) {
sysErrorCloser.run();
}
} catch (NodeValidationException | RuntimeException e) {
//日志打印
}
}
Node.start()
public Node start() throws NodeValidationException {
if (!lifecycle.moveToStarted()) {
return this;
}
logger.info("starting ...");
pluginLifecycleComponents.forEach(LifecycleComponent::start);
//通过injector获取各个类的实例,调用start()方法启动
//start()基本就是初始化内部数据、创建线程池、启动线程池等操作
injector.getInstance(MappingUpdatedAction.class).setClient(client);
injector.getInstance(IndicesService.class).start(); //IndexService:索引管理
injector.getInstance(IndicesClusterStateService.class).start(); //IndicesClusterStateService:跨集群同步
injector.getInstance(SnapshotsService.class).start(); //SnapshotsService:创建快照
injector.getInstance(SnapshotShardsService.class).start(); //SnapshotShardsService:启动和停止分片级别快照
injector.getInstance(RepositoriesService.class).start();
injector.getInstance(SearchService.class).start(); //SearchService:搜索服务
nodeService.getMonitorService().start(); //MonitorService:监控
......
return this;
}