官方pilot架构图
- discovery service:从Kubernetes apiserver list/watch service/endpoint/pod/node等资源信息,监听istio控制平面配置信息(Kubernetes CRD),翻译为envoy可以使用的配置格式
- proxy:envoy连接discovery service,间接获取集群中微服务的注册信息
- agent:pilot-agent生成envoy配置文件,管理envoy进程的生命周期
- service A/B:istio的应用进出网络流量会被proxy转发
Istio Pilot agent runs in the sidecar or gateway container and bootstraps Envoy.
Usage:
pilot-agent [command]
Available Commands:
help Help about any command
proxy Envoy proxy agent
request Makes an HTTP request to the Envoy admin API
version Prints out build version information
Flags:
-h, --help help for pilot-agent
--log_as_json Whether to format output as JSON or in plain console-friendly format
--log_caller string Comma-separated list of scopes for which to include caller information, scopes can be any of [default, model, rbac]
--log_output_level string Comma-separated minimum per-scope logging level of messages to output, in the form of :,:,... where scope can be one of [default, model, rbac] and level can be one of [debug, info, warn, error, fatal, none] (default "default:info")
--log_rotate string The path for the optional rotating log file
--log_rotate_max_age int The maximum age in days of a log file beyond which the file is rotated (0 indicates no limit) (default 30)
--log_rotate_max_backups int The maximum number of log file backups to keep before older files are deleted (0 indicates no limit) (default 1000)
--log_rotate_max_size int The maximum size in megabytes of a log file beyond which the file is rotated (default 104857600)
--log_stacktrace_level string Comma-separated minimum per-scope logging level at which stack traces are captured, in the form of :,,... where scope can be one of [default, model, rbac] and level can be one of [debug, info, warn, error, fatal, none] (default "default:none")
--log_target stringArray The set of paths where to output the log. This can be any path as well as the special values stdout and stderr (default [stdout])
sidecar-injector方式启动的应用,pilot-agent命令行
/usr/local/bin/pilot-agent
proxy sidecar
--configPath /etc/istio/proxy
--binaryPath /usr/local/bin/envoy
--serviceCluster istio-proxy
--drainDuration 45s
--parentShutdownDuration 1m0s
--discoveryAddress istio-pilot.istio-system:15007
--discoveryRefreshDelay 1s
--zipkinAddress zipkin.istio-system:9411
--connectTimeout 10s
--statsdUdpAddress istio-statsd-prom-bridge.istio-system:9125
--proxyAdminPort 15000
--controlPlaneAuthPolicy NONE
rootCmd = &cobra.Command{
Use: "pilot-agent",
Short: "Istio Pilot agent.",
Long: "Istio Pilot agent runs in the sidecar or gateway container and bootstraps Envoy.",
SilenceUsage: true,
}
proxyCmd = &cobra.Command{
Use: "proxy",
Short: "Envoy proxy agent",
RunE: func(c *cobra.Command, args []string) error {
cmd.PrintFlags(c.Flags())
if err := log.Configure(loggingOptions); err != nil {
return err
}
log.Infof("Version %s", version.Info.String())
// Sidecar type is used for sidecar proxies in the application containers Sidecar NodeType = "sidecar" // Ingress type is used for cluster ingress proxies Ingress NodeType = "ingress" // Router type is used for standalone proxies acting as L7/L4 routers Router NodeType = "router"
role.Type = model.Sidecar
if len(args) > 0 {
role.Type = model.NodeType(args[0])
if !model.IsApplicationNodeType(role.Type) {
log.Errorf("Invalid role Type: %#v", role.Type)
return fmt.Errorf("Invalid role Type: " + string(role.Type))
}
}
Kubernetes: 默认使用的注册服务中心,得到环境变量INSTANCE_IP,如果没有的话使用网卡IP,在不行的话127.0.0.1也得将就使用
// set values from registry platform
if len(role.IPAddress) == 0 {
if registry == serviceregistry.KubernetesRegistry {
role.IPAddress = os.Getenv("INSTANCE_IP")
} else {
if ipAddr, ok := proxy.GetPrivateIP(context.Background()); ok {
log.Infof("Obtained private IP %v", ipAddr)
role.IPAddress = ipAddr.String()
} else {
role.IPAddress = "127.0.0.1"
}
}
}
if len(role.ID) == 0 {
if registry == serviceregistry.KubernetesRegistry {
role.ID = os.Getenv("POD_NAME") + "." + os.Getenv("POD_NAMESPACE")
} else if registry == serviceregistry.ConsulRegistry {
role.ID = role.IPAddress + ".service.consul"
} else {
role.ID = role.IPAddress
}
}
注册服务方式 | Domain | pilotDomain |
Kubernetes | POD_NAMESPACE.svc.cluster.local | cluster.local |
Consul | service.consule | "" |
其他方式 | "" | "" |
pilotDomain := role.Domain
if len(role.Domain) == 0 {
if registry == serviceregistry.KubernetesRegistry {
role.Domain = os.Getenv("POD_NAMESPACE") + ".svc.cluster.local"
pilotDomain = "cluster.local"
} else if registry == serviceregistry.ConsulRegistry {
role.Domain = "service.consul"
} else {
role.Domain = ""
}
}
构造ProxyConfig实例,包括配置路径,proxy可执行文件(也就是envoy)路径等等
// DefaultProxyConfig for individual proxies
func DefaultProxyConfig() meshconfig.ProxyConfig {
return meshconfig.ProxyConfig{
ConfigPath: ConfigPathDir,
BinaryPath: BinaryPathFilename,
ServiceCluster: ServiceClusterName,
DrainDuration: types.DurationProto(2 * time.Second),
ParentShutdownDuration: types.DurationProto(3 * time.Second),
DiscoveryAddress: DiscoveryPlainAddress,
ZipkinAddress: "",
ConnectTimeout: types.DurationProto(1 * time.Second),
StatsdUdpAddress: "",
ProxyAdminPort: 15000,
ControlPlaneAuthPolicy: meshconfig.AuthenticationPolicy_NONE,
CustomConfigFile: "",
Concurrency: 0,
StatNameLength: 189,
}
}
启动的配置如下所示:
// set all flags
proxyConfig.CustomConfigFile = customConfigFile
proxyConfig.ConfigPath = configPath
proxyConfig.BinaryPath = binaryPath
proxyConfig.ServiceCluster = serviceCluster
proxyConfig.DrainDuration = types.DurationProto(drainDuration)
proxyConfig.ParentShutdownDuration = types.DurationProto(parentShutdownDuration)
proxyConfig.DiscoveryAddress = discoveryAddress
proxyConfig.ZipkinAddress = zipkinAddress
proxyConfig.ConnectTimeout = types.DurationProto(connectTimeout)
proxyConfig.StatsdUdpAddress = statsdUDPAddress
proxyConfig.ProxyAdminPort = int32(proxyAdminPort)
proxyConfig.Concurrency = int32(concurrency)
未使用认证方式,使用TLS方式认证
var pilotSAN []string
switch controlPlaneAuthPolicy {
case meshconfig.AuthenticationPolicy_NONE.String():
proxyConfig.ControlPlaneAuthPolicy = meshconfig.AuthenticationPolicy_NONE
case meshconfig.AuthenticationPolicy_MUTUAL_TLS.String():
var ns string
proxyConfig.ControlPlaneAuthPolicy = meshconfig.AuthenticationPolicy_MUTUAL_TLS
if registry == serviceregistry.KubernetesRegistry {
partDiscoveryAddress := strings.Split(discoveryAddress, ":")
discoveryHostname := partDiscoveryAddress[0]
parts := strings.Split(discoveryHostname, ".")
if len(parts) == 1 {
// namespace of pilot is not part of discovery address use
// pod namespace e.g. istio-pilot:15005
ns = os.Getenv("POD_NAMESPACE")
} else if len(parts) == 2 {
// namespace is found in the discovery address
// e.g. istio-pilot.istio-system:15005
ns = parts[1]
} else {
// discovery address is a remote address. For remote clusters
// only support the default config, or env variable
ns = os.Getenv("ISTIO_NAMESPACE")
if ns == "" {
ns = model.IstioSystemNamespace
}
}
}
pilotSAN = envoy.GetPilotSAN(pilotDomain, ns)
}
certs := []envoy.CertSource{
{
Directory: model.AuthCertsPath,
Files: []string{model.CertChainFilename, model.KeyFilename, model.RootCertFilename},
},
}
if role.Type == model.Ingress {
certs = append(certs, envoy.CertSource{
Directory: model.IngressCertsPath,
Files: []string{model.IngressCertFilename, model.IngressKeyFilename},
})
}
sidecar-injector默认不使用这种方式,简单的看看直接过了
if templateFile != "" && proxyConfig.CustomConfigFile == "" {
opts := make(map[string]string)
opts["PodName"] = os.Getenv("POD_NAME")
opts["PodNamespace"] = os.Getenv("POD_NAMESPACE")
// protobuf encoding of IP_ADDRESS type
opts["PodIP"] = base64.StdEncoding.EncodeToString(net.ParseIP(os.Getenv("INSTANCE_IP")))
if proxyConfig.ControlPlaneAuthPolicy == meshconfig.AuthenticationPolicy_MUTUAL_TLS {
opts["ControlPlaneAuth"] = "enable"
}
if disableInternalTelemetry {
opts["DisableReportCalls"] = "true"
}
tmpl, err := template.ParseFiles(templateFile)
if err != nil {
return err
}
var buffer bytes.Buffer
err = tmpl.Execute(&buffer, opts)
if err != nil {
return err
}
content := buffer.Bytes()
log.Infof("Static config:\n%s", string(content))
proxyConfig.CustomConfigFile = proxyConfig.ConfigPath + "/envoy.yaml"
err = ioutil.WriteFile(proxyConfig.CustomConfigFile, content, 0644)
if err != nil {
return err
}
}
实现路径istio/pilot/cmd/pilot-agent/status/server.go,API为/healthz/ready,回调handler函数为handleReadyProbe
// If a status port was provided, start handling status probes.
if statusPort > 0 {
parsedPorts, err := parseApplicationPorts()
if err != nil {
return err
}
statusServer := status.NewServer(status.Config{
AdminPort: proxyAdminPort,
StatusPort: statusPort,
ApplicationPorts: parsedPorts,
})
go statusServer.Run(ctx)
}
包括配置,启动参数等,envoy启动参数大致如下:
/usr/local/bin/envoy -c /etc/istio/proxy/envoy-rev0.json --restart-epoch 0 --drain-time-s 45 --parent-shutdown-time-s 60 --service-cluster istio-proxy --service-node sidecar~172.30.45.4~httpserver-d-78f6cdd6b5-dwt98.default~default.svc.cluster.local --max-obj-name-len 189 --allow-unknown-fields -l warn --v2-config-only
// NewProxy creates an instance of the proxy control commands
func NewProxy(config meshconfig.ProxyConfig, node string, logLevel string, pilotSAN []string) proxy.Proxy {
// inject tracing flag for higher levels
var args []string
if logLevel != "" {
args = append(args, "-l", logLevel)
}
return &envoy{
config: config,
node: node,
extraArgs: args,
pilotSAN: pilotSAN,
}
}
// NewAgent creates a new proxy agent for the proxy start-up and clean-up functions.
func NewAgent(proxy Proxy, retry Retry) Agent {
return &agent{
proxy: proxy,
retry: retry,
epochs: make(map[int]interface{}),
configCh: make(chan interface{}),
statusCh: make(chan exitStatus),
abortCh: make(map[int]chan error),
}
}
watcher := envoy.NewWatcher(proxyConfig, role, certs, pilotSAN, agent.ConfigCh())
// NewWatcher creates a new watcher instance from a proxy agent and a set of monitored certificate paths
// (directories with files in them)
func NewWatcher(
config meshconfig.ProxyConfig,
role model.Proxy,
certs []CertSource,
pilotSAN []string,
updates chan<- interface{}) Watcher {
return &watcher{
role: role,
config: config,
certs: certs,
pilotSAN: pilotSAN,
updates: updates,
}
}
路径istio/pilot/pkg/proxy/agent.go
func (a *agent) Run(ctx context.Context) {
log.Info("Starting proxy agent")
// Throttle processing up to smoothed 1 qps with bursts up to 10 qps.
// High QPS is needed to process messages on all channels.
rateLimiter := rate.NewLimiter(1, 10)
设置下一次start最大的时间,创建定时器
for {
err := rateLimiter.Wait(ctx)
if err != nil {
a.terminate()
return
}
// maximum duration or duration till next restart
var delay time.Duration = 1<<63 - 1
if a.retry.restart != nil {
delay = time.Until(*a.retry.restart)
}
if reconcileTimer != nil {
reconcileTimer.Stop()
}
reconcileTimer = time.NewTimer(delay)
比较配置,有变更则更新配置,reconcile调整函数(第3.5章节讲解)
select {
case config := <-a.configCh:
if !reflect.DeepEqual(a.desiredConfig, config) {
log.Infof("Received new config, resetting budget")
a.desiredConfig = config
// reset retry budget if and only if the desired config changes
a.retry.budget = a.retry.MaxRetries
a.reconcile()
}
处理envoy进程退出
case status := <-a.statusCh:
// delete epoch record and update current config
// avoid self-aborting on non-abort error
delete(a.epochs, status.epoch)
delete(a.abortCh, status.epoch)
a.currentConfig = a.epochs[a.latestEpoch()]
if status.err == errAbort {
log.Infof("Epoch %d aborted", status.epoch)
} else if status.err != nil {
log.Warnf("Epoch %d terminated with an error: %v", status.epoch, status.err)
// NOTE: due to Envoy hot restart race conditions, an error from the
// process requires aggressive non-graceful restarts by killing all
// existing proxy instances
a.abortAll()
} else {
log.Infof("Epoch %d exited normally", status.epoch)
}
// cleanup for the epoch
a.proxy.Cleanup(status.epoch)
case <-reconcileTimer.C:
a.reconcile()
case _, more := <-ctx.Done():
if !more {
a.terminate()
return
}
}
递增epoch值,设置当前最新的配置,调用proxy Run主要执行体(第5章节讲解),runWatit运行的结果在塞到statusCh channel中
func (a *agent) reconcile() {
......
// discover and increment the latest running epoch
epoch := a.latestEpoch() + 1
// buffer aborts to prevent blocking on failing proxy
abortCh := make(chan error, maxAborts)
a.epochs[epoch] = a.desiredConfig
a.abortCh[epoch] = abortCh
a.currentConfig = a.desiredConfig
go a.runWait(a.desiredConfig, epoch, abortCh)
}
// runWait runs the start-up command as a go routine and waits for it to finish
func (a *agent) runWait(config interface{}, epoch int, abortCh <-chan error) {
log.Infof("Epoch %d starting", epoch)
err := a.proxy.Run(config, epoch, abortCh)
a.statusCh <- exitStatus{epoch: epoch, err: err}
}
type watcher struct {
role model.Proxy
config meshconfig.ProxyConfig
certs []CertSource
pilotSAN []string
updates chan<- interface{}
}
Run主要watcher执行体
func (w *watcher) Run(ctx context.Context) {
// kick start the proxy with partial state (in case there are no notifications coming)
w.SendConfig()
// monitor certificates
certDirs := make([]string, 0, len(w.certs))
for _, cert := range w.certs {
certDirs = append(certDirs, cert.Directory)
}
go watchCerts(ctx, certDirs, watchFileEvents, defaultMinDelay, w.SendConfig)
<-ctx.Done()
}
4.1.1 SendConfig函数
做的事情比较简单,就是将证书的目录,文件生成sha256哈希值,传给watcher的updates channel中,这个updates又需要谁来消费呢,看2.1.3NewWatcher函数调用,发现是agent实现的ConfigCh接口,然后分析第3章节agent的Run方法一目了然
func (w *watcher) SendConfig() {
h := sha256.New()
for _, cert := range w.certs {
generateCertHash(h, cert.Directory, cert.Files)
}
w.updates <- h.Sum(nil)
}
4.1.2 watchCerts函数
工作是watch证书的更新,阻塞方式,然后调用updateFunc回调函数处理,还是SendConfig函数
// watchCerts watches all certificate directories and calls the provided
// `updateFunc` method when changes are detected. This method is blocking
// so it should be run as a goroutine.
// updateFunc will not be called more than one time per minDelay.
func watchCerts(ctx context.Context, certsDirs []string, watchFileEventsFn watchFileEventsFn,
minDelay time.Duration, updateFunc func()) {
fw, err := fsnotify.NewWatcher()
if err != nil {
log.Warnf("failed to create a watcher for certificate files: %v", err)
return
}
defer func() {
if err := fw.Close(); err != nil {
log.Warnf("closing watcher encounters an error %v", err)
}
}()
// watch all directories
for _, d := range certsDirs {
if err := fw.Watch(d); err != nil {
log.Warnf("watching %s encounters an error %v", d, err)
return
}
}
watchFileEventsFn(ctx, fw.Event, minDelay, updateFunc)
}
路径 istio/pilot/pkg/proxy/envoy/proxy.go
type envoy struct {
config meshconfig.ProxyConfig
node string
extraArgs []string
pilotSAN []string
opts map[string]interface{}
errChan chan error
}
func (e *envoy) Run(config interface{}, epoch int, abort <-chan error) error {
var fname string
// Note: the cert checking still works, the generated file is updated if certs are changed.
// We just don't save the generated file, but use a custom one instead. Pilot will keep
// monitoring the certs and restart if the content of the certs changes.
if len(e.config.CustomConfigFile) > 0 {
// there is a custom configuration. Don't write our own config - but keep watching the certs.
fname = e.config.CustomConfigFile
} else {
out, err := bootstrap.WriteBootstrap(&e.config, e.node, epoch, e.pilotSAN, e.opts)
if err != nil {
log.Errora("Failed to generate bootstrap config", err)
os.Exit(1) // Prevent infinite loop attempting to write the file, let k8s/systemd report
return err
}
fname = out
}
5.1.1 启动一个新的envoy进程
直接使用exec包调用envoy启动命令
// spin up a new Envoy process
args := e.args(fname, epoch)
if len(e.config.CustomConfigFile) == 0 {
args = append(args, "--v2-config-only")
}
log.Infof("Envoy command: %v", args)
/* #nosec */
cmd := exec.Command(e.config.BinaryPath, args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
return err
}