k8s DNS
参考:
dns-pod-service
kubernetes-dns
dns name
同一个namespace下,dns name就是service name;不同namespace下,dns name就是svc.ns
。比如在ns ns1
下有svcsvc1
和svc2
,在nsns2
下有svcsvc3
。那么svc1
访问svc2
就可以通过dns svc2访问,而svc3
访问svc2
就需要通过dns svc2.ns1访问。具体原因下面分析。
Resource Records
定义
: k8s svc name
: k8s ns name
: kubedns domain
: time-to-live for record
svc
kubernetes.default.svc.cluster.local. 4 IN A 10.3.0.1
headless svc
DNS直接解析ep,客户端将轮询这些Pod IPs。
headless.default.svc.cluster.local. 4 IN A 10.3.0.1
headless.default.svc.cluster.local. 4 IN A 10.3.0.2
headless.default.svc.cluster.local. 4 IN A 10.3.0.3
named ports
_
多port场景
-
: port name
_https._tcp.headless.default.svc.cluster.local. 4 IN SRV 10 100 443 my-pet.headless.default.svc.cluster.local.
_https._tcp.headless.default.svc.cluster.local. 4 IN SRV 10 100 443 my-pet-2.headless.default.svc.cluster.local.
_https._tcp.headless.default.svc.cluster.local. 4 IN SRV 10 100 443 438934893.headless.default.svc.cluster.local.
DNS
kubelet
--cluster_dns=169.169.0.2 --cluster_domain=cluster.local
cluster-dns: kubedns svc的clusterIP。Comma-separated list of DNS server IP address. This value is used for containers DNS server in case of Pods with "dnsPolicy=ClusterFirst". Note: all DNS servers appearing in the list MUST serve the same set of records otherwise name resolution within the cluster may not work correctly. There is no guarantee as to which DNS server may be contacted for name resolution.
cluster-domain: kubedns的domain。Domain for this cluster. If set, kubelet will configure all containers to search this domain in addition to the host's search domains.
/etc/resolv.conf
nameserver 169.169.0.2
search default.svc.cluster.local svc.cluster.local cluster.local
options ndots:5
架构
kubedns
kubedns
watch k8s svc & ep,并在内存中维护 lookup 结构用于服务DNS请求。watches the Kubernetes master for changes in Services and Endpoints, and maintains in-memory lookup structures to serve DNS requests.
dnsmasq
dnsmasq
容器增加DNS缓存,从而提升性能。adds DNS caching to improve performance. 封装了dnsmasq软件。
sidecar
sidecar
is a daemon that exports metrics and performs healthcheck on DNS systems. provides a single health check endpoint to perform healthchecks for dnsmasq
and kubedns
. 封装了sidecar软件。
Running
sidecar
is configured through command line flags, defaults of which can be found by executing it with --help
. Important flags to configure:
| Flag | Description |
| ---- | ---- |
| --dnsmasq-{addr,port}
| endpoint of dnsmasq DNS service |
| --prometheus-{addr,port}
| endpoint used to export metrics |
| --probe
label,server,domain name,interval | probe DNS server with domain name every interval
seconds, reporting its health under healthcheck/
label. |
源码
k8s.io\dns\pkg\dns\dns.go
// 真正干活的类
func NewKubeDNS(client clientset.Interface, clusterDomain string, timeout time.Duration, configSync config.Sync) *KubeDNS {
kd := &KubeDNS{
kubeClient: client,
domain: clusterDomain,
cache: treecache.NewTreeCache(),
cacheLock: sync.RWMutex{},
nodesStore: kcache.NewStore(kcache.MetaNamespaceKeyFunc),
reverseRecordMap: make(map[string]*skymsg.Service),
clusterIPServiceMap: make(map[string]*v1.Service),
domainPath: util.ReverseArray(strings.Split(strings.TrimRight(clusterDomain, "."), ".")),
initialSyncTimeout: timeout,
configLock: sync.RWMutex{},
configSync: configSync,
}
// 重点关注,之前说过了,kubeDNS通过watch svc & ep来生成DNS记录
kd.setEndpointsStore()
kd.setServicesStore()
return kd
}
func (kd *KubeDNS) setServicesStore() {
// Returns a cache.ListWatch that gets all changes to services.
kd.servicesStore, kd.serviceController = kcache.NewInformer(
kcache.NewListWatchFromClient(
kd.kubeClient.Core().RESTClient(),
"services",
v1.NamespaceAll,
fields.Everything()),
&v1.Service{},
resyncPeriod,
kcache.ResourceEventHandlerFuncs{
AddFunc: kd.newService,
DeleteFunc: kd.removeService,
UpdateFunc: kd.updateService,
},
)
}
func (kd *KubeDNS) setEndpointsStore() {
// Returns a cache.ListWatch that gets all changes to endpoints.
kd.endpointsStore, kd.endpointsController = kcache.NewInformer(
kcache.NewListWatchFromClient(
kd.kubeClient.Core().RESTClient(),
"endpoints",
v1.NamespaceAll,
fields.Everything()),
&v1.Endpoints{},
resyncPeriod,
kcache.ResourceEventHandlerFuncs{
AddFunc: kd.handleEndpointAdd,
UpdateFunc: kd.handleEndpointUpdate,
// If Service is named headless need to remove the reverse dns entries.
DeleteFunc: kd.handleEndpointDelete,
},
)
}
创建ep和svc的ListWatch以及注册ev处理函数。
Process: func(obj interface{}) error {
// from oldest to newest
for _, d := range obj.(Deltas) {
switch d.Type {
case Sync, Added, Updated:
if old, exists, err := clientState.Get(d.Object); err == nil && exists {
if err := clientState.Update(d.Object); err != nil {
return err
}
h.OnUpdate(old, d.Object)
} else {
if err := clientState.Add(d.Object); err != nil {
return err
}
h.OnAdd(d.Object)
}
case Deleted:
if err := clientState.Delete(d.Object); err != nil {
return err
}
h.OnDelete(d.Object)
}
}
return nil
}
process将刚才注册的ev处理函数串起来。
通常都是先创建svc,如果svc有selector,则创建对应的ep,所以先看svc的处理逻辑。
func (kd *KubeDNS) newService(obj interface{}) {
if service, ok := assertIsService(obj); ok {
glog.V(3).Infof("New service: %v", service.Name)
glog.V(4).Infof("Service details: %v", service)
// ExternalName services are a special kind that return CNAME records
if service.Spec.Type == v1.ServiceTypeExternalName {
kd.newExternalNameService(service)
return
}
// if ClusterIP is not set, a DNS entry should not be created
if !v1.IsServiceIPSet(service) {
if err := kd.newHeadlessService(service); err != nil {
glog.Errorf("Could not create new headless service %v: %v", service.Name, err)
}
return
}
if len(service.Spec.Ports) == 0 {
glog.Warningf("Service with no ports, this should not have happened: %v",
service)
}
kd.newPortalService(service)
}
}
根据svc的类型,创建不同的DNS记录,主要看普通svc和headless svc。
func (kd *KubeDNS) newPortalService(service *v1.Service) {
subCache := treecache.NewTreeCache()
// 根据svc的clusterIP创建DNS记录
recordValue, recordLabel := util.GetSkyMsg(service.Spec.ClusterIP, 0)
subCache.SetEntry(recordLabel, recordValue, kd.fqdn(service, recordLabel))
...
}
// Generates skydns records for a headless service.
func (kd *KubeDNS) newHeadlessService(service *v1.Service) error {
// Create an A record for every pod in the service.
// This record must be periodically updated.
// Format is as follows:
// For a service x, with pods a and b create DNS records,
// a.x.ns.domain. and, b.x.ns.domain.
key, err := kcache.MetaNamespaceKeyFunc(service)
if err != nil {
return err
}
// headless svc的DNS记录需要对应的ep,如果找不到ep就直接返回,这里留个疑问。这个svc的DNS记录怎么办?
e, exists, err := kd.endpointsStore.GetByKey(key)
if err != nil {
return fmt.Errorf("failed to get endpoints object from endpoints store - %v", err)
}
if !exists {
glog.V(1).Infof("Could not find endpoints for service %q in namespace %q. DNS records will be created once endpoints show up.",
service.Name, service.Namespace)
return nil
}
if e, ok := e.(*v1.Endpoints); ok {
return kd.generateRecordsForHeadlessService(e, service)
}
return nil
}
func (kd *KubeDNS) generateRecordsForHeadlessService(e *v1.Endpoints, svc *v1.Service) error {
subCache := treecache.NewTreeCache()
glog.V(4).Infof("Endpoints Annotations: %v", e.Annotations)
generatedRecords := map[string]*skymsg.Service{}
// 根据ep下面的PodIP,创建多条DNS记录
for idx := range e.Subsets {
for subIdx := range e.Subsets[idx].Addresses {
address := &e.Subsets[idx].Addresses[subIdx]
endpointIP := address.IP
recordValue, endpointName := util.GetSkyMsg(endpointIP, 0)
...
}
}
...
}
对于ep的处理,只需要考虑Headless svc这一种情况,因为Headless svc的DNS记录需要由ep生成。这就解决了上面的疑问,如果Headless svc没有找到对应的ep,就直接返回,交于ep add处理该条DNS记录。这里其实可以优化,svc add函数可以直接不处理Headless svc,连判断都不需要。
func (kd *KubeDNS) handleEndpointAdd(obj interface{}) {
if e, ok := obj.(*v1.Endpoints); ok {
if err := kd.addDNSUsingEndpoints(e); err != nil {
glog.Errorf("Error in addDNSUsingEndpoints(%v): %v", e.Name, err)
}
}
}
func (kd *KubeDNS) addDNSUsingEndpoints(e *v1.Endpoints) error {
svc, err := kd.getServiceFromEndpoints(e)
if err != nil {
return err
}
if svc == nil || v1.IsServiceIPSet(svc) || svc.Spec.Type == v1.ServiceTypeExternalName {
// No headless service found corresponding to endpoints object.
return nil
}
return kd.generateRecordsForHeadlessService(e, svc)
}