目录
一:架构图
二:资源分配
三:部署过程
1)elasticsearch-7.17.5 安装
2)修改配置文件
3) 分别启动elasticsearch
4) 安装apache-skywalking-apm
5) 启动apache-skywalking-apm
6) 服务器节点配置agent
7)在服务器节点上启动应用服务
8)访问skywalking webui
四:配置文件解释
1)集群配置
2)core核心配置
3)storage数据存储配置
4)agent-analyzer :接收探针代理配置
5)log日志分析
6)远程接收
7)configuration:远程配置
8)其他
五:采样率
六:k8s配置skywalking
官方中文文档:
document-cn-translation-of-skywalking/README.md at master · SkyAPM/document-cn-translation-of-skywalking · GitHub
序号 | 主机 | 角色 | 版本 |
1 | 10.19.9.84 | elasticsearch1 | 7.17.5 |
2 | 10.19.9.85 | elasticsearch1 | 7.17.5 |
3 | 10.19.9.86 | elasticsearch1 | 7.17.5 |
4 | 10.19.9.99 | apache-skywalking-apm | 9.1.0 |
下载elasticsearch-7.17.5-x86_64.rpm包,三个节点分别执行
yum install elasticsearch-7.17.5-x86_64.rpm
vim /etc/elasticsearch/elasticsearch.yml
cluster.name: skywalking
node.name: node2
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
network.host: 10.19.9.85
http.port: 9200
discovery.seed_hosts: ["10.19.9.84", "10.19.9.85", "10.19.9.86"]
cluster.initial_master_nodes: ["10.19.9.84", "10.19.9.85", "10.19.9.86"]
systemctl start elasticsearch
下载apache-skywalking-apm-9.1.0.tar.gz 二进制包
解压修稿配置文件
vim apache-skywalking-apm-bin/config/application.yml
storage:
selector: ${SW_STORAGE:elasticsearch} #选择elasticsearch存储方式
elasticsearch:
namespace: ${SW_NAMESPACE:""}
clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:10.19.9.85:9200} #elasticsearch存储地址
protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:"http"}
connectTimeout: ${SW_STORAGE_ES_CONNECT_TIMEOUT:3000}
socketTimeout: ${SW_STORAGE_ES_SOCKET_TIMEOUT:30000}
responseTimeout: ${SW_STORAGE_ES_RESPONSE_TIMEOUT:15000}
numHttpClientThread: ${SW_STORAGE_ES_NUM_HTTP_CLIENT_THREAD:0}
user: ${SW_ES_USER:""}
password: ${SW_ES_PASSWORD:""}
trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:""}
trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:""}
secretsManagementFile: ${SW_ES_SECRETS_MANAGEMENT_FILE:""}
apache-skywalking-apm-bin/bin/startup.sh
下载skywalking-agent.jar
nohup java -Xms512m -Xmx512m -Dserver.max-http-header-size=65536 -javaagent:/home/dev/skywalking-agent/skywalking-agent.jar -DSW_AGENT_NAME=gofun-api2 -DSW_AGENT_COLLECTOR_BACKEND_SERVICES=10.19.9.99:11800 -jar SERVICE-NAME.jar >/dev/null 2>&1 &
#集群配置
cluster:
# 选择哪一种集群模式
selector: ${SW_CLUSTER:standalone}
# 单机版
standalone:
# zk 注册集群配置,zk 版本保证在3.5以上,oap-libs中也对3.4进行了支持
zookeeper:
nameSpace: ${SW_NAMESPACE:""}
hostPort: ${SW_CLUSTER_ZK_HOST_PORT:localhost:2181}
## Retry Policy 重试策略
# initial amount of time to wait between retries
# 初始化等待时间
baseSleepTimeMs: ${SW_CLUSTER_ZK_SLEEP_TIME:1000}
# max number of times to retry 最大重试次数
maxRetries: ${SW_CLUSTER_ZK_MAX_RETRIES:3}
# Enable ACL 开启访问控制
enableACL: ${SW_ZK_ENABLE_ACL:false} # disable ACL in default
schema: ${SW_ZK_SCHEMA:digest} # only support digest schema
expression: ${SW_ZK_EXPRESSION:skywalking:skywalking}
# k8s部署配置
kubernetes:
namespace: ${SW_CLUSTER_K8S_NAMESPACE:default}
labelSelector: ${SW_CLUSTER_K8S_LABEL:app=collector,release=skywalking}
uidEnvName: ${SW_CLUSTER_K8S_UID:SKYWALKING_COLLECTOR_UID}
# consul 注册集群配置
consul:
serviceName: ${SW_SERVICE_NAME:"SkyWalking_OAP_Cluster"}
# Consul cluster nodes, example: 10.0.0.1:8500,10.0.0.2:8500,10.0.0.3:8500
hostPort: ${SW_CLUSTER_CONSUL_HOST_PORT:localhost:8500}
aclToken: ${SW_CLUSTER_CONSUL_ACLTOKEN:""}
# etcd 注册集群配置
etcd:
serviceName: ${SW_SERVICE_NAME:"SkyWalking_OAP_Cluster"}
# etcd cluster nodes, example: 10.0.0.1:2379,10.0.0.2:2379,10.0.0.3:2379
hostPort: ${SW_CLUSTER_ETCD_HOST_PORT:localhost:2379}
# nacos 注册集群配置
nacos:
serviceName: ${SW_SERVICE_NAME:"SkyWalking_OAP_Cluster"}
hostPort: ${SW_CLUSTER_NACOS_HOST_PORT:localhost:8848}
# Nacos Configuration namespace
namespace: ${SW_CLUSTER_NACOS_NAMESPACE:"public"}
# Nacos auth username
username: ${SW_CLUSTER_NACOS_USERNAME:""}
password: ${SW_CLUSTER_NACOS_PASSWORD:""}
# Nacos auth accessKey
accessKey: ${SW_CLUSTER_NACOS_ACCESSKEY:""}
secretKey: ${SW_CLUSTER_NACOS_SECRETKEY:""}
core:
#配置选择
selector: ${SW_CORE:default}
default:
# Mixed: Receive agent data, Level 1 aggregate, Level 2 aggregate
# Receiver: Receive agent data, Level 1 aggregate
# Aggregator: Level 2 aggregate
# 本服务的角色,包含3总类型,当数据量较大时,需要分角色部署,增加处理能力。
# Mixed: 混合模式,接收数据,1级汇总数据,2级汇总数据
# Receiver: 接收角色,数据接收,1级汇总数据,持久化,告警
# Aggregator: 聚合角色,2级汇总数据,持久化,告警
role: ${SW_CORE_ROLE:Mixed} # Mixed/Receiver/Aggregator
#接收ip
restHost: ${SW_CORE_REST_HOST:0.0.0.0}
#接收端口
restPort: ${SW_CORE_REST_PORT:12800}
#服务路径
restContextPath: ${SW_CORE_REST_CONTEXT_PATH:/}
#最小线程
restMaxThreads: ${SW_CORE_REST_MAX_THREADS:200}
#线程空闲时间,超过关闭
restIdleTimeOut: ${SW_CORE_REST_IDLE_TIMEOUT:30000}
#接收队列大小
restAcceptQueueSize: ${SW_CORE_REST_QUEUE_SIZE:0}
#http最大请求标头大小
httpMaxRequestHeaderSize: ${SW_CORE_HTTP_MAX_REQUEST_HEADER_SIZE:8192}
#grpc服务接收ip
gRPCHost: ${SW_CORE_GRPC_HOST:0.0.0.0}
#grpc服务接收端口
gRPCPort: ${SW_CORE_GRPC_PORT:11800}
#最大并发连接数
maxConcurrentCallsPerConnection: ${SW_CORE_GRPC_MAX_CONCURRENT_CALL:0}
#最大消息体
maxMessageSize: ${SW_CORE_GRPC_MAX_MESSAGE_SIZE:0}
#gRpc线程队列大小
gRPCThreadPoolQueueSize: ${SW_CORE_GRPC_POOL_QUEUE_SIZE:-1}
#gRpc线程队列
gRPCThreadPoolSize: ${SW_CORE_GRPC_THREAD_POOL_SIZE:-1}
#gRpc安全协议是否开启
gRPCSslEnabled: ${SW_CORE_GRPC_SSL_ENABLED:false}
gRPCSslKeyPath: ${SW_CORE_GRPC_SSL_KEY_PATH:""}
gRPCSslCertChainPath: ${SW_CORE_GRPC_SSL_CERT_CHAIN_PATH:""}
gRPCSslTrustedCAPath: ${SW_CORE_GRPC_SSL_TRUSTED_CA_PATH:""}
#采样率
downsampling:
- Hour
- Day
#采样数据留存时间,超过自动删除
# Set a timeout on metrics data. After the timeout has expired, the metrics data will automatically be deleted.
#是否开启数据保持处理器,关闭则不删除采样数据
enableDataKeeperExecutor: ${SW_CORE_ENABLE_DATA_KEEPER_EXECUTOR:true} # Turn it off then automatically metrics data delete will be close.
#数据保持处理器执行频率,单位分钟,默认5分钟一次
dataKeeperExecutePeriod: ${SW_CORE_DATA_KEEPER_EXECUTE_PERIOD:5} # How often the data keeper executor runs periodically, unit is minute
#数据保留时间,单位天
recordDataTTL: ${SW_CORE_RECORD_DATA_TTL:3} # Unit is day
#分析数据保留时间,单位天
metricsDataTTL: ${SW_CORE_METRICS_DATA_TTL:7} # Unit is day
#L1聚合刷新到L2聚合的周期。 单位是毫秒。
# The period of L1 aggregation flush to L2 aggregation. Unit is ms.
l1FlushPeriod: ${SW_CORE_L1_AGGREGATION_FLUSH_PERIOD:500
#会话时间的阈值,单位是毫秒,默认值为 70 秒。
# The threshold of session time. Unit is ms. Default value is 70s.
storageSessionTimeout: ${SW_CORE_STORAGE_SESSION_TIMEOUT:70000}
#做数据持久化的时期,单位为秒,默认值为25s
# The period of doing data persistence. Unit is second.Default value is 25s
persistentPeriod: ${SW_CORE_PERSISTENT_PERIOD:25}
#如果OAP集群在一分钟内发生变化,缓存metrics数据以减少数据库查询
# Cache metrics data for 1 minute to reduce database queries, and if the OAP cluster changes within that minute,
#如果OAP集群在那一分钟内发生变化,那么在那一分钟内,这些度量可能不准确
# the metrics may not be accurate within that minute.
enableDatabaseSession: ${SW_CORE_ENABLE_DATABASE_SESSION:true}
#每个报告周期的前N条记录, unit is minute
topNReportPeriod: ${SW_CORE_TOPN_REPORT_PERIOD:10} # top_n record worker report cycle, unit is minute
#额外的模型字段,用于在可视化工具中查看es数据,开启会增加一定的性能损耗。
activeExtraModelColumns: ${SW_CORE_ACTIVE_EXTRA_MODEL_COLUMNS:false}
#服务名最大长度,服务名+实例名的最大长度必须小于200
# The max length of service + instance names should be less than 200
serviceNameMaxLength: ${SW_SERVICE_NAME_MAX_LENGTH:70}
#实例名最大长度,服务名+实例名的最大长度必须小于200
instanceNameMaxLength: ${SW_INSTANCE_NAME_MAX_LENGTH:70}
#端点名最大长度,服务名+端点名(api)的最大长度必须小于240
# The max length of service + endpoint names should be less than 240
endpointNameMaxLength: ${SW_ENDPOINT_NAME_MAX_LENGTH:150}
#定义一组span标记键,这些键可以通过GraphQL进行搜索
# Define the set of span tag keys, which should be searchable through the GraphQL.
searchableTracesTags: ${SW_SEARCHABLE_TAG_KEYS:http.method,http.status_code,rpc.status_code,db.type,db.instance,mq.queue,mq.topic,mq.broker}
# Define the set of log tag keys, which should be searchable through the GraphQL.
searchableLogsTags: ${SW_SEARCHABLE_LOGS_TAG_KEYS:level}
# Define the set of alarm tag keys, which should be searchable through the GraphQL.
searchableAlarmTags: ${SW_SEARCHABLE_ALARM_TAG_KEYS:level}
# The max size of tags keys for autocomplete select.
autocompleteTagKeysQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_KEYS_QUERY_MAX_SIZE:100}
# The max size of tags values for autocomplete select.
autocompleteTagValuesQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_VALUES_QUERY_MAX_SIZE:100}
# The number of threads used to prepare metrics data to the storage.
prepareThreads: ${SW_CORE_PREPARE_THREADS:2}
# Turn it on then automatically grouping endpoint by the given OpenAPI definitions.
enableEndpointNameGroupingByOpenapi: ${SW_CORE_ENABLE_ENDPOINT_NAME_GROUPING_BY_OPAENAPI:true}
storage:
#选择数据存储类型
selector: ${SW_STORAGE:elasticsearch}
elasticsearch:
namespace: ${SW_NAMESPACE:""}
clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:10.19.9.85:9200}
protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:"http"}
connectTimeout: ${SW_STORAGE_ES_CONNECT_TIMEOUT:3000}
socketTimeout: ${SW_STORAGE_ES_SOCKET_TIMEOUT:30000}
responseTimeout: ${SW_STORAGE_ES_RESPONSE_TIMEOUT:15000}
numHttpClientThread: ${SW_STORAGE_ES_NUM_HTTP_CLIENT_THREAD:0}
user: ${SW_ES_USER:""}
password: ${SW_ES_PASSWORD:""}
trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:""}
trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:""}
#安全管理文件,内容包括用户名/密码,由第三方工具管理
secretsManagementFile: ${SW_ES_SECRETS_MANAGEMENT_FILE:""}
#索引步长,默认1,每天一个索引
dayStep: ${SW_STORAGE_DAY_STEP:1}
#索引分片数
indexShardsNumber: ${SW_STORAGE_ES_INDEX_SHARDS_NUMBER:1}
#索引副本数
indexReplicasNumber: ${SW_STORAGE_ES_INDEX_REPLICAS_NUMBER:1}
#大数据集配置,当查询包含超数据集,下面三种配置可以提高新能
#大数据存储索引保留天数,小于0则与dayStep一致
superDatasetDayStep: ${SW_SUPERDATASET_STORAGE_DAY_STEP:-1}
#大数据存储分片因子,分片数为indexShardsNumber*superDatasetIndexShardsFactor,此因素也会影响Zipkin和Jaeger的踪迹
superDatasetIndexShardsFactor: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_SHARDS_FACTOR:5}
#大数据索引副本数
superDatasetIndexReplicasNumber: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_REPLICAS_NUMBER:0}
#索引模板的顺序
indexTemplateOrder: ${SW_STORAGE_ES_INDEX_TEMPLATE_ORDER:0}
#异步批量写库,默认5000条
bulkActions: ${SW_STORAGE_ES_BULK_ACTIONS:5000}
#无论请求数量如何,每 15 秒刷新一次批量
flushInterval: ${SW_STORAGE_ES_FLUSH_INTERVAL:15}
#并发请求数
concurrentRequests: ${SW_STORAGE_ES_CONCURRENT_REQUESTS:2}
#结果最大数据量
resultWindowMaxSize: ${SW_STORAGE_ES_QUERY_MAX_WINDOW_SIZE:10000}
#源数据查询最大数据量
metadataQueryMaxSize: ${SW_STORAGE_ES_QUERY_MAX_SIZE:10000}\
#滚动数据量大小
scrollingBatchSize: ${SW_STORAGE_ES_SCROLLING_BATCH_SIZE:5000}
#分段查询最大数据量
segmentQueryMaxSize: ${SW_STORAGE_ES_QUERY_SEGMENT_SIZE:200}
#任务查询最大条数
profileTaskQueryMaxSize: ${SW_STORAGE_ES_QUERY_PROFILE_TASK_SIZE:200}
#profile 数据查询批量大小
profileDataQueryBatchSize: ${SW_STORAGE_ES_QUERY_PROFILE_DATA_BATCH_SIZE:100}
#OAP分析器
oapAnalyzer: ${SW_STORAGE_ES_OAP_ANALYZER:"{\"analyzer\":{\"oap_analyzer\":{\"type\":\"stop\"}}}"} # the oap analyzer.
#OPA日志分析器
oapLogAnalyzer: ${SW_STORAGE_ES_OAP_LOG_ANALYZER:"{\"analyzer\":{\"oap_log_analyzer\":{\"type\":\"standard\"}}}"} # the oap log analyzer. It could be customized by the ES analyzer configuration to support more language log formats, such as Chinese log, Japanese log and etc.
advanced: ${SW_STORAGE_ES_ADVANCED:""}
h2:
driver: ${SW_STORAGE_H2_DRIVER:org.h2.jdbcx.JdbcDataSource}
url: ${SW_STORAGE_H2_URL:jdbc:h2:mem:skywalking-oap-db;DB_CLOSE_DELAY=-1}
user: ${SW_STORAGE_H2_USER:sa}
metadataQueryMaxSize: ${SW_STORAGE_H2_QUERY_MAX_SIZE:5000}
maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:100}
asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:1}
mysql:
properties:
jdbcUrl: ${SW_JDBC_URL:"jdbc:mysql://localhost:3306/swtest?rewriteBatchedStatements=true"}
dataSource.user: ${SW_DATA_SOURCE_USER:root}
dataSource.password: ${SW_DATA_SOURCE_PASSWORD:root@1234}
dataSource.cachePrepStmts: ${SW_DATA_SOURCE_CACHE_PREP_STMTS:true}
dataSource.prepStmtCacheSize: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_SIZE:250}
dataSource.prepStmtCacheSqlLimit: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_LIMIT:2048}
dataSource.useServerPrepStmts: ${SW_DATA_SOURCE_USE_SERVER_PREP_STMTS:true}
metadataQueryMaxSize: ${SW_STORAGE_MYSQL_QUERY_MAX_SIZE:5000}
maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:2000}
asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:4}
tidb:
properties:
jdbcUrl: ${SW_JDBC_URL:"jdbc:mysql://localhost:4000/tidbswtest?rewriteBatchedStatements=true"}
dataSource.user: ${SW_DATA_SOURCE_USER:root}
dataSource.password: ${SW_DATA_SOURCE_PASSWORD:""}
dataSource.cachePrepStmts: ${SW_DATA_SOURCE_CACHE_PREP_STMTS:true}
dataSource.prepStmtCacheSize: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_SIZE:250}
dataSource.prepStmtCacheSqlLimit: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_LIMIT:2048}
dataSource.useServerPrepStmts: ${SW_DATA_SOURCE_USE_SERVER_PREP_STMTS:true}
dataSource.useAffectedRows: ${SW_DATA_SOURCE_USE_AFFECTED_ROWS:true}
metadataQueryMaxSize: ${SW_STORAGE_MYSQL_QUERY_MAX_SIZE:5000}
maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:2000}
asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:4}
postgresql:
properties:
jdbcUrl: ${SW_JDBC_URL:"jdbc:postgresql://localhost:5432/skywalking"}
dataSource.user: ${SW_DATA_SOURCE_USER:postgres}
dataSource.password: ${SW_DATA_SOURCE_PASSWORD:123456}
dataSource.cachePrepStmts: ${SW_DATA_SOURCE_CACHE_PREP_STMTS:true}
dataSource.prepStmtCacheSize: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_SIZE:250}
dataSource.prepStmtCacheSqlLimit: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_LIMIT:2048}
dataSource.useServerPrepStmts: ${SW_DATA_SOURCE_USE_SERVER_PREP_STMTS:true}
metadataQueryMaxSize: ${SW_STORAGE_MYSQL_QUERY_MAX_SIZE:5000}
maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:2000}
asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:4}
banyandb:
host: ${SW_STORAGE_BANYANDB_HOST:127.0.0.1}
port: ${SW_STORAGE_BANYANDB_PORT:17912}
maxBulkSize: ${SW_STORAGE_BANYANDB_MAX_BULK_SIZE:5000}
flushInterval: ${SW_STORAGE_BANYANDB_FLUSH_INTERVAL:15}
metricsShardsNumber: ${SW_STORAGE_BANYANDB_METRICS_SHARDS_NUMBER:1}
recordShardsNumber: ${SW_STORAGE_BANYANDB_RECORD_SHARDS_NUMBER:1}
superDatasetShardsFactor: ${SW_STORAGE_BANYANDB_SUPERDATASET_SHARDS_FACTOR:2}
concurrentWriteThreads: ${SW_STORAGE_BANYANDB_CONCURRENT_WRITE_THREADS:15}
profileTaskQueryMaxSize: ${SW_STORAGE_BANYANDB_PROFILE_TASK_QUERY_MAX_SIZE:200} # the max number of fetch task in a request
agent-analyzer:
selector: ${SW_AGENT_ANALYZER:default}
default:
#采样率
traceSamplingPolicySettingsFile: ${SW_TRACE_SAMPLING_POLICY_SETTINGS_FILE:trace-sampling-policy-settings.yml}
#慢数据访问阀值,单位ms
slowDBAccessThreshold: ${SW_SLOW_DB_THRESHOLD:default:200,mongodb:100}
#开启采样率时,开启对错误分段数据全部保持,防止没有处理错误数据。
forceSampleErrorSegment: ${SW_FORCE_SAMPLE_ERROR_SEGMENT:true}
# 分段状态分析策略
# FROM_SPAN_STATUS:任何一个span状态决定分段状态,只要一个span为Error则为Error,默认
# FROM_ENTRY_SPAN:入口span状态决定分段状态
# FROM_FIRST_SPAN:第一个span状态决定分段状态
segmentStatusAnalysisStrategy: ${SW_SEGMENT_STATUS_ANALYSIS_STRATEGY:FROM_SPAN_STATUS}
#Nginx和外部代理无法获取到原始地址的,且端口不在范围内的,不会产生客户端实例关系
noUpstreamRealAddressAgents: ${SW_NO_UPSTREAM_REAL_ADDRESS:6000,9000}
# 可以被分析的文件,用“,”逗号分隔
meterAnalyzerActiveFiles:
${SW_METER_ANALYZER_ACTIVE_FILES:datasource,threadpool,satellite}
log-analyzer:
selector: ${SW_LOG_ANALYZER:default}
default:
lalFiles: ${SW_LOG_LAL_FILES:default}
malFiles: ${SW_LOG_MAL_FILES:""}
#共享服务器,为所有接收模块提供独立的jetty和grpc服务,开启减少内部ip/端口和线程池的共享占用
receiver-sharing-server:
selector: ${SW_RECEIVER_SHARING_SERVER:default}
default:
# For HTTP server
restHost: ${SW_RECEIVER_SHARING_REST_HOST:0.0.0.0}
restPort: ${SW_RECEIVER_SHARING_REST_PORT:0}
restContextPath: ${SW_RECEIVER_SHARING_REST_CONTEXT_PATH:/}
restMaxThreads: ${SW_RECEIVER_SHARING_REST_MAX_THREADS:200}
restIdleTimeOut: ${SW_RECEIVER_SHARING_REST_IDLE_TIMEOUT:30000}
restAcceptQueueSize: ${SW_RECEIVER_SHARING_REST_QUEUE_SIZE:0}
httpMaxRequestHeaderSize: ${SW_RECEIVER_SHARING_HTTP_MAX_REQUEST_HEADER_SIZE:8192}
# For gRPC server
gRPCHost: ${SW_RECEIVER_GRPC_HOST:0.0.0.0}
gRPCPort: ${SW_RECEIVER_GRPC_PORT:0}
maxConcurrentCallsPerConnection: ${SW_RECEIVER_GRPC_MAX_CONCURRENT_CALL:0}
maxMessageSize: ${SW_RECEIVER_GRPC_MAX_MESSAGE_SIZE:0}
gRPCThreadPoolQueueSize: ${SW_RECEIVER_GRPC_POOL_QUEUE_SIZE:0}
gRPCThreadPoolSize: ${SW_RECEIVER_GRPC_THREAD_POOL_SIZE:0}
gRPCSslEnabled: ${SW_RECEIVER_GRPC_SSL_ENABLED:false}
gRPCSslKeyPath: ${SW_RECEIVER_GRPC_SSL_KEY_PATH:""}
gRPCSslCertChainPath: ${SW_RECEIVER_GRPC_SSL_CERT_CHAIN_PATH:""}
gRPCSslTrustedCAsPath: ${SW_RECEIVER_GRPC_SSL_TRUSTED_CAS_PATH:""}
authentication: ${SW_AUTHENTICATION:""}
#数据接收器,从其他监测系统获取上传的数据。
receiver-register:
selector: ${SW_RECEIVER_REGISTER:default}
default:
receiver-trace:
selector: ${SW_RECEIVER_TRACE:default}
default:
receiver-jvm:
selector: ${SW_RECEIVER_JVM:default}
default:
receiver-clr:
selector: ${SW_RECEIVER_CLR:default}
default:
receiver-profile:
selector: ${SW_RECEIVER_PROFILE:default}
default:
receiver-zabbix:
selector: ${SW_RECEIVER_ZABBIX:-}
default:
port: ${SW_RECEIVER_ZABBIX_PORT:10051}
host: ${SW_RECEIVER_ZABBIX_HOST:0.0.0.0}
activeFiles: ${SW_RECEIVER_ZABBIX_ACTIVE_FILES:agent}
configuration:
selector: ${SW_CONFIGURATION:none}
#不是用远程配置
none:
grpc:
host: ${SW_DCS_SERVER_HOST:""}
port: ${SW_DCS_SERVER_PORT:80}
clusterName: ${SW_DCS_CLUSTER_NAME:SkyWalking}
period: ${SW_DCS_PERIOD:20}
apollo:
apolloMeta: ${SW_CONFIG_APOLLO:http://localhost:8080}
apolloCluster: ${SW_CONFIG_APOLLO_CLUSTER:default}
apolloEnv: ${SW_CONFIG_APOLLO_ENV:""}
appId: ${SW_CONFIG_APOLLO_APP_ID:skywalking}
period: ${SW_CONFIG_APOLLO_PERIOD:60}
zookeeper:
period: ${SW_CONFIG_ZK_PERIOD:60} # Unit seconds, sync period. Default fetch every 60 seconds.
namespace: ${SW_CONFIG_ZK_NAMESPACE:/default}
hostPort: ${SW_CONFIG_ZK_HOST_PORT:localhost:2181}
# Retry Policy
baseSleepTimeMs: ${SW_CONFIG_ZK_BASE_SLEEP_TIME_MS:1000} # initial amount of time to wait between retries
maxRetries: ${SW_CONFIG_ZK_MAX_RETRIES:3} # max number of times to retry
etcd:
period: ${SW_CONFIG_ETCD_PERIOD:60} # Unit seconds, sync period. Default fetch every 60 seconds.
endpoints: ${SW_CONFIG_ETCD_ENDPOINTS:http://localhost:2379}
namespace: ${SW_CONFIG_ETCD_NAMESPACE:/skywalking}
authentication: ${SW_CONFIG_ETCD_AUTHENTICATION:false}
user: ${SW_CONFIG_ETCD_USER:}
password: ${SW_CONFIG_ETCD_password:}
consul:
# Consul host and ports, separated by comma, e.g. 1.2.3.4:8500,2.3.4.5:8500
hostAndPorts: ${SW_CONFIG_CONSUL_HOST_AND_PORTS:1.2.3.4:8500}
# Sync period in seconds. Defaults to 60 seconds.
period: ${SW_CONFIG_CONSUL_PERIOD:60}
# Consul aclToken
aclToken: ${SW_CONFIG_CONSUL_ACL_TOKEN:""}
k8s-configmap:
period: ${SW_CONFIG_CONFIGMAP_PERIOD:60}
namespace: ${SW_CLUSTER_K8S_NAMESPACE:default}
labelSelector: ${SW_CLUSTER_K8S_LABEL:app=collector,release=skywalking}
nacos:
# Nacos Server Host
serverAddr: ${SW_CONFIG_NACOS_SERVER_ADDR:127.0.0.1}
# Nacos Server Port
port: ${SW_CONFIG_NACOS_SERVER_PORT:8848}
# Nacos Configuration Group
group: ${SW_CONFIG_NACOS_SERVER_GROUP:skywalking}
# Nacos Configuration namespace
namespace: ${SW_CONFIG_NACOS_SERVER_NAMESPACE:}
# Unit seconds, sync period. Default fetch every 60 seconds.
period: ${SW_CONFIG_NACOS_PERIOD:60}
# Nacos auth username
username: ${SW_CONFIG_NACOS_USERNAME:""}
password: ${SW_CONFIG_NACOS_PASSWORD:""}
# Nacos auth accessKey
accessKey: ${SW_CONFIG_NACOS_ACCESSKEY:""}
secretKey: ${SW_CONFIG_NACOS_SECRETKEY:""}
exporter:
selector: ${SW_EXPORTER:-}
grpc:
targetHost: ${SW_EXPORTER_GRPC_HOST:127.0.0.1}
targetPort: ${SW_EXPORTER_GRPC_PORT:9870}
health-checker:
selector: ${SW_HEALTH_CHECKER:-}
default:
checkIntervalSeconds: ${SW_HEALTH_CHECKER_INTERVAL_SECONDS:5}
configuration-discovery:
selector: ${SW_CONFIGURATION_DISCOVERY:default}
default:
disableMessageDigest: ${SW_DISABLE_MESSAGE_DIGEST:false}
receiver-event:
selector: ${SW_RECEIVER_EVENT:default}
default:
receiver-ebpf:
selector: ${SW_RECEIVER_EBPF:default}
default:
service-mesh:
selector: ${SW_SERVICE_MESH:default}
default:
envoy-metric:
selector: ${SW_ENVOY_METRIC:default}
default:
acceptMetricsService: ${SW_ENVOY_METRIC_SERVICE:true}
alsHTTPAnalysis: ${SW_ENVOY_METRIC_ALS_HTTP_ANALYSIS:""}
alsTCPAnalysis: ${SW_ENVOY_METRIC_ALS_TCP_ANALYSIS:""}
# k8sServiceNameRule 允许您通过 Kubernetes 元数据自定义 ALS 中的服务名称,
# 可用的变量是 `pod`, `service`,`${service.metadata.name},、${pod.metadata.labels.version}`将版本号附加到服务名称。
# 请注意,在使用环境变量传递此配置时,请使用单引号(`''`)以避免它被shell。
k8sServiceNameRule: ${K8S_SERVICE_NAME_RULE:"${pod.metadata.labels.(service.istio.io/canonical-name)}"}
#数据抓取
prometheus-fetcher:
selector: ${SW_PROMETHEUS_FETCHER:default}
default:
enabledRules: ${SW_PROMETHEUS_FETCHER_ENABLED_RULES:"self"}
maxConvertWorker: ${SW_PROMETHEUS_FETCHER_NUM_CONVERT_WORKER:-1}
#使用kafka获取agent数据,需要agent配置kafka相同地址,可以与grpc同时使用
kafka-fetcher:
selector: ${SW_KAFKA_FETCHER:-}
default:
bootstrapServers: ${SW_KAFKA_FETCHER_SERVERS:localhost:9092}
namespace: ${SW_NAMESPACE:""}
partitions: ${SW_KAFKA_FETCHER_PARTITIONS:3}
replicationFactor: ${SW_KAFKA_FETCHER_PARTITIONS_FACTOR:2}
enableNativeProtoLog: ${SW_KAFKA_FETCHER_ENABLE_NATIVE_PROTO_LOG:true}
enableNativeJsonLog: ${SW_KAFKA_FETCHER_ENABLE_NATIVE_JSON_LOG:true}
consumers: ${SW_KAFKA_FETCHER_CONSUMERS:1}
kafkaHandlerThreadPoolSize: ${SW_KAFKA_HANDLER_THREAD_POOL_SIZE:-1}
kafkaHandlerThreadPoolQueueSize: ${SW_KAFKA_HANDLER_THREAD_POOL_QUEUE_SIZE:-1}
receiver-meter:
selector: ${SW_RECEIVER_METER:default}
default:
receiver-otel:
selector: ${SW_OTEL_RECEIVER:default}
default:
enabledHandlers: ${SW_OTEL_RECEIVER_ENABLED_HANDLERS:"oc"}
enabledOcRules: ${SW_OTEL_RECEIVER_ENABLED_OC_RULES:"istio-controlplane,k8s-node,oap,vm"}
receiver-zipkin:
selector: ${SW_RECEIVER_ZIPKIN:-}
default:
# For HTTP server
restHost: ${SW_RECEIVER_ZIPKIN_REST_HOST:0.0.0.0}
restPort: ${SW_RECEIVER_ZIPKIN_REST_PORT:9411}
restContextPath: ${SW_RECEIVER_ZIPKIN_REST_CONTEXT_PATH:/}
restMaxThreads: ${SW_RECEIVER_ZIPKIN_REST_MAX_THREADS:200}
restIdleTimeOut: ${SW_RECEIVER_ZIPKIN_REST_IDLE_TIMEOUT:30000}
restAcceptQueueSize: ${SW_RECEIVER_ZIPKIN_REST_QUEUE_SIZE:0}
searchableTracesTags: ${SW_ZIPKIN_SEARCHABLE_TAG_KEYS:http.method}
# The sample rate precision is 1/10000, should be between 0 and 10000
sampleRate: ${SW_ZIPKIN_SAMPLE_RATE:10000}
receiver-browser:
selector: ${SW_RECEIVER_BROWSER:default}
default:
# The sample rate precision is 1/10000. 10000 means 100% sample in default.
sampleRate: ${SW_RECEIVER_BROWSER_SAMPLE_RATE:10000}
receiver-log:
selector: ${SW_RECEIVER_LOG:default}
default:
query:
selector: ${SW_QUERY:graphql}
graphql:
# Enable the log testing API to test the LAL.
# NOTE: This API evaluates untrusted code on the OAP server.
# A malicious script can do significant damage (steal keys and secrets, remove files and directories, install malware, etc).
# As such, please enable this API only when you completely trust your users.
enableLogTestTool: ${SW_QUERY_GRAPHQL_ENABLE_LOG_TEST_TOOL:false}
# Maximum complexity allowed for the GraphQL query that can be used to
# abort a query if the total number of data fields queried exceeds the defined threshold.
maxQueryComplexity: ${SW_QUERY_MAX_QUERY_COMPLEXITY:1000}
# Allow user add, disable and update UI template
enableUpdateUITemplate: ${SW_ENABLE_UPDATE_UI_TEMPLATE:false}
# "On demand log" allows users to fetch Pod containers' log in real time,
# because this might expose secrets in the logs (if any), users need
# to enable this manually, and add permissions to OAP cluster role.
enableOnDemandPodLog: ${SW_ENABLE_ON_DEMAND_POD_LOG:false}
# This module is for Zipkin query API and support zipkin-lens UI
query-zipkin:
selector: ${SW_QUERY_ZIPKIN:-}
default:
# For HTTP server
restHost: ${SW_QUERY_ZIPKIN_REST_HOST:0.0.0.0}
restPort: ${SW_QUERY_ZIPKIN_REST_PORT:9412}
restContextPath: ${SW_QUERY_ZIPKIN_REST_CONTEXT_PATH:/zipkin}
restMaxThreads: ${SW_QUERY_ZIPKIN_REST_MAX_THREADS:200}
restIdleTimeOut: ${SW_QUERY_ZIPKIN_REST_IDLE_TIMEOUT:30000}
restAcceptQueueSize: ${SW_QUERY_ZIPKIN_REST_QUEUE_SIZE:0}
# Default look back for serviceNames, remoteServiceNames and spanNames, 1 day in millis
lookback: ${SW_QUERY_ZIPKIN_LOOKBACK:86400000}
# The Cache-Control max-age (seconds) for serviceNames, remoteServiceNames and spanNames
namesMaxAge: ${SW_QUERY_ZIPKIN_NAMES_MAX_AGE:300}
## The below config are OAP support for zipkin-lens UI
# Default traces query max size
uiQueryLimit: ${SW_QUERY_ZIPKIN_UI_QUERY_LIMIT:10}
# Default look back for search traces, 15 minutes in millis
uiDefaultLookback: ${SW_QUERY_ZIPKIN_UI_DEFAULT_LOOKBACK:900000}
alarm:
selector: ${SW_ALARM:default}
default:
telemetry:
selector: ${SW_TELEMETRY:prometheus}
none:
prometheus:
host: ${SW_TELEMETRY_PROMETHEUS_HOST:127.0.0.1}
port: ${SW_TELEMETRY_PROMETHEUS_PORT:1543}
sslEnabled: ${SW_TELEMETRY_PROMETHEUS_SSL_ENABLED:false}
sslKeyPath: ${SW_TELEMETRY_PROMETHEUS_SSL_KEY_PATH:""}
sslCertChainPath: ${SW_TELEMETRY_PROMETHEUS_SSL_CERT_CHAIN_PATH:""}
默认:
# 替代“agent-analyzer.default.sampleRate”的默认采样率
# 采样率精度为1/10000。 10000 表示默认为 100% 样本。
# 替换“agent-analyzer.default.slowTraceSegmentThreshold”的默认跟踪延迟时间
# 设置这个延迟阈值将使慢跟踪段在花费更多时间时被采样,即使采样机制被激活。 默认值为“-1”,这意味着不会对慢速跟踪进行采样。 单位,毫秒。
# rate: 1000 # 此特定服务的采样率
# duration: 10000 # 此特定服务的跟踪采样的跟踪延迟阈值
default:
rate: 10000
duration: -1
10000代表采样率为100% ,1/10000代表采样率为1%
apiVersion: v1
kind: Pod
metadata:
name: agent-as-sidecar
spec:
restartPolicy: Never
volumes:
- name: skywalking-agent
emptyDir: { }
initContainers:
- name: agent-container
image: apache/skywalking-java-agent:8.7.0-alpine
volumeMounts:
- name: skywalking-agent
mountPath: /agent
command: [ "/bin/sh" ]
args: [ "-c", "cp -R /skywalking/agent /agent/" ]
containers:
- name: app-container
image: springio/gs-spring-boot-docker
volumeMounts:
- name: skywalking-agent
mountPath: /skywalking
env:
- name: JAVA_TOOL_OPTIONS
value: "-javaagent:/skywalking/agent/skywalking-agent.jar -DSW_AGENT_NAME=spring-test -DSW_AGENT_COLLECTOR_BACKEND_SERVICES=10.19.9.99:11800"
~