机器分配
192.168.77.136 docker-compose
192.168.77.137 log-test cron
centos
yum -y install docker-ce docker-compose
debian/ubuntu
apt -y install docker-ce docker-compose
编写docker-compose.yaml
mkdir /opt/elk
cd /opt/elk
vim logstash.yml
http.host: "0.0.0.0"
xpack.monitoring.elasticsearch.hosts: [ "http://192.168.77.136:9200" ]
vim docker-compose.yaml
version: '3'
services:
elasticsearch1:
image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
container_name: elasticsearch1
environment:
- node.name=elasticsearch1
- cluster.name=es-docker-cluster
- discovery.seed_hosts=elasticsearch2,elasticsearch3
- cluster.initial_master_nodes=elasticsearch1,elasticsearch2
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- data01:/usr/share/elasticsearch/data
ports:
- 9200:9200
elasticsearch2:
image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
container_name: elasticsearch2
environment:
- node.name=elasticsearch2
- cluster.name=es-docker-cluster
- discovery.seed_hosts=elasticsearch1,elasticsearch3
- cluster.initial_master_nodes=elasticsearch1,elasticsearch2
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- data02:/usr/share/elasticsearch/data
elasticsearch3:
image: docker.elastic.co/elasticsearch/elasticsearch:7.14.0
container_name: elasticsearch3
environment:
- node.name=elasticsearch3
- cluster.name=es-docker-cluster
- discovery.seed_hosts=elasticsearch1,elasticsearch2
- cluster.initial_master_nodes=elasticsearch1,elasticsearch2
- node.master=false
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- data03:/usr/share/elasticsearch/data
logstash:
image: docker.elastic.co/logstash/logstash:7.14.0
volumes:
- /var/log:/host/var/log
- /opt/elk/logstash.yml:/usr/share/logstash/config/logstash.yml
- /opt/elk/pipeline:/usr/share/logstash/pipeline
ports:
- 5000:5000
kibana:
image: docker.elastic.co/kibana/kibana:7.14.0
ports:
- 5601:5601
environment:
- ELASTICSEARCH_HOSTS=http://elasticsearch1:9200
volumes:
data01:
driver: local
data02:
driver: local
data03:
driver: local
##被收集日志赋权
chmod +x /var/log
chmod -R o+r /var/log
## 创建配置文件
mkdir /opt/elk/pipeline
cd /opt/elk/pipeline
[root@localhost pipeline]# cat logstash-cron.conf
input {
file {
path => "/host/var/log/cron*" # 调整为实际的 cron 日志路径
start_position => "beginning"
sincedb_path => "/dev/null"
type => "cron" # 这将在日志事件中添加一个字段,用于表示日志的类型
}
}
filter {
if [type] == "cron" {
grok {
match => { "message" => "%{GREEDYDATA:cron_message}" }
add_field => { "source_ip" => "192.168.77.136" } # 将这个 IP 地址更改为实际的源服务器 IP 地址
}
}
}
output {
if [type] == "cron" {
elasticsearch {
hosts => ["192.168.77.136:9200"]
index => "cron_%{source_ip}-%{+YYYY.MM.dd}"
}
}
stdout { codec => rubydebug }
}
[root@localhost pipeline]# cat logstash-nginx.conf
input {
file {
path => "/host/var/log/nginx/*" # 调整为实际的 cron 日志路径
start_position => "beginning"
sincedb_path => "/dev/null"
type => "nginx" # 这将在日志事件中添加一个字段,用于表示日志的类型
}
}
filter {
if [type] == "nginx" {
grok {
match => { "message" => "%{GREEDYDATA:cron_message}" }
add_field => { "source_ip" => "192.168.77.136" } # 将这个 IP 地址更改为实际的源服务器 IP 地址
}
}
}
output {
if [type] == "nginx" {
elasticsearch {
hosts => ["192.168.77.136:9200"]
index => "nginx_%{source_ip}-%{+YYYY.MM.dd}"
}
}
stdout { codec => rubydebug }
}
注意权限问题:对日志文件夹有执行权限,对日志文件有读取权限,关闭selinx
cd /opt/elk
docker-compose up -d > elk.log
docker-compose down
##被收集日志赋权
chmod +x /var/log
chmod -R o+r /var/log
##从主集群机器上拷贝配置文件
scp /opt/elk 192.168.77.137:/opt
##启动
docker run --name elk_logstash_1 -d -p 5000:5000 -v /var/log:/host/var/log -v /opt/elk/pipeline:/usr/share/logstash/pipeline -v /opt/elk/logstash.yml:/usr/share/logstash/config/logstash.yml docker.elastic.co/logstash/logstash:7.14.0
如果遇到以下问题
[root@localhost elk]# curl http://192.168.77.136:9200/_cluster/health?pretty
{
"cluster_name" : "es-docker-cluster",
"status" : "red",
"timed_out" : false,
"number_of_nodes" : 3,
"number_of_data_nodes" : 3,
"active_primary_shards" : 8,
"active_shards" : 8,
"relocating_shards" : 0,
"initializing_shards" : 0,
"unassigned_shards" : 18,
"delayed_unassigned_shards" : 0,
"number_of_pending_tasks" : 2,
"number_of_in_flight_fetch" : 3,
"task_max_waiting_in_queue_millis" : 1013,
"active_shards_percent_as_number" : 30.76923076923077
}
##
"status" : "red",
###
curl -X GET "192.168.77.136:9200/_cluster/allocation/explain?pretty"
查看
curl -X GET "192.168.77.136:9200/_cluster/allocation/explain?pretty"
{
"index" : ".geoip_databases",
"shard" : 0,
"primary" : false,
"current_state" : "unassigned",
"unassigned_info" : {
"reason" : "CLUSTER_RECOVERED",
"at" : "2023-12-10T10:43:38.178Z",
"last_allocation_status" : "no_attempt"
},
"can_allocate" : "throttled",
"allocate_explanation" : "allocation temporarily throttled",
"node_allocation_decisions" : [
{
"node_id" : "p0gVV46mSiaNR2eIxc8KRQ",
"node_name" : "elasticsearch2",
"transport_address" : "172.19.0.3:9300",
"node_attributes" : {
"ml.machine_memory" : "4072427520",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"ml.max_jvm_size" : "536870912",
"transform.node" : "true"
},
"node_decision" : "throttled",
"deciders" : [
{
"decider" : "throttling",
"decision" : "THROTTLE",
"explanation" : "reached the limit of outgoing shard recoveries [2] on the node [a1aLJNBxQ7G0kJw8SfI8xQ] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=2] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
}
]
},
{
"node_id" : "yRljcHJOS121zoG97QpEMA",
"node_name" : "elasticsearch3",
"transport_address" : "172.19.0.5:9300",
"node_attributes" : {
"ml.machine_memory" : "4072427520",
"ml.max_open_jobs" : "512",
"xpack.installed" : "true",
"ml.max_jvm_size" : "536870912",
"transform.node" : "true"
},
"node_decision" : "throttled",
"deciders" : [
{
"decider" : "throttling",
"decision" : "THROTTLE",
"explanation" : "reached the limit of outgoing shard recoveries [2] on the node [a1aLJNBxQ7G0kJw8SfI8xQ] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=2] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
}
]
},
{
"node_id" : "a1aLJNBxQ7G0kJw8SfI8xQ",
"node_name" : "elasticsearch1",
"transport_address" : "172.19.0.6:9300",
"node_attributes" : {
"ml.machine_memory" : "4072427520",
"xpack.installed" : "true",
"transform.node" : "true",
"ml.max_open_jobs" : "512",
"ml.max_jvm_size" : "536870912"
},
"node_decision" : "no",
"store" : {
"matching_size_in_bytes" : 40959603
},
"deciders" : [
{
"decider" : "same_shard",
"decision" : "NO",
"explanation" : "a copy of this shard is already allocated to this node [[.geoip_databases][0], node[a1aLJNBxQ7G0kJw8SfI8xQ], [P], s[STARTED], a[id=xg4XhDseQuuoRdFE9BQL_A]]"
},
{
"decider" : "throttling",
"decision" : "THROTTLE",
"explanation" : "reached the limit of outgoing shard recoveries [2] on the node [a1aLJNBxQ7G0kJw8SfI8xQ] which holds the primary, cluster setting [cluster.routing.allocation.node_concurrent_outgoing_recoveries=2] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])"
}
]
}
]
}
这段 Elasticsearch 集群分片分配信息显示,.geoip_databases 索引的副本分片(shard 0)没有被分配( current_state : "unassigned")。原因是集群的每个节点都被暂时阻止("node_decision" : "throttled")分配新的分片。
这主要是由于 cluster.routing.allocation.node_concurrent_outgoing_recoveries 集群设置的限制。这个设置限制了单个节点可以同时进行的迁出分片恢复操作的数量,其默认值是 2。在你的集群中,每个节点都达到了这个限制,而这个限制又阻止了新的分片分配。
对于这个问题,你有两个可能的解决方法:
临时增大 cluster.routing.allocation.node_concurrent_outgoing_recoveries 的值。你可以将这个值设为 余下未分配分片数和节点数之间的较小值。例如,你的集群有 18 个未分配的分片和 3 个可用节点,你可以将 cluster.routing.allocation.node_concurrent_outgoing_recoveries 的值设为 6。请注意,增大这个值可能会增大集群的负载。
用以下的 API 指令可以增大这个值:
curl -X PUT "192.168.77.136:9200/_cluster/settings" -H 'Content-Type: application/json' -d'
{
"transient" :{
"cluster.routing.allocation.node_concurrent_outgoing_recoveries" : 6
}
}'
等待当前的恢复操作完成。一旦当前的恢复操作完成,新的分片就可以被分配了。在实际生产环境中,最好的做法是避免产生大量未分配的分片。你可以更频繁地检查你的集群健康状态,一旦问题发生,立即进行处理。