实现功能:每间隔一分钟,查询集群hits.hits.total.value这个值,如果该值小于500,代表一分钟内集群未收到logstash的数据写入请求,存在采集日志中断的可能,根据实际生产情况,本例采取重启logstash进程的方法解决,并进行邮件通知。
一、配置警报器
二、配置通知方法为webhook
三、webhook的脚本如下:
# -*- coding: utf-8 -*-
"""
====================================
@File Name :email_webhook.py
@Time : 2023/10/27 14:55
@Program IDE :PyCharm
@Create by Author :zayki
====================================
"""
import smtplib
import time
from os import popen
from email.mime.text import MIMEText
from email.header import Header
from flask import Flask, request
app = Flask(__name__)
def send_mail(content):
# 发送邮件
sender = 'monitor@test.com.cn'
receiver = 'zyjsuper@test.com.cn'
message = MIMEText(content, 'plain', 'utf-8')
message['From'] = Header('sysmonitor', 'utf-8')
message['To'] = Header(receiver, 'utf-8')
subject = 'OpenSearch生产集群监控'
message['Subject'] = Header(subject, 'utf-8')
smtpObj = smtplib.SMTP('email.test.com.cn', port=25)
try:
smtpObj.sendmail(sender, receiver, message.as_string())
print("Send mail successfully.")
except Exception as err:
print(str(err))
@app.route("/send_mail", methods=["POST"])
def send():
monopt = request.args["monopt"]
client = request.remote_addr
if monopt == "tookvalue":
content = "近一分钟内命中率低于500个,请检查集群状态。集群IP地址为:{0}".format(client)
popen("systemctl restart logstash")
logstash_status = popen("systemctl status logstash").read().strip()
print(logstash_status)
if monopt == "taskcount":
content = "近一分钟内执行的task数量低于5个,请检查集群状态。集群IP地址为:{0}".format(client)
send_mail(content)
return "Successfully."
if __name__ == "__main__":
app.run("0.0.0.0", "5000")
复制
四、实现上述功能的python脚本如下:
#!/usr/bin/evn python3
# -*-coding:utf-8-*-
# Author: zyjsuper
# Filename: mon_total_hits_alert.py
# CreateTime: 2023/11/1 10:42
import os
import random
import requests
import json
import smtplib
from email.mime.text import MIMEText
from email.header import Header
import base64 as b64
from warnings import filterwarnings
filterwarnings('ignore')
class monitor_total_hits_alert:
def __init__(self,username,password):
auth=b64.b64encode(('{0}:{1}'.format(username,password)).encode('utf-8')).decode('utf-8')
es_cluster = [ '10.10.1.42','10.10.1.43','10.10.1.45' ]
self.url = 'https://{0}:9200/*/_search?size=0'.format(es_cluster[random.randint(0,2)])
self.json_request = '''{
"track_total_hits": true,
"query": {
"range": {
"@timestamp": {
"from": "now-1m",
"to": "now",
"format": "yyyy-MM-dd HH:mm:ss.SSS",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
}'''
self.headers = {
'Content-Type': 'application/json',
'Authorization': 'Basic {0}'.format(auth)
}
def get_total_hits(self):
response = requests.post(self.url, headers = self.headers , data = self.json_request,verify=False)
print("本次请求得到的响应数据为:{0}".format(response.text))
return json.loads(response.text)
def send_mail(self,subject, content):
sender = 'monitor@test.com.cn'
receiver = 'zyjsuper@test.com.cn'
message = MIMEText(content, 'plain', 'utf-8')
message['From'] = Header('monitor', 'utf-8')
message['To'] = Header(receiver, 'utf-8')
# subject = 'OpenSearch集群监控'
message['Subject'] = Header(subject, 'utf-8')
smtpObj = smtplib.SMTP('email.aeonlife.com.cn', port=25)
try:
smtpObj.sendmail(sender, receiver, message.as_string())
print("Send mail successfully.")
except Exception as err:
print(str(err))
def restart_logstash(self):
os.popen('systemctl restart logstash')
if __name__ == '__main__':
obj = monitor_total_hits_alert("admin","admin")
result = obj.get_total_hits()
total_hits = result['hits']['total']['value']
print("本次查询的命中次数为:{0}".format(total_hits))
if total_hits < 500:
obj.send_mail("OpenSearch集群监控","近一分钟内命中率低于500个,请检查集群状态。")
else:
pass
复制
五、涉及到以下三种聚合查询方式的写法,请参考:(备注:此三种写法查询到的结果不一致,拜托看到的朋友指点一下,什么原理,是写法有什么问题么。)
方法1
GET */_search?size=0
{
"track_total_hits": true,
"aggs": {
"range": {
"date_range": {
"field": "@timestamp",
"format": "yyyy-MM-dd HH:mm:ss",
"ranges": [
{
"from": "now-1m/m",
"to": "now"
}
]
}
}
}
}
方法2
GET logstash-2023.11.01/_search?size=0&track_total_hits=true&q=@timestamp[ "now" TO "now-1m" ]
方法3
GET */_search?size=0
{
"track_total_hits": true,
"query": {
"range": {
"@timestamp": {
"from": "now-1m",
"to": "now",
"format": "yyyy-MM-dd HH:mm:ss.SSS",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
}
复制
最后修改时间:2023-11-09 09:04:46
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。
评论
相关阅读
[MYSQL] 服务器出现大量的TIME_WAIT, 每天凌晨就清零了
大大刺猬
152次阅读
2025-04-01 16:20:44
mysql提升10倍count(*)的神器
大大刺猬
132次阅读
2025-03-21 16:54:21
演讲实录|分布式 Python 计算服务 MaxFrame 介绍及场景应用方案
阿里云大数据AI技术
127次阅读
2025-03-17 13:27:37
官宣,Milvus SDK v2发布!原生异步接口、支持MCP、性能提升
ZILLIZ
102次阅读
2025-04-02 09:34:13
[MYSQL] query_id和STATEMENT_ID在不同OS上的关系
大大刺猬
73次阅读
2025-03-26 19:08:13
DataWorks :Data+AI 一体化开发实战图谱
阿里云大数据AI技术
50次阅读
2025-03-19 11:00:55
国密算法介绍
漫步者
46次阅读
2025-03-21 09:20:39
如何使用 RisingWave 和 PuppyGraph 构建高性能实时图分析框架
RisingWave中文开源社区
43次阅读
2025-03-18 10:49:54
WingPro for Mac 强大的Python开发工具 v10.0.9注册激活版
一梦江湖远
36次阅读
2025-03-29 10:33:27
python操作MySQL数据库
怀念和想念
31次阅读
2025-03-30 23:22:07