open-im-server/config/instance-down-rules.yml
xuexihuang ceb669dfb8
Feature middleware (#1476)
* fix:fix error values&logs

* modify: add logs

* feature:add redis io retry logic

* feature:add redis error alert rule

* test:for test alert

* fix:fix prometheus rules

* del:del test code

---------

Co-authored-by: lin.huang <lin.huang@apulis.com>
2023-11-29 02:41:11 +00:00

23 lines
968 B
YAML

groups:
- name: instance_down
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
- name: database_insert_failure_alerts
rules:
- alert: DatabaseInsertFailed
expr: (increase(msg_insert_redis_failed_total[5m]) > 0) or (increase(msg_insert_mongo_failed_total[5m]) > 0)
for: 1m
labels:
severity: critical
annotations:
summary: "Increase in MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter detected"
description: "Either MsgInsertRedisFailedCounter or MsgInsertMongoFailedCounter has increased in the last 5 minutes, indicating failures in message insert operations to Redis or MongoDB,maybe the redis or mongodb is crash."