diff --git a/docker/categraf/conf/config.toml b/docker/categraf/conf/config.toml index 2df2ba17..917c2bc8 100644 --- a/docker/categraf/conf/config.toml +++ b/docker/categraf/conf/config.toml @@ -73,3 +73,11 @@ timeout = 5000 dial_timeout = 2500 max_idle_conns_per_host = 100 +[ibex] +enable = true +## ibex flush interval +interval = "1000ms" +## n9e ibex server rpc address +servers = ["ibex:20090"] +## temp script dir +meta_dir = "./meta" diff --git a/docker/categraf/conf/logs.toml b/docker/categraf/conf/logs.toml deleted file mode 100644 index a4d8b721..00000000 --- a/docker/categraf/conf/logs.toml +++ /dev/null @@ -1,35 +0,0 @@ -[logs] -## key 占位符 -api_key = "ef4ahfbwzwwtlwfpbertgq1i6mq0ab1q" -## 是否开启日志采集 -enable = false -## 接受日志的server地址 -send_to = "127.0.0.1:17878" -## 发送日志的协议 http/tcp -send_type = "http" -## 是否压缩发送 -use_compress = false -## 是否采用ssl -send_with_tls = false -## -batch_wait = 5 -## 日志offset信息保存目录 -run_path = "/opt/categraf/run" -## 最多同时采集多少个日志文件 -open_files_limit = 100 -## 定期扫描目录下是否有新增日志 -scan_period = 10 -## -frame_size = 10 -## -collect_container_all = true - ## 全局的处理规则 - [[logs.Processing_rules]] - ## 单个日志采集配置 - [[logs.items]] - ## file/journald - type = "file" - ## type=file时 path必填,type=journald时 port必填 - path = "/opt/tomcat/logs/*.txt" - source = "tomcat" - service = "my_service" diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 876e56b4..f67837b7 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -12,7 +12,7 @@ services: hostname: mysql restart: always ports: - - "3306:3306" + - "3406:3306" environment: TZ: Asia/Shanghai MYSQL_ROOT_PASSWORD: 1234 @@ -80,7 +80,7 @@ services: sh -c "/wait && /app/ibex server" n9e: - image: flashcatcloud/nightingale:6.0.0-beta.1 + image: flashcatcloud/nightingale:latest container_name: n9e hostname: n9e restart: always @@ -122,31 +122,13 @@ services: - ./categraf/conf:/etc/categraf/conf - /:/hostfs - /var/run/docker.sock:/var/run/docker.sock - ports: - - "9100:9100/tcp" + # ports: + # - "9100:9100/tcp" networks: - nightingale depends_on: - n9e - links: - - n9e:n9e - - agentd: - image: ulric2019/ibex:0.3 - container_name: agentd - hostname: agentd - restart: always - environment: - GIN_MODE: release - TZ: Asia/Shanghai - volumes: - - ./ibexetc:/app/etc - networks: - - nightingale - depends_on: - ibex links: + - n9e:n9e - ibex:ibex - command: - - "/app/ibex" - - "agentd" \ No newline at end of file diff --git a/docker/ibexetc/agentd.conf b/docker/ibexetc/agentd.conf deleted file mode 100644 index 86b6d07c..00000000 --- a/docker/ibexetc/agentd.conf +++ /dev/null @@ -1,38 +0,0 @@ -# debug, release -RunMode = "release" - -# task meta storage dir -MetaDir = "./meta" - -[HTTP] -Enable = true -# http listening address -Host = "0.0.0.0" -# http listening port -Port = 2090 -# https cert file path -CertFile = "" -# https key file path -KeyFile = "" -# whether print access log -PrintAccessLog = true -# whether enable pprof -PProf = false -# http graceful shutdown timeout, unit: s -ShutdownTimeout = 30 -# max content length: 64M -MaxContentLength = 67108864 -# http server read timeout, unit: s -ReadTimeout = 20 -# http server write timeout, unit: s -WriteTimeout = 40 -# http server idle timeout, unit: s -IdleTimeout = 120 - -[Heartbeat] -# unit: ms -Interval = 1000 -# rpc servers -Servers = ["ibex:20090"] -# $ip or $hostname or specified string -Host = "categraf01" \ No newline at end of file diff --git a/docker/n9eetc/config.toml b/docker/n9eetc/config.toml index 9c91cb6f..a07c1b23 100644 --- a/docker/n9eetc/config.toml +++ b/docker/n9eetc/config.toml @@ -118,15 +118,6 @@ IP = "" Interval = 1000 ClusterName = "default" -# [Alert.SMTP] -# Host = "smtp.163.com" -# Port = 994 -# User = "username" -# Pass = "password" -# From = "username@163.com" -# InsecureSkipVerify = true -# Batch = 5 - # [Alert.Alerting] # NotifyConcurrency = 10 @@ -147,52 +138,7 @@ BasicAuthPass = "ibex" Timeout = 3000 [Pushgw] -# use target labels in database instead of in series LabelRewrite = true -# # default busigroup key name -# BusiGroupLabelKey = "busigroup" -# ForceUseServerTS = false - -# [Pushgw.DebugSample] -# ident = "xx" -# __name__ = "xx" - -# [Pushgw.WriterOpt] -# # Writer Options -# QueueCount = 1000 -# QueueMaxSize = 1000000 -# QueuePopSize = 1000 -# # ident or metric -# ShardingKey = "ident" [[Pushgw.Writers]] -# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write" -Url = "http://prometheus:9090/api/v1/write" -# Basic auth username -BasicAuthUser = "" -# Basic auth password -BasicAuthPass = "" -# timeout settings, unit: ms -Headers = ["X-From", "n9e"] -Timeout = 10000 -DialTimeout = 3000 -TLSHandshakeTimeout = 30000 -ExpectContinueTimeout = 1000 -IdleConnTimeout = 90000 -# time duration, unit: ms -KeepAlive = 30000 -MaxConnsPerHost = 0 -MaxIdleConns = 100 -MaxIdleConnsPerHost = 100 -## Optional TLS Config -# UseTLS = false -# TLSCA = "/etc/n9e/ca.pem" -# TLSCert = "/etc/n9e/cert.pem" -# TLSKey = "/etc/n9e/key.pem" -# InsecureSkipVerify = false -# [[Writers.WriteRelabels]] -# Action = "replace" -# SourceLabels = ["__address__"] -# Regex = "([^:]+)(?::\\d+)?" -# Replacement = "$1:80" -# TargetLabel = "__address__" \ No newline at end of file +Url = "http://prometheus:9090/api/v1/write" \ No newline at end of file diff --git a/integrations/linux/alerts/linux_by_categraf.json b/integrations/linux/alerts/linux_by_categraf.json index 525cc220..27101dec 100644 --- a/integrations/linux/alerts/linux_by_categraf.json +++ b/integrations/linux/alerts/linux_by_categraf.json @@ -1,45 +1,54 @@ [ - { - "name": "Lost connection with monitoring target - categraf", - "note": "", - "severity": 1, - "disabled": 0, - "prom_for_duration": 60, - "prom_ql": "max_over_time(target_up[130s]) == 0", - "prom_eval_interval": 15, - "enable_stime": "00:00", - "enable_etime": "23:59", - "enable_days_of_week": [ - "1", - "2", - "3", - "4", - "5", - "6", - "0" - ], - "notify_recovered": 1, - "notify_channels": [ - "email", - "dingtalk", - "wecom" - ], - "notify_repeat_step": 60, - "callbacks": [], - "runbook_url": "", - "append_tags": [] + { + "cate": "host", + "datasource_ids": null, + "name": "Lost connection with monitoring target - categraf", + "note": "", + "prod": "host", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "disabled": 0, + "prom_for_duration": 0, + "prom_ql": "", + "rule_config": { + "inhibit": false, + "queries": [ + { + "key": "all_hosts", + "op": "==", + "values": [] + } + ], + "triggers": [ + { + "duration": 60, + "severity": 2, + "type": "target_miss" + } + ] }, - { - "name": "Machine load - high CPU, please pay attention - categraf", - "note": "", - "severity": 3, - "disabled": 0, - "prom_for_duration": 60, - "prom_ql": "cpu_usage_idle{cpu=\"cpu-total\"} < 25", - "prom_eval_interval": 15, - "enable_stime": "00:00", - "enable_etime": "23:59", - "enable_days_of_week": [ + "prom_eval_interval": 15, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "23:59", + "enable_etimes": [ + "23:59" + ], + "enable_days_of_week": [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ], + "enable_days_of_weeks": [ + [ "1", "2", "3", @@ -47,18 +56,19 @@ "5", "6", "0" - ], - "notify_recovered": 1, - "notify_channels": [ - "email", - "dingtalk", - "wecom" - ], - "notify_repeat_step": 60, - "callbacks": [], - "runbook_url": "", - "append_tags": [] - }, + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {} + }, { "name": "Machine load - high memory, please pay attention - categraf", "note": "", diff --git a/integrations/linux/alerts/linux_by_telegraf.json b/integrations/linux/alerts/linux_by_telegraf.json index b6ceeee2..208f670c 100644 --- a/integrations/linux/alerts/linux_by_telegraf.json +++ b/integrations/linux/alerts/linux_by_telegraf.json @@ -30,15 +30,44 @@ "append_tags": [] }, { + "cate": "host", + "datasource_ids": null, "name": "Lost connection with monitoring target - telegraf", "note": "", - "severity": 1, + "prod": "host", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, "disabled": 0, - "prom_for_duration": 60, - "prom_ql": "target_up != 1", + "prom_for_duration": 0, + "prom_ql": "", + "rule_config": { + "inhibit": false, + "queries": [ + { + "key": "all_hosts", + "op": "==", + "values": [] + } + ], + "triggers": [ + { + "duration": 60, + "severity": 2, + "type": "target_miss" + } + ] + }, "prom_eval_interval": 15, "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], "enable_etime": "23:59", + "enable_etimes": [ + "23:59" + ], "enable_days_of_week": [ "1", "2", @@ -48,16 +77,27 @@ "6", "0" ], - "notify_recovered": 1, - "notify_channels": [ - "email", - "dingtalk", - "wecom" + "enable_days_of_weeks": [ + [ + "1", + "2", + "3", + "4", + "5", + "6", + "0" + ] ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, "callbacks": [], "runbook_url": "", - "append_tags": [] + "append_tags": [], + "annotations": {} }, { "name": "Port detection failed, please pay attention - telegraf", diff --git a/memsto/notify_config.go b/memsto/notify_config.go index 7d2a9c3f..02a6d061 100644 --- a/memsto/notify_config.go +++ b/memsto/notify_config.go @@ -2,6 +2,7 @@ package memsto import ( "encoding/json" + "strings" "sync" "time" @@ -57,7 +58,14 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error { if err != nil { return err } - json.Unmarshal([]byte(cval), &w.webhooks) + + if strings.TrimSpace(cval) != "" { + err = json.Unmarshal([]byte(cval), &w.webhooks) + if err != nil { + logger.Errorf("failed to unmarshal webhooks:%s config:", cval, err) + } + } + logger.Infof("timer: sync wbhooks done number: %d", len(w.webhooks)) cval, err = models.ConfigsGet(w.ctx, models.SMTP) @@ -65,9 +73,11 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error { return err } - err = toml.Unmarshal([]byte(cval), &w.smtp) - if err != nil { - logger.Errorf("failed to unmarshal smtp:%s config:", cval, err) + if strings.TrimSpace(cval) != "" { + err = toml.Unmarshal([]byte(cval), &w.smtp) + if err != nil { + logger.Errorf("failed to unmarshal smtp:%s config:", cval, err) + } } logger.Infof("timer: sync smtp:%+v done", w.smtp) @@ -76,20 +86,28 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error { if err != nil { return err } - err = json.Unmarshal([]byte(cval), &w.script) - if err != nil { - logger.Errorf("failed to unmarshal notify script:%s config:", cval, err) + + if strings.TrimSpace(cval) != "" { + err = json.Unmarshal([]byte(cval), &w.script) + if err != nil { + logger.Errorf("failed to unmarshal notify script:%s config:", cval, err) + } } + logger.Infof("timer: sync notify script done") cval, err = models.ConfigsGet(w.ctx, models.IBEX) if err != nil { return err } - err = toml.Unmarshal([]byte(cval), &w.ibex) - if err != nil { - logger.Errorf("failed to unmarshal ibex:%s config:", cval, err) + + if strings.TrimSpace(cval) != "" { + err = toml.Unmarshal([]byte(cval), &w.ibex) + if err != nil { + logger.Errorf("failed to unmarshal ibex:%s config:", cval, err) + } } + logger.Infof("timer: sync ibex done") return nil