| @@ -73,3 +73,11 @@ timeout = 5000 | |||
| dial_timeout = 2500 | |||
| max_idle_conns_per_host = 100 | |||
| [ibex] | |||
| enable = true | |||
| ## ibex flush interval | |||
| interval = "1000ms" | |||
| ## n9e ibex server rpc address | |||
| servers = ["ibex:20090"] | |||
| ## temp script dir | |||
| meta_dir = "./meta" | |||
| @@ -1,35 +0,0 @@ | |||
| [logs] | |||
| ## key 占位符 | |||
| api_key = "ef4ahfbwzwwtlwfpbertgq1i6mq0ab1q" | |||
| ## 是否开启日志采集 | |||
| enable = false | |||
| ## 接受日志的server地址 | |||
| send_to = "127.0.0.1:17878" | |||
| ## 发送日志的协议 http/tcp | |||
| send_type = "http" | |||
| ## 是否压缩发送 | |||
| use_compress = false | |||
| ## 是否采用ssl | |||
| send_with_tls = false | |||
| ## | |||
| batch_wait = 5 | |||
| ## 日志offset信息保存目录 | |||
| run_path = "/opt/categraf/run" | |||
| ## 最多同时采集多少个日志文件 | |||
| open_files_limit = 100 | |||
| ## 定期扫描目录下是否有新增日志 | |||
| scan_period = 10 | |||
| ## | |||
| frame_size = 10 | |||
| ## | |||
| collect_container_all = true | |||
| ## 全局的处理规则 | |||
| [[logs.Processing_rules]] | |||
| ## 单个日志采集配置 | |||
| [[logs.items]] | |||
| ## file/journald | |||
| type = "file" | |||
| ## type=file时 path必填,type=journald时 port必填 | |||
| path = "/opt/tomcat/logs/*.txt" | |||
| source = "tomcat" | |||
| service = "my_service" | |||
| @@ -12,7 +12,7 @@ services: | |||
| hostname: mysql | |||
| restart: always | |||
| ports: | |||
| - "3306:3306" | |||
| - "3406:3306" | |||
| environment: | |||
| TZ: Asia/Shanghai | |||
| MYSQL_ROOT_PASSWORD: 1234 | |||
| @@ -80,7 +80,7 @@ services: | |||
| sh -c "/wait && /app/ibex server" | |||
| n9e: | |||
| image: flashcatcloud/nightingale:6.0.0-beta.1 | |||
| image: flashcatcloud/nightingale:latest | |||
| container_name: n9e | |||
| hostname: n9e | |||
| restart: always | |||
| @@ -122,31 +122,13 @@ services: | |||
| - ./categraf/conf:/etc/categraf/conf | |||
| - /:/hostfs | |||
| - /var/run/docker.sock:/var/run/docker.sock | |||
| ports: | |||
| - "9100:9100/tcp" | |||
| # ports: | |||
| # - "9100:9100/tcp" | |||
| networks: | |||
| - nightingale | |||
| depends_on: | |||
| - n9e | |||
| links: | |||
| - n9e:n9e | |||
| agentd: | |||
| image: ulric2019/ibex:0.3 | |||
| container_name: agentd | |||
| hostname: agentd | |||
| restart: always | |||
| environment: | |||
| GIN_MODE: release | |||
| TZ: Asia/Shanghai | |||
| volumes: | |||
| - ./ibexetc:/app/etc | |||
| networks: | |||
| - nightingale | |||
| depends_on: | |||
| - ibex | |||
| links: | |||
| - n9e:n9e | |||
| - ibex:ibex | |||
| command: | |||
| - "/app/ibex" | |||
| - "agentd" | |||
| @@ -1,38 +0,0 @@ | |||
| # debug, release | |||
| RunMode = "release" | |||
| # task meta storage dir | |||
| MetaDir = "./meta" | |||
| [HTTP] | |||
| Enable = true | |||
| # http listening address | |||
| Host = "0.0.0.0" | |||
| # http listening port | |||
| Port = 2090 | |||
| # https cert file path | |||
| CertFile = "" | |||
| # https key file path | |||
| KeyFile = "" | |||
| # whether print access log | |||
| PrintAccessLog = true | |||
| # whether enable pprof | |||
| PProf = false | |||
| # http graceful shutdown timeout, unit: s | |||
| ShutdownTimeout = 30 | |||
| # max content length: 64M | |||
| MaxContentLength = 67108864 | |||
| # http server read timeout, unit: s | |||
| ReadTimeout = 20 | |||
| # http server write timeout, unit: s | |||
| WriteTimeout = 40 | |||
| # http server idle timeout, unit: s | |||
| IdleTimeout = 120 | |||
| [Heartbeat] | |||
| # unit: ms | |||
| Interval = 1000 | |||
| # rpc servers | |||
| Servers = ["ibex:20090"] | |||
| # $ip or $hostname or specified string | |||
| Host = "categraf01" | |||
| @@ -118,15 +118,6 @@ IP = "" | |||
| Interval = 1000 | |||
| ClusterName = "default" | |||
| # [Alert.SMTP] | |||
| # Host = "smtp.163.com" | |||
| # Port = 994 | |||
| # User = "username" | |||
| # Pass = "password" | |||
| # From = "username@163.com" | |||
| # InsecureSkipVerify = true | |||
| # Batch = 5 | |||
| # [Alert.Alerting] | |||
| # NotifyConcurrency = 10 | |||
| @@ -147,52 +138,7 @@ BasicAuthPass = "ibex" | |||
| Timeout = 3000 | |||
| [Pushgw] | |||
| # use target labels in database instead of in series | |||
| LabelRewrite = true | |||
| # # default busigroup key name | |||
| # BusiGroupLabelKey = "busigroup" | |||
| # ForceUseServerTS = false | |||
| # [Pushgw.DebugSample] | |||
| # ident = "xx" | |||
| # __name__ = "xx" | |||
| # [Pushgw.WriterOpt] | |||
| # # Writer Options | |||
| # QueueCount = 1000 | |||
| # QueueMaxSize = 1000000 | |||
| # QueuePopSize = 1000 | |||
| # # ident or metric | |||
| # ShardingKey = "ident" | |||
| [[Pushgw.Writers]] | |||
| # Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write" | |||
| Url = "http://prometheus:9090/api/v1/write" | |||
| # Basic auth username | |||
| BasicAuthUser = "" | |||
| # Basic auth password | |||
| BasicAuthPass = "" | |||
| # timeout settings, unit: ms | |||
| Headers = ["X-From", "n9e"] | |||
| Timeout = 10000 | |||
| DialTimeout = 3000 | |||
| TLSHandshakeTimeout = 30000 | |||
| ExpectContinueTimeout = 1000 | |||
| IdleConnTimeout = 90000 | |||
| # time duration, unit: ms | |||
| KeepAlive = 30000 | |||
| MaxConnsPerHost = 0 | |||
| MaxIdleConns = 100 | |||
| MaxIdleConnsPerHost = 100 | |||
| ## Optional TLS Config | |||
| # UseTLS = false | |||
| # TLSCA = "/etc/n9e/ca.pem" | |||
| # TLSCert = "/etc/n9e/cert.pem" | |||
| # TLSKey = "/etc/n9e/key.pem" | |||
| # InsecureSkipVerify = false | |||
| # [[Writers.WriteRelabels]] | |||
| # Action = "replace" | |||
| # SourceLabels = ["__address__"] | |||
| # Regex = "([^:]+)(?::\\d+)?" | |||
| # Replacement = "$1:80" | |||
| # TargetLabel = "__address__" | |||
| Url = "http://prometheus:9090/api/v1/write" | |||
| @@ -1,45 +1,54 @@ | |||
| [ | |||
| { | |||
| "name": "Lost connection with monitoring target - categraf", | |||
| "note": "", | |||
| "severity": 1, | |||
| "disabled": 0, | |||
| "prom_for_duration": 60, | |||
| "prom_ql": "max_over_time(target_up[130s]) == 0", | |||
| "prom_eval_interval": 15, | |||
| "enable_stime": "00:00", | |||
| "enable_etime": "23:59", | |||
| "enable_days_of_week": [ | |||
| "1", | |||
| "2", | |||
| "3", | |||
| "4", | |||
| "5", | |||
| "6", | |||
| "0" | |||
| ], | |||
| "notify_recovered": 1, | |||
| "notify_channels": [ | |||
| "email", | |||
| "dingtalk", | |||
| "wecom" | |||
| ], | |||
| "notify_repeat_step": 60, | |||
| "callbacks": [], | |||
| "runbook_url": "", | |||
| "append_tags": [] | |||
| { | |||
| "cate": "host", | |||
| "datasource_ids": null, | |||
| "name": "Lost connection with monitoring target - categraf", | |||
| "note": "", | |||
| "prod": "host", | |||
| "algorithm": "", | |||
| "algo_params": null, | |||
| "delay": 0, | |||
| "severity": 0, | |||
| "disabled": 0, | |||
| "prom_for_duration": 0, | |||
| "prom_ql": "", | |||
| "rule_config": { | |||
| "inhibit": false, | |||
| "queries": [ | |||
| { | |||
| "key": "all_hosts", | |||
| "op": "==", | |||
| "values": [] | |||
| } | |||
| ], | |||
| "triggers": [ | |||
| { | |||
| "duration": 60, | |||
| "severity": 2, | |||
| "type": "target_miss" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Machine load - high CPU, please pay attention - categraf", | |||
| "note": "", | |||
| "severity": 3, | |||
| "disabled": 0, | |||
| "prom_for_duration": 60, | |||
| "prom_ql": "cpu_usage_idle{cpu=\"cpu-total\"} < 25", | |||
| "prom_eval_interval": 15, | |||
| "enable_stime": "00:00", | |||
| "enable_etime": "23:59", | |||
| "enable_days_of_week": [ | |||
| "prom_eval_interval": 15, | |||
| "enable_stime": "00:00", | |||
| "enable_stimes": [ | |||
| "00:00" | |||
| ], | |||
| "enable_etime": "23:59", | |||
| "enable_etimes": [ | |||
| "23:59" | |||
| ], | |||
| "enable_days_of_week": [ | |||
| "1", | |||
| "2", | |||
| "3", | |||
| "4", | |||
| "5", | |||
| "6", | |||
| "0" | |||
| ], | |||
| "enable_days_of_weeks": [ | |||
| [ | |||
| "1", | |||
| "2", | |||
| "3", | |||
| @@ -47,18 +56,19 @@ | |||
| "5", | |||
| "6", | |||
| "0" | |||
| ], | |||
| "notify_recovered": 1, | |||
| "notify_channels": [ | |||
| "email", | |||
| "dingtalk", | |||
| "wecom" | |||
| ], | |||
| "notify_repeat_step": 60, | |||
| "callbacks": [], | |||
| "runbook_url": "", | |||
| "append_tags": [] | |||
| }, | |||
| ] | |||
| ], | |||
| "enable_in_bg": 0, | |||
| "notify_recovered": 1, | |||
| "notify_channels": [], | |||
| "notify_repeat_step": 60, | |||
| "notify_max_number": 0, | |||
| "recover_duration": 0, | |||
| "callbacks": [], | |||
| "runbook_url": "", | |||
| "append_tags": [], | |||
| "annotations": {} | |||
| }, | |||
| { | |||
| "name": "Machine load - high memory, please pay attention - categraf", | |||
| "note": "", | |||
| @@ -30,15 +30,44 @@ | |||
| "append_tags": [] | |||
| }, | |||
| { | |||
| "cate": "host", | |||
| "datasource_ids": null, | |||
| "name": "Lost connection with monitoring target - telegraf", | |||
| "note": "", | |||
| "severity": 1, | |||
| "prod": "host", | |||
| "algorithm": "", | |||
| "algo_params": null, | |||
| "delay": 0, | |||
| "severity": 0, | |||
| "disabled": 0, | |||
| "prom_for_duration": 60, | |||
| "prom_ql": "target_up != 1", | |||
| "prom_for_duration": 0, | |||
| "prom_ql": "", | |||
| "rule_config": { | |||
| "inhibit": false, | |||
| "queries": [ | |||
| { | |||
| "key": "all_hosts", | |||
| "op": "==", | |||
| "values": [] | |||
| } | |||
| ], | |||
| "triggers": [ | |||
| { | |||
| "duration": 60, | |||
| "severity": 2, | |||
| "type": "target_miss" | |||
| } | |||
| ] | |||
| }, | |||
| "prom_eval_interval": 15, | |||
| "enable_stime": "00:00", | |||
| "enable_stimes": [ | |||
| "00:00" | |||
| ], | |||
| "enable_etime": "23:59", | |||
| "enable_etimes": [ | |||
| "23:59" | |||
| ], | |||
| "enable_days_of_week": [ | |||
| "1", | |||
| "2", | |||
| @@ -48,16 +77,27 @@ | |||
| "6", | |||
| "0" | |||
| ], | |||
| "notify_recovered": 1, | |||
| "notify_channels": [ | |||
| "email", | |||
| "dingtalk", | |||
| "wecom" | |||
| "enable_days_of_weeks": [ | |||
| [ | |||
| "1", | |||
| "2", | |||
| "3", | |||
| "4", | |||
| "5", | |||
| "6", | |||
| "0" | |||
| ] | |||
| ], | |||
| "enable_in_bg": 0, | |||
| "notify_recovered": 1, | |||
| "notify_channels": [], | |||
| "notify_repeat_step": 60, | |||
| "notify_max_number": 0, | |||
| "recover_duration": 0, | |||
| "callbacks": [], | |||
| "runbook_url": "", | |||
| "append_tags": [] | |||
| "append_tags": [], | |||
| "annotations": {} | |||
| }, | |||
| { | |||
| "name": "Port detection failed, please pay attention - telegraf", | |||
| @@ -2,6 +2,7 @@ package memsto | |||
| import ( | |||
| "encoding/json" | |||
| "strings" | |||
| "sync" | |||
| "time" | |||
| @@ -57,7 +58,14 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error { | |||
| if err != nil { | |||
| return err | |||
| } | |||
| json.Unmarshal([]byte(cval), &w.webhooks) | |||
| if strings.TrimSpace(cval) != "" { | |||
| err = json.Unmarshal([]byte(cval), &w.webhooks) | |||
| if err != nil { | |||
| logger.Errorf("failed to unmarshal webhooks:%s config:", cval, err) | |||
| } | |||
| } | |||
| logger.Infof("timer: sync wbhooks done number: %d", len(w.webhooks)) | |||
| cval, err = models.ConfigsGet(w.ctx, models.SMTP) | |||
| @@ -65,9 +73,11 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error { | |||
| return err | |||
| } | |||
| err = toml.Unmarshal([]byte(cval), &w.smtp) | |||
| if err != nil { | |||
| logger.Errorf("failed to unmarshal smtp:%s config:", cval, err) | |||
| if strings.TrimSpace(cval) != "" { | |||
| err = toml.Unmarshal([]byte(cval), &w.smtp) | |||
| if err != nil { | |||
| logger.Errorf("failed to unmarshal smtp:%s config:", cval, err) | |||
| } | |||
| } | |||
| logger.Infof("timer: sync smtp:%+v done", w.smtp) | |||
| @@ -76,20 +86,28 @@ func (w *NotifyConfigCacheType) syncNotifyConfigs() error { | |||
| if err != nil { | |||
| return err | |||
| } | |||
| err = json.Unmarshal([]byte(cval), &w.script) | |||
| if err != nil { | |||
| logger.Errorf("failed to unmarshal notify script:%s config:", cval, err) | |||
| if strings.TrimSpace(cval) != "" { | |||
| err = json.Unmarshal([]byte(cval), &w.script) | |||
| if err != nil { | |||
| logger.Errorf("failed to unmarshal notify script:%s config:", cval, err) | |||
| } | |||
| } | |||
| logger.Infof("timer: sync notify script done") | |||
| cval, err = models.ConfigsGet(w.ctx, models.IBEX) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| err = toml.Unmarshal([]byte(cval), &w.ibex) | |||
| if err != nil { | |||
| logger.Errorf("failed to unmarshal ibex:%s config:", cval, err) | |||
| if strings.TrimSpace(cval) != "" { | |||
| err = toml.Unmarshal([]byte(cval), &w.ibex) | |||
| if err != nil { | |||
| logger.Errorf("failed to unmarshal ibex:%s config:", cval, err) | |||
| } | |||
| } | |||
| logger.Infof("timer: sync ibex done") | |||
| return nil | |||