Browse Source

docs: update integrations

main
ning 2 years ago
parent
commit
c5cd6c0337
5 changed files with 48 additions and 292 deletions
  1. +48
    -6
      integrations/Net_Response/markdown/README.md
  2. +0
    -73
      integrations/Network/alerts/net_response_by_categraf.json
  3. +0
    -131
      integrations/Network/dashboards/net_response_by_categraf.json
  4. BIN
      integrations/Network/icon/network.png
  5. +0
    -82
      integrations/Network/markdown/net.md

+ 48
- 6
integrations/Net_Response/markdown/README.md View File

@@ -1,18 +1,20 @@
# net_response

网络探测插件,通常用于监控本机某个端口是否在监听,或远端某个端口是否能连通

## code meanings
网络探测插件,通常用于监控本机某个端口是否在监听,或远端某个端口是否能连通。因为 Prometheus 生态的时序库只能存储 float64 类型的值,所以网络探测插件探测的结果也是 float64 类型的值,但是这个值的含义是不同的,具体含义如下:

```
- 0: Success
- 1: Timeout
- 2: ConnectionFailed
- 3: ReadFailed
- 4: StringMismatch
```

如果一切正常,这个值是 0,如果有异常,这个值是 1-4 之间的值,具体含义如上。这个值对应的指标名字是 `net_response_result_code`。

## Configuration

最核心的配置就是 targets 部分,指定探测的目标,下面的例子:
categraf 的 `conf/input.net_response/net_response.toml`。最核心的配置就是 targets 部分,指定探测的目标,下面的例子:

```toml
[[instances]]
@@ -27,7 +29,7 @@ targets = [
- `localhost:6379` 表示探测本机的 6379 端口是否可以连通
- `:9090` 表示探测本机的 9090 端口是否可以连通

监控数据或告警事件中只是一个 IP 和端口,接收告警的人看到了,可能不清楚只是哪个业务的模块告警了,可以附加一些更有价值的信息放到标签里,比如例子中
监控数据或告警事件中只是一个 IP 和端口,接收告警的人看到了,可能不清楚只是哪个业务的模块告警了,可以附加一些更有价值的信息放到标签里,比如:

```toml
labels = { region="cloud", product="n9e" }
@@ -35,6 +37,46 @@ labels = { region="cloud", product="n9e" }

标识了这是 cloud 这个 region,n9e 这个产品,这俩标签会附到时序数据上,告警的时候自然也会报出来。

完整配置样例如下:

```toml
[mappings]
# "127.0.0.1:22"= {region="local",ssh="test"}
# "127.0.0.1:22"= {region="local",ssh="redis"}

[[instances]]
targets = [
# "127.0.0.1:22",
# "localhost:6379",
# ":9090"
]

# # append some labels for series
# labels = { region="cloud", product="n9e" }

# # interval = global.interval * interval_times
# interval_times = 1

## Protocol, must be "tcp" or "udp"
## NOTE: because the "udp" protocol does not respond to requests, it requires
## a send/expect string pair (see below).
# protocol = "tcp"

## Set timeout
# timeout = "1s"

## Set read timeout (only used if expecting a response)
# read_timeout = "1s"

## The following options are required for UDP checks. For TCP, they are
## optional. The plugin will send the given string to the server and then
## expect to receive the given 'expect' string back.
## string sent to the server
# send = "ssh"
## expected string in answer
# expect = "ssh"
```

## 监控大盘和告警规则

该 README 的同级目录下,提供了 dashboard.json 就是监控大盘的配置,alerts.json 是告警规则,可以导入夜莺使用。
夜莺内置了仪表盘和告警规则,克隆到自己的业务组即可使用。

+ 0
- 73
integrations/Network/alerts/net_response_by_categraf.json View File

@@ -1,73 +0,0 @@
[
{
"cate": "prometheus",
"datasource_ids": [
0
],
"name": "Network address probe failed",
"note": "",
"prod": "metric",
"algorithm": "",
"algo_params": null,
"delay": 0,
"severity": 2,
"severities": [
2
],
"disabled": 1,
"prom_for_duration": 60,
"prom_ql": "",
"rule_config": {
"algo_params": null,
"inhibit": false,
"prom_ql": "",
"queries": [
{
"prom_ql": "net_response_result_code != 0",
"severity": 2
}
],
"severity": 0
},
"prom_eval_interval": 15,
"enable_stime": "00:00",
"enable_stimes": [
"00:00"
],
"enable_etime": "23:59",
"enable_etimes": [
"23:59"
],
"enable_days_of_week": [
"1",
"2",
"3",
"4",
"5",
"6",
"0"
],
"enable_days_of_weeks": [
[
"1",
"2",
"3",
"4",
"5",
"6",
"0"
]
],
"enable_in_bg": 0,
"notify_recovered": 1,
"notify_channels": [],
"notify_repeat_step": 60,
"notify_max_number": 0,
"recover_duration": 0,
"callbacks": [],
"runbook_url": "",
"append_tags": [],
"annotations": null,
"extra_config": null
}
]

+ 0
- 131
integrations/Network/dashboards/net_response_by_categraf.json View File

@@ -1,131 +0,0 @@
{
"name": "TCP detection by UlricQin",
"tags": "",
"ident": "",
"configs": {
"panels": [
{
"type": "table",
"id": "73c6eaf9-1685-4a7a-bf53-3d52afa1792e",
"layout": {
"h": 15,
"w": 24,
"x": 0,
"y": 0,
"i": "73c6eaf9-1685-4a7a-bf53-3d52afa1792e",
"isResizable": true
},
"version": "3.0.0",
"datasourceCate": "prometheus",
"datasourceValue": "${prom}",
"targets": [
{
"expr": "max(net_response_result_code) by (target)",
"legend": "UP?",
"refId": "A"
},
{
"expr": "max(net_response_response_time) by (target) * 1000",
"legend": "Latency(ms)",
"refId": "C"
}
],
"transformations": [
{
"id": "organize",
"options": {
"indexByName": {
"target": 0
}
}
}
],
"name": "Targets",
"custom": {
"showHeader": true,
"colorMode": "background",
"calc": "lastNotNull",
"displayMode": "labelValuesToRows",
"aggrDimension": "target"
},
"options": {
"valueMappings": [],
"standardOptions": {}
},
"overrides": [
{
"matcher": {
"value": "A"
},
"properties": {
"standardOptions": {},
"valueMappings": [
{
"match": {
"special": 0
},
"result": {
"color": "#2c9d3d",
"text": "UP"
},
"type": "special"
},
{
"match": {
"from": 1,
"special": 1
},
"result": {
"color": "#e90f0f",
"text": "DOWN"
},
"type": "range"
}
]
}
},
{
"type": "special",
"matcher": {
"value": "C"
},
"properties": {
"valueMappings": [
{
"type": "range",
"result": {
"color": "#f10c0c"
},
"match": {
"from": 1
}
},
{
"type": "range",
"result": {
"color": "#2c9d3d"
},
"match": {
"to": 1
}
}
],
"standardOptions": {
"util": "milliseconds",
"decimals": 3
}
}
}
]
}
],
"var": [
{
"definition": "prometheus",
"name": "prom",
"type": "datasource"
}
],
"version": "3.0.0"
}
}

BIN
integrations/Network/icon/network.png View File

Before After
Width: 48  |  Height: 48  |  Size: 888 B

+ 0
- 82
integrations/Network/markdown/net.md View File

@@ -1,82 +0,0 @@
# net_response plugin

网络探测插件,通常用于监控本机某个端口是否在监听,或远端某个端口是否能连通。因为 Prometheus 生态的时序库只能存储 float64 类型的值,所以网络探测插件探测的结果也是 float64 类型的值,但是这个值的含义是不同的,具体含义如下:

```
- 0: Success
- 1: Timeout
- 2: ConnectionFailed
- 3: ReadFailed
- 4: StringMismatch
```

如果一切正常,这个值是 0,如果有异常,这个值是 1-4 之间的值,具体含义如上。这个值对应的指标名字是 `net_response_result_code`。

## Configuration

categraf 的 `conf/input.net_response/net_response.toml`。最核心的配置就是 targets 部分,指定探测的目标,下面的例子:

```toml
[[instances]]
targets = [
"10.2.3.4:22",
"localhost:6379",
":9090"
]
```

- `10.2.3.4:22` 表示探测 10.2.3.4 这个机器的 22 端口是否可以连通
- `localhost:6379` 表示探测本机的 6379 端口是否可以连通
- `:9090` 表示探测本机的 9090 端口是否可以连通

监控数据或告警事件中只是一个 IP 和端口,接收告警的人看到了,可能不清楚只是哪个业务的模块告警了,可以附加一些更有价值的信息放到标签里,比如:

```toml
labels = { region="cloud", product="n9e" }
```

标识了这是 cloud 这个 region,n9e 这个产品,这俩标签会附到时序数据上,告警的时候自然也会报出来。

完整配置样例如下:

```toml
[mappings]
# "127.0.0.1:22"= {region="local",ssh="test"}
# "127.0.0.1:22"= {region="local",ssh="redis"}

[[instances]]
targets = [
# "127.0.0.1:22",
# "localhost:6379",
# ":9090"
]

# # append some labels for series
# labels = { region="cloud", product="n9e" }

# # interval = global.interval * interval_times
# interval_times = 1

## Protocol, must be "tcp" or "udp"
## NOTE: because the "udp" protocol does not respond to requests, it requires
## a send/expect string pair (see below).
# protocol = "tcp"

## Set timeout
# timeout = "1s"

## Set read timeout (only used if expecting a response)
# read_timeout = "1s"

## The following options are required for UDP checks. For TCP, they are
## optional. The plugin will send the given string to the server and then
## expect to receive the given 'expect' string back.
## string sent to the server
# send = "ssh"
## expected string in answer
# expect = "ssh"
```

## 监控大盘和告警规则

夜莺内置了仪表盘和告警规则,克隆到自己的业务组即可使用。

Loading…
Cancel
Save