diff --git a/integrations/Doris/alerts/doris_by_categraf.json b/integrations/Doris/alerts/doris_by_categraf.json new file mode 100644 index 00000000..aa59500e --- /dev/null +++ b/integrations/Doris/alerts/doris_by_categraf.json @@ -0,0 +1,2524 @@ +[ + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris JVM 线程数", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "jvm_thread", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784164489700, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris JVM内存使用率", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "sum(jvm_heap_size_bytes{type=\"used\"})by(ident)/sum(jvm_heap_size_bytes{type=\"committed\"})by(ident)", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784169880800, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE 1 分钟 Load Avg", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_be_load_average{mode=\"1_minutes\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784171517400, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE 1 分钟新增tcp包接收错误的次数", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "increase(doris_be_snmp_tcp_in_errs[1m])", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784172854000, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE 15 分钟 Load Avg", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_be_load_average{mode=\"15_minutes\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784174515000, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE 5 分钟 Load Avg", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_be_load_average{mode=\"5_minutes\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784175691000, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE batch 的线程池队列积压", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_be_add_batch_task_queue_size > 20", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784177409800, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE CPU 使用率", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "(sum(doris_be_cpu)by(instance)-sum(doris_be_cpu{mode=~\"idle|iowait\"})by(instance))/sum(doris_be_cpu)by(instance)*100 > 70", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784179069200, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE OlapScanner 线程池积压", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_be_scanner_thread_pool_queue_size > 0", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784180644600, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_BE 发送数据包的线程池出现积压", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_be_send_batch_thread_pool_queue_size > 0", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784181912800, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 95百分位查询延迟", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_query_latency_ms{quantile=\"0.95\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784184238600, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 99百分位查询延迟", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_query_latency_ms{quantile=\"0.99\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784185714400, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 事务 publish 耗时95分位", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_txn_publish_latency_ms{quantile=\"0.95\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784187173400, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 事务 publish 耗时99分位", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_txn_publish_latency_ms{quantile=\"0.99\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784188659500, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 事务执行耗时95分位", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_txn_exec_latency_ms{quantile=\"0.95\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784190024000, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 事务执行耗时99分位", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_txn_exec_latency_ms{quantile=\"0.99\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784191466000, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 失败的事务数量", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_txn_counter{type=\"failed\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784192987100, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 异常事务的数量", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "sum(doris_fe_txn_status{type=~\"aborted|unknown\"})by(type)", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784194383000, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 日志写入延迟95分位", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_editlog_write_latency_ms{quantile=\"0.95\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784195737900, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 日志写入延迟99分位", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_editlog_write_latency_ms{quantile=\"0.99\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784197151700, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 每秒查询数量", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_qps", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784198568000, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 每秒错误查询数", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_query_err_rate", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784199959300, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 清理元数据文件失败的次数", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "increase(doris_fe_image_clean[1m]) > 0", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784201470700, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 清理元数据日志失败的次数", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "increase(doris_fe_edit_log_clean[1m]) > 0", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784202892800, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 生成元数据镜像文件失败的次数", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "increase(doris_fe_image_write[1m]) > 0", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784204482600, + "cur_event_count": 0, + "update_by_nickname": "管理员" + }, + { + "id": 0, + "group_id": 0, + "cate": "prometheus", + "datasource_ids": [ + 0 + ], + "cluster": "", + "name": "Doris_FE 被拒绝的事务数量", + "note": "", + "prod": "metric", + "algorithm": "", + "algo_params": null, + "delay": 0, + "severity": 0, + "severities": [ + 2 + ], + "disabled": 1, + "prom_for_duration": 60, + "prom_ql": "", + "rule_config": { + "queries": [ + { + "keys": { + "labelKey": "", + "metricKey": "", + "valueKey": "" + }, + "prom_ql": "doris_fe_txn_counter{type=\"reject\"}", + "severity": 2 + } + ], + "version": "v1" + }, + "event_relabel_config": null, + "prom_eval_interval": 30, + "enable_stime": "00:00", + "enable_stimes": [ + "00:00" + ], + "enable_etime": "00:00", + "enable_etimes": [ + "00:00" + ], + "enable_days_of_week": [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ], + "enable_days_of_weeks": [ + [ + "0", + "1", + "2", + "3", + "4", + "5", + "6" + ] + ], + "enable_in_bg": 0, + "notify_recovered": 1, + "notify_channels": [], + "notify_groups_obj": null, + "notify_groups": null, + "notify_repeat_step": 60, + "notify_max_number": 0, + "recover_duration": 0, + "callbacks": [], + "runbook_url": "", + "append_tags": [], + "annotations": {}, + "extra_config": { + "escalation": { + "for_duration": 60, + "new_severity": 2, + "notify_max_number": 0, + "notify_repeat_step": 60 + }, + "network_device_config": {}, + "notify_aggregation": { + "wait": 1 + } + }, + "create_at": 0, + "create_by": "", + "update_at": 0, + "update_by": "", + "uuid": 1730292784205710300, + "cur_event_count": 0, + "update_by_nickname": "管理员" + } +] \ No newline at end of file diff --git a/integrations/Doris/collect/prometheus/collect_doris_examples.toml b/integrations/Doris/collect/prometheus/collect_doris_examples.toml new file mode 100644 index 00000000..8d167afb --- /dev/null +++ b/integrations/Doris/collect/prometheus/collect_doris_examples.toml @@ -0,0 +1,22 @@ +# doris_fe +[[instances]] +# 配置 fe metrics 服务地址 +urls = [ + "http://127.0.0.1:8030/metrics" +] + +url_label_key = "instance" +url_label_value = "{{.Host}}" +# 指定 fe 服务 group 和 job 标签,这里是仪表盘变量调用,可根据实际需求修改。 +labels = { group = "fe",job = "doris_cluster01"} + +# doris_be +[[instances]] +# 配置 be metrics 服务地址 +urls = [ + "http://127.0.0.1:8040/metrics" +] +url_label_key = "instance" +url_label_value = "{{.Host}}" +# 指定 be 服务 group 和 job 标签,这里是仪表盘变量调用,可根据实际需求修改。 +labels = { group = "be",job = "doris_cluster01"} \ No newline at end of file diff --git a/integrations/Doris/dashboards/Doris_Overview.json b/integrations/Doris/dashboards/Doris_Overview.json new file mode 100644 index 00000000..c21d4163 --- /dev/null +++ b/integrations/Doris/dashboards/Doris_Overview.json @@ -0,0 +1,4700 @@ +[ + { + "name": "Doris Overview", + "tags": "", + "configs": { + "links": [], + "panels": [ + { + "collapsed": true, + "id": "5d9daaa1-b1de-476d-a924-c2a3abe4778f", + "layout": { + "h": 1, + "i": "5d9daaa1-b1de-476d-a924-c2a3abe4778f", + "isResizable": false, + "w": 24, + "x": 0, + "y": 0 + }, + "name": "Overview", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": { + "calc": "avg", + "colorMode": "value", + "textMode": "value", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Num of Doris Clusters", + "id": "2696e097-5c15-4c1a-81f5-58d5b923cfc6", + "layout": { + "h": 7, + "i": "2696e097-5c15-4c1a-81f5-58d5b923cfc6", + "isResizable": true, + "w": 4, + "x": 0, + "y": 1 + }, + "links": [], + "maxPerRow": 4, + "name": "Cluster Number", + "options": { + "legend": { + "displayMode": "list" + }, + "standardOptions": { + "util": "none" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69", + "type": "base", + "value": null + }, + { + "color": "#F2495C", + "value": 80 + } + ], + "style": "line" + }, + "tooltip": { + "mode": "all", + "sort": "none" + }, + "valueMappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ] + }, + "targets": [ + { + "expr": "count(node_info{type=\"is_master\"})", + "refId": "A" + } + ], + "type": "stat", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Dead Frontends will be shown as Colored points.\nIf all Frontends are alive, all points should be Green.", + "id": "80bdedc3-e2e4-4198-b1ce-a94c8aa417f1", + "layout": { + "h": 7, + "i": "80bdedc3-e2e4-4198-b1ce-a94c8aa417f1", + "isResizable": true, + "w": 10, + "x": 4, + "y": 1 + }, + "links": [], + "maxPerRow": 4, + "name": "Frontends Status", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "(up{group=\"fe\"} == 0) +0", + "legend": "{{job}}-{{instance}}: DEAD", + "refId": "B" + }, + { + "expr": "(up{group=\"fe\"} == 1) +0", + "legend": "{{job}}-{{instance}}: ALIVE", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Dead Backends will be shown as Colored points.\nIf all Backends are alive, all points should be Green.", + "id": "757fca81-5e3f-48de-ab61-ad8150422d50", + "layout": { + "h": 7, + "i": "757fca81-5e3f-48de-ab61-ad8150422d50", + "isResizable": true, + "w": 10, + "x": 14, + "y": 1 + }, + "links": [], + "maxPerRow": 4, + "name": "Backends status", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "(up{group=\"be\"} == 0) +0", + "legend": "{{job}}-{{instance}}: DEAD", + "refId": "B" + }, + { + "expr": "(up{group=\"be\"} == 1) +0", + "legend": "{{job}}-{{instance}}: ALIVE", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The JVM heap usage percent of each Frontend of each Doris cluster.", + "id": "27d22825-5a24-4d53-87d1-2639f3e13a70", + "layout": { + "h": 7, + "i": "27d22825-5a24-4d53-87d1-2639f3e13a70", + "isResizable": true, + "w": 8, + "x": 0, + "y": 8 + }, + "links": [], + "maxPerRow": 4, + "name": "Cluster FE JVM Heap Stat", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(jvm_heap_size_bytes{group=\"fe\", type=\"used\"} * 100) by (instance, job) / sum(jvm_heap_size_bytes{group=\"fe\", type=\"max\"}) by (instance, job)", + "legend": "{{job}}-{{instance}}", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The Backend CPU idle overview of each Doris cluster.\nThe detail Backend CPU idle info can be seen in 'BE' section.", + "id": "df750dad-1be8-4b67-976d-91e751724193", + "layout": { + "h": 7, + "i": "df750dad-1be8-4b67-976d-91e751724193", + "isResizable": true, + "w": 8, + "x": 8, + "y": 8 + }, + "links": [], + "maxPerRow": 4, + "name": "Cluster BE CPU Idle", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "(sum(rate(doris_be_cpu{mode=\"idle\"}[$interval])) by (job)) / (sum(rate(doris_be_cpu[$interval])) by (job))", + "legend": "{{job}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The Backend memory usage overview of each Doris cluster.\nThe detail backend memory usage can be seen in 'BE' section.", + "id": "dd16ac62-5af4-40e0-a449-b0a95f32b33b", + "layout": { + "h": 7, + "i": "dd16ac62-5af4-40e0-a449-b0a95f32b33b", + "isResizable": true, + "w": 8, + "x": 16, + "y": 8 + }, + "links": [], + "maxPerRow": 4, + "name": "Cluster BE Mem Stat", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "avg(doris_be_memory_allocated_bytes) by (job)", + "legend": "{{job}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "QPS statistic group by cluster.\nThe QPS of each cluster is the sum of all queries processed on all Frontends.", + "id": "df810a3f-79ae-4a56-868e-abd9dee23ecc", + "layout": { + "h": 7, + "i": "df810a3f-79ae-4a56-868e-abd9dee23ecc", + "isResizable": true, + "w": 8, + "x": 0, + "y": 15 + }, + "links": [], + "maxPerRow": 4, + "name": "Cluster QPS Stat", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum by (job)(rate(doris_fe_query_total{group=\"fe\"}[$interval]))", + "legend": "{{job}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The Disk state. GREEN point means this disk is ONLINE. RED point means this disk is OFFLINE", + "id": "dbb3fd31-5577-4d89-9c5d-801469286c35", + "layout": { + "h": 7, + "i": "dbb3fd31-5577-4d89-9c5d-801469286c35", + "isResizable": true, + "w": 8, + "x": 8, + "y": 15 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Disk State", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "(doris_be_disks_state{job=\"$cluster_name\"} == 0)+0", + "legend": "{{instance}}: {{path}} OFFLINE", + "refId": "A" + }, + { + "expr": "(doris_be_disks_state{job=\"$cluster_name\"} == 1)+0", + "legend": "{{instance}}: {{path}} ONLINE", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "collapsed": true, + "id": "4934c795-bb60-412d-8a5e-0aeb9db6905e", + "layout": { + "h": 1, + "i": "4934c795-bb60-412d-8a5e-0aeb9db6905e", + "isResizable": false, + "w": 24, + "x": 0, + "y": 22 + }, + "name": "Cluster Overview", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": { + "calc": "avg", + "colorMode": "value", + "textMode": "value", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Total Frontends node number", + "id": "81e85606-4059-4728-8624-a1c3adaf4356", + "layout": { + "h": 6, + "i": "81e85606-4059-4728-8624-a1c3adaf4356", + "isResizable": true, + "w": 4, + "x": 0, + "y": 23 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] FE Node", + "options": { + "legend": { + "displayMode": "list" + }, + "standardOptions": { + "util": "none" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69" + }, + { + "color": "#F2495C", + "value": 80 + } + ], + "style": "line" + }, + "tooltip": { + "mode": "all", + "sort": "none" + }, + "valueMappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ] + }, + "targets": [ + { + "expr": "count(up{group=\"fe\", job=\"$cluster_name\"})", + "refId": "A" + } + ], + "type": "stat", + "version": "3.0.0" + }, + { + "custom": { + "calc": "avg", + "colorMode": "value", + "textMode": "value", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Total alive number of Frontends. Normally, it should be equal to the Total number of Frontends", + "id": "e469b68d-9351-4083-b4bc-1fb3f410efd9", + "layout": { + "h": 6, + "i": "e469b68d-9351-4083-b4bc-1fb3f410efd9", + "isResizable": true, + "w": 4, + "x": 4, + "y": 23 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] FE Alive", + "options": { + "legend": { + "displayMode": "list" + }, + "standardOptions": { + "util": "none" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69" + }, + { + "color": "#F2495C", + "value": 80 + } + ], + "style": "line" + }, + "tooltip": { + "mode": "all", + "sort": "none" + }, + "valueMappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ] + }, + "targets": [ + { + "expr": "count(up{group=\"fe\", job=\"$cluster_name\"}==1)", + "refId": "A" + } + ], + "type": "stat", + "version": "3.0.0" + }, + { + "custom": { + "calc": "avg", + "colorMode": "value", + "textMode": "value", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Total Backends node number", + "id": "332a4eca-3ca3-4f73-b800-4a88dffc8c1e", + "layout": { + "h": 6, + "i": "332a4eca-3ca3-4f73-b800-4a88dffc8c1e", + "isResizable": true, + "w": 4, + "x": 8, + "y": 23 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Node", + "options": { + "legend": { + "displayMode": "list" + }, + "standardOptions": { + "util": "none" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69" + }, + { + "color": "#F2495C", + "value": 80 + } + ], + "style": "line" + }, + "tooltip": { + "mode": "all", + "sort": "none" + }, + "valueMappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ] + }, + "targets": [ + { + "expr": "count(up{group=\"be\", job=\"$cluster_name\"})", + "refId": "A" + } + ], + "type": "stat", + "version": "3.0.0" + }, + { + "custom": { + "calc": "avg", + "colorMode": "value", + "textMode": "value", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Total alive number of Backends. Normally, it should be equal to the Total number of Backends.", + "id": "2303b720-98e1-421c-918d-e6b613b3036d", + "layout": { + "h": 6, + "i": "2303b720-98e1-421c-918d-e6b613b3036d", + "isResizable": true, + "w": 4, + "x": 12, + "y": 23 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Alive", + "options": { + "legend": { + "displayMode": "list" + }, + "standardOptions": { + "util": "none" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69" + }, + { + "color": "#F2495C", + "value": 80 + } + ], + "style": "line" + }, + "tooltip": { + "mode": "all", + "sort": "none" + }, + "valueMappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ] + }, + "targets": [ + { + "expr": "count(up{group=\"be\", job=\"$cluster_name\"}==1)", + "refId": "A" + } + ], + "type": "stat", + "version": "3.0.0" + }, + { + "custom": { + "calc": "lastNotNull", + "colorMode": "value", + "textMode": "value", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Total used disk capacity of all Backends.", + "id": "8ff1a193-dd17-40c8-a25d-39985edef8ee", + "layout": { + "h": 6, + "i": "8ff1a193-dd17-40c8-a25d-39985edef8ee", + "isResizable": true, + "w": 4, + "x": 16, + "y": 23 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Used Capacity", + "options": { + "legend": { + "displayMode": "list" + }, + "standardOptions": { + "util": "bytesSI" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69" + }, + { + "color": "#F2495C", + "value": 80 + } + ], + "style": "line" + }, + "tooltip": { + "mode": "all", + "sort": "none" + }, + "valueMappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ] + }, + "targets": [ + { + "expr": "SUM(doris_be_disks_local_used_capacity{job=\"$cluster_name\"})", + "refId": "B" + } + ], + "type": "stat", + "version": "3.0.0" + }, + { + "custom": { + "calc": "lastNotNull", + "colorMode": "value", + "textMode": "value", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Total disk capacity of all Backends", + "id": "d3762969-ca1b-4794-b710-ceeee5820008", + "layout": { + "h": 6, + "i": "d3762969-ca1b-4794-b710-ceeee5820008", + "isResizable": true, + "w": 4, + "x": 20, + "y": 23 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Total Capacity", + "options": { + "legend": { + "displayMode": "list" + }, + "standardOptions": { + "util": "bytesSI" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#73BF69" + }, + { + "color": "#F2495C", + "value": 80 + } + ], + "style": "line" + }, + "tooltip": { + "mode": "all", + "sort": "none" + }, + "valueMappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ] + }, + "targets": [ + { + "expr": "SUM(doris_be_disks_total_capacity{job=\"$cluster_name\"})", + "refId": "A" + } + ], + "type": "stat", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The max replayed meta data journal id on Frontends.\nNormally, all Frontends should be same on this metrics, or just slightly different for a short period.", + "id": "9c5fb54f-2428-4ece-8057-b75cf4cbbef9", + "layout": { + "h": 6, + "i": "9c5fb54f-2428-4ece-8057-b75cf4cbbef9", + "isResizable": true, + "w": 6, + "x": 0, + "y": 29 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Max Replayed journal id", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_max_journal_id{job=\"$cluster_name\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The counter of meta data image generation on Master Frontend. And the counter of image successfully pushing to other Non-master Frontends.\nThese metrics is expected to increase at reasonable intervals. And normally, they should be equal.", + "id": "40779789-0758-4a7b-916e-d66f54c4d096", + "layout": { + "h": 6, + "i": "40779789-0758-4a7b-916e-d66f54c4d096", + "isResizable": true, + "w": 6, + "x": 6, + "y": 29 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Image counter", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_image_write{job=\"$cluster_name\", instance=\"$fe_master\"}", + "legend": "{{instance}}-write", + "refId": "A" + }, + { + "expr": "doris_fe_image_push{job=\"$cluster_name\", instance=\"$fe_master\"}", + "legend": "{{instance}}-push", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The left Y axes shows write latency of 99th. The right Y axes shows the write per seconds of journal.", + "id": "1c1d3ebe-182b-4182-ae0a-4e7339b1eb42", + "layout": { + "h": 6, + "i": "1c1d3ebe-182b-4182-ae0a-4e7339b1eb42", + "isResizable": true, + "w": 6, + "x": 12, + "y": 29 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BDBJE Write", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_editlog_write_latency_ms{job=\"$cluster_name\", instance=\"$fe_master\",quantile=\"0.99\"}", + "legend": "{{instance}}-99th", + "refId": "A" + }, + { + "expr": "rate(doris_fe_edit_log{job=\"$cluster_name\", type=\"write\"}[$interval])", + "legend": "{{instance}}-write-rate", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The left Y axes shows the read per seconds of journal.", + "id": "a4aee7a4-3acc-4259-827f-4e28a669ea18", + "layout": { + "h": 6, + "i": "a4aee7a4-3acc-4259-827f-4e28a669ea18", + "isResizable": true, + "w": 6, + "x": 18, + "y": 29 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BDBJE Read", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_fe_edit_log{job=\"$cluster_name\", type=\"read\"}[$interval])", + "legend": "{{instance}}-read-rate", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The edit log size for each FE", + "id": "2137be68-6db0-4ee3-a6b1-127b3d00c146", + "layout": { + "h": 6, + "i": "2137be68-6db0-4ee3-a6b1-127b3d00c146", + "isResizable": true, + "w": 6, + "x": 0, + "y": 35 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Edit Log Size", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_edit_log{job=\"$cluster_name\", type=\"bytes\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The edit log clean of each FE", + "id": "fff6178f-056a-4354-b714-71e00eb35b7e", + "layout": { + "h": 6, + "i": "fff6178f-056a-4354-b714-71e00eb35b7e", + "isResizable": true, + "w": 6, + "x": 6, + "y": 35 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Edit Log Clean", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_edit_log_clean{job=\"$cluster_name\", type=\"success\"}", + "legend": "{{instance}}_success", + "refId": "A" + }, + { + "expr": "doris_fe_edit_log_clean{job=\"$cluster_name\", type=\"failed\"}", + "legend": "{{instance}}_failed", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The FE collect compaction score of each BE", + "id": "0e7a3912-ef46-442a-96a9-ca98f1b3ec1f", + "layout": { + "h": 6, + "i": "0e7a3912-ef46-442a-96a9-ca98f1b3ec1f", + "isResizable": true, + "w": 6, + "x": 12, + "y": 35 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] FE Collect Compaction Score", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_max_tablet_compaction_score{job=\"$cluster_name\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The compaction score of each BE", + "id": "96c4f66c-f8e5-4020-b13a-ad5f39b4c7bf", + "layout": { + "h": 6, + "i": "96c4f66c-f8e5-4020-b13a-ad5f39b4c7bf", + "isResizable": true, + "w": 6, + "x": 18, + "y": 35 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Compaction Score", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_tablet_max_compaction_score{job=\"$cluster_name\", instance=\"$fe_master\"}", + "legend": "{{backend}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The image Write of each FE", + "id": "09b36550-35e9-4a57-8384-e49b159d4bb6", + "layout": { + "h": 6, + "i": "09b36550-35e9-4a57-8384-e49b159d4bb6", + "isResizable": true, + "w": 6, + "x": 0, + "y": 41 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Image Write", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_image_write{job=\"$cluster_name\", type=\"success\"}", + "legend": "{{instance}}_success", + "refId": "A" + }, + { + "expr": "doris_fe_image_write{job=\"$cluster_name\", type=\"failed\"}", + "legend": "{{instance}}_failed", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The image push of each FE", + "id": "490f2533-e2eb-4a49-9b51-878281a79b1a", + "layout": { + "h": 6, + "i": "490f2533-e2eb-4a49-9b51-878281a79b1a", + "isResizable": true, + "w": 6, + "x": 6, + "y": 41 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Image Push", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_image_push{job=\"$cluster_name\", type=\"success\"}", + "legend": "{{instance}}_success", + "refId": "A" + }, + { + "expr": "doris_fe_image_push{job=\"$cluster_name\", type=\"failed\"}", + "legend": "{{instance}}_failed", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The image clean of each FE", + "id": "cffa363d-b514-42ed-be6b-67c93325d9e1", + "layout": { + "h": 6, + "i": "cffa363d-b514-42ed-be6b-67c93325d9e1", + "isResizable": true, + "w": 6, + "x": 12, + "y": 41 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Image Clean", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_image_clean{job=\"$cluster_name\", type=\"success\"}", + "legend": "{{instance}}_success", + "refId": "A" + }, + { + "expr": "doris_fe_image_clean{job=\"$cluster_name\", type=\"failed\"}", + "legend": "{{instance}}_failed", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Number of tablets begin scheduled. These tablet may be in recovery process or balance process", + "id": "1cab833b-22db-494c-bd44-fe7e22b91321", + "layout": { + "h": 6, + "i": "1cab833b-22db-494c-bd44-fe7e22b91321", + "isResizable": true, + "w": 6, + "x": 18, + "y": 41 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Scheduling Tablets", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_scheduled_tablet_num{job=\"$cluster_name\", instance=\"$fe_master\"}", + "legend": "Scheduling tablet number", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The max IO util of each Backend", + "id": "854f7bb3-cf8e-48a3-81aa-50f83d89d6c0", + "layout": { + "h": 6, + "i": "854f7bb3-cf8e-48a3-81aa-50f83d89d6c0", + "isResizable": true, + "w": 6, + "x": 0, + "y": 47 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE IO Util", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_be_max_disk_io_util_percent{job=\"$cluster_name\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "collapsed": true, + "id": "016e25d8-b7ca-4492-95f2-9caaaeb9897a", + "layout": { + "h": 1, + "i": "016e25d8-b7ca-4492-95f2-9caaaeb9897a", + "isResizable": false, + "w": 24, + "x": 0, + "y": 53 + }, + "name": "Query Statistic", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Requests per seconds on each Frontends.\nRequests include all requests sending to the Frontends.", + "id": "1d0ea05c-654a-4e55-a56a-f8cf29b3e109", + "layout": { + "h": 9, + "i": "1d0ea05c-654a-4e55-a56a-f8cf29b3e109", + "isResizable": true, + "w": 8, + "x": 0, + "y": 54 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] RPS", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_fe_request_total{job=\"$cluster_name\", group=\"fe\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Queries per seconds on each Frontends.\nQueries only include Select requests.", + "id": "8147ab91-ab8a-47c9-9049-08fbb77f9412", + "layout": { + "h": 9, + "i": "8147ab91-ab8a-47c9-9049-08fbb77f9412", + "isResizable": true, + "w": 8, + "x": 8, + "y": 54 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] QPS", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_fe_query_total{job=\"$cluster_name\", group=\"fe\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "99 quantiles of query latency on each Frontends.", + "id": "8de6fab5-27b8-4f40-8209-8a702ec9f665", + "layout": { + "h": 9, + "i": "8de6fab5-27b8-4f40-8209-8a702ec9f665", + "isResizable": true, + "w": 8, + "x": 16, + "y": 54 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] 99th Latency", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(doris_fe_query_latency_ms{job=\"$cluster_name\", quantile=\"0.99\"}) by (instance)", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates 95 to 99 quantiles of query latency on each Frontends.\nRight Y axes indicates the query rate per 1 min.", + "id": "6d6c3311-86dd-443f-97bf-2e9874b61650", + "layout": { + "h": 6, + "i": "6d6c3311-86dd-443f-97bf-2e9874b61650", + "isResizable": true, + "w": 8, + "x": 0, + "y": 63 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] [$fe_instance] Query Percentile", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_query_latency_ms{job=\"$cluster_name\", instance=\"$fe_instance\"}", + "legend": "{{quantile}}", + "refId": "A" + }, + { + "expr": "rate(doris_fe_query_latency_ms_count{job=\"$cluster_name\", instance=\"$fe_instance\"}[1m])", + "legend": "query rate", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the accumulated error queries number.\nRight Y axes indicates the error query rate per 1 min.\nNormally, the error query rate should be 0.", + "id": "dec70a12-df04-4763-8f51-30a5b1d4c2c5", + "layout": { + "h": 6, + "i": "dec70a12-df04-4763-8f51-30a5b1d4c2c5", + "isResizable": true, + "w": 8, + "x": 8, + "y": 63 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Query Error [1m]", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_query_err{job=\"$cluster_name\"}", + "legend": "Err Counter-{{instance}}", + "refId": "B" + }, + { + "expr": "rate(doris_fe_query_err{job=\"$cluster_name\"}[$interval])", + "legend": "Err Rate-{{instance}}", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The connections' number to each Frontends.", + "id": "5297af7c-771c-467f-a8c0-4d7311151492", + "layout": { + "h": 6, + "i": "5297af7c-771c-467f-a8c0-4d7311151492", + "isResizable": true, + "w": 8, + "x": 16, + "y": 63 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Connections", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_connection_total{job=\"$cluster_name\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "collapsed": true, + "id": "3f88e434-af57-4b10-a36a-6af0c40b8fcd", + "layout": { + "h": 1, + "i": "3f88e434-af57-4b10-a36a-6af0c40b8fcd", + "isResizable": false, + "w": 24, + "x": 0, + "y": 69 + }, + "name": "Jobs", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": {}, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Statistic of Broker load jobs's num in each Load State.", + "id": "36b2c233-f929-4d0a-af5a-1b4c433b8ba6", + "layout": { + "h": 6, + "i": "36b2c233-f929-4d0a-af5a-1b4c433b8ba6", + "isResizable": true, + "w": 6, + "x": 0, + "y": 70 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Broker Load Job", + "options": {}, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"BROKER\", instance=\"$fe_master\"}", + "refId": "A" + } + ], + "type": "unknown", + "version": "3.0.0" + }, + { + "custom": {}, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Statistic of load jobs's num in each Load State which is generated by Insert Stmt.", + "id": "e4518ad0-a300-4e22-9114-7bcc2a770baa", + "layout": { + "h": 6, + "i": "e4518ad0-a300-4e22-9114-7bcc2a770baa", + "isResizable": true, + "w": 6, + "x": 6, + "y": 70 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Insert Load Job", + "options": {}, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"INSERT\", instance=\"$fe_master\"}", + "refId": "A" + } + ], + "type": "unknown", + "version": "3.0.0" + }, + { + "custom": {}, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Statistic of Routine load jobs's num in each Load State.", + "id": "90ad8b66-2b7d-4efc-9dc9-bc9fee72492c", + "layout": { + "h": 6, + "i": "90ad8b66-2b7d-4efc-9dc9-bc9fee72492c", + "isResizable": true, + "w": 6, + "x": 12, + "y": 70 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Routine Load Job", + "options": {}, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"ROUTINE_LOAD\", instance=\"$fe_master\"}", + "refId": "A" + } + ], + "type": "unknown", + "version": "3.0.0" + }, + { + "custom": {}, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Statistic of Spark load jobs's num in each Load State.", + "id": "8e431597-7fef-4a81-9c60-caafc9150e75", + "layout": { + "h": 6, + "i": "8e431597-7fef-4a81-9c60-caafc9150e75", + "isResizable": true, + "w": 6, + "x": 18, + "y": 70 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Spark Load Job", + "options": {}, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"SPARK\", instance=\"$fe_master\"}", + "refId": "A" + } + ], + "type": "unknown", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The trend report of broker load job", + "id": "6398fd57-672a-4104-bbf9-5b6a08a31d02", + "layout": { + "h": 6, + "i": "6398fd57-672a-4104-bbf9-5b6a08a31d02", + "isResizable": true, + "w": 6, + "x": 0, + "y": 76 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Broker load tendency", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"BROKER\", instance=\"$fe_master\", state=\"PENDING\"}", + "legend": "PENDING", + "refId": "A" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"BROKER\", instance=\"$fe_master\", state=\"ETL\"}", + "legend": "ETL", + "refId": "B" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"BROKER\", instance=\"$fe_master\", state=\"LOADING\"}", + "legend": "LOADING", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The trend report of insert load job", + "id": "2cf4e7c5-0edc-411d-bc33-863d6fda4107", + "layout": { + "h": 6, + "i": "2cf4e7c5-0edc-411d-bc33-863d6fda4107", + "isResizable": true, + "w": 6, + "x": 6, + "y": 76 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Insert load tendency", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"INSERT\", instance=\"$fe_master\", state=\"PENDING\"}", + "legend": "PENDING", + "refId": "A" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"INSERT\", instance=\"$fe_master\", state=\"ETL\"}", + "legend": "ETL", + "refId": "B" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"INSERT\", instance=\"$fe_master\", state=\"LOADING\"}", + "legend": "LOADING", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The trend report of routine load job", + "id": "0899fb9a-dcc6-4eb8-a8dc-96d3f2124b10", + "layout": { + "h": 6, + "i": "0899fb9a-dcc6-4eb8-a8dc-96d3f2124b10", + "isResizable": true, + "w": 6, + "x": 12, + "y": 76 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Routine load tendency", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"ROUTINE_LOAD\", instance=\"$fe_master\", state=\"NEED_SCHEDULE\"}", + "legend": "NEED_SCHEDULE", + "refId": "A" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"ROUTINE_LOAD\", instance=\"$fe_master\", state=\"RUNNING\"}", + "legend": "RUNNING", + "refId": "B" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"ROUTINE_LOAD\", instance=\"$fe_master\", state=\"PAUSED\"}", + "legend": "PAUSED", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The trend report of spark load job", + "id": "ea340542-58e0-4ced-a758-a0563d3346bd", + "layout": { + "h": 6, + "i": "ea340542-58e0-4ced-a758-a0563d3346bd", + "isResizable": true, + "w": 6, + "x": 18, + "y": 76 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Spark load tendency", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"SPARK\", instance=\"$fe_master\", state=\"PENDING\"}", + "legend": "PENDING", + "refId": "A" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"SPARK\", instance=\"$fe_master\", state=\"ETL\"}", + "legend": "ETL", + "refId": "B" + }, + { + "expr": "doris_fe_job{job=\"$cluster_name\", exported_job=\"load\", type=\"SPARK\", instance=\"$fe_master\", state=\"LOADING\"}", + "legend": "LOADING", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": {}, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Number of running schema change jobs.", + "id": "f0df6aae-52a4-4a92-a4a9-b6a0679be656", + "layout": { + "h": 3, + "i": "f0df6aae-52a4-4a92-a4a9-b6a0679be656", + "isResizable": true, + "w": 6, + "x": 0, + "y": 82 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] SC Job", + "options": {}, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", instance=\"$fe_master\", type=\"SCHEMA_CHANGE\"}", + "legend": "asds", + "refId": "A" + } + ], + "type": "unknown", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Queue size of report in Master FE.", + "id": "33d04fa7-1c77-4d0c-a43b-4b36539fc5e6", + "layout": { + "h": 6, + "i": "33d04fa7-1c77-4d0c-a43b-4b36539fc5e6", + "isResizable": true, + "w": 6, + "x": 6, + "y": 82 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Report queue size", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_report_queue_size{job=\"$cluster_name\", instance=\"$fe_master\"}", + "legend": "Report queue size", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": {}, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Number of running rollup jobs.", + "id": "aa2551f4-361d-498b-a7ff-ed9485346db2", + "layout": { + "h": 3, + "i": "aa2551f4-361d-498b-a7ff-ed9485346db2", + "isResizable": true, + "w": 6, + "x": 0, + "y": 85 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Rollup Job", + "options": {}, + "targets": [ + { + "expr": "doris_fe_job{job=\"$cluster_name\", instance=\"$fe_master\", type=\"ROLLUP\"}", + "refId": "A" + } + ], + "type": "unknown", + "version": "3.0.0" + }, + { + "collapsed": true, + "id": "b6ec71fc-8801-49d9-b418-80a56527637e", + "layout": { + "h": 1, + "i": "b6ec71fc-8801-49d9-b418-80a56527637e", + "isResizable": false, + "w": 24, + "x": 0, + "y": 88 + }, + "name": "Transaction", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Show the number and rate of txn begin and success", + "id": "03f0fe6c-9794-4bde-af91-d19caecd6166", + "layout": { + "h": 6, + "i": "03f0fe6c-9794-4bde-af91-d19caecd6166", + "isResizable": true, + "w": 4, + "x": 0, + "y": 89 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Txn Begin/Success on FE", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_txn_counter{type=\"begin\"}", + "legend": "txn begin", + "refId": "A" + }, + { + "expr": "doris_fe_txn_counter{type=\"begin\"}", + "legend": "txn success", + "refId": "D" + }, + { + "expr": "irate(doris_fe_txn_counter{type=\"begin\"}[$interval])", + "legend": "txn begin rate", + "refId": "B" + }, + { + "expr": "irate(doris_fe_txn_counter{type=\"begin\"}[$interval])", + "legend": "txn success rate", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Show the failed txn request. Including rejected request and failed txn", + "id": "fb50e00a-30c4-4ad4-9cbd-49defebfd090", + "layout": { + "h": 6, + "i": "fb50e00a-30c4-4ad4-9cbd-49defebfd090", + "isResizable": true, + "w": 5, + "x": 4, + "y": 89 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Txn Failed/Reject on FE", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_fe_txn_counter{type=\"reject\"}[$interval])", + "legend": "txn reject rate", + "refId": "C" + }, + { + "expr": "rate(doris_fe_txn_counter{type=\"failed\"}[$interval])", + "legend": "txn failed rate", + "refId": "D" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The number of total publish task request and error rate.", + "id": "cfc68c22-6619-478b-9e12-0a15314662f1", + "layout": { + "h": 6, + "i": "cfc68c22-6619-478b-9e12-0a15314662f1", + "isResizable": true, + "w": 5, + "x": 9, + "y": 89 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Publish Task on BE", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"publish\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"publish\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Show the txn rstatus on FE", + "id": "0a89693c-6530-46dd-a01a-0f00cee1fc75", + "layout": { + "h": 6, + "i": "0a89693c-6530-46dd-a01a-0f00cee1fc75", + "isResizable": true, + "w": 5, + "x": 14, + "y": 89 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] fe_txn_status", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_txn_status{group=\"fe\", job=\"$cluster_name\", type=\"prepare\"}", + "legend": "prepare", + "refId": "A" + }, + { + "expr": "doris_fe_txn_status{group=\"fe\", job=\"$cluster_name\", type=\"precommitted\"}", + "legend": "precommitted", + "refId": "B" + }, + { + "expr": "doris_fe_txn_status{group=\"fe\", job=\"$cluster_name\", type=\"committed\"}", + "legend": "committed", + "refId": "C" + }, + { + "expr": "doris_fe_txn_status{group=\"fe\", job=\"$cluster_name\", type=\"aborted\"}", + "legend": "aborted", + "refId": "D" + }, + { + "expr": "doris_fe_txn_status{group=\"fe\", job=\"$cluster_name\", type=\"visible\"}", + "legend": "visible", + "refId": "E" + }, + { + "expr": "doris_fe_txn_status{group=\"fe\", job=\"$cluster_name\", type=\"unknown\"}", + "legend": "unknown", + "refId": "F" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the total received bytes rate of txn. Right Y axes indicates the loaded rows rate of txn.", + "id": "a965e2f4-1210-4a9c-a60c-142e3ef20857", + "layout": { + "h": 6, + "i": "a965e2f4-1210-4a9c-a60c-142e3ef20857", + "isResizable": true, + "w": 5, + "x": 19, + "y": 89 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Txn Load Bytes/Rows rate", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(rate(doris_be_stream_load{group=\"be\", job=\"$cluster_name\", type=\"receive_bytes\"}[$interval]))", + "legend": "bytes", + "refId": "A" + }, + { + "expr": "sum(rate(doris_be_stream_load{group=\"be\", job=\"$cluster_name\", type=\"load_rows\"}[$interval]))", + "legend": "rows", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "collapsed": true, + "id": "d5660739-4863-45ab-8989-321806539fa2", + "layout": { + "h": 1, + "i": "d5660739-4863-45ab-8989-321806539fa2", + "isResizable": false, + "w": 24, + "x": 0, + "y": 95 + }, + "name": "FE JVM", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "JVM Heap usage of specified Frontend.\nLeft Y Axes shows the used/max heap size.\nRight Y Axes shows the used percentage.", + "id": "4a802abe-2eb9-43b2-80ef-83edc18d69b1", + "layout": { + "h": 6, + "i": "4a802abe-2eb9-43b2-80ef-83edc18d69b1", + "isResizable": true, + "w": 6, + "x": 0, + "y": 96 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] [$fe_instance] JVM Heap", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "jvm_heap_size_bytes{instance=\"$fe_instance\", job=\"$cluster_name\", type=\"used\"}", + "legend": "used", + "refId": "A" + }, + { + "expr": "jvm_heap_size_bytes{instance=\"$fe_instance\", job=\"$cluster_name\", type=\"max\"}", + "legend": "max", + "refId": "B" + }, + { + "expr": "sum(jvm_heap_size_bytes{instance=\"$fe_instance\", job=\"$cluster_name\", type=\"used\"}) * 100 / sum(jvm_heap_size_bytes{instance=\"$fe_instance\", job=\"$cluster_name\", type=\"max\"})", + "legend": "percentage", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "JVM Non Heap usage of specified Frontend.\nLeft Y Axes shows the used/committed non heap size.", + "id": "d9854424-b151-4c38-853a-4526cd31edb9", + "layout": { + "h": 6, + "i": "d9854424-b151-4c38-853a-4526cd31edb9", + "isResizable": true, + "w": 6, + "x": 6, + "y": 96 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] [$fe_instance] JVM Non Heap", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "jvm_non_heap_size_bytes{instance=\"$fe_instance\", job=\"$cluster_name\", type=\"used\"}", + "legend": "used", + "refId": "A" + }, + { + "expr": "jvm_non_heap_size_bytes{instance=\"$fe_instance\", job=\"$cluster_name\", type=\"committed\"}", + "legend": "committed", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "JVM old generation usage of specified Frontend. Left Y Axes shows the used/max old generation size. Right Y Axes shows the used percentage.\nNormally, the usage percentage should be less than 80%.", + "id": "c30e8716-0e03-4b28-a9a7-4c3ea862f458", + "layout": { + "h": 6, + "i": "c30e8716-0e03-4b28-a9a7-4c3ea862f458", + "isResizable": true, + "w": 6, + "x": 12, + "y": 96 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] [$fe_instance] JVM Old", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "jvm_old_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"used\"}", + "legend": "used", + "refId": "A" + }, + { + "expr": "jvm_old_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"max\"}", + "legend": "max", + "refId": "B" + }, + { + "expr": "sum(jvm_old_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"used\"}) * 100 / sum(jvm_old_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"max\"})", + "legend": "percentage", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "JVM full gc stat of specified Frontend. \nLeft Y Axes shows times of full gc.\nRight Y Axes shows the time cost of each full gc.", + "id": "1ae86a38-9421-4472-b23a-1f41828a24e4", + "layout": { + "h": 6, + "i": "1ae86a38-9421-4472-b23a-1f41828a24e4", + "isResizable": true, + "w": 6, + "x": 18, + "y": 96 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] [$fe_instance] JVM Old GC", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "jvm_old_gc{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"count\"}", + "legend": "count", + "refId": "A" + }, + { + "expr": "sum(jvm_old_gc{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"time\"}) / sum(jvm_old_gc{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"count\"})", + "legend": "avg time", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "JVM young generation usage of specified Frontend.\nLeft Y Axes shows the used/max young generation size.\nRight Y Axes shows the used percentage.", + "id": "d6d6155c-470d-4fb6-a4db-fded915c5cef", + "layout": { + "h": 6, + "i": "d6d6155c-470d-4fb6-a4db-fded915c5cef", + "isResizable": true, + "w": 6, + "x": 0, + "y": 102 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] [$fe_instance] JVM Young", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "jvm_young_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"used\"}", + "legend": "used", + "refId": "A" + }, + { + "expr": "jvm_young_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"max\"}", + "legend": "max", + "refId": "B" + }, + { + "expr": "sum(jvm_young_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"used\"}) * 100 / sum(jvm_young_size_bytes{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"max\"})", + "legend": "percentage", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "JVM young gc stat of specified Frontend. \nLeft Y Axes shows times of young gc.\nRight Y Axes shows the time cost of each young gc.", + "id": "1e9c7d1d-f69d-4c95-bfa7-d46014151f21", + "layout": { + "h": 6, + "i": "1e9c7d1d-f69d-4c95-bfa7-d46014151f21", + "isResizable": true, + "w": 6, + "x": 6, + "y": 102 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] [$fe_instance] JVM Young GC", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "jvm_young_gc{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"count\"}", + "legend": "count", + "refId": "A" + }, + { + "expr": "sum(jvm_young_gc{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"time\"}) / sum(jvm_young_gc{job=\"$cluster_name\", instance=\"$fe_instance\", type=\"count\"})", + "legend": "avg time", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Num of threads of FE JVM", + "id": "dd35a648-f3f2-4ce5-8f3c-315ccc3ad655", + "layout": { + "h": 6, + "i": "dd35a648-f3f2-4ce5-8f3c-315ccc3ad655", + "isResizable": true, + "w": 6, + "x": 12, + "y": 102 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] JVM Threads", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "jvm_thread{job=\"$cluster_name\", group=\"fe\", type=\"count\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "collapsed": true, + "id": "e9f5be81-e476-4056-bb0f-065a9082b26c", + "layout": { + "h": 1, + "i": "e9f5be81-e476-4056-bb0f-065a9082b26c", + "isResizable": false, + "w": 24, + "x": 0, + "y": 108 + }, + "name": "BE", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "CPU idle stat of Backends.\nLow means CPU is busy.", + "id": "d174d1e4-3f66-45c6-b2c8-e0ad12662bd3", + "layout": { + "h": 9, + "i": "d174d1e4-3f66-45c6-b2c8-e0ad12662bd3", + "isResizable": true, + "w": 12, + "x": 0, + "y": 109 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE CPU Idle", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "(sum(rate(doris_be_cpu{mode=\"idle\", job=\"$cluster_name\"}[$interval])) by (job, instance)) / (sum(rate(doris_be_cpu{job=\"$cluster_name\"}[$interval])) by (job, instance)) * 100", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Memory usage of Backends.", + "id": "6d71521c-9ef9-4204-aba6-cf2a7659b5cd", + "layout": { + "h": 9, + "i": "6d71521c-9ef9-4204-aba6-cf2a7659b5cd", + "isResizable": true, + "w": 12, + "x": 12, + "y": 109 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Mem", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_be_memory_allocated_bytes{job=\"$cluster_name\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Network send(Left Y)/receive(Right Y) bytes rate of all device except 'lo'", + "id": "48850636-2977-43c0-bac3-2860f6e95eb1", + "layout": { + "h": 7, + "i": "48850636-2977-43c0-bac3-2860f6e95eb1", + "isResizable": true, + "w": 8, + "x": 0, + "y": 118 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Net send/receive bytes", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "irate(doris_be_network_send_bytes{job=\"$cluster_name\", group=\"be\", device!=\"lo\"}[$interval])", + "legend": "{{instance}}-{{device}}-send", + "refId": "A" + }, + { + "expr": "irate(doris_be_network_receive_bytes{job=\"$cluster_name\", group=\"be\", device!=\"lo\"}[$interval])", + "legend": "{{instance}}-{{device}}-receive", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Disk capacity usage of Backends", + "id": "3e344063-09e3-44e2-a544-80879a68bb67", + "layout": { + "h": 7, + "i": "3e344063-09e3-44e2-a544-80879a68bb67", + "isResizable": true, + "w": 8, + "x": 8, + "y": 118 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Disk Usage", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "(SUM(doris_be_disks_total_capacity{job=\"$cluster_name\"}) by (instance, path) - SUM(doris_be_disks_avail_capacity{job=\"$cluster_name\"}) by (instance, path)) / SUM(doris_be_disks_total_capacity{job=\"$cluster_name\"}) by (instance, path)", + "legend": "{{instance}}:{{path}}", + "refId": "C" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Number of tablets of each Backends", + "id": "10bc7e4d-44f0-4ec7-b16a-72eae631fb50", + "layout": { + "h": 7, + "i": "10bc7e4d-44f0-4ec7-b16a-72eae631fb50", + "isResizable": true, + "w": 8, + "x": 16, + "y": 118 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Tablet Distribution", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_fe_tablet_num{job=\"$cluster_name\", instance=\"$fe_master\"}", + "legend": "{{backend}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The file descriptor usage of Backends. Left Y axes shows the used fd num. Right Y axes shows the soft limit open file number.", + "id": "06bfa07c-aacd-44b6-8059-1d859999457d", + "layout": { + "h": 7, + "i": "06bfa07c-aacd-44b6-8059-1d859999457d", + "isResizable": true, + "w": 8, + "x": 0, + "y": 125 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE FD count", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_be_process_fd_num_used{job=\"$cluster_name\", group=\"be\"}", + "legend": "{{instance}}-used", + "refId": "A" + }, + { + "expr": "doris_be_process_fd_num_limit_soft{job=\"$cluster_name\", group=\"be\"}", + "legend": "{{instance}}-soft limit", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The thread number of Backends", + "id": "d22e80d3-b055-4520-a667-aeee7b754d42", + "layout": { + "h": 7, + "i": "d22e80d3-b055-4520-a667-aeee7b754d42", + "isResizable": true, + "w": 8, + "x": 8, + "y": 125 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE thread num", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_be_process_thread_num{job=\"$cluster_name\", group=\"be\"}", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "IO util of Backends.\nHigh means I/O is busy.", + "id": "621fec2e-c115-4c11-9420-f96720c0d348", + "layout": { + "h": 7, + "i": "621fec2e-c115-4c11-9420-f96720c0d348", + "isResizable": true, + "w": 8, + "x": 16, + "y": 125 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Disk IO util", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "doris_be_max_disk_io_util_percent", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Base compaction rate of Backends.\nNormally, base compaction only runs between 20:00 to 4:00 and it is configurable.\nRight Y axes indicates the total base compaction bytes.", + "id": "5cf950b7-0b3d-4eab-8f23-b5623cb0c590", + "layout": { + "h": 5, + "i": "5cf950b7-0b3d-4eab-8f23-b5623cb0c590", + "isResizable": true, + "w": 12, + "x": 0, + "y": 132 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Compaction Base", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_be_compaction_bytes_total{type=\"base\", job=\"$cluster_name\"}[$interval])", + "legend": "{{instance}}", + "refId": "A" + }, + { + "expr": "sum(doris_be_compaction_bytes_total{type=\"base\", job=\"$cluster_name\"})", + "legend": "Total", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Cumulative compaction rate of Backends.\nRight Y axes indicates the total cumulative compaction bytes.", + "id": "111d1ab7-f1ad-4de3-be45-9015d0ca1967", + "layout": { + "h": 5, + "i": "111d1ab7-f1ad-4de3-be45-9015d0ca1967", + "isResizable": true, + "w": 12, + "x": 12, + "y": 132 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Compaction Cumulate", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_be_compaction_bytes_total{type=\"cumulative\", job=\"$cluster_name\"}[$interval])", + "legend": "{{instance}}", + "refId": "A" + }, + { + "expr": "SUM(doris_be_compaction_bytes_total{type=\"cumulative\", job=\"$cluster_name\"})", + "legend": "Total", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Loading rate of Backends.\nThis indicates the rate of file downloading in LOADING state of load job(MINI and BROKER load).\nRight Y axes indicates the total rate of file downloading.", + "id": "5ee12bf7-aaab-4f49-baa7-406c3782239d", + "layout": { + "h": 5, + "i": "5ee12bf7-aaab-4f49-baa7-406c3782239d", + "isResizable": true, + "w": 12, + "x": 0, + "y": 137 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Push Bytes", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_be_push_request_write_bytes{job=\"$cluster_name\"}[$interval])", + "legend": "{{instance}}", + "refId": "A" + }, + { + "expr": "sum(rate(doris_be_push_request_write_bytes{job=\"$cluster_name\"}[$interval]))", + "legend": "Total rate", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Loading rows rate of Backends.\nThis indicates the rate of rows loaded in LOADING state of load job. Right Y axes shows the total push rate of cluster.", + "id": "ba09e247-7df0-42f5-9f08-2352e54a0644", + "layout": { + "h": 5, + "i": "ba09e247-7df0-42f5-9f08-2352e54a0644", + "isResizable": true, + "w": 12, + "x": 12, + "y": 137 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Push Rows", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_be_push_request_write_rows{job=\"$cluster_name\"}[$interval])", + "legend": "{{instance}}", + "refId": "A" + }, + { + "expr": "sum(rate(doris_be_push_request_write_rows{job=\"$cluster_name\"}[$interval]))", + "legend": "Total", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes shows the write rate of tablet header saved in rocksdb. Right Y axes shows the duration of each write operation.", + "id": "579dbc8d-11a0-4eda-b5f0-3e8f89d26268", + "layout": { + "h": 10, + "i": "579dbc8d-11a0-4eda-b5f0-3e8f89d26268", + "isResizable": true, + "w": 12, + "x": 0, + "y": 142 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Tablet Meta Write", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "irate(doris_be_meta_request_total{job=\"$cluster_name\", type=\"write\"}[$interval])", + "legend": "{{instance}}-rate", + "refId": "B" + }, + { + "expr": "doris_be_meta_request_duration{job=\"$cluster_name\", type=\"write\"} / doris_be_meta_request_total{job=\"$cluster_name\", type=\"write\"}", + "legend": "{{instance}}-latency", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Scan rows rate of Backends.\nThis indicates the read rows rate when processing queries.", + "id": "24202a67-6d39-42b1-afb8-b1058ab6da0c", + "layout": { + "h": 5, + "i": "24202a67-6d39-42b1-afb8-b1058ab6da0c", + "isResizable": true, + "w": 12, + "x": 12, + "y": 142 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] BE Scan Rows", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "rate(doris_be_query_scan_rows{job=\"$cluster_name\"}[$interval])", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes shows the read rate of tablet header saved in rocksdb. Right Y axes shows the duration of each read operation.", + "id": "d00679d1-d426-4848-ad94-c51ddae6e46f", + "layout": { + "h": 5, + "i": "d00679d1-d426-4848-ad94-c51ddae6e46f", + "isResizable": true, + "w": 12, + "x": 12, + "y": 147 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Tablet Meta Read", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "irate(doris_be_meta_request_total{job=\"$cluster_name\", type=\"read\"}[$interval])", + "legend": "{{instance}}-rate", + "refId": "B" + }, + { + "expr": "doris_be_meta_request_duration{job=\"$cluster_name\", type=\"read\"} / doris_be_meta_request_total{job=\"$cluster_name\", type=\"read\"}", + "legend": "{{instance}}-latency", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "collapsed": true, + "id": "c0a6e025-7ed2-4a81-9459-ce37715aede9", + "layout": { + "h": 1, + "i": "c0a6e025-7ed2-4a81-9459-ce37715aede9", + "isResizable": false, + "w": 24, + "x": 0, + "y": 152 + }, + "name": "BE tasks", + "panels": [], + "type": "row", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "100bce80-d34a-4244-8b85-627499710294", + "layout": { + "h": 6, + "i": "100bce80-d34a-4244-8b85-627499710294", + "isResizable": true, + "w": 8, + "x": 0, + "y": 153 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Tablets Report", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"report_all_tablets\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"report_all_tablets\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "62b0082f-e377-43f5-a0e5-98049379d152", + "layout": { + "h": 6, + "i": "62b0082f-e377-43f5-a0e5-98049379d152", + "isResizable": true, + "w": 8, + "x": 8, + "y": 153 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Single Tablet Report", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"report_tablet\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"report_tablet\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "bd866f1a-7bfb-44d0-a4b4-4f4f2b2cd267", + "layout": { + "h": 6, + "i": "bd866f1a-7bfb-44d0-a4b4-4f4f2b2cd267", + "isResizable": true, + "w": 8, + "x": 16, + "y": 153 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Finish task report", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"finish_task\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"finish_task\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "b22c2e79-a7fa-4675-a256-6e032ecd9b74", + "layout": { + "h": 6, + "i": "b22c2e79-a7fa-4675-a256-6e032ecd9b74", + "isResizable": true, + "w": 8, + "x": 0, + "y": 159 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Push Task", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "sum(doris_be_push_requests_total{job=\"$cluster_name\", status=\"SUCCESS\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_push_requests_total{job=\"$cluster_name\", status=\"FAIL\"}[$interval])", + "legend": "{{instance}}-failed", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "The average cost time of push tasks on each Backend.", + "id": "94ca89bb-8c46-4f30-8ac3-14981b09208d", + "layout": { + "h": 6, + "i": "94ca89bb-8c46-4f30-8ac3-14981b09208d", + "isResizable": true, + "w": 8, + "x": 8, + "y": 159 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Push Task Cost Time", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "irate(doris_be_push_request_duration_us{job=\"$cluster_name\"}[$interval])", + "legend": "{{instance}}", + "refId": "A" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "d2df89c0-808c-4032-8f69-473e02645208", + "layout": { + "h": 6, + "i": "d2df89c0-808c-4032-8f69-473e02645208", + "isResizable": true, + "w": 8, + "x": 16, + "y": 159 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Delete", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"delete\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"delete\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "360dd4ef-b945-4948-8c3c-73311392380d", + "layout": { + "h": 6, + "i": "360dd4ef-b945-4948-8c3c-73311392380d", + "isResizable": true, + "w": 8, + "x": 0, + "y": 165 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Base Compaction", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"base_compaction\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"base_compaction\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "f532a20d-6207-4287-a656-a86347f84e69", + "layout": { + "h": 6, + "i": "f532a20d-6207-4287-a656-a86347f84e69", + "isResizable": true, + "w": 8, + "x": 8, + "y": 165 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Cumulative Compaction", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"cumulative_compaction\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"cumulative_compaction\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "e38c661a-ebd5-461d-8821-e2a29fc5fffb", + "layout": { + "h": 6, + "i": "e38c661a-ebd5-461d-8821-e2a29fc5fffb", + "isResizable": true, + "w": 8, + "x": 16, + "y": 165 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Clone", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"clone\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"clone\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "6f07f466-a195-4232-ba97-4694e4d74f4c", + "layout": { + "h": 6, + "i": "6f07f466-a195-4232-ba97-4694e4d74f4c", + "isResizable": true, + "w": 8, + "x": 0, + "y": 171 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Create rollup", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"create_rollup\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"create_rollup\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "f79f8151-34dc-4f8c-a08c-1c0af04cd0b7", + "layout": { + "h": 6, + "i": "f79f8151-34dc-4f8c-a08c-1c0af04cd0b7", + "isResizable": true, + "w": 8, + "x": 8, + "y": 171 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Schema change", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"schema_change\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"schema_change\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + }, + { + "custom": { + "drawStyle": "lines", + "fillOpacity": 0, + "lineInterpolation": "linear", + "stack": "off", + "version": "3.0.0" + }, + "datasourceCate": "prometheus", + "datasourceValue": "${DS_DORIS}", + "description": "Left Y axes indicates the failure rate of specified tasks. Normally, it should be 0.\nRight Y axes indicates the total number of specified tasks in all Backends.", + "id": "fe07e069-210e-410c-9781-d71bf4086803", + "layout": { + "h": 6, + "i": "fe07e069-210e-410c-9781-d71bf4086803", + "isResizable": true, + "w": 8, + "x": 16, + "y": 171 + }, + "links": [], + "maxPerRow": 4, + "name": "[$cluster_name] Create tablet", + "options": { + "legend": { + "displayMode": "hidden" + }, + "thresholds": { + "steps": [ + { + "color": "#6C53B1", + "type": "base", + "value": null + } + ] + }, + "tooltip": { + "mode": "all", + "sort": "none" + } + }, + "targets": [ + { + "expr": "SUM(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"create_tablet\", status=\"total\"})", + "legend": "Total", + "refId": "A" + }, + { + "expr": "irate(doris_be_engine_requests_total{job=\"$cluster_name\", type=\"create_tablet\", status=\"failed\"}[$interval])", + "legend": "{{instance}}", + "refId": "B" + } + ], + "type": "timeseries", + "version": "3.0.0" + } + ], + "var": [ + { + "definition": "prometheus", + "name": "DS_DORIS", + "type": "datasource" + }, + { + "allOption": false, + "datasource": { + "cate": "prometheus", + "value": "${DS_DORIS}" + }, + "definition": "label_values(up, job)", + "hide": false, + "multi": false, + "name": "cluster_name", + "reg": "", + "type": "query" + }, + { + "allOption": false, + "datasource": { + "cate": "prometheus", + "value": "${DS_DORIS}" + }, + "definition": "query_result(node_info{group=\"fe\", job=\"$cluster_name\", type=\"is_master\"})", + "hide": false, + "multi": false, + "name": "fe_master", + "reg": "/instance=\"(.+:\\d+)\"/", + "type": "query" + }, + { + "allOption": false, + "datasource": { + "cate": "prometheus", + "value": "${DS_DORIS}" + }, + "definition": "up{group=\"fe\", job=\"$cluster_name\"}", + "hide": false, + "multi": false, + "name": "fe_instance", + "reg": "/instance=\"(.+:\\d+)/", + "type": "query" + }, + { + "allOption": false, + "datasource": { + "cate": "prometheus", + "value": "${DS_DORIS}" + }, + "definition": "up{group=\"be\", job=\"$cluster_name\"}", + "hide": false, + "multi": false, + "name": "be_instance", + "reg": "/instance=\"(.+:\\d+)/", + "type": "query" + }, + { + "definition": "1s,5s,1m,5m,1h,6h,1d", + "hide": false, + "name": "interval", + "type": "custom" + } + ], + "version": "3.0.0" + }, + "uuid": 1731586085431795000 + } +] diff --git a/integrations/Doris/icon/doris.svg b/integrations/Doris/icon/doris.svg new file mode 100644 index 00000000..82a04ae4 --- /dev/null +++ b/integrations/Doris/icon/doris.svg @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + diff --git a/integrations/Doris/markdown/README.md b/integrations/Doris/markdown/README.md new file mode 100644 index 00000000..f40ae13f --- /dev/null +++ b/integrations/Doris/markdown/README.md @@ -0,0 +1,39 @@ +# Doris + +Doris 的进程都会暴露 `/metrics` 接口,通过这个接口暴露 Prometheus 协议的监控数据。 + +## 采集配置 + +categraf 的 `conf/input.prometheus/prometheus.toml`。因为 Doris 是暴露的 Prometheus 协议的监控数据,所以使用 categraf 的 prometheus 插件即可采集。 + +```toml +# doris_fe +[[instances]] +urls = [ + "http://127.0.0.1:8030/metrics" +] + +url_label_key = "instance" +url_label_value = "{{.Host}}" + +labels = { group = "fe",job = "doris_cluster01"} + +# doris_be +[[instances]] +urls = [ + "http://127.0.0.1:8040/metrics" +] +url_label_key = "instance" +url_label_value = "{{.Host}}" +labels = { group = "be",job = "doris_cluster01"} +``` + +## 告警规则 + +夜莺内置了 Doris 的告警规则,克隆到自己的业务组下即可使用。 + +## 仪表盘 + +夜莺内置了 Doris 的仪表盘,克隆到自己的业务组下即可使用。 + +