{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "676be61a-bd65-4510-8357-94859f596330", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import json" ] }, { "cell_type": "code", "execution_count": 2, "id": "94f312ae-87d2-4d5e-ae75-9fc85a2a980c", "metadata": {}, "outputs": [], "source": [ "data = pd.read_json('../data/SIR_test_set.json')" ] }, { "cell_type": "code", "execution_count": 3, "id": "7d949b04-4929-4921-a818-4e8cbb57826b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " CVE_ID Issue_Url_old \\\n", "0 CVE-2021-45822 https://github.com/btiteam/xbtit-3.1/issues/7 \n", "1 CVE-2021-45769 https://github.com/mz-automation/libiec61850/i... \n", "2 CVE-2021-45773 https://github.com/mz-automation/lib60870/issu... \n", "3 CVE-2022-25014 https://github.com/gamonoid/icehrm/issues/283 \n", "4 CVE-2022-25013 https://github.com/gamonoid/icehrm/issues/284 \n", ".. ... ... \n", "705 CVE-2022-32417 https://github.com/Snakinya/Vuln/issues/1 \n", "706 CVE-2021-34485 https://github.com/github/advisory-database/is... \n", "707 CVE-2021-44906 https://github.com/minimistjs/minimist/issues/11 \n", "708 CVE-2020-8927 https://github.com/github/advisory-database/is... \n", "709 CVE-2021-31402 https://github.com/cfug/dio/issues/1752 \n", "\n", " Issue_Url_new \\\n", "0 https://github.com/btiteam/xbtit-3.1/issues/7 \n", "1 https://github.com/mz-automation/libiec61850/i... \n", "2 https://github.com/mz-automation/lib60870/issu... \n", "3 https://github.com/gamonoid/icehrm/issues/283 \n", "4 https://github.com/gamonoid/icehrm/issues/284 \n", ".. ... \n", "705 https://github.com/snakinya/vuln/issues/1 \n", "706 https://github.com/github/advisory-database/is... \n", "707 https://github.com/minimistjs/minimist/issues/11 \n", "708 https://github.com/github/advisory-database/is... \n", "709 https://github.com/cfug/dio/issues/1752 \n", "\n", " Repo_new Issue_Created_At \\\n", "0 btiteam/xbtit-3.1 2021-12-22 20:25:58+00:00 \n", "1 mz-automation/libiec61850 2021-12-23 00:53:55+00:00 \n", "2 mz-automation/lib60870 2021-12-23 06:01:26+00:00 \n", "3 gamonoid/icehrm 2021-12-23 08:09:18+00:00 \n", "4 gamonoid/icehrm 2021-12-23 08:13:20+00:00 \n", ".. ... ... \n", "705 Snakinya/Vuln 2022-08-04 10:38:48+00:00 \n", "706 github/advisory-database 2022-10-12 20:44:32+00:00 \n", "707 minimistjs/minimist 2022-10-19 14:23:14+00:00 \n", "708 github/advisory-database 2022-10-31 20:04:11+00:00 \n", "709 cfug/dio 2023-03-21 16:54:52+00:00 \n", "\n", " description \\\n", "0 Stored & Reflected XSS affecting Xbtit NUMBERT... \n", "1 NULL Pointer Dereference in APITAG NULL Pointe... \n", "2 NULL Pointer Dereference in APITAG NULL Pointe... \n", "3 Reflected XSS vulnerability NUMBERTAG in icehr... \n", "4 Reflected XSS vulnerabilities NUMBERTAG in ice... \n", ".. ... \n", "705 pboot cms NUMBERTAG RCE. 漏洞详情: URLTAG 声明 APITA... \n", "706 .NET CVE backfill round NUMBERTAG Hello, Pleas... \n", "707 Backport of NUMBERTAG fixes to NUMBERTAG Thank... \n", "708 Update impacted packages for CVETAG . Hi, This... \n", "709 CVE Dio NUMBERTAG Google OVS Scanner. Package ... \n", "\n", " vectorString severity baseScore \\\n", "0 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N MEDIUM 6.1 \n", "1 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H HIGH 7.5 \n", "2 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H HIGH 7.5 \n", "3 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N MEDIUM 6.1 \n", "4 CVSS:3.1/AV:N/AC:L/PR:N/UI:R/S:C/C:L/I:L/A:N MEDIUM 6.1 \n", ".. ... ... ... \n", "705 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CRITICAL 9.8 \n", "706 CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:N/A:N MEDIUM 5.5 \n", "707 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H CRITICAL 9.8 \n", "708 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:L MEDIUM 6.5 \n", "709 CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N HIGH 7.5 \n", "\n", " impactScore exploitabilityScore \n", "0 2.7 2.8 \n", "1 3.6 3.9 \n", "2 3.6 3.9 \n", "3 2.7 2.8 \n", "4 2.7 2.8 \n", ".. ... ... \n", "705 5.9 3.9 \n", "706 3.6 1.8 \n", "707 5.9 3.9 \n", "708 2.5 3.9 \n", "709 3.6 3.9 \n", "\n", "[710 rows x 11 columns]\n" ] } ], "source": [ "train_data_temp = pd.DataFrame()\n", "print(data)" ] }, { "cell_type": "code", "execution_count": 4, "id": "d4272d23-2c40-416a-aa83-40b09817ea0a", "metadata": {}, "outputs": [], "source": [ "train_data_temp['description'] = data['description']" ] }, { "cell_type": "code", "execution_count": 5, "id": "101f87d6-38d7-4562-a572-49ab74eec58d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 False\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", " ... \n", "705 False\n", "706 False\n", "707 False\n", "708 False\n", "709 False\n", "Name: description, Length: 710, dtype: bool\n" ] } ], "source": [ "print(train_data_temp['description'].isna())" ] }, { "cell_type": "code", "execution_count": 6, "id": "85b0cd35-3862-43fb-b4ab-d88c0ceae6da", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Empty DataFrame\n", "Columns: [description]\n", "Index: []\n" ] } ], "source": [ "# 获取 NaN 值的行索引\n", "nan_rows = train_data_temp[train_data_temp['description'].isna()]\n", "print(nan_rows)" ] }, { "cell_type": "code", "execution_count": 7, "id": "8eaf202e-b96b-4f79-8b3b-89e4757add04", "metadata": {}, "outputs": [], "source": [ "vectorString = data['vectorString']" ] }, { "cell_type": "code", "execution_count": 8, "id": "49331613-93f9-4d86-9e43-384c16ff8813", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " AV AC PR UI S C I A\n", "0 N L N R C L L N\n", "1 N L N N U N N H\n", "2 N L N N U N N H\n", "3 N L N R C L L N\n", "4 N L N R C L L N\n", ".. .. .. .. .. .. .. .. ..\n", "705 N L N N U H H H\n", "706 L L L N U H N N\n", "707 N L N N U H H H\n", "708 N L N N U N L L\n", "709 N L N N U H N N\n", "\n", "[710 rows x 8 columns]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\lx\\AppData\\Local\\Temp\\ipykernel_38864\\3052899741.py:14: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n", " train_data = train_data.applymap(transform_value)\n" ] } ], "source": [ "#转换数据\n", "def transform_value(val):\n", " return val.split(':')[1]\n", " \n", "columns = ['AV', 'AC', 'PR', 'UI', 'S', 'C', 'I', 'A']\n", "\n", "temp = []\n", "\n", "for i in range(vectorString.size):\n", " part = vectorString[i].split('/')\n", " list_items = part[1::]\n", " temp.append(list_items)\n", "train_data = pd.DataFrame(temp, columns=columns)\n", "train_data = train_data.applymap(transform_value)\n", "print(train_data)" ] }, { "cell_type": "code", "execution_count": 9, "id": "79a6f3ee-0517-4a4f-b26a-6f2dabf9d3b0", "metadata": {}, "outputs": [], "source": [ "def calculate_cvss_score(params):\n", " # 字典映射分值\n", " AV = {'N': 0.85, 'A': 0.62, 'L': 0.55, 'P': 0.2}\n", " AC = {'L': 0.77, 'H': 0.44}\n", " PR = {'N': 0.85, 'L': 0.68, 'H': 0.5}\n", " UI = {'N': 0.85, 'R': 0.62}\n", " S = {'U': 1, 'C': 1.08}\n", " C = {'N': 0, 'L': 0.22, 'H': 0.56}\n", " I = {'N': 0, 'L': 0.22, 'H': 0.56}\n", " A = {'N': 0, 'L': 0.22, 'H': 0.56}\n", "\n", " # 获取参数值\n", " av = AV[params['AV']]\n", " ac = AC[params['AC']]\n", " pr = PR[params['PR']]\n", " ui = UI[params['UI']]\n", " s = S[params['S']]\n", " c = C[params['C']]\n", " i = I[params['I']]\n", " a = A[params['A']]\n", "\n", " # 计算临时分数\n", " impact = 1 - (1 - c) * (1 - i) * (1 - a)\n", " exploitability = 8.22 * av * ac * pr * ui\n", "\n", " if impact == 0:\n", " base_score = 0\n", " else:\n", " if s == 1: # 未改变\n", " base_score = round(min(1.176 * (exploitability + impact), 10), 1)\n", " else: # 改变\n", " base_score = round(min(1.08 * (exploitability + impact), 10), 1)\n", "\n", " return base_score" ] }, { "cell_type": "code", "execution_count": 10, "id": "622cf1dd-082c-4d2a-a880-34d22e96d053", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " AV AC PR UI S C I A score\n", "0 N L N R C L L N 3.5\n", "1 N L N N U N N H 5.2\n", "2 N L N N U N N H 5.2\n", "3 N L N R C L L N 3.5\n", "4 N L N R C L L N 3.5\n", ".. .. .. .. .. .. .. .. .. ...\n", "705 N L N N U H H H 5.6\n", "706 L L L N U H N N 3.0\n", "707 N L N N U H H H 5.6\n", "708 N L N N U N L L 5.0\n", "709 N L N N U H N N 5.2\n", "\n", "[710 rows x 9 columns]\n" ] } ], "source": [ "# 为每一行创建字典\n", "train_dicts = train_data.apply(lambda row: {col: row[col][0] for col in train_data.columns}, axis=1)\n", "train_score = train_dicts.apply(calculate_cvss_score)\n", "train_data['score'] = train_score\n", "print(train_data)" ] }, { "cell_type": "code", "execution_count": 11, "id": "f767e3c9-634b-4c0d-9145-eb4c013e1a6e", "metadata": {}, "outputs": [], "source": [ "dict = {\n", " 'AV': {\n", " 'N': 'NETWORK',\n", " 'A': 'ADJACENT',\n", " 'L': 'LOCAL',\n", " 'P': 'PHYSICAL'\n", " },\n", " 'AC': {\n", " 'L': 'LOW',\n", " 'H': 'HIGH'\n", " }, \n", " 'PR': {\n", " 'N': 'NONE',\n", " 'L': 'LOW',\n", " 'H': 'HIGH'\n", " }, \n", " 'UI': {\n", " 'N': 'NONE',\n", " 'R': 'REQUIRED'\n", " },\n", " 'S': {\n", " 'U': 'UNCHANGED',\n", " 'C': 'CHANGED'\n", " },\n", " 'C': {\n", " 'N': 'NONE',\n", " 'L': 'LOW',\n", " 'H': 'HIGH'\n", " },\n", " 'I': {\n", " 'N': 'NONE',\n", " 'L': 'LOW',\n", " 'H': 'HIGH'\n", " },\n", " 'A': {\n", " 'N': 'NONE', \n", " 'L': 'LOW',\n", " 'H': 'HIGH'\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 12, "id": "d42106a3-9eb5-4580-9143-d7ae061b6d4c", "metadata": {}, "outputs": [ { "data": { "text/plain": " AV AC PR UI S C I A score\n0 NETWORK LOW NONE REQUIRED CHANGED LOW LOW NONE 3.5\n1 NETWORK LOW NONE NONE UNCHANGED NONE NONE HIGH 5.2\n2 NETWORK LOW NONE NONE UNCHANGED NONE NONE HIGH 5.2\n3 NETWORK LOW NONE REQUIRED CHANGED LOW LOW NONE 3.5\n4 NETWORK LOW NONE REQUIRED CHANGED LOW LOW NONE 3.5\n.. ... ... ... ... ... ... ... ... ...\n705 NETWORK LOW NONE NONE UNCHANGED HIGH HIGH HIGH 5.6\n706 LOCAL LOW LOW NONE UNCHANGED HIGH NONE NONE 3.0\n707 NETWORK LOW NONE NONE UNCHANGED HIGH HIGH HIGH 5.6\n708 NETWORK LOW NONE NONE UNCHANGED NONE LOW LOW 5.0\n709 NETWORK LOW NONE NONE UNCHANGED HIGH NONE NONE 5.2\n\n[710 rows x 9 columns]", "text/html": "
| \n | AV | \nAC | \nPR | \nUI | \nS | \nC | \nI | \nA | \nscore | \n
|---|---|---|---|---|---|---|---|---|---|
| 0 | \nNETWORK | \nLOW | \nNONE | \nREQUIRED | \nCHANGED | \nLOW | \nLOW | \nNONE | \n3.5 | \n
| 1 | \nNETWORK | \nLOW | \nNONE | \nNONE | \nUNCHANGED | \nNONE | \nNONE | \nHIGH | \n5.2 | \n
| 2 | \nNETWORK | \nLOW | \nNONE | \nNONE | \nUNCHANGED | \nNONE | \nNONE | \nHIGH | \n5.2 | \n
| 3 | \nNETWORK | \nLOW | \nNONE | \nREQUIRED | \nCHANGED | \nLOW | \nLOW | \nNONE | \n3.5 | \n
| 4 | \nNETWORK | \nLOW | \nNONE | \nREQUIRED | \nCHANGED | \nLOW | \nLOW | \nNONE | \n3.5 | \n
| ... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
| 705 | \nNETWORK | \nLOW | \nNONE | \nNONE | \nUNCHANGED | \nHIGH | \nHIGH | \nHIGH | \n5.6 | \n
| 706 | \nLOCAL | \nLOW | \nLOW | \nNONE | \nUNCHANGED | \nHIGH | \nNONE | \nNONE | \n3.0 | \n
| 707 | \nNETWORK | \nLOW | \nNONE | \nNONE | \nUNCHANGED | \nHIGH | \nHIGH | \nHIGH | \n5.6 | \n
| 708 | \nNETWORK | \nLOW | \nNONE | \nNONE | \nUNCHANGED | \nNONE | \nLOW | \nLOW | \n5.0 | \n
| 709 | \nNETWORK | \nLOW | \nNONE | \nNONE | \nUNCHANGED | \nHIGH | \nNONE | \nNONE | \n5.2 | \n
710 rows × 9 columns
\n