|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263 |
- import requests
- import pandas as pd
- from event_classifier import is_promoted
- from dateutil.parser import parse
-
- # 设置好时间参数即可自动爬取并分析这段时间的issue数据
- # 时间参数格式:YYYY/MM/DD
- start_date = '2021/05/01'
- end_date = '2021/05/31'
-
- token = '' # Add Your Token Here
-
- # Data collection and analysis
- date_range = pd.date_range(start=start_date, end=end_date, freq="D")
-
- issue_list = []
- for date in date_range:
- print(f'Getting issues created at {str(date.strftime("%Y-%m-%d"))}:')
- date_str = str(date.strftime("%Y%m%d"))
- issue_list_payload = {'access_token': token,
- 'state': 'all',
- 'sort': 'created',
- 'direction': 'asc', # desc/asc
- 'per_page': 100,
- 'created_at': date_str
- }
- r = requests.get("https://gitee.com/api/v5/repos/mindspore/mindspore/issues", params=issue_list_payload)
- daily_issues = r.json()
-
- for issue in daily_issues:
- created_at = issue['created_at']
-
- if parse(created_at).replace(tzinfo=None).date() != date:
- break
-
- issue_id = issue['id']
- number = issue['number'] # Gitee issue URL
- comments_url = issue['comments_url']
- owner = issue['user']
- owner_id = owner['id']
- owner_login = owner['login']
- owner_name = owner['name']
-
- issue_operate_logs = requests.get(
- f"https://gitee.com/api/v5/repos/mindspore/issues/{number}/operate_logs",
- params={'access_token': token, 'repo': 'mindspore', 'sort': 'desc'}).json()
- issue_comments = requests.get(
- comments_url, params={'access_token': token, 'per_page': 100}).json()
-
- issue_status = is_promoted(owner_id, issue_operate_logs, issue_comments)
-
- print(number, owner_id, owner_login, owner_name, created_at,
- issue_status[1], issue_status[2]) # 1 label_flag, 2 assign_flag
-
- issue_list.append((number, owner_id, owner_login, owner_name, created_at,
- issue_status[1], issue_status[2]))
-
- # 输出已处理的issue列表
- name = ['issue_number', 'owner_id', 'owner_login', 'owner_name', 'created_at',
- 'is_labeled', 'is_assigned']
- issue_list_df = pd.DataFrame(columns=name, data=issue_list)
- issue_list_df.drop_duplicates(['issue_number'])
- issue_list_df.to_csv('data/MindSpore_issue_list(20210601).csv')
|