You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

issue_data_analyzer.py 2.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. import requests
  2. import pandas as pd
  3. from event_classifier import is_promoted
  4. from dateutil.parser import parse
  5. # 设置好时间参数即可自动爬取并分析这段时间的issue数据
  6. # 时间参数格式:YYYY/MM/DD
  7. start_date = '2021/05/01'
  8. end_date = '2021/05/31'
  9. token = '' # Add Your Token Here
  10. # Data collection and analysis
  11. date_range = pd.date_range(start=start_date, end=end_date, freq="D")
  12. issue_list = []
  13. for date in date_range:
  14. print(f'Getting issues created at {str(date.strftime("%Y-%m-%d"))}:')
  15. date_str = str(date.strftime("%Y%m%d"))
  16. issue_list_payload = {'access_token': token,
  17. 'state': 'all',
  18. 'sort': 'created',
  19. 'direction': 'asc', # desc/asc
  20. 'per_page': 100,
  21. 'created_at': date_str
  22. }
  23. r = requests.get("https://gitee.com/api/v5/repos/mindspore/mindspore/issues", params=issue_list_payload)
  24. daily_issues = r.json()
  25. for issue in daily_issues:
  26. created_at = issue['created_at']
  27. if parse(created_at).replace(tzinfo=None).date() != date:
  28. break
  29. issue_id = issue['id']
  30. number = issue['number'] # Gitee issue URL
  31. comments_url = issue['comments_url']
  32. owner = issue['user']
  33. owner_id = owner['id']
  34. owner_login = owner['login']
  35. owner_name = owner['name']
  36. issue_operate_logs = requests.get(
  37. f"https://gitee.com/api/v5/repos/mindspore/issues/{number}/operate_logs",
  38. params={'access_token': token, 'repo': 'mindspore', 'sort': 'desc'}).json()
  39. issue_comments = requests.get(
  40. comments_url, params={'access_token': token, 'per_page': 100}).json()
  41. issue_status = is_promoted(owner_id, issue_operate_logs, issue_comments)
  42. print(number, owner_id, owner_login, owner_name, created_at,
  43. issue_status[1], issue_status[2]) # 1 label_flag, 2 assign_flag
  44. issue_list.append((number, owner_id, owner_login, owner_name, created_at,
  45. issue_status[1], issue_status[2]))
  46. # 输出已处理的issue列表
  47. name = ['issue_number', 'owner_id', 'owner_login', 'owner_name', 'created_at',
  48. 'is_labeled', 'is_assigned']
  49. issue_list_df = pd.DataFrame(columns=name, data=issue_list)
  50. issue_list_df.drop_duplicates(['issue_number'])
  51. issue_list_df.to_csv('data/MindSpore_issue_list(20210601).csv')