Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

features added from downstream #1073

Open
wants to merge 37 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e9ab620
add release infomation
lishengbao Aug 15, 2022
26befd4
add comments data without bot
shanchenqi Aug 18, 2022
9d4fb8b
add pr-linked-issue message
shanchenqi Aug 18, 2022
54cc266
debug for review_data is None
shanchenqi Aug 18, 2022
cd0ef46
Merge pull request #1 from lishengbao/release
eyehwan Aug 19, 2022
b1140c4
Merge pull request #2 from shanchenqi/open-metrics-code
eyehwan Aug 19, 2022
7e00e64
fix release bug
lishengbao Aug 31, 2022
f49e8d6
Merge branch 'chaoss:master' into master
eyehwan Sep 9, 2022
77d4099
support opensearch 2.X and add commits_data for pr
shanchenqi Sep 19, 2022
5b5bf73
Merge pull request #4 from shanchenqi/main
eyehwan Sep 19, 2022
72d3ba3
Merge pull request #3 from lishengbao/fix_release
eyehwan Sep 19, 2022
936eb7c
raw phase add release mapping
lishengbao Dec 15, 2022
8b44171
Merge pull request #5 from lishengbao/lsb_dev
eyehwan Dec 15, 2022
57f7eaf
Add 'email' field
lishengbao Jan 20, 2023
638d2e5
Merge pull request #6 from lishengbao/lsb_dev
eyehwan Jan 20, 2023
7c27dd8
Fix the bug of reaction['user'] being NoneType in issues
lishengbao Jun 6, 2023
fb7bbbc
Merge pull request #8 from lishengbao/fix_issue_bug
eyehwan Jun 6, 2023
6038493
Add more event types
lishengbao Aug 17, 2023
eed43d3
Add stargazer and fork category to the githubql.py
lishengbao Aug 17, 2023
93f0a7c
Merge branch 'master' of github.com:lishengbao/grimoirelab-elk into e…
lishengbao Aug 17, 2023
50937f5
Merge pull request #9 from lishengbao/event
eyehwan Aug 17, 2023
001c16e
Add archived_at time in github:repo
lishengbao Aug 26, 2023
f021f54
Fix release author may be None
lishengbao Aug 26, 2023
df9b87c
Merge branch 'master' of github.com:lishengbao/grimoirelab-elk into e…
lishengbao Aug 26, 2023
5734ab3
Merge pull request #11 from lishengbao/event
eyehwan Aug 28, 2023
d770408
Adding the 'committer_email' and 'parents' fields to git
lishengbao Sep 25, 2023
aa395c3
Adding the 'assignees_login', 'requested_reviewers_login', 'reviewers…
lishengbao Sep 25, 2023
9885f22
Remove LockedEvent
lishengbao Oct 26, 2023
9fc15b1
Add UnassignedEvent, DemilestonedEvent, RenameTitleEvent
lishengbao Oct 26, 2023
73f7491
Merge pull request #12 from lishengbao/event
eyehwan Oct 27, 2023
c427377
Fix NoneType bug
lishengbao Nov 30, 2023
83c6676
Merge branch 'master' of github.com:lishengbao/grimoirelab-elk into e…
lishengbao Nov 30, 2023
1332602
Merge pull request #13 from lishengbao/event
eyehwan Nov 30, 2023
2cb947d
Add the 'topics' field to the repo stage
lishengbao Mar 6, 2024
364eaaf
Fix git branch bug
lishengbao May 15, 2024
0d7acb5
Merge pull request #15 from lishengbao/oh
eyehwan May 15, 2024
b74abf0
Merge pull request #14 from lishengbao/event
eyehwan May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions grimoire_elk/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ def get_bulk_url(self):
"""Get the bulk URL endpoint"""

if (self.major == '7' and self.distribution == 'elasticsearch') or \
(self.major == '2' and self.distribution == 'opensearch') or \
(self.major == '1' and self.distribution == 'opensearch'):
bulk_url = self.index_url + '/_bulk'
else:
Expand All @@ -312,6 +313,7 @@ def get_mapping_url(self, _type=None):
:param _type: type of the mapping. In case of ES7, it is None
"""
if (self.major == '7' and self.distribution == 'elasticsearch') or \
(self.major == '2' and self.distribution == 'opensearch') or \
(self.major == '1' and self.distribution == 'opensearch'):
mapping_url = self.index_url + "/_mapping"
else:
Expand Down
9 changes: 8 additions & 1 deletion grimoire_elk/enriched/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ def get_rich_item(self, item):

if 'refs' in commit:
eitem["commit_tags"] = list(filter(lambda r: "tag: " in r, commit['refs']))


eitem['parents'] = commit['parents']
eitem['hash_short'] = eitem['hash'][0:6]
# Enrich dates
author_date = str_to_datetime(commit["AuthorDate"])
Expand Down Expand Up @@ -293,11 +294,13 @@ def get_rich_item(self, item):
# author_name and author_domain are added always
identity = self.get_sh_identity(commit["Author"])
eitem["author_name"] = identity['name']
eitem["author_email"] = identity['email']
eitem["author_domain"] = self.get_identity_domain(identity)

# committer data
identity = self.get_sh_identity(commit["Commit"])
eitem["committer_name"] = identity['name']
eitem["committer_email"] = identity['email']
eitem["committer_domain"] = self.get_identity_domain(identity)

# title from first line
Expand Down Expand Up @@ -1040,6 +1043,10 @@ def add_commit_branches(self, git_repo, enrich_backend):
if commit_count:
self.__process_commits_in_branch(enrich_backend, git_repo.uri, branch_name, to_process)

# reset the counter
to_process = []
commit_count = 0

except Exception as e:
logger.error("[git] Skip adding branch info for repo {} due to {}".format(git_repo.uri, e))
return
Expand Down
126 changes: 124 additions & 2 deletions grimoire_elk/enriched/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,11 @@ def get_time_to_first_attention(self, item):
comment_dates = [str_to_datetime(comment['created_at']) for comment in item['comments_data']
if item['user']['login'] != comment['user']['login']]
reaction_dates = [str_to_datetime(reaction['created_at']) for reaction in item['reactions_data']
if item['user']['login'] != reaction['user']['login']]
if item.get('user') is not None and
item['user'].get('login') is not None and
reaction.get('user') is not None and
reaction['user'].get('login') is not None and
item['user']['login'] != reaction['user']['login']]
reaction_dates.extend(comment_dates)
if reaction_dates:
return min(reaction_dates)
Expand All @@ -203,6 +207,58 @@ def get_time_to_merge_request_response(self, item):

return None

#get comments and exclude bot
def get_num_of_comments_without_bot(self, item):
"""Get the num of comment was made to the issue by someone
other than the user who created the issue and bot
"""
comments = [comment for comment in item['comments_data']
if item['user']['login'] != comment['user']['login'] \
and 'bot' not in comment['user']['login']]
return len(comments)

def get_num_of_reviews_without_bot(self, item):
"""Get the num of comment was made to the issue by someone
other than the user who created the issue and bot
"""
review_comments = []
for comment in item['review_comments_data']:
# skip comments of ghost users
if not comment['user']:
continue

# skip comments of the pull request creator
if 'bot' in comment['user']['login'] or item['user']['login'] == comment['user']['login'] :
continue

review_comments.append(comment)

return len(review_comments)

#get first attendtion without bot
def get_time_to_first_attention_without_bot(self, item):
"""Get the first date at which a comment was made to the issue by someone
other than the user who created the issue and bot
"""
comment_dates = [str_to_datetime(comment['created_at']) for comment in item['comments_data']
if item['user']['login'] != comment['user']['login'] and 'bot' not in comment['user']['login']]
if comment_dates:
return min(comment_dates)
return None

#get first attendtion without bot
def get_time_to_first_review_attention_without_bot(self, item):
"""Get the first date at which a comment was made to the issue by someone
other than the user who created the issue and bot
"""
if 'review_comments_data' in item and item['review_comments_data']:
comment_dates = [str_to_datetime(comment['created_at']) for comment in item['review_comments_data']
if 'login' in item['user'] and comment['user'] and item['user']['login'] != comment['user']['login'] and 'bot' not in comment['user']['login']]
if comment_dates:
return min(comment_dates)
else:
return None

def get_latest_comment_date(self, item):
"""Get the date of the latest comment on the issue/pr"""

Expand Down Expand Up @@ -457,6 +513,7 @@ def __get_rich_pull(self, item):
if self.__has_user(user):
rich_pr['user_name'] = user['name']
rich_pr['author_name'] = user['name']
rich_pr['user_email'] = user.get('email', None)
rich_pr["user_domain"] = self.get_email_domain(user['email']) if user['email'] else None
rich_pr['user_org'] = user['company']
rich_pr['user_location'] = user['location']
Expand All @@ -468,6 +525,7 @@ def __get_rich_pull(self, item):
rich_pr['user_location'] = None
rich_pr['user_geolocation'] = None
rich_pr['author_name'] = None
rich_pr['user_email'] = None

merged_by = pull_request.get('merged_by_data', None)
if merged_by and merged_by is not None:
Expand All @@ -484,6 +542,20 @@ def __get_rich_pull(self, item):
rich_pr['merge_author_org'] = None
rich_pr['merge_author_location'] = None
rich_pr['merge_author_geolocation'] = None
assignees_login = set()
for assignee in pull_request.get('assignees', []):
assignees_login.add(assignee.get('login'))
rich_pr['assignees_login'] = list(assignees_login)
requested_reviewers_login = set()
for requested_reviewer in pull_request.get('requested_reviewers', []):
requested_reviewers_login.add(requested_reviewer.get('login'))
rich_pr['requested_reviewers_login'] = list(requested_reviewers_login)
reviewers_login = set()
for reviewer in pull_request.get('reviews_data', []):
reviewer_user = reviewer.get('user')
if reviewer_user is not None:
reviewers_login.add(reviewer_user.get('login'))
rich_pr['reviewers_login'] = list(reviewers_login)

rich_pr['id'] = pull_request['id']
rich_pr['id_in_repo'] = pull_request['html_url'].split("/")[-1]
Expand All @@ -500,6 +572,7 @@ def __get_rich_pull(self, item):
rich_pr['additions'] = pull_request['additions']
rich_pr['deletions'] = pull_request['deletions']
rich_pr['changed_files'] = pull_request['changed_files']
rich_pr['merge_commit_sha'] = pull_request['merge_commit_sha']
# Adding this field for consistency with the rest of github-related enrichers
rich_pr['issue_url'] = pull_request['html_url']
labels = []
Expand All @@ -524,6 +597,15 @@ def __get_rich_pull(self, item):
min_review_date = self.get_time_to_merge_request_response(pull_request)
rich_pr['time_to_merge_request_response'] = \
get_time_diff_days(str_to_datetime(pull_request['created_at']), min_review_date)
rich_pr['num_review_comments_without_bot'] = \
self.get_num_of_reviews_without_bot(pull_request)
rich_pr['time_to_first_attention_without_bot'] = \
get_time_diff_days(str_to_datetime(pull_request['created_at']),
self.get_time_to_first_review_attention_without_bot(pull_request))

if 'linked_issues_data' in pull_request:
rich_pr['linked_issues_count'] = pull_request['linked_issues_data']
rich_pr['commits_data'] = pull_request['commits_data']

if self.prjs_map:
rich_pr.update(self.get_item_project(rich_pr))
Expand Down Expand Up @@ -560,6 +642,7 @@ def __get_rich_issue(self, item):
if self.__has_user(user):
rich_issue['user_name'] = user['name']
rich_issue['author_name'] = user['name']
rich_issue['user_email'] = user.get('email', None)
rich_issue["user_domain"] = self.get_email_domain(user['email']) if user['email'] else None
rich_issue['user_org'] = user['company']
rich_issue['user_location'] = user['location']
Expand All @@ -571,6 +654,7 @@ def __get_rich_issue(self, item):
rich_issue['user_location'] = None
rich_issue['user_geolocation'] = None
rich_issue['author_name'] = None
rich_issue['user_email'] = None

assignee = issue.get('assignee_data', None)
if self.__has_user(assignee):
Expand Down Expand Up @@ -614,6 +698,7 @@ def __get_rich_issue(self, item):
rich_issue['github_repo'] = rich_issue['repository'].replace(GITHUB, '')
rich_issue['github_repo'] = re.sub('.git$', '', rich_issue['github_repo'])
rich_issue["url_id"] = rich_issue['github_repo'] + "/issues/" + rich_issue['id_in_repo']
rich_issue['body'] = issue['body']

if self.prjs_map:
rich_issue.update(self.get_item_project(rich_issue))
Expand All @@ -622,11 +707,17 @@ def __get_rich_issue(self, item):
rich_issue['project'] = item['project']

rich_issue['time_to_first_attention'] = None
rich_issue['num_of_comments_without_bot'] = None
if issue['comments'] + issue['reactions']['total_count'] != 0:
rich_issue['time_to_first_attention'] = \
get_time_diff_days(str_to_datetime(issue['created_at']),
self.get_time_to_first_attention(issue))

rich_issue['num_of_comments_without_bot'] = \
self.get_num_of_comments_without_bot(issue)
rich_issue['time_to_first_attention_without_bot'] = \
get_time_diff_days(str_to_datetime(issue['created_at']),
self.get_time_to_first_attention_without_bot(issue))

rich_issue.update(self.get_grimoire_fields(issue['created_at'], "issue"))

item[self.get_field_date()] = rich_issue[self.get_field_date()]
Expand All @@ -646,6 +737,37 @@ def __get_rich_repo(self, item):
rich_repo['stargazers_count'] = repo['stargazers_count']
rich_repo['fetched_on'] = repo['fetched_on']
rich_repo['url'] = repo['html_url']
rich_repo['archived'] = repo['archived']
rich_repo['archivedAt'] = repo.get('archivedAt')
rich_repo['created_at'] = repo['created_at']
rich_repo['updated_at'] = repo['updated_at']


rich_releases = []
releases = repo.get('releases')
if releases:
for release in releases:
rich_releases_dict = {}
rich_releases_dict['id'] = release['id']
rich_releases_dict['tag_name'] = release['tag_name']
rich_releases_dict['target_commitish'] = release['target_commitish']
rich_releases_dict['prerelease'] = release['prerelease']
rich_releases_dict['name'] = release['name']
rich_releases_dict['body'] = ''
rich_releases_dict['created_at'] = release['created_at']
release_author = release['author']
rich_releases_author_dict = {}
if release_author is None:
rich_releases_author_dict['login'] = ''
else:
rich_releases_author_dict['login'] = release_author['login']
rich_releases_author_dict['name'] = ''
rich_releases_dict['author'] = rich_releases_author_dict
rich_releases.append(rich_releases_dict)
rich_repo['releases'] = rich_releases
rich_repo['releases_count'] = len(rich_releases)

rich_repo['topics'] = repo.get("topics", [])

if self.prjs_map:
rich_repo.update(self.get_item_project(rich_repo))
Expand Down
4 changes: 4 additions & 0 deletions grimoire_elk/enriched/github2.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,7 @@ def __get_rich_pull(self, item):
if user is not None and user:
rich_pr['user_name'] = user['name']
rich_pr['author_name'] = user['name']
rich_pr['user_email'] = user.get('email', None)
rich_pr["user_domain"] = self.get_email_domain(user['email']) if user['email'] else None
rich_pr['user_org'] = user['company']
rich_pr['user_location'] = user['location']
Expand All @@ -521,6 +522,7 @@ def __get_rich_pull(self, item):
rich_pr['user_location'] = None
rich_pr['user_geolocation'] = None
rich_pr['author_name'] = None
rich_pr['user_email'] = None

merged_by = pull_request.get('merged_by_data', None)
if merged_by and merged_by != USER_NOT_AVAILABLE:
Expand Down Expand Up @@ -618,6 +620,7 @@ def __get_rich_issue(self, item):
if user is not None and user:
rich_issue['user_name'] = user['name']
rich_issue['author_name'] = user['name']
rich_issue['user_email'] = user.get('email', None)
rich_issue["user_domain"] = self.get_email_domain(user['email']) if user['email'] else None
rich_issue['user_org'] = user['company']
rich_issue['user_location'] = user['location']
Expand All @@ -629,6 +632,7 @@ def __get_rich_issue(self, item):
rich_issue['user_location'] = None
rich_issue['user_geolocation'] = None
rich_issue['author_name'] = None
rich_issue['user_email'] = None

assignee = issue.get('assignee_data', None)
if assignee and assignee != USER_NOT_AVAILABLE:
Expand Down
Loading