Skip to content

Commit

Permalink
update according to suggestions
Browse files Browse the repository at this point in the history
Signed-off-by: Venu Vardhan Reddy Tekula <[email protected]>
  • Loading branch information
vchrombie committed Jun 11, 2020
1 parent c2a40cb commit 1e30dfe
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 85 deletions.
93 changes: 32 additions & 61 deletions grimoire_elk/enriched/gitlabcomments.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2019 Bitergia
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -19,6 +19,7 @@
# Venu Vardhan Reddy Tekula <[email protected]>
#


import logging
import re
import collections
Expand Down Expand Up @@ -88,12 +89,7 @@ def __init__(self, db_sortinghat=None, db_projects_map=None, json_projects_map=N
super().__init__(db_sortinghat, db_projects_map, json_projects_map,
db_user, db_password, db_host)

self.users = {} # cache users

self.studies = []
# self.studies.append(self.enrich_geolocation)
# self.studies.append(self.enrich_feelings)
# self.studies.append(self.enrich_extra_data)

def set_elastic(self, elastic):
self.elastic = elastic
Expand Down Expand Up @@ -147,6 +143,8 @@ def get_sh_identity(self, item, identity_field=None):
user = item # by default a specific user dict is expected
if isinstance(item, dict) and 'data' in item:
user = item['data'][identity_field]
elif identity_field:
user = item[identity_field]

if not user:
return identity
Expand Down Expand Up @@ -177,13 +175,13 @@ def get_time_to_first_attention(self, item):
return None

def get_time_to_merge_request_response(self, item):
"""Get the first date at which a review was made on the PR by someone
other than the user who created the PR
"""Get the first date at which a review was made on the MR by someone
other than the user who created the MR
"""
review_dates = []

for comment in item['notes_data']:
# skip comments of the pull request creator
# skip comments of the merge request creator
if item['author']['username'] == comment['author']['username']:
continue

Expand All @@ -196,12 +194,13 @@ def get_time_to_merge_request_response(self, item):

def __get_reactions(self, item):
item_reactions = item.get('award_emoji_data', [])
reactions_total_count = len(item_reactions)
if item_reactions:
reactions_counter = collections.Counter([reaction["name"] for reaction in item_reactions])
item_reactions = [{"type": reaction, "count": reactions_counter[reaction]} for reaction in
reactions_counter]

return {"reactions": item_reactions}
return {"reactions": item_reactions, "reactions_total_count": reactions_total_count}

def __get_rich_issue(self, item):
rich_issue = {}
Expand All @@ -220,41 +219,27 @@ def __get_rich_issue(self, item):
else:
rich_issue['time_open_days'] = rich_issue['time_to_close_days']

rich_issue['user_login'] = issue['author']['username']

user = issue.get('author', None)
if user is not None and user:
rich_issue['user_name'] = user['name']
rich_issue['author_name'] = user['name']
rich_issue["user_domain"] = self.get_email_domain(user['email']) if user.get('email') else None
rich_issue['user_org'] = user.get('company', None)
rich_issue['user_location'] = user.get('location', None)
rich_issue['user_geolocation'] = None
author = issue.get('author', None)
if author is not None and author:
rich_issue['author_login'] = author['username']
rich_issue['author_name'] = author['name']
rich_issue['author_domain'] = self.get_email_domain(author['email']) if author.get('email') else None
else:
rich_issue['user_name'] = None
rich_issue["user_domain"] = None
rich_issue['user_org'] = None
rich_issue['user_location'] = None
rich_issue['user_geolocation'] = None
rich_issue['author_login'] = None
rich_issue['author_name'] = None
rich_issue['author_domain'] = None

assignee = issue.get('assignee', None)
if assignee is not None and assignee:
rich_issue['assignee_login'] = assignee['username']
rich_issue['assignee_name'] = assignee['name']
rich_issue["assignee_domain"] = self.get_email_domain(assignee['email']) if assignee.get('email') else None
rich_issue['assignee_org'] = assignee.get('company', None)
rich_issue['assignee_location'] = assignee.get('location'), None
rich_issue['assignee_geolocation'] = None
rich_issue['assignee_domain'] = self.get_email_domain(assignee['email']) if assignee.get('email') else None
else:
rich_issue['assignee_name'] = None
rich_issue['assignee_login'] = None
rich_issue["assignee_domain"] = None
rich_issue['assignee_org'] = None
rich_issue['assignee_location'] = None
rich_issue['assignee_geolocation'] = None
rich_issue['assignee_name'] = None
rich_issue['assignee_domain'] = None

rich_issue['id'] = issue['id']
rich_issue['id'] = str(issue['id'])
rich_issue['issue_id'] = issue['id']
rich_issue['issue_id_in_repo'] = issue['web_url'].split("/")[-1]
rich_issue['repository'] = self.get_project_repository(rich_issue)
Expand Down Expand Up @@ -316,42 +301,28 @@ def __get_rich_merge(self, item):
else:
rich_mr['time_open_days'] = rich_mr['time_to_close_days']

rich_mr['user_login'] = merge_request['author']['username']

user = merge_request.get('author', None)
if user is not None and user:
rich_mr['user_name'] = user['name']
rich_mr['author_name'] = user['name']
rich_mr["user_domain"] = self.get_email_domain(user['email']) if user.get('email') else None
rich_mr['user_org'] = user.get('company', None)
rich_mr['user_location'] = user.get('location', None)
rich_mr['user_geolocation'] = None
author = merge_request.get('author', None)
if author is not None and author:
rich_mr['author_login'] = author['username']
rich_mr['author_name'] = author['name']
rich_mr['author_domain'] = self.get_email_domain(author['email']) if author.get('email') else None
else:
rich_mr['user_name'] = None
rich_mr["user_domain"] = None
rich_mr['user_org'] = None
rich_mr['user_location'] = None
rich_mr['user_geolocation'] = None
rich_mr['author_login'] = None
rich_mr['author_name'] = None
rich_mr['author_domain'] = None

merged_by = merge_request.get('merged_by', None)
if merged_by is not None and merged_by:
rich_mr['merge_author_login'] = merged_by['username']
rich_mr['merge_author_name'] = merged_by['name']
rich_mr["merge_author_domain"] = \
rich_mr['merge_author_domain'] = \
self.get_email_domain(merged_by['email']) if merged_by.get('email') else None
rich_mr['merge_author_org'] = merged_by.get('company', None)
rich_mr['merge_author_location'] = merged_by.get('location', None)
rich_mr['merge_author_geolocation'] = None
else:
rich_mr['merge_author_name'] = None
rich_mr['merge_author_login'] = None
rich_mr["merge_author_domain"] = None
rich_mr['merge_author_org'] = None
rich_mr['merge_author_location'] = None
rich_mr['merge_author_geolocation'] = None
rich_mr['merge_author_name'] = None
rich_mr['merge_author_domain'] = None

rich_mr['id'] = merge_request['id']
rich_mr['id'] = str(merge_request['id'])
rich_mr['merge_id'] = merge_request['id']
rich_mr['merge_id_in_repo'] = merge_request['web_url'].split("/")[-1]
rich_mr['repository'] = self.get_project_repository(rich_mr)
Expand Down Expand Up @@ -490,7 +461,7 @@ def get_rich_merge_reviews(self, comments, eitem):

self.copy_raw_fields(self.RAW_FIELDS_COPY, eitem, ecomment)

# Copy data from the enriched pull
# Copy data from the enriched merge request
ecomment['merge_labels'] = eitem['merge_labels']
ecomment['merge_id'] = eitem['merge_id']
ecomment['merge_id_in_repo'] = eitem['merge_id_in_repo']
Expand Down
9 changes: 4 additions & 5 deletions schema/gitlabcomments_issues.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ assignee_org_name,keyword,true,"Assignee organization name."
assignee_user_name,keyword,true,"Assignee user name from SortingHat."
assignee_uuid,keyword,true,"Assignee UUID from SortingHat."
author_bot,boolean,true,"True if the given author is identified as a bot, from SortingHat profile."
author_domain,keyword,true,"Domain associated to the author in SortingHat profile."
author_gender,keyword,true,"Author gender, based on her name (disabled by default)."
author_gender_acc,long,true,"Author gender accuracy (disabled by default)."
author_id,keyword,true,"Author ID from SortingHat."
author_login,keyword,true,"Author's login name from GitLab."
author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile."
author_name,keyword,true,"Author name."
author_org_name,keyword,true,"Author organization name."
Expand Down Expand Up @@ -48,8 +48,9 @@ metadata__updated_on,date,true,"Date when the item was updated on its original d
origin,keyword,true,"The original URL from which the repository was retrieved from."
project,keyword,true,"Project name."
project_1,keyword,true,"Used if more than one project levels are allowed in the project hierarchy."
reactions.count,long,true,"The number of reactions to comment/issue."
reactions.type,keyword,true,"The name of reaction to comment/issue."
reactions.count,long,true,"Number of reactions of a given type."
reactions.type,keyword,true,"Name of reaction to comment/issue."
reactions_total_count,long,true,"Total number of reactions to comment/issue."
repository,keyword,true,"Repository name."
sub_type,keyword,true,"Type of the comment (issue comment)."
tag,keyword,true,"Perceval tag."
Expand All @@ -58,6 +59,4 @@ time_to_close_days,float,true,"Time to close an issue counted in days."
time_to_first_attention,float,true,"Time to first attention to an issue counted in days."
title_analyzed,text,true,"Issue title split by by terms to allow searching."
url,keyword,true,"Url of the issue/comment."
user_login,keyword,true,"User's login name from GitLab."
user_name,keyword,true,"User's name."
uuid,keyword,true,"Perceval UUID."
9 changes: 4 additions & 5 deletions schema/gitlabcomments_merges.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name,type,aggregatable,description
author_bot,boolean,true,"True if the given author is identified as a bot."
author_domain,keyword,true,"Domain associated to the author in SortingHat profile."
author_gender,keyword,true,"Author gender, based on her name (disabled by default)."
author_gender_acc,long,true,"Author gender accuracy (disabled by default)."
author_id,keyword,true,"Author ID from SortingHat."
author_login,keyword,true,"Author's login name from GitLab."
author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile."
author_name,keyword,true,"Author name."
author_org_name,keyword,true,"Author organization name."
Expand Down Expand Up @@ -54,8 +54,9 @@ num_versions,long,true,"Number of versions"
origin,keyword,true,"The original URL from which the repository was retrieved from."
project,keyword,true,"Project name."
project_1,keyword,true,"Used if more than one project levels are allowed in the project hierarchy."
reactions.count,long,true,"The number of reactions to comment/merge request."
reactions.type,keyword,true,"The name of reaction to comment/merge request."
reactions.count,long,true,"Number of reactions of a given type."
reactions.type,keyword,true,"Name of reaction to comment/issue."
reactions_total_count,long,true,"Total number of reactions to comment/issue."
repository,keyword,true,"Repository name."
sub_type,keyword,true,"Type of the comment (merge request comment)."
tag,keyword,true,"Perceval tag."
Expand All @@ -64,6 +65,4 @@ time_to_close_days,float,true,"Time to close a merge request counted in days."
time_to_merge_request_response,float,true,"Time to merge a merge request counted in days."
title_analyzed,text,true,"Issue title split by by terms to allow searching."
url,keyword,true,"Url of the merge request/comment."
user_login,keyword,true,"User's login name from GitLab."
user_name,keyword,true,"User's name."
uuid,keyword,true,"Perceval UUID."
25 changes: 11 additions & 14 deletions tests/test_gitlabcomments.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2019 Bitergia
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand Down Expand Up @@ -58,7 +58,6 @@ def test_raw_to_enrich(self):

self.assertGreater(result['raw'], 0)
self.assertGreater(result['enrich'], 0)
# self.assertEqual(result['raw'], result['enrich'])

enrich_backend = self.connectors[self.connector][2]()

Expand All @@ -67,37 +66,40 @@ def test_raw_to_enrich(self):
self.assertEqual(item['category'], 'issue')
self.assertNotEqual(eitem['issue_state'], 'closed')
self.assertEqual(eitem['issue_labels'], ['UI', 'enhancement', 'feature'])
self.assertEqual(eitem['reactions_total_count'], 1)
self.assertEqual(eitem['reactions'][0]['type'], 'thumbsdown')
self.assertEqual(eitem['reactions'][0]['count'], 1)
# self.assertEqual(eitem['time_open_days'], 63.74)
self.assertEqual(eitem['issue_id_in_repo'], '25')
self.assertEqual(eitem['issue_created_at'], '2020-04-07T11:31:36.167Z')

item = self.items[1]
eitem = enrich_backend.get_rich_item(item)
self.assertEqual(item['category'], 'merge_request')
self.assertEqual(eitem['merge_state'], 'closed')
self.assertEqual(eitem['merge_labels'], [])
self.assertEqual(eitem['reactions_total_count'], 0)
self.assertEqual(eitem['reactions'], [])
self.assertNotIn(eitem['time_to_merge_request_response'], eitem)
self.assertEqual(eitem['time_to_close_days'], eitem['time_open_days'])
self.assertEqual(eitem['merge_id_in_repo'], '41')
self.assertEqual(eitem['merge_created_at'], '2020-04-20T19:17:54.576Z')

item = self.items[2]
eitem = enrich_backend.get_rich_item(item)
self.assertEqual(item['category'], 'merge_request')
self.assertEqual(eitem['merge_state'], 'merged')
self.assertEqual(eitem['reactions_total_count'], 1)
self.assertEqual(eitem['reactions'][0]['type'], 'rocket')
self.assertEqual(eitem['reactions'][0]['count'], 1)
# self.assertEqual(eitem['time_open_days'], 49.04)

item = self.items[3]
eitem = enrich_backend.get_rich_item(item)
self.assertEqual(item['category'], 'issue')
self.assertEqual(eitem['issue_state'], 'closed')
self.assertEqual(eitem['user_name'], 'Shashank Priyadarshi')
self.assertIsNone(eitem['user_domain'])
self.assertIsNone(eitem['user_org'])
self.assertEqual(eitem['author_name'], 'Shashank Priyadarshi')
self.assertIsNone(eitem['author_domain'])
self.assertIsNone(eitem['assignee_domain'])
self.assertIsNone(eitem['assignee_org'])
self.assertEqual(eitem['reactions_total_count'], 0)
self.assertEqual(eitem['reactions'], [])
self.assertEqual(eitem['time_to_first_attention'], 13.07)
self.assertEqual(eitem['time_to_close_days'], eitem['time_open_days'])
Expand All @@ -106,14 +108,9 @@ def test_raw_to_enrich(self):
eitem = enrich_backend.get_rich_item(item)
self.assertEqual(item['category'], 'merge_request')
self.assertEqual(eitem['merge_state'], 'merged')
self.assertEqual(eitem['user_name'], 'Vaishnav')
self.assertIsNone(eitem['user_domain'])
self.assertIsNone(eitem['user_org'])
self.assertIsNone(eitem['user_location'])
self.assertIsNone(eitem['user_location'])
self.assertEqual(eitem['author_name'], 'Vaishnav')
self.assertIsNone(eitem['author_domain'])
self.assertIsNone(eitem['merge_author_domain'])
self.assertIsNone(eitem['merge_author_org'])
self.assertEqual(eitem['num_versions'], 1)
self.assertEqual(eitem['num_merge_comments'], 3)
self.assertEqual(eitem['time_to_merge_request_response'], 0)
Expand Down

0 comments on commit 1e30dfe

Please sign in to comment.