From 10bf53e43b5293b2e8ad9b234836babdc66ca956 Mon Sep 17 00:00:00 2001 From: Venu Vardhan Reddy Tekula Date: Thu, 11 Jun 2020 02:08:13 +0530 Subject: [PATCH] update according to suggestions Signed-off-by: Venu Vardhan Reddy Tekula --- grimoire_elk/enriched/gitlabcomments.py | 93 +++++++++---------------- schema/gitlabcomments_issues.csv | 8 +-- schema/gitlabcomments_merges.csv | 8 +-- tests/test_gitlabcomments.py | 25 +++---- 4 files changed, 51 insertions(+), 83 deletions(-) diff --git a/grimoire_elk/enriched/gitlabcomments.py b/grimoire_elk/enriched/gitlabcomments.py index 12d512b73..543858bb5 100644 --- a/grimoire_elk/enriched/gitlabcomments.py +++ b/grimoire_elk/enriched/gitlabcomments.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2015-2019 Bitergia +# Copyright (C) 2015-2020 Bitergia # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,6 +19,7 @@ # Venu Vardhan Reddy Tekula # + import logging import re import collections @@ -88,12 +89,7 @@ def __init__(self, db_sortinghat=None, db_projects_map=None, json_projects_map=N super().__init__(db_sortinghat, db_projects_map, json_projects_map, db_user, db_password, db_host) - self.users = {} # cache users - self.studies = [] - # self.studies.append(self.enrich_geolocation) - # self.studies.append(self.enrich_feelings) - # self.studies.append(self.enrich_extra_data) def set_elastic(self, elastic): self.elastic = elastic @@ -147,6 +143,8 @@ def get_sh_identity(self, item, identity_field=None): user = item # by default a specific user dict is expected if isinstance(item, dict) and 'data' in item: user = item['data'][identity_field] + elif identity_field: + user = item[identity_field] if not user: return identity @@ -177,13 +175,13 @@ def get_time_to_first_attention(self, item): return None def get_time_to_merge_request_response(self, item): - """Get the first date at which a review was made on the PR by someone - other than the user who created the PR + """Get the first date at which a review was made on the MR by someone + other than the user who created the MR """ review_dates = [] for comment in item['notes_data']: - # skip comments of the pull request creator + # skip comments of the merge request creator if item['author']['username'] == comment['author']['username']: continue @@ -196,12 +194,13 @@ def get_time_to_merge_request_response(self, item): def __get_reactions(self, item): item_reactions = item.get('award_emoji_data', []) + reactions_total_count = len(item_reactions) if item_reactions: reactions_counter = collections.Counter([reaction["name"] for reaction in item_reactions]) item_reactions = [{"type": reaction, "count": reactions_counter[reaction]} for reaction in reactions_counter] - return {"reactions": item_reactions} + return {"reactions": item_reactions, "reactions_total_count": reactions_total_count} def __get_rich_issue(self, item): rich_issue = {} @@ -220,41 +219,27 @@ def __get_rich_issue(self, item): else: rich_issue['time_open_days'] = rich_issue['time_to_close_days'] - rich_issue['user_login'] = issue['author']['username'] - - user = issue.get('author', None) - if user is not None and user: - rich_issue['user_name'] = user['name'] - rich_issue['author_name'] = user['name'] - rich_issue["user_domain"] = self.get_email_domain(user['email']) if user.get('email') else None - rich_issue['user_org'] = user.get('company', None) - rich_issue['user_location'] = user.get('location', None) - rich_issue['user_geolocation'] = None + author = issue.get('author', None) + if author is not None and author: + rich_issue['author_login'] = author['username'] + rich_issue['author_name'] = author['name'] + rich_issue['author_domain'] = self.get_email_domain(author['email']) if author.get('email') else None else: - rich_issue['user_name'] = None - rich_issue["user_domain"] = None - rich_issue['user_org'] = None - rich_issue['user_location'] = None - rich_issue['user_geolocation'] = None + rich_issue['author_login'] = None rich_issue['author_name'] = None + rich_issue['author_domain'] = None assignee = issue.get('assignee', None) if assignee is not None and assignee: rich_issue['assignee_login'] = assignee['username'] rich_issue['assignee_name'] = assignee['name'] - rich_issue["assignee_domain"] = self.get_email_domain(assignee['email']) if assignee.get('email') else None - rich_issue['assignee_org'] = assignee.get('company', None) - rich_issue['assignee_location'] = assignee.get('location'), None - rich_issue['assignee_geolocation'] = None + rich_issue['assignee_domain'] = self.get_email_domain(assignee['email']) if assignee.get('email') else None else: - rich_issue['assignee_name'] = None rich_issue['assignee_login'] = None - rich_issue["assignee_domain"] = None - rich_issue['assignee_org'] = None - rich_issue['assignee_location'] = None - rich_issue['assignee_geolocation'] = None + rich_issue['assignee_name'] = None + rich_issue['assignee_domain'] = None - rich_issue['id'] = issue['id'] + rich_issue['id'] = str(issue['id']) rich_issue['issue_id'] = issue['id'] rich_issue['issue_id_in_repo'] = issue['web_url'].split("/")[-1] rich_issue['repository'] = self.get_project_repository(rich_issue) @@ -316,42 +301,28 @@ def __get_rich_merge(self, item): else: rich_mr['time_open_days'] = rich_mr['time_to_close_days'] - rich_mr['user_login'] = merge_request['author']['username'] - - user = merge_request.get('author', None) - if user is not None and user: - rich_mr['user_name'] = user['name'] - rich_mr['author_name'] = user['name'] - rich_mr["user_domain"] = self.get_email_domain(user['email']) if user.get('email') else None - rich_mr['user_org'] = user.get('company', None) - rich_mr['user_location'] = user.get('location', None) - rich_mr['user_geolocation'] = None + author = merge_request.get('author', None) + if author is not None and author: + rich_mr['author_login'] = author['username'] + rich_mr['author_name'] = author['name'] + rich_mr['author_domain'] = self.get_email_domain(author['email']) if author.get('email') else None else: - rich_mr['user_name'] = None - rich_mr["user_domain"] = None - rich_mr['user_org'] = None - rich_mr['user_location'] = None - rich_mr['user_geolocation'] = None + rich_mr['author_login'] = None rich_mr['author_name'] = None + rich_mr['author_domain'] = None merged_by = merge_request.get('merged_by', None) if merged_by is not None and merged_by: rich_mr['merge_author_login'] = merged_by['username'] rich_mr['merge_author_name'] = merged_by['name'] - rich_mr["merge_author_domain"] = \ + rich_mr['merge_author_domain'] = \ self.get_email_domain(merged_by['email']) if merged_by.get('email') else None - rich_mr['merge_author_org'] = merged_by.get('company', None) - rich_mr['merge_author_location'] = merged_by.get('location', None) - rich_mr['merge_author_geolocation'] = None else: - rich_mr['merge_author_name'] = None rich_mr['merge_author_login'] = None - rich_mr["merge_author_domain"] = None - rich_mr['merge_author_org'] = None - rich_mr['merge_author_location'] = None - rich_mr['merge_author_geolocation'] = None + rich_mr['merge_author_name'] = None + rich_mr['merge_author_domain'] = None - rich_mr['id'] = merge_request['id'] + rich_mr['id'] = str(merge_request['id']) rich_mr['merge_id'] = merge_request['id'] rich_mr['merge_id_in_repo'] = merge_request['web_url'].split("/")[-1] rich_mr['repository'] = self.get_project_repository(rich_mr) @@ -490,7 +461,7 @@ def get_rich_merge_reviews(self, comments, eitem): self.copy_raw_fields(self.RAW_FIELDS_COPY, eitem, ecomment) - # Copy data from the enriched pull + # Copy data from the enriched merge request ecomment['merge_labels'] = eitem['merge_labels'] ecomment['merge_id'] = eitem['merge_id'] ecomment['merge_id_in_repo'] = eitem['merge_id_in_repo'] diff --git a/schema/gitlabcomments_issues.csv b/schema/gitlabcomments_issues.csv index f85b95701..9578d2f10 100644 --- a/schema/gitlabcomments_issues.csv +++ b/schema/gitlabcomments_issues.csv @@ -15,6 +15,7 @@ author_domain,keyword,true,"Domain associated to the author in SortingHat profil author_gender,keyword,true,"Author gender, based on her name (disabled by default)." author_gender_acc,long,true,"Author gender accuracy (disabled by default)." author_id,keyword,true,"Author ID from SortingHat." +author_login,keyword,true,"Author's login name from GitLab." author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile." author_name,keyword,true,"Author name." author_org_name,keyword,true,"Author organization name." @@ -48,8 +49,9 @@ metadata__updated_on,date,true,"Date when the item was updated on its original d origin,keyword,true,"The original URL from which the repository was retrieved from." project,keyword,true,"Project name." project_1,keyword,true,"Used if more than one project levels are allowed in the project hierarchy." -reactions.count,long,true,"The number of reactions to comment/issue." -reactions.type,keyword,true,"The name of reaction to comment/issue." +reactions.count,long,true,"Number of reactions of a given type." +reactions.type,keyword,true,"Name of reaction to comment/issue." +reactions_total_count,long,true,"Total number of reactions to comment/issue." repository,keyword,true,"Repository name." sub_type,keyword,true,"Type of the comment (issue comment)." tag,keyword,true,"Perceval tag." @@ -58,6 +60,4 @@ time_to_close_days,float,true,"Time to close an issue counted in days." time_to_first_attention,float,true,"Time to first attention to an issue counted in days." title_analyzed,text,true,"Issue title split by by terms to allow searching." url,keyword,true,"Url of the issue/comment." -user_login,keyword,true,"User's login name from GitLab." -user_name,keyword,true,"User's name." uuid,keyword,true,"Perceval UUID." diff --git a/schema/gitlabcomments_merges.csv b/schema/gitlabcomments_merges.csv index f6fba79b4..fc348970d 100644 --- a/schema/gitlabcomments_merges.csv +++ b/schema/gitlabcomments_merges.csv @@ -4,6 +4,7 @@ author_domain,keyword,true,"Domain associated to the author in SortingHat profil author_gender,keyword,true,"Author gender, based on her name (disabled by default)." author_gender_acc,long,true,"Author gender accuracy (disabled by default)." author_id,keyword,true,"Author ID from SortingHat." +author_login,keyword,true,"Author's login name from GitLab." author_multi_org_names,keyword,true,"List of the author organizations from SortingHat profile." author_name,keyword,true,"Author name." author_org_name,keyword,true,"Author organization name." @@ -54,8 +55,9 @@ num_versions,long,true,"Number of versions" origin,keyword,true,"The original URL from which the repository was retrieved from." project,keyword,true,"Project name." project_1,keyword,true,"Used if more than one project levels are allowed in the project hierarchy." -reactions.count,long,true,"The number of reactions to comment/merge request." -reactions.type,keyword,true,"The name of reaction to comment/merge request." +reactions.count,long,true,"Number of reactions of a given type." +reactions.type,keyword,true,"Name of reaction to comment/issue." +reactions_total_count,long,true,"Total number of reactions to comment/issue." repository,keyword,true,"Repository name." sub_type,keyword,true,"Type of the comment (merge request comment)." tag,keyword,true,"Perceval tag." @@ -64,6 +66,4 @@ time_to_close_days,float,true,"Time to close a merge request counted in days." time_to_merge_request_response,float,true,"Time to merge a merge request counted in days." title_analyzed,text,true,"Issue title split by by terms to allow searching." url,keyword,true,"Url of the merge request/comment." -user_login,keyword,true,"User's login name from GitLab." -user_name,keyword,true,"User's name." uuid,keyword,true,"Perceval UUID." diff --git a/tests/test_gitlabcomments.py b/tests/test_gitlabcomments.py index 70b5ef60d..c78f153fb 100644 --- a/tests/test_gitlabcomments.py +++ b/tests/test_gitlabcomments.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2015-2019 Bitergia +# Copyright (C) 2015-2020 Bitergia # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,7 +58,6 @@ def test_raw_to_enrich(self): self.assertGreater(result['raw'], 0) self.assertGreater(result['enrich'], 0) - # self.assertEqual(result['raw'], result['enrich']) enrich_backend = self.connectors[self.connector][2]() @@ -67,37 +66,40 @@ def test_raw_to_enrich(self): self.assertEqual(item['category'], 'issue') self.assertNotEqual(eitem['issue_state'], 'closed') self.assertEqual(eitem['issue_labels'], ['UI', 'enhancement', 'feature']) + self.assertEqual(eitem['reactions_total_count'], 1) self.assertEqual(eitem['reactions'][0]['type'], 'thumbsdown') self.assertEqual(eitem['reactions'][0]['count'], 1) - # self.assertEqual(eitem['time_open_days'], 63.74) + self.assertEqual(eitem['issue_id_in_repo'], '25') + self.assertEqual(eitem['issue_created_at'], '2020-04-07T11:31:36.167Z') item = self.items[1] eitem = enrich_backend.get_rich_item(item) self.assertEqual(item['category'], 'merge_request') self.assertEqual(eitem['merge_state'], 'closed') self.assertEqual(eitem['merge_labels'], []) + self.assertEqual(eitem['reactions_total_count'], 0) self.assertEqual(eitem['reactions'], []) self.assertNotIn(eitem['time_to_merge_request_response'], eitem) self.assertEqual(eitem['time_to_close_days'], eitem['time_open_days']) + self.assertEqual(eitem['merge_id_in_repo'], '41') + self.assertEqual(eitem['merge_created_at'], '2020-04-20T19:17:54.576Z') item = self.items[2] eitem = enrich_backend.get_rich_item(item) self.assertEqual(item['category'], 'merge_request') self.assertEqual(eitem['merge_state'], 'merged') + self.assertEqual(eitem['reactions_total_count'], 1) self.assertEqual(eitem['reactions'][0]['type'], 'rocket') self.assertEqual(eitem['reactions'][0]['count'], 1) - # self.assertEqual(eitem['time_open_days'], 49.04) item = self.items[3] eitem = enrich_backend.get_rich_item(item) self.assertEqual(item['category'], 'issue') self.assertEqual(eitem['issue_state'], 'closed') - self.assertEqual(eitem['user_name'], 'Shashank Priyadarshi') - self.assertIsNone(eitem['user_domain']) - self.assertIsNone(eitem['user_org']) self.assertEqual(eitem['author_name'], 'Shashank Priyadarshi') + self.assertIsNone(eitem['author_domain']) self.assertIsNone(eitem['assignee_domain']) - self.assertIsNone(eitem['assignee_org']) + self.assertEqual(eitem['reactions_total_count'], 0) self.assertEqual(eitem['reactions'], []) self.assertEqual(eitem['time_to_first_attention'], 13.07) self.assertEqual(eitem['time_to_close_days'], eitem['time_open_days']) @@ -106,14 +108,9 @@ def test_raw_to_enrich(self): eitem = enrich_backend.get_rich_item(item) self.assertEqual(item['category'], 'merge_request') self.assertEqual(eitem['merge_state'], 'merged') - self.assertEqual(eitem['user_name'], 'Vaishnav') - self.assertIsNone(eitem['user_domain']) - self.assertIsNone(eitem['user_org']) - self.assertIsNone(eitem['user_location']) - self.assertIsNone(eitem['user_location']) self.assertEqual(eitem['author_name'], 'Vaishnav') + self.assertIsNone(eitem['author_domain']) self.assertIsNone(eitem['merge_author_domain']) - self.assertIsNone(eitem['merge_author_org']) self.assertEqual(eitem['num_versions'], 1) self.assertEqual(eitem['num_merge_comments'], 3) self.assertEqual(eitem['time_to_merge_request_response'], 0)