I am using the following code to get the bug fixing commits in a list of GitHub repositories.
def get_commit_bug_fixing(self):
EMPTY_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
detected_sml = []
selected_projects_by_version = pd.DataFrame()
curr_path = ''
bug_fixing_commits = self.selected_projects_commits[self.selected_projects_commits['is_bug_fixing'] == True]
for _, row in bug_fixing_commits.iterrows():
if row['path'] != curr_path:
curr_path = row['path']
g_repo = git.Git(curr_path)
g_repo.init()
if row['parent_sha'] == EMPTY_TREE_SHA:
continue
g_repo.checkout(row['parent_sha'])
if row['old_object']:
detected_sml += util.compute_file_metrics(row['path'], row['old_object'], row['parent_sha'])
else:
detected_sml += util.compute_file_metrics(row['path'], row['object'], row['parent_sha'])
sml_dict = dict(Counter([sml[4] for sml in detected_sml]))
pre_dict = {}
pre_dict['sml_name'] = list(sml_dict.keys())
pre_dict['sml_occs'] = list(sml_dict.values())
df = pd.DataFrame(pre_dict)
df['repoName'] = row['repoName']
df['repoOrg'] = row['repoOrg']
df['tag_commit_sha'] = row['tag_commit_sha']
df['tag'] = row['tag']
selected_projects_by_version = pd.concat([selected_projects_by_version, df])
selected_projects_by_version.to_csv('bug_fixing_commits.csv', index=False)
However, after getting to the g_repo.checkout(row['parent_sha'])
line I get the following error:
git.exc.GitCommandError: Cmd('git') failed due to: exit code(1)
cmdline: git checkout 6ada1a2e125f9b40bc38f9d6b69c60e4fe3b7f4e
stderr: 'error: Your local changes to the following files would be overwritten by checkout:
...list of files path...
Please commit your changes or stash them before you switch branches.
Aborting'
How can I resolve this issue? I am not sure what am I doing wrong?