Jira Issue Creation
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import random
from scipy.stats import truncnorm
import argparse
# Configuration
estimated_hrs_max = 24
estimated_hrs_min = 4
estimated_mul = 1.25
start_range_days = 7
df_ffill_slices = ['key','name']
df_interpolate_slices = ['Earliest Start', 'Story Factor', 'Bug Factor', 'Worklog Factor']
history_items_config = {
'created':{'fieldType':'jira','field':'status','from':10000,'fromString':'Backlog','to':10001,'toString':'Selected for Development'},
'WL startTime':{'fieldType':'jira','field':'status','from':10001,'fromString':'Selected for Development','to':3,'toString':'In Progress'},
'WL endTime':{'fieldType':'jira','field':'status','from':3,'fromString':'In Progress','to':10002,'toString':'Done'},
'resolution':{'fieldType':'jira','field':'resolution','from':None,'fromString':None,'to':10000,'toString':'Done'}
}
resolution_done = 'Done'
workhrs = 8
previous = None
previous_user = None
parser = argparse.ArgumentParser()
parser.add_argument('file')
parser.add_argument('-s', '--sheet', required=True)
parser.add_argument('-t', '--time', required=True, help='yyyy-mm-dd for last worklogs')
parser.add_argument('-w', '--worklogs', default="yes", help='"no" will remove worklogs from export, default="yes"')
parser.add_argument('-k', '--key', required=True, help='Project key')
parser.add_argument('-n', '--name', required=True, help='Project name')
parser.add_argument('-c', '--count', default=200, type=int, help='Issue count')
parser.add_argument('-z', '--tzone', default='Europe/Berlin', help='Timezone, default="Europe/Berlin"')
parser.add_argument('--reporter', required=True, help='Reporter username')
parser.add_argument('--firstuser', default=1, type=int, help='Number of the first user. Eg. 1 for user1. Default=1')
parser.add_argument('--lastuser', default=16, type=int, help='Number of the last user. Eg. 16 for user16. Default=16')
def worklogs(s):
global previous
global previous_user
if previous_user != s.name[0]:
previous = pd.Series([0]*len(s), index=s.index)
# Remove all remaining entries less than 15 minutes
previous.loc[previous < 15] = 0
# Add current to remaining
previous = previous.add(s.mul(estimated_mul).mul(60))
t_count = previous.loc[previous > 0].count()
t = workhrs*60*s.name[2]/t_count if t_count > 0 else 0
current = previous.copy()
current = current.map(lambda x: t-x if x > 0 and x < t else 0)
dis_count = previous.loc[previous >= t].count()
dis = current.sum()/dis_count+t if dis_count > 0 else t
current = previous.copy()
current = current.map(lambda x: dis-np.max([dis-x, 0]))
current = current.map(lambda x: np.floor(x))
previous = previous.sub(current)
previous_user = s.name[0]
return current
def main():
args = parser.parse_args()
df = pd.read_excel(args.file, engine='odf', sheet_name=args.sheet)
df['key'] = args.key
df['name'] = args.name
assignees = np.vectorize(lambda x: 'user'+str(x))(np.arange(args.firstuser, args.lastuser+1))
assignees_bias = (np.arange(len(assignees)-1,-1,-1)**1.4)+10
reporter = args.reporter
import_tz = args.tzone
# Get earlist start and latest end from imported data
earliest_day = pd.offsets.MonthBegin().rollback(np.min([pd.Timestamp(x) for x in df['created']]))
latest_day = pd.offsets.MonthBegin().rollback(np.max([pd.Timestamp(x) for x in df['created']]))
# Fill time dimension with days.
# resample ignores / remove timezone. We will set it later. Take care all date-time data is in the same timezone.
# interpolate is applied on a sub-set of columns. All other columns ffill is used.
df = df.set_index(['created'])
_df_ffill = df[df_ffill_slices].resample('D').ffill()
_df_interpolate = df[df_interpolate_slices].resample('D').interpolate(method='linear')
s_worklog_factor = _df_interpolate['Worklog Factor']
df = _df_ffill.merge(_df_interpolate, left_index=True, right_index=True)
# Calculate issue type issue count for a day each.
# For each issue type we create a new column with a value of the amount of issues for that day.
# melt helps us to group issues by issue types, and for having one column only for the amount of issues for that day.
df = df.reset_index()
df['Last Day'] = (df['created'] + pd.offsets.MonthEnd(0)).dt.day
df = df.set_index(['created'])
cols = df.columns.to_list()
df['Story'] = (args.count * df['Story Factor'] / df['Last Day']).astype(dtype='int64')
df['Bug'] = (args.count * df['Bug Factor'] / df['Last Day']).astype(dtype='int64')
df = df.melt(id_vars=cols, var_name='issueType', value_name='Issue Type Count', ignore_index=False)
df = df.drop(columns=['Story Factor', 'Bug Factor'])
# Create a row per issue.
# No need to take care of issue types since that piece of information got melted before, and now is available as an index.
# We only get the index with dates and issues types, and the column with the amount of issues for that day.
df = df.reset_index()
df = df.set_index(['created', 'issueType']).sort_index(level=0)
_idx = df.index.values.tolist()
_icount = df['Issue Type Count'].to_list()
# We duplicate all tuples (original index) for the amount of issues for that day. One tuple represents one day.
_idx_new = [_idx[i] for i in range(0, len(_idx)) for j in range(0, _icount[i])]
# This is our issue counter. KA-1, KA-2, and so on.
_icount_new = [i for i in range(1, len(_idx_new)+1)]
_df_issues = pd.DataFrame({'Issue Counter':_icount_new}, index=pd.MultiIndex.from_tuples(_idx_new, names=['created','issueType']))
# By merging both results by inner we drop all unrelated days
df = df.merge(_df_issues, how='inner', on=['created','issueType']).ffill()
df = df.drop(columns=['Issue Type Count'])
# Add issue fields
df = df.reset_index()
df['summary'] = df['key'] + '-' + df['Issue Counter'].astype(dtype='string') + ' ' + df['issueType']
df['reporter'] = reporter
df['priority'] = 'Medium'
_estimates_min_max_mean = (estimated_hrs_max-estimated_hrs_min)/2+estimated_hrs_min
# For estimates we apply a gaussian curve to get a more realistic distribution. 0.8 shifts mean a bit to lower values.
_estimates_floats = truncnorm((estimated_hrs_min-_estimates_min_max_mean*0.8)/4, (estimated_hrs_max-_estimates_min_max_mean*0.8)/4, loc=_estimates_min_max_mean*0.8, scale=4).rvs(df['Issue Counter'].size)
# A division by four and round multiplied by four creates a distance of four between all values.
df['originalEstimate'] = [int(round(i/4))*4 for i in _estimates_floats]
df['assignee'] = random.choices(assignees, weights=assignees_bias, k=df['summary'].size)
# Calculate Worklogs
df = df.reset_index()
df['Start Time'] = df['created'] + pd.Series([pd.Timedelta(days=(i+random.randint(1, start_range_days))) for i in df['Earliest Start']])
df['Start Time'] = df['Start Time'].dt.to_period('D')
# Create an own table for worklogs
wl = df[['Issue Counter','Start Time','originalEstimate','assignee']]
wl_concat_frames = []
wl_groups = wl.groupby('assignee')
s_worklog_factor.index = s_worklog_factor.index.to_period()
frame_period_idx = pd.Index(pd.period_range(start=earliest_day, end=args.time, freq='D', name='Start Time').to_list())
s_worklog_factor = s_worklog_factor.reindex(frame_period_idx).ffill().bfill()
for name, frame in wl_groups:
frame = frame.copy()
# The only value is originalEstimate, the rest goes to the index.
# By unstacking 'Issue Counter' the 'Start Time' becomes unique.
frame = frame.set_index(['Issue Counter','assignee','Start Time']).unstack(0)
# Add name (assignee) to the index as level 0
frame_idx = pd.MultiIndex.from_product([[name], frame_period_idx.to_list()])
frame = frame.reindex(frame_idx).droplevel(0, axis=1).fillna(0)
# The reindexing already happened before the loop
# We can overwrite the existing one level index with the two level index
s_worklog_factor.index = frame_idx
frame['Worklog Factor'] = s_worklog_factor
frame = frame.set_index('Worklog Factor', append=True)
frame = frame.apply(worklogs, axis=1)
# Remove all days containing zeros only
frame = frame.loc[frame.sum(axis=1) > 0]
# Remove Worklog Factor
frame = frame.droplevel(2).stack().reset_index()
frame = frame.rename(columns={'level_0':'author','level_1':'startDate','Issue Counter':'issue',0:'timeSpent'})
# Remove all issues with timeSpent == 0
frame = frame.loc[frame['timeSpent'] > 0]
# frame_endtime will be a period of 'D'
frame['startDate'] = [x.to_timestamp()+(pd.Timedelta(hours=9)) for x in frame['startDate']]
frame_starttime = frame.groupby('issue')['startDate'].min()
frame_endtime = frame.groupby('issue')['startDate'].max()
frame['startDate'] = frame['startDate'].dt.tz_localize(import_tz)
frame['timeSpent'] = [pd.Timedelta(minutes=x) for x in frame['timeSpent']]
frame_issues = frame.set_index('issue').groupby('issue')
frame_timespent_sum = frame_issues['timeSpent'].sum()
frame = frame_issues.apply(lambda x: x.to_dict(orient='records')).to_frame()
frame = frame.rename(columns={0:'worklogs'})
# Add columns to the issue
frame['WL startTime'] = frame_starttime
frame['WL endTime'] = frame_endtime+pd.Timedelta(hours=9)
frame['WL timeSpent sum'] = frame_timespent_sum
wl_concat_frames.append(frame)
# 'issue' as index remaines and will be used for merging
wl = pd.concat(wl_concat_frames)
df = df.set_index('Issue Counter')
df = df.merge(wl, how='left', left_index=True, right_index=True)
df['WL timeSpent sum'] = df['WL timeSpent sum'].fillna(pd.Timedelta(0))
# Changelog
df = df.reset_index()
df['resolution'] = None
# We only regard Truthy instead the entire worklogs
df['wl started'] = df['worklogs'].notnull()
df['wl finished'] = [pd.Timedelta(x*estimated_mul-0.25, unit='hours') for x in df['originalEstimate']]
df['wl finished'] = (df['wl finished'].sub(df['WL timeSpent sum'])) < pd.Timedelta(0, unit='hours')
df.loc[df['wl finished'] == True,'resolution'] = resolution_done
chlog = df[['created','assignee','Issue Counter','WL startTime','wl started','WL endTime','wl finished']]
# Mark all lines not started yet
chlog.loc[chlog['wl started'] == False,'WL startTime'] = np.nan
# Mark all lines not started yet
chlog.loc[chlog['wl finished'] == False,'WL endTime'] = np.nan
chlog = chlog.drop(columns=['wl started','wl finished'])
# Use 'WL endTime' for 'resolution'
chlog['resolution'] = chlog['WL endTime']
# Make keys from history_items_config match to chlog columns
chlog = chlog.rename(columns={'assignee':'author'})
chlog = chlog.melt(id_vars=['Issue Counter','author'], value_vars=['created','WL startTime','WL endTime','resolution'])
# Change the author of the first change to reporter
chlog = chlog.set_index('variable')
chlog.loc['created','author'] = reporter
chlog = chlog.reset_index()
history_items = pd.DataFrame(history_items_config).T
# 'variable' is the matching column after the melt of chlog
history_items.index.name = 'variable'
history_items = history_items.reset_index()
chlog = chlog.merge(history_items, how='inner', on='variable')
chlog = chlog.drop(columns=['variable']).rename(columns={'value':'created'})
# Remove all lines with np.nan
chlog = chlog.loc[chlog['created'].notnull(),:]
chlog.loc[:,['created']] = [pd.Timestamp(x).tz_localize(import_tz) for x in chlog['created']]
lastest_status = chlog.set_index('Issue Counter').sort_values(by=['created']).groupby('Issue Counter')['toString'].tail(1).reset_index().rename(columns={'toString':'status'})
lastest_date = chlog.set_index('Issue Counter').sort_values(by=['created']).groupby('Issue Counter')['created'].tail(1).reset_index().rename(columns={'created':'updated'})
chlog = chlog.merge(lastest_status, how='inner', on='Issue Counter')
chlog = chlog.merge(lastest_date, how='inner', on='Issue Counter')
# Creation of 'items':[]
chlog = chlog.set_index(['Issue Counter','status','updated','author','created']).groupby(by=['Issue Counter','status','updated','author','created']).apply(lambda x: x.to_dict(orient='records')).to_frame()
chlog = chlog.rename(columns={0:'items'})
chlog = chlog.reset_index(['author','created'])
# Creation of 'history':[]
chlog['history'] = 'history'
chlog = chlog.set_index(['history'], append=True).groupby(by=['Issue Counter','status','updated','history']).apply(lambda x: x.to_dict(orient='records')).to_frame()
# Leave and take 'Issue Counter' as index for merging chlog with df
chlog = chlog.droplevel(3)
chlog = chlog.reset_index(['status','updated'])
chlog = chlog.rename(columns={0:'history'})
df = df.set_index('Issue Counter')
df = df.merge(chlog, how='left', left_index=True, right_index=True)
# Last changes before generating a json
df = df.reset_index()
df = df.drop(columns=['index', 'Worklog Factor', 'Earliest Start', 'Last Day', 'Start Time', 'Issue Counter','wl started','WL endTime','wl finished','WL startTime'])
df['created'] = df['created'].dt.tz_localize(import_tz)
df['originalEstimate'] = [pd.Timedelta(hours=x) for x in df['originalEstimate']]
df['estimate'] = [pd.Timedelta(np.max([x.total_seconds(),0]), unit='seconds') for x in df['originalEstimate'].sub(df['WL timeSpent sum'])]
df = df.rename(columns={'WL timeSpent sum':'timeSpent'})
df = df.drop(columns=['timeSpent'])
if args.worklogs == 'no':
df = df.drop(columns=['worklogs','originalEstimate','estimate'])
# Build import JSON
# Setting an index with project key and name preserve these two keys, and we get them back with a reset.
# Anything under project key and name becomes a dict of issues
# Remove pairs with a value of np.nan from all dicts. np.nan == np.nan is False.
df = df.set_index(['key', 'name']).groupby(by=['key', 'name']).apply(lambda x: [{k:v for k,v in y.items() if v==v and v} for y in x.to_dict(orient='records')])
df.name = 'issues'
df = df.reset_index()
# Create a new group and produce a dict with anything under it
df['projects'] = 'projects'
df = df.set_index('projects').groupby(by=['projects']).apply(lambda x: x.to_dict(orient='records'))
print(df.to_json(orient='index', indent=4, date_format='iso'))
if __name__ == "__main__":
main()
Reach out to me for professional support!