Jira Issue Creation

#!/usr/bin/env python3

import pandas as pd
import numpy as np
import random
from scipy.stats import truncnorm
import argparse

# Configuration
estimated_hrs_max = 24
estimated_hrs_min = 4
estimated_mul = 1.25
start_range_days = 7
df_ffill_slices = ['key','name']
df_interpolate_slices = ['Earliest Start', 'Story Factor', 'Bug Factor', 'Worklog Factor']
history_items_config = {
    'created':{'fieldType':'jira','field':'status','from':10000,'fromString':'Backlog','to':10001,'toString':'Selected for Development'},
    'WL startTime':{'fieldType':'jira','field':'status','from':10001,'fromString':'Selected for Development','to':3,'toString':'In Progress'},
    'WL endTime':{'fieldType':'jira','field':'status','from':3,'fromString':'In Progress','to':10002,'toString':'Done'},
resolution_done = 'Done'

workhrs = 8
previous = None
previous_user = None

parser = argparse.ArgumentParser()
parser.add_argument('-s', '--sheet', required=True)
parser.add_argument('-t', '--time', required=True, help='yyyy-mm-dd for last worklogs')
parser.add_argument('-w', '--worklogs', default="yes", help='"no" will remove worklogs from export, default="yes"')
parser.add_argument('-k', '--key', required=True, help='Project key')
parser.add_argument('-n', '--name', required=True, help='Project name')
parser.add_argument('-c', '--count', default=200, type=int, help='Issue count')
parser.add_argument('-z', '--tzone', default='Europe/Berlin', help='Timezone, default="Europe/Berlin"')
parser.add_argument('--reporter', required=True, help='Reporter username')
parser.add_argument('--firstuser', default=1, type=int, help='Number of the first user. Eg. 1 for user1. Default=1')
parser.add_argument('--lastuser', default=16, type=int, help='Number of the last user. Eg. 16 for user16. Default=16')

def worklogs(s):
    global previous
    global previous_user

    if previous_user != s.name[0]:
        previous = pd.Series([0]*len(s), index=s.index)

    # Remove all remaining entries less than 15 minutes
    previous.loc[previous < 15] = 0

    # Add current to remaining
    previous = previous.add(s.mul(estimated_mul).mul(60))

    t_count = previous.loc[previous > 0].count()
    t = workhrs*60*s.name[2]/t_count if t_count > 0 else 0

    current = previous.copy()
    current = current.map(lambda x: t-x if x > 0 and x < t else 0)

    dis_count = previous.loc[previous >= t].count()
    dis = current.sum()/dis_count+t if dis_count > 0 else t

    current = previous.copy()
    current = current.map(lambda x: dis-np.max([dis-x, 0]))

    current = current.map(lambda x: np.floor(x))

    previous = previous.sub(current)
    previous_user = s.name[0]

    return current

def main():
    args = parser.parse_args()
    df = pd.read_excel(args.file, engine='odf', sheet_name=args.sheet)

    df['key'] = args.key
    df['name'] = args.name

    assignees = np.vectorize(lambda x: 'user'+str(x))(np.arange(args.firstuser, args.lastuser+1))
    assignees_bias = (np.arange(len(assignees)-1,-1,-1)**1.4)+10
    reporter = args.reporter

    import_tz = args.tzone

    # Get earlist start and latest end from imported data
    earliest_day = pd.offsets.MonthBegin().rollback(np.min([pd.Timestamp(x) for x in df['created']]))
    latest_day = pd.offsets.MonthBegin().rollback(np.max([pd.Timestamp(x) for x in df['created']]))

    # Fill time dimension with days.
    # resample ignores / remove timezone. We will set it later. Take care all date-time data is in the same timezone.
    # interpolate is applied on a sub-set of columns. All other columns ffill is used.
    df = df.set_index(['created'])
    _df_ffill = df[df_ffill_slices].resample('D').ffill()
    _df_interpolate = df[df_interpolate_slices].resample('D').interpolate(method='linear')
    s_worklog_factor = _df_interpolate['Worklog Factor']
    df = _df_ffill.merge(_df_interpolate, left_index=True, right_index=True)

    # Calculate issue type issue count for a day each.
    # For each issue type we create a new column with a value of the amount of issues for that day.
    # melt helps us to group issues by issue types, and for having one column only for the amount of issues for that day.
    df = df.reset_index()
    df['Last Day'] = (df['created'] + pd.offsets.MonthEnd(0)).dt.day
    df = df.set_index(['created'])
    cols = df.columns.to_list()
    df['Story'] = (args.count * df['Story Factor'] / df['Last Day']).astype(dtype='int64')
    df['Bug'] = (args.count * df['Bug Factor'] / df['Last Day']).astype(dtype='int64')
    df = df.melt(id_vars=cols, var_name='issueType', value_name='Issue Type Count', ignore_index=False)
    df = df.drop(columns=['Story Factor', 'Bug Factor'])

    # Create a row per issue.
    # No need to take care of issue types since that piece of information got melted before, and now is available as an index.
    # We only get the index with dates and issues types, and the column with the amount of issues for that day.
    df = df.reset_index()
    df = df.set_index(['created', 'issueType']).sort_index(level=0)
    _idx = df.index.values.tolist()
    _icount = df['Issue Type Count'].to_list()
    # We duplicate all tuples (original index) for the amount of issues for that day. One tuple represents one day.
    _idx_new = [_idx[i] for i in range(0, len(_idx)) for j in range(0, _icount[i])]
    # This is our issue counter. KA-1, KA-2, and so on.
    _icount_new = [i for i in range(1, len(_idx_new)+1)]
    _df_issues = pd.DataFrame({'Issue Counter':_icount_new}, index=pd.MultiIndex.from_tuples(_idx_new, names=['created','issueType']))
    # By merging both results by inner we drop all unrelated days
    df = df.merge(_df_issues, how='inner', on=['created','issueType']).ffill()
    df = df.drop(columns=['Issue Type Count'])

    # Add issue fields
    df = df.reset_index()
    df['summary'] = df['key'] + '-' + df['Issue Counter'].astype(dtype='string') + ' ' + df['issueType']
    df['reporter'] = reporter
    df['priority'] = 'Medium'
    _estimates_min_max_mean = (estimated_hrs_max-estimated_hrs_min)/2+estimated_hrs_min
    # For estimates we apply a gaussian curve to get a more realistic distribution. 0.8 shifts mean a bit to lower values.
    _estimates_floats = truncnorm((estimated_hrs_min-_estimates_min_max_mean*0.8)/4, (estimated_hrs_max-_estimates_min_max_mean*0.8)/4, loc=_estimates_min_max_mean*0.8, scale=4).rvs(df['Issue Counter'].size)
    # A division by four and round multiplied by four creates a distance of four between all values.
    df['originalEstimate'] = [int(round(i/4))*4 for i in _estimates_floats]
    df['assignee'] = random.choices(assignees, weights=assignees_bias, k=df['summary'].size)

    # Calculate Worklogs
    df = df.reset_index()
    df['Start Time'] = df['created'] + pd.Series([pd.Timedelta(days=(i+random.randint(1, start_range_days))) for i in df['Earliest Start']])
    df['Start Time'] = df['Start Time'].dt.to_period('D')
    # Create an own table for worklogs
    wl = df[['Issue Counter','Start Time','originalEstimate','assignee']]
    wl_concat_frames = []
    wl_groups = wl.groupby('assignee')
    s_worklog_factor.index = s_worklog_factor.index.to_period()
    frame_period_idx = pd.Index(pd.period_range(start=earliest_day, end=args.time, freq='D', name='Start Time').to_list())
    s_worklog_factor = s_worklog_factor.reindex(frame_period_idx).ffill().bfill()
    for name, frame in wl_groups:
        frame = frame.copy()
        # The only value is originalEstimate, the rest goes to the index.
        # By unstacking 'Issue Counter' the 'Start Time' becomes unique.
        frame = frame.set_index(['Issue Counter','assignee','Start Time']).unstack(0)
        # Add name (assignee) to the index as level 0
        frame_idx = pd.MultiIndex.from_product([[name], frame_period_idx.to_list()])
        frame = frame.reindex(frame_idx).droplevel(0, axis=1).fillna(0)
        # The reindexing already happened before the loop
        # We can overwrite the existing one level index with the two level index
        s_worklog_factor.index = frame_idx
        frame['Worklog Factor'] = s_worklog_factor
        frame = frame.set_index('Worklog Factor', append=True)
        frame = frame.apply(worklogs, axis=1)
        # Remove all days containing zeros only
        frame = frame.loc[frame.sum(axis=1) > 0]
        # Remove Worklog Factor
        frame = frame.droplevel(2).stack().reset_index()
        frame = frame.rename(columns={'level_0':'author','level_1':'startDate','Issue Counter':'issue',0:'timeSpent'})
        # Remove all issues with timeSpent == 0
        frame = frame.loc[frame['timeSpent'] > 0]
        # frame_endtime will be a period of 'D'
        frame['startDate'] = [x.to_timestamp()+(pd.Timedelta(hours=9)) for x in frame['startDate']]
        frame_starttime = frame.groupby('issue')['startDate'].min()
        frame_endtime = frame.groupby('issue')['startDate'].max()
        frame['startDate'] = frame['startDate'].dt.tz_localize(import_tz)
        frame['timeSpent'] = [pd.Timedelta(minutes=x) for x in frame['timeSpent']]
        frame_issues = frame.set_index('issue').groupby('issue')
        frame_timespent_sum = frame_issues['timeSpent'].sum()
        frame = frame_issues.apply(lambda x: x.to_dict(orient='records')).to_frame()
        frame = frame.rename(columns={0:'worklogs'})
        # Add columns to the issue
        frame['WL startTime'] = frame_starttime
        frame['WL endTime'] = frame_endtime+pd.Timedelta(hours=9)
        frame['WL timeSpent sum'] = frame_timespent_sum

    # 'issue' as index remaines and will be used for merging
    wl = pd.concat(wl_concat_frames)
    df = df.set_index('Issue Counter')
    df = df.merge(wl, how='left', left_index=True, right_index=True)
    df['WL timeSpent sum'] = df['WL timeSpent sum'].fillna(pd.Timedelta(0))

    # Changelog
    df = df.reset_index()
    df['resolution'] = None
    # We only regard Truthy instead the entire worklogs
    df['wl started'] = df['worklogs'].notnull()
    df['wl finished'] = [pd.Timedelta(x*estimated_mul-0.25, unit='hours') for x in df['originalEstimate']]
    df['wl finished'] = (df['wl finished'].sub(df['WL timeSpent sum'])) < pd.Timedelta(0, unit='hours')
    df.loc[df['wl finished'] == True,'resolution'] = resolution_done
    chlog = df[['created','assignee','Issue Counter','WL startTime','wl started','WL endTime','wl finished']]
    # Mark all lines not started yet
    chlog.loc[chlog['wl started'] == False,'WL startTime'] = np.nan
    # Mark all lines not started yet
    chlog.loc[chlog['wl finished'] == False,'WL endTime'] = np.nan
    chlog = chlog.drop(columns=['wl started','wl finished'])
    # Use 'WL endTime' for 'resolution'
    chlog['resolution'] = chlog['WL endTime']
    # Make keys from history_items_config match to chlog columns
    chlog = chlog.rename(columns={'assignee':'author'})
    chlog = chlog.melt(id_vars=['Issue Counter','author'], value_vars=['created','WL startTime','WL endTime','resolution'])
    # Change the author of the first change to reporter
    chlog = chlog.set_index('variable')
    chlog.loc['created','author'] = reporter
    chlog = chlog.reset_index()
    history_items = pd.DataFrame(history_items_config).T
    # 'variable' is the matching column after the melt of chlog
    history_items.index.name = 'variable'
    history_items = history_items.reset_index()
    chlog = chlog.merge(history_items, how='inner', on='variable')
    chlog = chlog.drop(columns=['variable']).rename(columns={'value':'created'})
    # Remove all lines with np.nan
    chlog = chlog.loc[chlog['created'].notnull(),:]
    chlog.loc[:,['created']] = [pd.Timestamp(x).tz_localize(import_tz) for x in chlog['created']]
    lastest_status = chlog.set_index('Issue Counter').sort_values(by=['created']).groupby('Issue Counter')['toString'].tail(1).reset_index().rename(columns={'toString':'status'})
    lastest_date = chlog.set_index('Issue Counter').sort_values(by=['created']).groupby('Issue Counter')['created'].tail(1).reset_index().rename(columns={'created':'updated'})
    chlog = chlog.merge(lastest_status, how='inner', on='Issue Counter')
    chlog = chlog.merge(lastest_date, how='inner', on='Issue Counter')
    # Creation of 'items':[]
    chlog = chlog.set_index(['Issue Counter','status','updated','author','created']).groupby(by=['Issue Counter','status','updated','author','created']).apply(lambda x: x.to_dict(orient='records')).to_frame()
    chlog = chlog.rename(columns={0:'items'})
    chlog = chlog.reset_index(['author','created'])
    # Creation of 'history':[]
    chlog['history'] = 'history'
    chlog = chlog.set_index(['history'], append=True).groupby(by=['Issue Counter','status','updated','history']).apply(lambda x: x.to_dict(orient='records')).to_frame()
    # Leave and take 'Issue Counter' as index for merging chlog with df
    chlog = chlog.droplevel(3)
    chlog = chlog.reset_index(['status','updated'])
    chlog = chlog.rename(columns={0:'history'})
    df = df.set_index('Issue Counter')
    df = df.merge(chlog, how='left', left_index=True, right_index=True)

    # Last changes before generating a json
    df = df.reset_index()
    df = df.drop(columns=['index', 'Worklog Factor', 'Earliest Start', 'Last Day', 'Start Time', 'Issue Counter','wl started','WL endTime','wl finished','WL startTime'])
    df['created'] = df['created'].dt.tz_localize(import_tz)
    df['originalEstimate'] = [pd.Timedelta(hours=x) for x in df['originalEstimate']]
    df['estimate'] = [pd.Timedelta(np.max([x.total_seconds(),0]), unit='seconds') for x in df['originalEstimate'].sub(df['WL timeSpent sum'])]
    df = df.rename(columns={'WL timeSpent sum':'timeSpent'})
    df = df.drop(columns=['timeSpent'])

    if args.worklogs == 'no':
        df = df.drop(columns=['worklogs','originalEstimate','estimate'])

    # Build import JSON
    # Setting an index with project key and name preserve these two keys, and we get them back with a reset.
    # Anything under project key and name becomes a dict of issues
    # Remove pairs with a value of np.nan from all dicts. np.nan == np.nan is False.
    df = df.set_index(['key', 'name']).groupby(by=['key', 'name']).apply(lambda x: [{k:v for k,v in y.items() if v==v and v} for y in x.to_dict(orient='records')])
    df.name = 'issues'
    df = df.reset_index()
    # Create a new group and produce a dict with anything under it
    df['projects'] = 'projects'
    df = df.set_index('projects').groupby(by=['projects']).apply(lambda x: x.to_dict(orient='records'))

    print(df.to_json(orient='index', indent=4, date_format='iso'))

if __name__ == "__main__":

Reach out to me for professional support!