@@ -83,14 +83,15 @@ def contributor_team(contributions, metadata, forks, stars, axs):
83
83
# user is active contributor if made at least one commit in last 12 weeks
84
84
windowed_team_df = team_df .groupby (level = "author" ).rolling (window = 12 , min_periods = 0 ).sum ().droplevel (0 )
85
85
windowed_team_df ["active contributors" ] = windowed_team_df .commits > 0
86
+ windowed_team_df ["active contributors" ] = windowed_team_df ["active contributors" ].map ({True : "active" , False : "inactive" })
86
87
# plot per-user status
87
88
sns .scatterplot (
88
89
ax = axs [0 ],
89
90
data = windowed_team_df ,
90
91
x = "week_since_repo_creation" ,
91
92
y = "author" ,
92
93
hue = "active contributors" ,
93
- hue_order = [False , True ],
94
+ hue_order = ["inactive" , "active" ],
94
95
palette = ['#d62728' , '#2ca02c' ],
95
96
marker = "|" ,
96
97
s = 500 ,
@@ -99,14 +100,14 @@ def contributor_team(contributions, metadata, forks, stars, axs):
99
100
users = contributions .author .unique ()
100
101
engagement_user_highlights (users , metadata , forks , stars , axs [0 ])
101
102
# team size
102
- team_size = windowed_team_df .groupby (level = "week_since_repo_creation" )["active contributors" ].value_counts ()[:,True ].reindex (windowed_team_df .index .levels [1 ], fill_value = 0 )
103
+ team_size = windowed_team_df .groupby (level = "week_since_repo_creation" )["active contributors" ].value_counts ()[:,"active" ].reindex (windowed_team_df .index .levels [1 ], fill_value = 0 )
103
104
# plot
104
105
team_size .plot (
105
106
ax = axs [1 ],
106
107
color = "black" ,
107
108
lw = 2 ,
108
109
# xlabel="week since repo creation",
109
- # ylabel="contributor team size",
110
+ ylabel = "contributor team size" ,
110
111
)
111
112
112
113
def no_open_and_closed_issues (issues , metadata , ax ):
@@ -133,7 +134,7 @@ def no_open_and_closed_issues(issues, metadata, ax):
133
134
y = ["open issues" , "closed issues" ],
134
135
lw = 2 ,
135
136
# xlabel="week since repo creation",
136
- # ylabel="count"
137
+ ylabel = "issue count"
137
138
)
138
139
139
140
def engagement (forks , stars , metadata , ax ):
@@ -151,10 +152,20 @@ def engagement(forks, stars, metadata, ax):
151
152
engagement_df = engagement_df .cumsum ()
152
153
engagement_df .plot (
153
154
ax = ax ,
154
- lw = 2
155
+ lw = 2 ,
156
+ ylabel = "count"
155
157
)
156
158
157
- def date_highlights (readme_history , contents , metadata , paper_data , ax ):
159
+ def calc_y_timeline (data ):
160
+ ys = [[] for _ in range (len (data ))]
161
+ seen_x = []
162
+ for i in range (len (data )):
163
+ for x in data [i ]:
164
+ ys [i ].append (- 1 * seen_x .count (x ))
165
+ seen_x .append (x )
166
+ return ys
167
+
168
+ def date_highlights (readme_history , contents , metadata , paper_data , ax , overlay_ax ):
158
169
df = pd .merge (metadata , readme_history , on = "github_user_cleaned_url" )
159
170
df .dropna (subset = ["author_date" ], inplace = True )
160
171
df ["authored_in_week_since_creation" ] = (df .author_date - df .created_at ).dt .days // 7
@@ -164,34 +175,34 @@ def date_highlights(readme_history, contents, metadata, paper_data, ax):
164
175
paper_df = pd .merge (metadata , paper_data , on = "github_user_cleaned_url" )
165
176
paper_df .date = (paper_df .date - paper_df .created_at ).dt .days // 7
166
177
# headings
167
- df = analyse_headings (df )
168
- # plotting
169
- prop_cycle = plt .rcParams ['axes.prop_cycle' ]
170
- colors = prop_cycle .by_key ()['color' ]
171
- max_y = ax .get_ylim ()[1 ]
172
- dist = max_y / 25
178
+ df = analyse_headings (df )
173
179
ownership_added = df [df .ownership_addition ].authored_in_week_since_creation
174
- ax .vlines (ownership_added , - 1 * dist , max_y , linestyles = 'dashed' , color = colors [0 ])
175
- ax .scatter (ownership_added , (- 1 * dist * np .ones ((len (ownership_added ),))), marker = 10 , s = 100 , label = "ownership heading" , color = colors [0 ])
176
180
usage_added = df [df .usage_addition ].authored_in_week_since_creation
177
- ax .vlines (usage_added , - 2 * dist , max_y , linestyles = 'dashed' , color = colors [1 ])
178
- ax .scatter (usage_added , (- 2 * dist * np .ones ((len (usage_added ),))), marker = 10 , s = 100 , label = "usage heading" , color = colors [1 ])
179
181
# citation in README
180
182
citation_added = df [(df .added_cites != "[]" ) & (df .added_cites .notna ())].authored_in_week_since_creation
181
- ax .vlines (citation_added , - 3 * dist , max_y , linestyles = 'dashed' , color = colors [2 ])
182
- ax .scatter (citation_added , (- 3 * dist * np .ones ((len (citation_added ),))), marker = 10 , s = 100 , label = "citation in README" , color = colors [2 ])
183
183
# citation file
184
184
citation_file_added = contents_df [contents_df .citation_added .notna ()].citation_added
185
- ax .vlines (citation_file_added , - 4 * dist , max_y , linestyles = 'dashed' , color = colors [3 ])
186
- ax .scatter (citation_file_added , (- 4 * dist * np .ones ((len (citation_file_added ),))), marker = 10 , s = 100 , label = "citation file" , color = colors [3 ])
187
185
# contributing file
188
186
contributing_file_added = contents_df [contents_df .contributing_added .notna ()].contributing_added
189
- ax .vlines (contributing_file_added , - 5 * dist , max_y , linestyles = 'dashed' , color = colors [4 ])
190
- ax .scatter (contributing_file_added , (- 5 * dist * np .ones ((len (contributing_file_added ),))), marker = 10 , s = 100 , label = "contributing file" , color = colors [4 ])
191
187
# paper publication
192
188
paper_published = paper_df [paper_df .date .notna ()].date
193
- ax .vlines (paper_published , - 6 * dist , max_y , linestyles = 'dashed' , color = colors [5 ])
194
- ax .scatter (paper_published , (- 6 * dist * np .ones ((len (paper_published ),))), marker = 10 , s = 100 , label = "mention in publication" , color = colors [5 ])
189
+ # plotting
190
+ ax .set (ylim = (- 6 , 0.4 ), yticks = [])
191
+ ax .set_xlabel ("weeks since repository creation" , loc = "right" )
192
+ ax .xaxis .set_label_position ('top' )
193
+ ax .xaxis .tick_top ()
194
+ ax .spines ['right' ].set_visible (False )
195
+ ax .spines ['bottom' ].set_visible (False )
196
+ ax .spines ['left' ].set_visible (False )
197
+ data = [ownership_added , usage_added , citation_added , citation_file_added , contributing_file_added , paper_published ]
198
+ ys = calc_y_timeline (data )
199
+ labels = ["ownership heading" , "usage heading" , "citation in README" , "citation file" , "contributing file" , "mention in publication" ]
200
+ prop_cycle = plt .rcParams ['axes.prop_cycle' ]
201
+ colors = prop_cycle .by_key ()['color' ]
202
+ ymax = 86
203
+ for i in range (len (data )):
204
+ ax .scatter (data [i ], ys [i ], marker = "^" , s = 100 , label = labels [i ], color = colors [i ])
205
+ overlay_ax .vlines (data [i ], ys [i ], ymax , linestyles = 'dashed' , color = colors [i ])
195
206
196
207
def main (repo , dir , output_dir , verbose ):
197
208
info (verbose , f"Loading data for repo { repo } ..." )
@@ -209,25 +220,37 @@ def main(repo, dir, output_dir, verbose):
209
220
info (verbose , f"Not enough data available for { repo } ." )
210
221
exit ()
211
222
212
- fig , axs = plt .subplots (nrows = 3 , figsize = (20 , 10 ), sharex = True )
223
+ fig = plt .figure (figsize = (20 , 20 ))
224
+ overlay_axis = fig .subplots ()
225
+ overlay_axis .axis ('off' )
226
+ axs = fig .subplots (nrows = 6 , sharex = True , height_ratios = [3 , 3 , 2 , 2 , 2 , 1 ])
227
+ for ax in axs :
228
+ ax .patch .set_alpha (0 )
213
229
info (verbose , "Crunching data..." )
214
230
user_type_wrt_issues (issues , metadata , forks , stars , axs [0 ])
215
231
axs [0 ].legend (loc = "upper right" )
216
232
axs [0 ].grid (True , axis = "x" )
217
- contributor_team (contributions , metadata , forks , stars , axs [1 :])
233
+ contributor_team (contributions , metadata , forks , stars , axs [1 :3 ])
218
234
axs [1 ].grid (True , axis = "x" )
219
235
axs [1 ].legend ()
220
- no_open_and_closed_issues (issues , metadata , axs [2 ])
221
- engagement (forks , stars , metadata , axs [2 ])
222
- date_highlights (readme_history , contents , metadata , paper_data , axs [2 ])
223
236
axs [2 ].legend (loc = "upper right" )
224
237
axs [2 ].grid (True )
225
- _ , right = plt .xlim ()
226
- plt .xlim (- 5 , right + 15 )
227
- plt .xlabel ("week since repository creation" )
238
+ no_open_and_closed_issues (issues , metadata , axs [3 ])
239
+ axs [3 ].legend (loc = "upper right" )
240
+ axs [3 ].grid (True )
241
+ engagement (forks , stars , metadata , axs [4 ])
242
+ axs [4 ].legend (loc = "upper right" )
243
+ axs [4 ].grid (True )
244
+ date_highlights (readme_history , contents , metadata , paper_data , axs [5 ], overlay_axis )
245
+ axs [5 ].legend (loc = "upper right" , ncols = 2 )
246
+ # final adjustments
247
+ ymax = 86
248
+ _ , xr = plt .xlim ()
249
+ plt .xlim (- 5 , xr + 15 )
250
+ overlay_axis .set (xlim = (- 5 , xr + 15 ), ylim = (- 6 , ymax ))
228
251
fig .suptitle (repo )
229
252
s = repo .replace ("/" , "-" )
230
- fig .tight_layout ()
253
+ fig .tight_layout (rect = [ 0 , 0.03 , 1 , 0.98 ] )
231
254
outpath = os .path .join (dir , output_dir )
232
255
os .makedirs (outpath , exist_ok = True )
233
256
plt .savefig (os .path .join (outpath , f"{ s } .png" ), bbox_inches = "tight" )
0 commit comments