Skip to content

Commit ff7f389

Browse files
committed
fix #46
1 parent bbb7f2e commit ff7f389

File tree

2 files changed

+57
-34
lines changed

2 files changed

+57
-34
lines changed

analysis/github.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": 3,
13+
"execution_count": 1,
1414
"metadata": {},
1515
"outputs": [],
1616
"source": [

analysis/repository_timeline.py

Lines changed: 56 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,15 @@ def contributor_team(contributions, metadata, forks, stars, axs):
8383
# user is active contributor if made at least one commit in last 12 weeks
8484
windowed_team_df = team_df.groupby(level="author").rolling(window=12, min_periods=0).sum().droplevel(0)
8585
windowed_team_df["active contributors"] = windowed_team_df.commits > 0
86+
windowed_team_df["active contributors"] = windowed_team_df["active contributors"].map({True: "active", False: "inactive"})
8687
# plot per-user status
8788
sns.scatterplot(
8889
ax=axs[0],
8990
data=windowed_team_df,
9091
x="week_since_repo_creation",
9192
y="author",
9293
hue="active contributors",
93-
hue_order=[False, True],
94+
hue_order=["inactive", "active"],
9495
palette=['#d62728', '#2ca02c'],
9596
marker="|",
9697
s=500,
@@ -99,14 +100,14 @@ def contributor_team(contributions, metadata, forks, stars, axs):
99100
users = contributions.author.unique()
100101
engagement_user_highlights(users, metadata, forks, stars, axs[0])
101102
# team size
102-
team_size = windowed_team_df.groupby(level="week_since_repo_creation")["active contributors"].value_counts()[:,True].reindex(windowed_team_df.index.levels[1], fill_value=0)
103+
team_size = windowed_team_df.groupby(level="week_since_repo_creation")["active contributors"].value_counts()[:,"active"].reindex(windowed_team_df.index.levels[1], fill_value=0)
103104
# plot
104105
team_size.plot(
105106
ax=axs[1],
106107
color="black",
107108
lw=2,
108109
# xlabel="week since repo creation",
109-
# ylabel="contributor team size",
110+
ylabel="contributor team size",
110111
)
111112

112113
def no_open_and_closed_issues(issues, metadata, ax):
@@ -133,7 +134,7 @@ def no_open_and_closed_issues(issues, metadata, ax):
133134
y=["open issues", "closed issues"],
134135
lw=2,
135136
# xlabel="week since repo creation",
136-
# ylabel="count"
137+
ylabel="issue count"
137138
)
138139

139140
def engagement(forks, stars, metadata, ax):
@@ -151,10 +152,20 @@ def engagement(forks, stars, metadata, ax):
151152
engagement_df = engagement_df.cumsum()
152153
engagement_df.plot(
153154
ax=ax,
154-
lw=2
155+
lw=2,
156+
ylabel="count"
155157
)
156158

157-
def date_highlights(readme_history, contents, metadata, paper_data, ax):
159+
def calc_y_timeline(data):
160+
ys = [[] for _ in range(len(data))]
161+
seen_x = []
162+
for i in range(len(data)):
163+
for x in data[i]:
164+
ys[i].append(-1 * seen_x.count(x))
165+
seen_x.append(x)
166+
return ys
167+
168+
def date_highlights(readme_history, contents, metadata, paper_data, ax, overlay_ax):
158169
df = pd.merge(metadata, readme_history, on="github_user_cleaned_url")
159170
df.dropna(subset=["author_date"], inplace=True)
160171
df["authored_in_week_since_creation"] = (df.author_date - df.created_at).dt.days // 7
@@ -164,34 +175,34 @@ def date_highlights(readme_history, contents, metadata, paper_data, ax):
164175
paper_df = pd.merge(metadata, paper_data, on="github_user_cleaned_url")
165176
paper_df.date = (paper_df.date - paper_df.created_at).dt.days // 7
166177
# headings
167-
df = analyse_headings(df)
168-
# plotting
169-
prop_cycle = plt.rcParams['axes.prop_cycle']
170-
colors = prop_cycle.by_key()['color']
171-
max_y = ax.get_ylim()[1]
172-
dist = max_y/25
178+
df = analyse_headings(df)
173179
ownership_added = df[df.ownership_addition].authored_in_week_since_creation
174-
ax.vlines(ownership_added, -1*dist, max_y, linestyles='dashed', color=colors[0])
175-
ax.scatter(ownership_added, (-1*dist * np.ones((len(ownership_added),))), marker=10, s=100, label="ownership heading", color=colors[0])
176180
usage_added = df[df.usage_addition].authored_in_week_since_creation
177-
ax.vlines(usage_added, -2*dist, max_y, linestyles='dashed', color=colors[1])
178-
ax.scatter(usage_added, (-2*dist * np.ones((len(usage_added),))), marker=10, s=100, label="usage heading", color=colors[1])
179181
# citation in README
180182
citation_added = df[(df.added_cites != "[]") & (df.added_cites.notna())].authored_in_week_since_creation
181-
ax.vlines(citation_added, -3*dist, max_y, linestyles='dashed', color=colors[2])
182-
ax.scatter(citation_added, (-3*dist * np.ones((len(citation_added),))), marker=10, s=100, label="citation in README", color=colors[2])
183183
# citation file
184184
citation_file_added = contents_df[contents_df.citation_added.notna()].citation_added
185-
ax.vlines(citation_file_added, -4*dist, max_y, linestyles='dashed', color=colors[3])
186-
ax.scatter(citation_file_added, (-4*dist* np.ones((len(citation_file_added),))), marker=10, s=100, label="citation file", color=colors[3])
187185
# contributing file
188186
contributing_file_added = contents_df[contents_df.contributing_added.notna()].contributing_added
189-
ax.vlines(contributing_file_added, -5*dist, max_y, linestyles='dashed', color=colors[4])
190-
ax.scatter(contributing_file_added, (-5*dist* np.ones((len(contributing_file_added),))), marker=10, s=100, label="contributing file", color=colors[4])
191187
# paper publication
192188
paper_published = paper_df[paper_df.date.notna()].date
193-
ax.vlines(paper_published, -6*dist, max_y, linestyles='dashed', color=colors[5])
194-
ax.scatter(paper_published, (-6*dist* np.ones((len(paper_published),))), marker=10, s=100, label="mention in publication", color=colors[5])
189+
# plotting
190+
ax.set(ylim=(-6, 0.4), yticks=[])
191+
ax.set_xlabel("weeks since repository creation", loc="right")
192+
ax.xaxis.set_label_position('top')
193+
ax.xaxis.tick_top()
194+
ax.spines['right'].set_visible(False)
195+
ax.spines['bottom'].set_visible(False)
196+
ax.spines['left'].set_visible(False)
197+
data = [ownership_added, usage_added, citation_added, citation_file_added, contributing_file_added, paper_published]
198+
ys = calc_y_timeline(data)
199+
labels = ["ownership heading", "usage heading", "citation in README", "citation file", "contributing file", "mention in publication"]
200+
prop_cycle = plt.rcParams['axes.prop_cycle']
201+
colors = prop_cycle.by_key()['color']
202+
ymax = 86
203+
for i in range(len(data)):
204+
ax.scatter(data[i], ys[i], marker="^", s=100, label=labels[i], color=colors[i])
205+
overlay_ax.vlines(data[i], ys[i], ymax, linestyles='dashed', color=colors[i])
195206

196207
def main(repo, dir, output_dir, verbose):
197208
info(verbose, f"Loading data for repo {repo}...")
@@ -209,25 +220,37 @@ def main(repo, dir, output_dir, verbose):
209220
info(verbose, f"Not enough data available for {repo}.")
210221
exit()
211222

212-
fig, axs = plt.subplots(nrows=3, figsize=(20, 10), sharex=True)
223+
fig = plt.figure(figsize=(20, 20))
224+
overlay_axis = fig.subplots()
225+
overlay_axis.axis('off')
226+
axs = fig.subplots(nrows=6, sharex=True, height_ratios=[3, 3, 2, 2, 2, 1])
227+
for ax in axs:
228+
ax.patch.set_alpha(0)
213229
info(verbose, "Crunching data...")
214230
user_type_wrt_issues(issues, metadata, forks, stars, axs[0])
215231
axs[0].legend(loc="upper right")
216232
axs[0].grid(True, axis="x")
217-
contributor_team(contributions, metadata, forks, stars, axs[1:])
233+
contributor_team(contributions, metadata, forks, stars, axs[1:3])
218234
axs[1].grid(True, axis="x")
219235
axs[1].legend()
220-
no_open_and_closed_issues(issues, metadata, axs[2])
221-
engagement(forks, stars, metadata, axs[2])
222-
date_highlights(readme_history, contents, metadata, paper_data, axs[2])
223236
axs[2].legend(loc="upper right")
224237
axs[2].grid(True)
225-
_, right = plt.xlim()
226-
plt.xlim(-5, right+15)
227-
plt.xlabel("week since repository creation")
238+
no_open_and_closed_issues(issues, metadata, axs[3])
239+
axs[3].legend(loc="upper right")
240+
axs[3].grid(True)
241+
engagement(forks, stars, metadata, axs[4])
242+
axs[4].legend(loc="upper right")
243+
axs[4].grid(True)
244+
date_highlights(readme_history, contents, metadata, paper_data, axs[5], overlay_axis)
245+
axs[5].legend(loc="upper right", ncols=2)
246+
# final adjustments
247+
ymax = 86
248+
_, xr = plt.xlim()
249+
plt.xlim(-5, xr+15)
250+
overlay_axis.set(xlim=(-5, xr+15), ylim=(-6, ymax))
228251
fig.suptitle(repo)
229252
s = repo.replace("/", "-")
230-
fig.tight_layout()
253+
fig.tight_layout(rect=[0, 0.03, 1, 0.98])
231254
outpath = os.path.join(dir, output_dir)
232255
os.makedirs(outpath, exist_ok=True)
233256
plt.savefig(os.path.join(outpath, f"{s}.png"), bbox_inches="tight")

0 commit comments

Comments
 (0)