-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsolution.py
36 lines (25 loc) · 1.09 KB
/
solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
# Import data from file to dataframe and label columns
df = pd.read_csv('./sample.log', sep=" ", header=None,
names=["datestamp", "GUID", "action",
"url", "None", "status", "None1", "type"])
# Drop emty colums: None and None1
df = df.drop(columns=['None', 'None1'], axis=1)
# Sort all data by GUID and timestam
df = df.sort_values(['GUID', 'datestamp'], ascending=[True, True])
# Transform datestamp from string to datetime type
df['datestamp'] = pd.to_datetime(df['datestamp'])
# Compute the difference between front -> back & back -> front
df['difference'] = df['datestamp'].diff()
# Fill NaT values for first frontend entry
df.loc[df['url'] != "-", 'difference'] = 'NaT'
# Fill culprit column
df['culprit'] = df['type'].shift(1)
df.loc[df['url'] != "-", 'culprit'] = 'NaT'
# Sort descending by time response
df = df.sort_values(['difference'], ascending=[False])
# Drop duplicate entries
df = df.drop_duplicates(subset=['culprit'])
df = df[:-1]
# Print slow servers
print(df['culprit'].to_string(index=False))