-
Notifications
You must be signed in to change notification settings - Fork 0
/
SQL_CreateEdgeList
176 lines (168 loc) · 7.08 KB
/
SQL_CreateEdgeList
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
USE practice
-- N202002171140, rsc
-- not using these string splits because they cause too many problems in the case of "strength and/& conditioning"
-- get stats to remove parenthetical strings from person ; filter out NA values
;WITH tRemovingParenthesis_Stats_Name AS
(
SELECT *,
CHARINDEX('(',person) AS Parenthesis_Start,
CHARINDEX(')',person) AS Parenthesis_End,
CHARINDEX(')',person) - CHARINDEX('(',person) AS Parenthesis_Length
FROM TBL_GRAPH_PositionYearTeam003
WHERE person != 'NA'
--AND person != 'NULL'
--AND position != 'NA'
--AND position != 'NULL'
),
-- remove parenthetical strings from person ; string split position on existence of /
tStringSplit_Position AS
(
SELECT ID,
tPosition_CA_slash.value AS position,
CASE WHEN Parenthesis_Length > 0 THEN SUBSTRING(person,1,Parenthesis_Start - 1) ELSE person END AS person,
department,
year,
team
FROM tRemovingParenthesis_Stats_Name
CROSS APPLY STRING_SPLIT(position, '/') tPosition_CA_slash
-- CROSS APPLY STRING_SPLIT(position, 'and') tPosition_CA_and --N202002171140
-- CROSS APPLY STRING_SPLIT(position, '&') tPosition_CA_ampersand --N202002171140
),
-- string split person on existence of / ; string split perosn on existence of " and ", using replace method ; string split person on existence of "," using replace method
tStringSplit_Person AS
(
SELECT ID,
position,
tPerson_CA_slash.value AS person,
department,
year,
team
FROM tStringSplit_Position
CROSS APPLY STRING_SPLIT(REPLACE(REPLACE(person,',',' and '),' and ', '/'),'/') tPerson_CA_slash
),
-- trim and replace superfluous values from person
tTrimAndReplace_Person AS
(
SELECT ID,
position,
RTRIM(LTRIM(
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(person,'.',''),'Jr','II'),'Dr ',''),'†',''),'*',''),'[1]','')
)) AS person,
department,
year,
team
FROM tStringSplit_Person
),
-- add in department ID for vertical and horizontal connections; filter out team - years with superfluous entries
tDepartmentID AS
(
SELECT tTrimAndReplace_Person.*,
ROW_NUMBER() OVER(PARTITION BY department, year, team ORDER BY ID) AS ID_Department
FROM tTrimAndReplace_Person
WHERE 1=1
AND NOT (team = 'Baltimore_Ravens' AND year < 1996)
AND NOT (team = 'Cleveland_Browns' AND year IN (1996,1997,1998))
AND NOT (team = 'Houston_Texans' AND year < 2002)
AND NOT (team = 'Houston_Oilers' AND year > 1996)
AND NOT (team = 'Jacksonville_Jaguars' AND year < 1995)
AND NOT (team = 'Tennessee_Titans' AND year < 1999)
AND NOT (team = 'Tennessee_Oilers' AND year NOT IN (1997,1998))
AND NOT (team = 'Oakland_Raiders' AND year < 1995)
AND NOT (team = 'Los_Angeles_Raiders' AND year > 1994)
AND NOT (team = 'San_Diego_Chargers' AND year > 2016)
AND NOT (team = 'Los_Angeles_Chargers' AND year < 2017)
AND NOT (team = 'Carolina_Panthers' AND year < 1995)
AND NOT (team = 'Los_Angeles_Rams' AND year > 1994 AND year < 2016)
AND NOT (team = 'St._Louis_Rams' AND year > 2015)
AND NOT (team = 'St._Louis_Rams' AND year < 1995)
AND NOT (team = 'Arizona_Cardinals' AND year < 1994)
AND NOT (team = 'Phoenix_Cardinals' AND year > 1993)
),
tDepartmentHiearchy AS
(
SELECT tDepartmentID.*,
TBL_GRAPH_DepartmentHierarchy.ID_DepartmentHierarchy,
TBL_GRAPH_DepartmentHierarchy.Hierarchy
FROM tDepartmentID
LEFT JOIN TBL_GRAPH_DepartmentHierarchy
ON tDepartmentID.department = TBL_GRAPH_DepartmentHierarchy.department
),
tFullTable AS
(
SELECT tDepartmentHiearchy.ID,
tDepartmentHiearchy.position,
REPLACE(UPPER(tDepartmentHiearchy.person),' ','_') AS person,
tDepartmentHiearchy.department,
tDepartmentHiearchy.year,
tDepartmentHiearchy.team,
COALESCE(tConnections_Vertical.ID,tConnections_Horizontal.ID) AS ID_associated,
REPLACE(UPPER(COALESCE(tConnections_Vertical.person,tConnections_Horizontal.person)),' ','_') AS person_associated,
COALESCE(tConnections_Vertical.position,tConnections_Horizontal.position) AS position_associated,
COALESCE(tConnections_Vertical.department,tConnections_Horizontal.department) AS department_associated,
CASE WHEN tDepartmentHiearchy.Hierarchy < COALESCE(tConnections_Vertical.Hierarchy,tConnections_Horizontal.Hierarchy) THEN 1
WHEN tDepartmentHiearchy.Hierarchy > COALESCE(tConnections_Vertical.Hierarchy,tConnections_Horizontal.Hierarchy) THEN -1
ELSE 0
END AS graphDirection
FROM tDepartmentHiearchy
LEFT JOIN
(
SELECT tDepartmentHiearchy.*
FROM tDepartmentHiearchy
WHERE ID_Department = 1
)
tConnections_Vertical
ON tDepartmentHiearchy.department = tConnections_Vertical.department -- same department
AND tDepartmentHiearchy.year = tConnections_Vertical.year -- same year
AND tDepartmentHiearchy.team = tConnections_Vertical.team -- same team
--AND (tDepartmentHiearchy.ID != tConnections_Vertical.ID OR tDepartmentHiearchy.ID_Department = tConnections_Vertical.ID_Department)
AND tDepartmentHiearchy.ID != tConnections_Vertical.ID -- connect with department head
LEFT JOIN
(
SELECT tDepartmentHiearchy.*
FROM tDepartmentHiearchy
WHERE ID_Department = 1
)
tConnections_Horizontal
ON tDepartmentHiearchy.year = tConnections_Horizontal.year -- same year
AND tDepartmentHiearchy.team = tConnections_Horizontal.team -- same team
AND tDepartmentHiearchy.ID != tConnections_Horizontal.ID -- connect with other department heads
AND tDepartmentHiearchy.ID_Department = tConnections_Horizontal.ID_Department -- ensure only department heads (unnecessary)
AND tDepartmentHiearchy.Hierarchy <= COALESCE(tConnections_Vertical.Hierarchy,tConnections_Horizontal.Hierarchy) -- enforce hierarchy rules / remove repetitive edges
INNER JOIN TBL_GRAPH_EdgeList_Department tEdgeDept
ON tDepartmentHiearchy.department = tEdgeDept.[Department_1]
AND COALESCE(tConnections_Vertical.department,tConnections_Horizontal.department) = tEdgeDept.[Department_2]
WHERE LTRIM(RTRIM(tDepartmentHiearchy.[person])) != ''
AND tDepartmentHiearchy.[person] IS NOT NULL
AND LTRIM(RTRIM(tDepartmentHiearchy.[person])) != ''
AND tDepartmentHiearchy.[person] IS NOT NULL
AND COALESCE(tConnections_Vertical.person,tConnections_Horizontal.person) != ''
AND COALESCE(tConnections_Vertical.person,tConnections_Horizontal.person) IS NOT NULL
)
SELECT person,
person_associated,
CAST(CAST(1 AS DECIMAL(12,8))/CAST(COUNT(*) AS DECIMAL(12,8)) AS DECIMAL(12,8)) AS Weight
FROM tFullTable
WHERE year >= 2008
--AND (position = 'Head coach' OR position_associated = 'Head coach')
--AND department NOT IN ('front office','strength and conditioning')
--AND department_associated NOT IN ('front office','strength and conditioning')
GROUP BY person,
person_associated
/*
SELECT DISTINCT REPLACE(person,' ','_') AS person,
REPLACE(person_associated,' ','_') AS person_associated
FROM tFullTable
WHERE person IS NOT NULL
AND person_associated IS NOT NULL
AND year >= 2016
AND (position = 'Head coach' OR position_associated = 'Head coach')
AND department NOT IN ('front office','strength and conditioning')
AND department_associated NOT IN ('front office','strength and conditioning')
AND graphDirection >= 0
*/
--AND tDepartmentHiearchy.year >= 2012
--WHERE --position = 'Head coach'
--tDepartmentHiearchy.year != 2099
--AND tDepartmentHiearchy.team = 'Minnesota_Vikings'
--ORDER BY tDepartmentHiearchy.ID ASC
--SELECT * FROM tFullTable