-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathedgar-8k105.py
200 lines (158 loc) · 7.89 KB
/
edgar-8k105.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# 05-29-2024: Initial release of edgar-8k105.py
# Kulkan Security - www.kulkan.com
import requests, json, re
from datetime import datetime
import pandas as pd
import yfinance as yf
# The SEC requests callers to identify themselves, as described at https://www.sec.gov/os/accessing-edgar-data
USER_AGENT = 'Your Company Name ([email protected])'
def get_filings(filing_type, start_date, end_date, max_results=999999):
base_url = "https://efts.sec.gov/LATEST/search-index"
results = []
fetched_results = 0
page_size = 100
total_available_results = None # To track the total results available as reported by the server
while fetched_results < max_results:
query = {
# We don't only search for Item 1.05 because in some cases it's not properly indexed
'q':'\"Material Cybersecurity Incidents\" OR \"Item 1.05\"',
'forms': filing_type,
'startdt': start_date,
'enddt': end_date,
'from': str(fetched_results),
'size': page_size,
'sort': 'desc'
}
headers = {
'User-Agent': USER_AGENT,
'Accept': 'application/json'
}
response = requests.get(base_url, headers=headers, params=query)
if response.status_code != 200:
raise Exception(f"Failed to retrieve data from SEC: {response.status_code}")
data = response.json()
filings = data['hits']['hits']
if total_available_results is None:
total_available_results = data['hits']['total']['value']
if not filings:
break
for filing in filings:
source = filing['_source']
display_names = source.get('display_names', [])
ticker_regex = re.findall(r'\((.*?)\)', display_names[0]) if display_names else None
ticker = str(ticker_regex[0]).split(",")[0] if ticker_regex and ticker_regex[0][:4] != "CIK " else 'N/A'
company_name = display_names[0] if display_names else 'N/A'
ciks = source.get('ciks', [])
cik = ciks[0] if ciks else 'N/A'
filing_date = source.get('file_date', 'N/A')
form_type = source.get('form', 'N/A')
adsh = source.get('adsh', 'N/A')
filing_href = f"https://www.sec.gov/Archives/edgar/data/{cik}/{adsh.replace('-', '')}/{adsh}-index.htm"
doc_id = filing.get('_id', None)
doc_id = doc_id.split(':')[1] if doc_id else 'N/A'
document_href = f"https://www.sec.gov/Archives/edgar/data/{cik}/{adsh.replace('-', '')}/{doc_id}"
published_timestamp = get_timestamp_from_index(filing_href)
results.append({
'company_name': company_name,
'cik': cik,
'filing_date': filing_date,
'form_type': form_type,
'filing_href': filing_href,
'document_href': document_href,
'ticker': ticker,
'published_timestamp': published_timestamp,
})
fetched_results += len(filings)
print(f"Fetched {fetched_results}/{total_available_results} filings...", end='\r', flush=True)
# If the fetched results equal the available total or if the length of filings is less than requested page size, break
if fetched_results >= total_available_results or len(filings) < page_size:
break
print("\nCompleted fetching filings.") # Ensure a new line is started after the loop
return results
def get_timestamp_from_index(index_href):
headers = { 'User-Agent': USER_AGENT }
response = requests.get(index_href, headers=headers)
if response.status_code != 200:
raise Exception(f"Failed to retrieve data from SEC: {response.status_code}")
match = re.findall('<div class="info">(.*?)</div>', response.text)
if len(match) >= 2:
timestamp = match[1]
return timestamp
else:
return None
def get_stock_data(ticker, start_date, end_date):
return yf.download(ticker, start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), progress=False)
def analyze_impact(ticker, filing_date):
filing_date = pd.to_datetime(filing_date).normalize() # Normalize to remove time
if filing_date.date() == datetime.now().date(): # We won't have Candle data yet.
print(f"Filing is very recent, skipping Stock analysis for now.")
return None, None, None
start_delta = 1
end_delta = 1
while (start_delta+end_delta) < 10:
# Extend the range around the filing date, normalized dates
date_range = pd.bdate_range(start=filing_date - pd.Timedelta(days=start_delta), end=filing_date + pd.Timedelta(days=end_delta)).normalize()
stock_data = get_stock_data(ticker, date_range.min(), date_range.max() + pd.Timedelta(days=1))
if stock_data.empty:
print(f"No data available for {ticker} between {date_range.min()} and {date_range.max()}")
start_delta += 1
end_delta += 1
continue # Continue to expand the date range
# Check if the filing date equals the max date, in which case we want to expand the search
if filing_date == pd.Timestamp(date_range.max().date()):
end_delta += 1
continue
if filing_date == pd.Timestamp(date_range.min().date()):
start_delta += 1
continue
# Check if the max date is present in the stock_data index
if pd.Timestamp(date_range.max().date()) not in stock_data.index:
#print("Max date not in stock data, expanding search...")
start_delta += 1
end_delta += 1
continue
try:
# Accessing the stock data by using normalized dates
before_price = stock_data.loc[date_range.min(), 'Close']
after_price = stock_data.loc[date_range.max(), 'Close']
pct_change = (after_price - before_price) / before_price * 100
print(stock_data) # We print the data here prior to returning
return before_price, after_price, pct_change
except KeyError as ex:
#print("Key error:", ex)
start_delta += 1
end_delta += 1
except Exception as ex:
#print("Unexpected error:", ex)
start_delta += 1
end_delta += 1
continue
return None, None, None
if __name__ == "__main__":
print("Fetching filings from the SEC with Item 1.05 or Material Cybersecurity Incidents...")
try:
# Define the date range for the query
end_date = datetime.now().strftime('%Y-%m-%d')
# The first filing making use of Item 1.05 for Cybersecurity is from (VFC) on 2023-12-18.
start_date = "2023-12-01"
filings = get_filings('8-K',start_date, end_date)
if filings:
print(f"\r\nPrinting {len(filings)} filings:")
for filing in filings:
print(f"Company: {filing['company_name']}, CIK: {filing['cik']}, Date: {filing['filing_date']}, Form Type: {filing['form_type']}")
print(f"Filing URL: {filing['filing_href']}")
print(f"Document URL: {filing['document_href']}")
print(f"Published at: {filing['published_timestamp']} Eastern Time")
print("")
if (filing['ticker'] != "N/A"):
# let's only provide dates, no timestamp here to analyze full day candles.
before, after, p_change = analyze_impact(filing['ticker'], filing['published_timestamp'].split(" ")[0])
if before != None:
print("")
print(f"Symbol/Ticker: {filing['ticker']}")
print(f"Approximate Price Change (%): {p_change}")
print("-" * 80)
else:
print("No filings found.")
except Exception as e:
print(f"Error: {e}")