-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcache.py
executable file
·424 lines (345 loc) · 12.4 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
import time
import datetime
import threading
import os
try:
import cPickle as pickle
except ImportError:
import pickle
try:
import hashlib
except ImportError:
# python 2.4
import md5 as hashlib
try:
import fcntl
except ImportError:
# Probably on a windows system
# TODO: use win32file
pass
class Cache(object):
"""Cache interface"""
def __init__(self, timeout=60):
"""Initialize the cache
timeout: number of seconds to keep a cached entry
"""
self.timeout = timeout
def store(self, key, value):
"""Add new record to cache
key: entry key
value: data of entry
"""
raise NotImplementedError
def get(self, key, timeout=None):
"""Get cached entry if exists and not expired
key: which entry to get
timeout: override timeout with this value [optional]
"""
raise NotImplementedError
def count(self):
"""Get count of entries currently stored in cache"""
raise NotImplementedError
def cleanup(self):
"""Delete any expired entries in cache."""
raise NotImplementedError
def flush(self):
"""Delete all cached entries"""
raise NotImplementedError
class MemoryCache(Cache):
"""In-memory cache"""
def __init__(self, timeout=60):
Cache.__init__(self, timeout)
self._entries = {}
self.lock = threading.Lock()
def __getstate__(self):
# pickle
return {'entries': self._entries, 'timeout': self.timeout}
def __setstate__(self, state):
# unpickle
self.lock = threading.Lock()
self._entries = state['entries']
self.timeout = state['timeout']
def _is_expired(self, entry, timeout):
return timeout > 0 and (time.time() - entry[0]) >= timeout
def store(self, key, value):
self.lock.acquire()
self._entries[key] = (time.time(), value)
self.lock.release()
def get(self, key, timeout=None):
self.lock.acquire()
try:
# check to see if we have this key
entry = self._entries.get(key)
if not entry:
# no hit, return nothing
return None
# use provided timeout in arguments if provided
# otherwise use the one provided during init.
if timeout is None:
timeout = self.timeout
# make sure entry is not expired
if self._is_expired(entry, timeout):
# entry expired, delete and return nothing
del self._entries[key]
return None
# entry found and not expired, return it
return entry[1]
finally:
self.lock.release()
def count(self):
return len(self._entries)
def cleanup(self):
self.lock.acquire()
try:
for k, v in self._entries.items():
if self._is_expired(v, self.timeout):
del self._entries[k]
finally:
self.lock.release()
def flush(self):
self.lock.acquire()
self._entries.clear()
self.lock.release()
class FileCache(Cache):
"""File-based cache"""
# locks used to make cache thread-safe
cache_locks = {}
def __init__(self, cache_dir, timeout=60):
Cache.__init__(self, timeout)
if os.path.exists(cache_dir) is False:
os.mkdir(cache_dir)
self.cache_dir = cache_dir
if cache_dir in FileCache.cache_locks:
self.lock = FileCache.cache_locks[cache_dir]
else:
self.lock = threading.Lock()
FileCache.cache_locks[cache_dir] = self.lock
if os.name == 'posix':
self._lock_file = self._lock_file_posix
self._unlock_file = self._unlock_file_posix
elif os.name == 'nt':
self._lock_file = self._lock_file_win32
self._unlock_file = self._unlock_file_win32
else:
print 'Warning! FileCache locking not supported on this system!'
self._lock_file = self._lock_file_dummy
self._unlock_file = self._unlock_file_dummy
def _get_path(self, key):
md5 = hashlib.md5()
md5.update(key)
return os.path.join(self.cache_dir, md5.hexdigest())
def _lock_file_dummy(self, path, exclusive=True):
return None
def _unlock_file_dummy(self, lock):
return
def _lock_file_posix(self, path, exclusive=True):
lock_path = path + '.lock'
if exclusive is True:
f_lock = open(lock_path, 'w')
fcntl.lockf(f_lock, fcntl.LOCK_EX)
else:
f_lock = open(lock_path, 'r')
fcntl.lockf(f_lock, fcntl.LOCK_SH)
if os.path.exists(lock_path) is False:
f_lock.close()
return None
return f_lock
def _unlock_file_posix(self, lock):
lock.close()
def _lock_file_win32(self, path, exclusive=True):
# TODO: implement
return None
def _unlock_file_win32(self, lock):
# TODO: implement
return
def _delete_file(self, path):
os.remove(path)
if os.path.exists(path + '.lock'):
os.remove(path + '.lock')
def store(self, key, value):
path = self._get_path(key)
self.lock.acquire()
try:
# acquire lock and open file
f_lock = self._lock_file(path)
datafile = open(path, 'wb')
# write data
pickle.dump((time.time(), value), datafile)
# close and unlock file
datafile.close()
self._unlock_file(f_lock)
finally:
self.lock.release()
def get(self, key, timeout=None):
return self._get(self._get_path(key), timeout)
def _get(self, path, timeout):
if os.path.exists(path) is False:
# no record
return None
self.lock.acquire()
try:
# acquire lock and open
f_lock = self._lock_file(path, False)
datafile = open(path, 'rb')
# read pickled object
created_time, value = pickle.load(datafile)
datafile.close()
# check if value is expired
if timeout is None:
timeout = self.timeout
if timeout > 0 and (time.time() - created_time) >= timeout:
# expired! delete from cache
value = None
self._delete_file(path)
# unlock and return result
self._unlock_file(f_lock)
return value
finally:
self.lock.release()
def count(self):
c = 0
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
c += 1
return c
def cleanup(self):
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
self._get(os.path.join(self.cache_dir, entry), None)
def flush(self):
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
self._delete_file(os.path.join(self.cache_dir, entry))
class MemCacheCache(Cache):
"""Cache interface"""
def __init__(self, client, timeout=60):
"""Initialize the cache
client: The memcache client
timeout: number of seconds to keep a cached entry
"""
self.client = client
self.timeout = timeout
def store(self, key, value):
"""Add new record to cache
key: entry key
value: data of entry
"""
self.client.set(key, value, time=self.timeout)
def get(self, key, timeout=None):
"""Get cached entry if exists and not expired
key: which entry to get
timeout: override timeout with this value [optional]. DOES NOT WORK HERE
"""
return self.client.get(key)
def count(self):
"""Get count of entries currently stored in cache. RETURN 0"""
raise NotImplementedError
def cleanup(self):
"""Delete any expired entries in cache. NO-OP"""
raise NotImplementedError
def flush(self):
"""Delete all cached entries. NO-OP"""
raise NotImplementedError
class RedisCache(Cache):
'''Cache running in a redis server'''
def __init__(self, client, timeout=60, keys_container = 'tweepy:keys', pre_identifier = 'tweepy:'):
Cache.__init__(self, timeout)
self.client = client
self.keys_container = keys_container
self.pre_identifier = pre_identifier
def _is_expired(self, entry, timeout):
# Returns true if the entry has expired
return timeout > 0 and (time.time() - entry[0]) >= timeout
def store(self, key, value):
'''Store the key, value pair in our redis server'''
# Prepend tweepy to our key, this makes it easier to identify tweepy keys in our redis server
key = self.pre_identifier + key
# Get a pipe (to execute several redis commands in one step)
pipe = self.client.pipeline()
# Set our values in a redis hash (similar to python dict)
pipe.set(key, pickle.dumps((time.time(), value)))
# Set the expiration
pipe.expire(key, self.timeout)
# Add the key to a set containing all the keys
pipe.sadd(self.keys_container, key)
# Execute the instructions in the redis server
pipe.execute()
def get(self, key, timeout=None):
'''Given a key, returns an element from the redis table'''
key = self.pre_identifier + key
# Check to see if we have this key
unpickled_entry = self.client.get(key)
if not unpickled_entry:
# No hit, return nothing
return None
entry = pickle.loads(unpickled_entry)
# Use provided timeout in arguments if provided
# otherwise use the one provided during init.
if timeout is None:
timeout = self.timeout
# Make sure entry is not expired
if self._is_expired(entry, timeout):
# entry expired, delete and return nothing
self.delete_entry(key)
return None
# entry found and not expired, return it
return entry[1]
def count(self):
'''Note: This is not very efficient, since it retreives all the keys from the redis
server to know how many keys we have'''
return len(self.client.smembers(self.keys_container))
def delete_entry(self, key):
'''Delete an object from the redis table'''
pipe = self.client.pipeline()
pipe.srem(self.keys_container, key)
pipe.delete(key)
pipe.execute()
def cleanup(self):
'''Cleanup all the expired keys'''
keys = self.client.smembers(self.keys_container)
for key in keys:
entry = self.client.get(key)
if entry:
entry = pickle.loads(entry)
if self._is_expired(entry, self.timeout):
self.delete_entry(key)
def flush(self):
'''Delete all entries from the cache'''
keys = self.client.smembers(self.keys_container)
for key in keys:
self.delete_entry(key)
class MongodbCache(Cache):
"""A simple pickle-based MongoDB cache sytem."""
def __init__(self, db, timeout=3600, collection='tweepy_cache'):
"""Should receive a "database" cursor from pymongo."""
Cache.__init__(self, timeout)
self.timeout = timeout
self.col = db[collection]
self.col.create_index('created', expireAfterSeconds=timeout)
def store(self, key, value):
from bson.binary import Binary
now = datetime.datetime.utcnow()
blob = Binary(pickle.dumps(value))
self.col.insert({'created': now, '_id': key, 'value': blob})
def get(self, key, timeout=None):
if timeout:
raise NotImplementedError
obj = self.col.find_one({'_id': key})
if obj:
return pickle.loads(obj['value'])
def count(self):
return self.col.find({}).count()
def delete_entry(self, key):
return self.col.remove({'_id': key})
def cleanup(self):
"""MongoDB will automatically clear expired keys."""
pass
def flush(self):
self.col.drop()
self.col.create_index('created', expireAfterSeconds=self.timeout)