54
54
from PAMI .frequentPattern .basic import abstract as _ab
55
55
from typing import List , Dict , Tuple , Set , Union , Any , Generator
56
56
from deprecated import deprecated
57
+ import numpy as np
57
58
58
59
class Apriori (_ab ._frequentPatterns ):
59
60
"""
@@ -221,56 +222,9 @@ def _convert(self, value: Union[int, float, str]) -> Union[int, float]:
221
222
else :
222
223
value = int (value )
223
224
return value
224
-
225
- def _candidateToFrequent (self , candidateList : List [set ]) -> Dict [frozenset , int ]:
226
- """
227
- Generates frequent patterns from the candidate patterns
228
-
229
- :param candidateList: Candidate pattern will be given as input
230
-
231
- :type candidateList: list
232
-
233
- :return: returning set of all frequent patterns
234
-
235
- :rtype: dict
236
-
237
- """
238
-
239
- candidateToFrequentList = {}
240
- for i in self ._Database :
241
- dictionary = {frozenset (j ): int (candidateToFrequentList .get (frozenset (j ), 0 )) + 1 for j in candidateList if
242
- j .issubset (i )}
243
- candidateToFrequentList .update (dictionary )
244
- candidateToFrequentList = {key : value for key , value in candidateToFrequentList .items () if
245
- value >= self ._minSup }
246
-
247
- return candidateToFrequentList
248
-
249
- @staticmethod
250
- def _frequentToCandidate (frequentList : Dict [frozenset , int ], length : int ) -> List [set ]:
251
- """
252
-
253
- Generates candidate patterns from the frequent patterns
254
-
255
- :param frequentList: set of all frequent patterns to generate candidate patterns of each of size is length
256
-
257
- :type frequentList: dict
258
-
259
- :param length: size of each candidate patterns to be generated
260
-
261
- :type length: int
262
-
263
- :return: set of candidate patterns in sorted order
264
-
265
- :rtype: list
266
-
267
- """
268
-
269
- frequentToCandidateList = []
270
- for i in frequentList :
271
- nextList = [i | j for j in frequentList if len (i | j ) == length and (i | j ) not in frequentToCandidateList ]
272
- frequentToCandidateList .extend (nextList )
273
- return sorted (frequentToCandidateList )
225
+
226
+ def _lowMemory (self ) -> None :
227
+ print ()
274
228
275
229
@deprecated ("It is recommended to use 'mine()' instead of 'startMine()' for mining process. Starting from January 2025, 'startMine()' will be completely terminated." )
276
230
def startMine (self ) -> None :
@@ -303,30 +257,130 @@ def startMine(self) -> None:
303
257
self ._memoryRSS = process .memory_info ().rss
304
258
print ("Frequent patterns were generated successfully using Apriori algorithm " )
305
259
260
+ def bitPacker (self , data , maxIndex ):
261
+ packed_bits = 0
262
+ for i in data :
263
+ packed_bits |= 1 << (maxIndex - i )
264
+
265
+ return packed_bits
266
+
267
+ # @profile
268
+ def mineLowMemory (self ) -> None :
269
+ """
270
+ Frequent pattern mining process will start from here
271
+ # Bitset implementation
272
+ """
273
+ self ._startTime = _ab ._time .time ()
274
+
275
+ self ._Database = []
276
+
277
+ self ._creatingItemSets ()
278
+
279
+ self ._minSup = self ._convert (self ._minSup )
280
+
281
+ items = {}
282
+ index = 0
283
+ for line in self ._Database :
284
+ for item in line :
285
+ if tuple ([item ]) in items :
286
+ items [tuple ([item ])].append (index )
287
+ else :
288
+ items [tuple ([item ])] = [index ]
289
+ index += 1
290
+
291
+ # sort by length in descending order
292
+ items = dict (sorted (items .items (), key = lambda x : len (x [1 ]), reverse = True ))
293
+ cands = []
294
+ for key in items :
295
+ if len (items [key ]) >= self ._minSup :
296
+ self ._finalPatterns [key ] = len (items [key ])
297
+ cands .append (key )
298
+ items [key ] = self .bitPacker (items [key ], index )
299
+ else :
300
+ break
301
+
302
+ while cands :
303
+ newCands = []
304
+ for i in range (len (cands )):
305
+ for j in range (i + 1 , len (cands )):
306
+ if cands [i ][:- 1 ] == cands [j ][:- 1 ]:
307
+ newCand = tuple (cands [i ] + tuple ([cands [j ][- 1 ]]))
308
+ intersection = items [tuple ([newCand [0 ]])]
309
+ for k in range (1 , len (newCand )):
310
+ intersection &= items [tuple ([newCand [k ]])]
311
+ count = int .bit_count (intersection )
312
+ if count >= self ._minSup :
313
+ # items[newCand] = intersection
314
+ newCands .append (newCand )
315
+ self ._finalPatterns [newCand ] = count
316
+ else :
317
+ break
318
+
319
+ cands = newCands
320
+
321
+ self ._endTime = _ab ._time .time ()
322
+ process = _ab ._psutil .Process (_ab ._os .getpid ())
323
+ self ._memoryUSS = float ()
324
+ self ._memoryRSS = float ()
325
+ self ._memoryUSS = process .memory_full_info ().uss
326
+ self ._memoryRSS = process .memory_info ().rss
327
+ print ("Frequent patterns were generated successfully using Apriori algorithm " )
328
+
306
329
def mine (self ) -> None :
307
330
"""
308
331
Frequent pattern mining process will start from here
309
332
"""
310
333
self ._Database = []
311
334
self ._startTime = _ab ._time .time ()
335
+
312
336
self ._creatingItemSets ()
313
- itemsList = sorted (list (set .union (* self ._Database ))) # because Database is list
314
- items = [{i } for i in itemsList ]
315
- itemsCount = len (items )
337
+
316
338
self ._minSup = self ._convert (self ._minSup )
317
- self ._finalPatterns = {}
318
- for i in range (1 , itemsCount ):
319
- frequentSet = self ._candidateToFrequent (items )
320
- for x , y in frequentSet .items ():
321
- sample = str ()
322
- for k in x :
323
- sample = sample + k + "\t "
324
- self ._finalPatterns [sample ] = y
325
- items = self ._frequentToCandidate (frequentSet , i + 1 )
326
- if len (items ) == 0 :
327
- break # finish apriori
328
- self ._endTime = _ab ._time .time ()
339
+
340
+ items = {}
341
+ index = 0
342
+ for line in self ._Database :
343
+ for item in line :
344
+ if tuple ([item ]) in items :
345
+ items [tuple ([item ])].append (index )
346
+ else :
347
+ items [tuple ([item ])] = [index ]
348
+ index += 1
349
+
350
+ # sort by length in descending order
351
+ items = dict (sorted (items .items (), key = lambda x : len (x [1 ]), reverse = True ))
352
+
353
+ cands = []
354
+ fileData = {}
355
+ for key in items :
356
+ if len (items [key ]) >= self ._minSup :
357
+ cands .append (tuple ([key ]))
358
+ self ._finalPatterns [tuple ([key ])] = len (items [key ])
359
+ fileData [tuple ([key ])] = set (items [key ])
360
+ else :
361
+ break
362
+
363
+ while cands :
364
+ newKeys = []
365
+ for i in range (len (cands )):
366
+ for j in range (i + 1 , len (cands )):
367
+ if cands [i ][:- 1 ] == cands [j ][:- 1 ]:
368
+ newCand = tuple (cands [i ] + tuple ([cands [j ][- 1 ]]))
369
+ intersection = fileData [tuple ([newCand [0 ]])]
370
+ for k in range (1 , len (newCand )):
371
+ intersection = intersection .intersection (fileData [tuple ([newCand [k ]])])
372
+
373
+ # intersection = fileData[cands[i]].intersection(fileData[cands[j]])
374
+ if len (intersection ) >= self ._minSup :
375
+ # fileData[newCand] = intersection
376
+ newKeys .append (newCand )
377
+ self ._finalPatterns [newCand ] = len (intersection )
378
+ del cands
379
+ cands = newKeys
380
+ del newKeys
381
+
329
382
process = _ab ._psutil .Process (_ab ._os .getpid ())
383
+ self ._endTime = _ab ._time .time ()
330
384
self ._memoryUSS = float ()
331
385
self ._memoryRSS = float ()
332
386
self ._memoryUSS = process .memory_full_info ().uss
@@ -448,3 +502,33 @@ def printResults(self) -> None:
448
502
else :
449
503
print ("Error! The number of input parameters do not match the total number of parameters provided" )
450
504
505
+
506
+ minUtils = [150 ]
507
+
508
+ for minUtil in minUtils :
509
+ file = "/Users/tarunsreepada/Downloads/Transactional_T10I4D100K.csv"
510
+ obj = Apriori (file , minUtil , sep = '\t ' )
511
+ obj .mineLowMemory ()
512
+ # obj.mine()
513
+ print ("Total number of Frequent Patterns:" , len (obj .getPatterns ()))
514
+ print ("Total Memory in USS:" , obj .getMemoryUSS ())
515
+ print ("Total Memory in RSS" , obj .getMemoryRSS ())
516
+ print ("Total ExecutionTime in seconds:" , obj .getRuntime ())
517
+
518
+ # print()
519
+
520
+ # obj.mine()
521
+ # print("Total number of Frequent Patterns:", len(obj.getPatterns()))
522
+ # print("Total Memory in USS:", obj.getMemoryUSS())
523
+ # print("Total Memory in RSS", obj.getMemoryRSS())
524
+ # print("Total ExecutionTime in seconds:", obj.getRuntime())
525
+
526
+ # print()
527
+
528
+ obj = Apriori (file , minUtil , sep = '\t ' )
529
+ obj .mine ()
530
+ # obj.mine()
531
+ print ("Total number of Frequent Patterns:" , len (obj .getPatterns ()))
532
+ print ("Total Memory in USS:" , obj .getMemoryUSS ())
533
+ print ("Total Memory in RSS" , obj .getMemoryRSS ())
534
+ print ("Total ExecutionTime in seconds:" , obj .getRuntime ())
0 commit comments