Skip to content

Commit 0b1b433

Browse files
committed
Separately divide a module - log system, as a characteristic function
1 parent cfd23a1 commit 0b1b433

File tree

5 files changed

+318
-282
lines changed

5 files changed

+318
-282
lines changed

Diff for: src/PaperCrawlerUtil/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
import pathlib
44
from PaperCrawlerUtil.common_util import basic_config
55
sys.path.append(str(pathlib.Path(__file__).parent))
6-
__all__ = ["common_util", "crawler_util", "document_util", "office_util", "pdf_util", "research_util"]
6+
__all__ = ["common_util", "crawler_util", "document_util", "office_util", "pdf_util", "research_util", "log_util"]
77
__author__ = "[email protected]"
8-
__version__ = "0.1.34"
8+
__version__ = "0.1.35"
99
__github__ = "https://github.com/Liwu-di/PaperCrawlerUtil"
1010
__email__ = "[email protected]"
1111
__info__ = "I write this package just for interest, no interests. Welcome all friends cooperate with me!"

Diff for: src/PaperCrawlerUtil/base_util.py

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2023/6/29 11:35
3+
# @Author : 银尘
4+
# @FileName: base_util.py.py
5+
# @Software: PyCharm
6+
7+
# @Info : the most base object or function
8+
from PaperCrawlerUtil.constant import *
9+
from bs4 import Tag
10+
import time
11+
12+
13+
def verify_rule(rule: dict, origin: float or str or Tag) -> bool:
14+
"""
15+
verify the element string. if element satisfy all rules provided by rule arg,
16+
return true.
17+
:param rule:a dictionary that represent rules. the key is the match string and the value
18+
is the rule. The rule is only support "in" and "not in" and "equal" and "not equal",
19+
and more than, less than and greater or equal and less than or equal.
20+
example:{"href": "in"}
21+
:param origin:the string will be verified
22+
:return:a bool value represent whether element satisfy all rule
23+
"""
24+
if rule is None or len(rule) == 0:
25+
return True
26+
if origin is None:
27+
return False
28+
for key, value in rule.items():
29+
if str(value) == IN and str(key) not in str(origin):
30+
return False
31+
elif str(value) == NOT_IN and str(key) in str(origin):
32+
return False
33+
elif str(value) == EQUAL and str(key) != str(origin):
34+
return False
35+
elif str(value) == NOT_EQUAL and str(key) == str(origin):
36+
return False
37+
elif str(value) == LESS_THAN or str(value) == LESS_THAN or str(value) == LESS_THAN_AND_EQUAL or str(
38+
value) == MORE_THAN or str(value) == GREATER_AND_EQUAL:
39+
if type(origin) != float and type(origin) != int:
40+
return False
41+
else:
42+
if str(value) == LESS_THAN and float(origin) >= float(key):
43+
return False
44+
elif str(value) == LESS_THAN_AND_EQUAL and float(origin) > float(key):
45+
return False
46+
elif str(value) == GREATER_AND_EQUAL and float(origin) < float(key):
47+
return False
48+
elif str(value) == MORE_THAN and float(origin) <= float(key):
49+
return False
50+
return True
51+
52+
53+
def get_timestamp(split: str or list = ["-", "-", " ", ":", ":"], accuracy: int = 6) -> str:
54+
"""
55+
%Y Year with century as a decimal number.
56+
%m Month as a decimal number [01,12].
57+
%d Day of the month as a decimal number [01,31].
58+
%H Hour (24-hour clock) as a decimal number [00,23].
59+
%M Minute as a decimal number [00,59].
60+
%S Second as a decimal number [00,61].
61+
%z Time zone offset from UTC.
62+
%a Locale's abbreviated weekday name.
63+
%A Locale's full weekday name.
64+
%b Locale's abbreviated month name.
65+
%B Locale's full month name.
66+
%c Locale's appropriate date and time representation.
67+
%I Hour (12-hour clock) as a decimal number [01,12].
68+
%p Locale's equivalent of either AM or PM.
69+
:param split:
70+
:param accuracy:
71+
:return:
72+
"""
73+
time_stamp_name = ["Y", "m", "d", "H", "M", "S", "z", "a", "A", "B", "c", "I", "p"]
74+
if accuracy >= len(time_stamp_name):
75+
accuracy = len(time_stamp_name)
76+
time_style = ""
77+
if type(split) == str:
78+
temp = split
79+
split = []
80+
for i in range(accuracy):
81+
split.append(temp)
82+
elif type(split) == list:
83+
if len(split) < accuracy:
84+
for i in range(accuracy - len(split)):
85+
split.append("-")
86+
for i in range(accuracy):
87+
if i == accuracy - 1:
88+
time_style = time_style + "%" + time_stamp_name[i]
89+
else:
90+
time_style = time_style + "%" + time_stamp_name[i] + split[i]
91+
return time.strftime(time_style, time.localtime())
92+
93+
94+
def get_split(lens: int = 20, style: str = '=') -> str:
95+
"""
96+
get a series of splits,like "======"
97+
:param lens: the length of split string
98+
:param style: the char used to create split string
99+
:return: a string of split
100+
"""
101+
splits = ''
102+
lens = max(lens, 1)
103+
for i in range(lens):
104+
splits = splits + style
105+
return splits
106+

0 commit comments

Comments
 (0)