-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrate.py
237 lines (202 loc) · 8.93 KB
/
rate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
import translations as tr
import artifacts as art
import re
from fuzzywuzzy import fuzz, process
from unidecode import unidecode
reg = re.compile(r'\d+(?:[.,]\d+)?')
bad_reg = re.compile(r'\d+/1000$')
hp_reg = re.compile(r'\d[.,]\d{3}')
lvl_reg = re.compile(r'^\+\d\d?$')
bad_lvl_reg_1 = re.compile(r'^\+?\d\d?$')
bad_lvl_reg_2 = re.compile(r'^\d{4}\d*$')
# Data processing
def parse(text, lang=tr.en(), lang2=art.en()):
stat = None
results = []
level = None
type = None
set = None
prev = None
del_prev = True
# Prepare dictionary
elements = [lang.anemo, lang.elec, lang.pyro, lang.hydro, lang.cryo, lang.geo, lang.dend]
choices = elements + [lang.hp, lang.heal, lang.df, lang.er, lang.em, lang.atk, lang.cd, lang.cr, lang.phys]
choices = {unidecode(choice).lower(): choice for choice in choices}
# Additional dictionary for artifact sets & type
types = lang2.types
types = {unidecode(x).lower(): x for x in types}
sets = lang2.sets
sets = {unidecode(x).lower(): x for x in sets}
for line in text.splitlines():
if not line or len(line.replace(' ','')) <= 1:
continue
if del_prev:
prev = None
del_prev = True
# Basic cleaning
for k,v in lang.replace.items():
line = line.replace(k,v)
line = unidecode(line).lower()
line = line.replace(':','').replace('-','').replace('0/0','%')
if line.replace(' ','') in lang.ignore or bad_reg.search(line.replace(' ','')):
continue
if fuzz.partial_ratio(line, unidecode(lang.piece_set).lower()) > 80 and len(line) > 4:
break
# Search for level (ex. +16)
if level == None or (len(results) == 1 and not stat):
value = lvl_reg.search(line.replace(' ',''))
if value:
print('1', line)
level = int(value[0].replace('+',''))
continue
# Search for HP (4,780)
value = hp_reg.search(line.replace(' ',''))
if value:
print('2', line)
value = int(value[0].replace(',','').replace('.',''))
results += [[lang.hp, value]]
stat = None
continue
# Search for type (ex. Goblet)
# Compare to full dictionary (get best match + confidence)
if type == None and len(line.replace(' ','')) > 3+2:
extract = process.extractOne(line, list(types))
# If the best match has high conf
if (extract[1] > 80):
type = types[extract[0]]
continue
# Search for set (ex. Gladiator)
# Compare to full dictionary (get best match + confidence)
if set == None and len(line.replace(' ','')) > 3+2:
extract = process.extractOne(line, list(sets))
# If the best match has high conf
if (extract[1] > 80):
print('0', extract[0])
set = sets[extract[0]]
# Prevent reading set description
break # Stop loop
# Search for substats & mainstat
# Compare to full dictionary (get best match + confidence)
alpha = re.sub("\d+", " ", line) # Remove numbers
extract = process.extractOne(alpha, list(choices))
# If low confidence, retry with another scorer
if extract[1] <= 80:
extract = process.extractOne(line, list(choices), scorer=fuzz.partial_ratio)
# If conf is high
if (extract[1] > 80) or stat:
print('3', line)
if (extract[1] > 80):
stat = choices[extract[0]]
# Remove the stat, thus only value is left
line = line.replace(stat.lower(),'').replace(' ','')
value = reg.findall(line.replace(' ','').replace(',','.'))
if not value:
# Thus this is mainstat
if not prev:
# The value is on next line
continue
print('4', prev)
value = prev
value = max(value, key=len)
if len(value) < 2:
continue
if line.find('%', line.find(value)) != -1 and '.' not in value:
value = value[:-1] + '.' + value[-1]
if '.' in value:
value = float(value)
stat += '%'
else:
value = int(value)
results += [[stat, value]]
stat = None
if len(results) == 6:
break
continue
# If conf is low, do special bad cases
value = bad_lvl_reg_1.search(line.replace(' ','')) or bad_lvl_reg_2.search(line.replace(' ','').replace('+',''))
if not value:
line = line.replace(',','')
prev = reg.findall(line.replace(' ',''))
del_prev = False
print("\n", type, level, set, results, "\n")
return type, level, set, results
def validate(value, max_stat, percent):
while value > max_stat * 1.05:
value = str(value)
removed = False
for i in reversed(range(1, len(value))):
if value[i] == value[i-1]:
value = value[:i-1] + value[i:]
removed = True
break
if not removed:
if percent:
pos = value.find('.')
value = value[:pos-1] + value[pos:]
else:
value = value[:-1]
value = float(value) if percent else int(value)
if int(value) == 1:
value += 10
return value
def score(level, results, options={}, lang=tr.en()):
# Weight = max potential at the level
# Score = absolute artifact value
# Score% = art value relative to max potential
main = True
main_score = 0.0
sub_score = 0.0
sub_weight = 0
main_weight = 3 + level / 4
elements = [lang.anemo, lang.elec, lang.pyro, lang.hydro, lang.cryo, lang.geo, lang.dend]
min_mains = {lang.hp: 717.0, lang.atk: 47.0, f'{lang.atk}%': 7.0, f'{lang.er}%': 7.8, lang.em: 28.0,
f'{lang.phys}%': 8.7, f'{lang.cr}%': 4.7, f'{lang.cd}%': 9.3, f'{lang.elem}%': 7.0,
f'{lang.hp}%': 7.0, f'{lang.df}%': 8.7, f'{lang.heal}%': 5.4}
max_mains = {lang.hp: 4780, lang.atk: 311.0, f'{lang.atk}%': 46.6, f'{lang.er}%': 51.8, lang.em: 187.0,
f'{lang.phys}%': 58.3, f'{lang.cr}%': 31.1, f'{lang.cd}%': 62.2, f'{lang.elem}%': 46.6,
f'{lang.hp}%': 46.6, f'{lang.df}%': 58.3, f'{lang.heal}%': 35.9}
max_subs = {lang.atk: 19.0, lang.em: 23.0, f'{lang.er}%': 6.5, f'{lang.atk}%': 5.8,
f'{lang.cr}%': 3.9, f'{lang.cd}%': 7.8, lang.df: 23.0, lang.hp: 299.0, f'{lang.df}%': 7.3, f'{lang.hp}%': 5.8}
weights = {lang.hp: 0, lang.atk: 0.5, f'{lang.atk}%': 1, f'{lang.er}%': 0.5, lang.em: 0.5,
f'{lang.phys}%': 1, f'{lang.cr}%': 1, f'{lang.cd}%': 1, f'{lang.elem}%': 1,
f'{lang.hp}%': 0, f'{lang.df}%': 0, lang.df: 0, f'{lang.heal}%': 0}
# Replaces weights with options
weights = {**weights, **options}
for result in results:
stat, value = result
key = stat if stat[:-1] not in elements else f'{lang.elem}%'
if main:
main = False
max_main = max_mains[key] - (max_mains[key] - min_mains[key]) * (1 - level / 20.0)
value = validate(value, max_main, '%' in key)
main_score = value / max_main * weights[key] * main_weight
if key in [lang.atk, lang.hp]:
main_weight *= weights[key]
count = 0
for k,v in sorted(weights.items(), reverse=True, key=lambda item: item[1]):
if k == key or k not in max_subs:
continue
if count == 0:
sub_weight += v * (1 + level / 4)
else:
sub_weight += v
count += 1
if count == 4:
break
else:
value = validate(value, max_subs[key] * 6, '%' in key)
sub_score += value / max_subs[key] * weights[key]
result[1] = value
# Measure total score in terms of combined max potential
total_score = main_score + sub_score
total_weight = main_weight + sub_weight
total_rel = total_score / total_weight * 100 if total_weight > 0 else 100
# Only useful for 4* and below. Always 100 for 5*
main_rel = main_score / main_weight * 100 if main_weight > 0 else 100
main_rel = 100 if main_rel > 99 else main_rel
# Compare score to max
sub_rel = sub_score / sub_weight * 100 if sub_weight > 0 else 100
print(f"Total: {total_score:.2f} {total_rel:.2f}% \n" +
f" Main: {main_score:.2f}/{main_weight}={main_rel:.2f}% \n" +
f" Sub: {sub_score:.2f}/{sub_weight}={sub_rel:.2f}% \n")
return (total_score, total_rel), (main_score, main_rel), (sub_score, sub_rel)