-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_saving.py
174 lines (148 loc) · 6.08 KB
/
data_saving.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import json
import csv
def save_answers_json(answers, output_path):
with open(output_path, 'w') as file:
json.dump(answers, file, indent=4)
def save_answers_csv(json_data, output_path):
with open(output_path, mode='w', newline='') as file:
writer = csv.writer(file)
header = ['Question']
# Add a column for each model in the first entry's answers
if json_data:
for answer in json_data[0]['answers']:
header.append(answer['model'])
if answer['llm_duration'] != -1:
header.append(answer['model'] + ' LLM Duration')
if answer['rag_duration'] != -1:
header.append(answer['model'] + ' RAG Duration')
writer.writerow(header)
for item in json_data:
row = [item['question']]
for answer in item['answers']:
row.append(answer['answer'])
if answer['llm_duration'] != -1:
row.append(answer['llm_duration'])
if answer['rag_duration'] != -1:
row.append(answer['rag_duration'])
writer.writerow(row)
def save_answers_html(json_data, output_path):
if json_data:
# Start with one column for questions
num_columns = 1 + len(json_data[0]['answers'])
for answer in json_data[0]['answers']:
if answer['llm_duration'] != -1:
num_columns += 1
if answer['rag_duration'] != -1:
num_columns += 1
# Calculate the percentage width for each column
col_width = 100 / num_columns
table_style = 'width: 100%; border="1" style="border-collapse: collapse;"'
th_style = f'style="padding: 8px; vertical-align: top; width: {col_width}%;"'
td_style = f'style="padding: 8px; vertical-align: top; width: {col_width}%;"'
html_content = f'<table {table_style}>\n<tr><th {th_style}>Questions</th>'
if json_data:
for answer in json_data[0]['answers']:
html_content += f'<th {th_style}>{answer["model"]}</th>'
if answer['llm_duration'] != -1:
html_content += f'<th {th_style}>{answer["model"]} LLM Duration</th>'
if answer['rag_duration'] != -1:
html_content += f'<th {th_style}>{answer["model"]} RAG Duration</th>'
html_content += '</tr>\n'
for item in json_data:
row = f'<tr><td {td_style}>{item["question"]}</td>'
for answer in item['answers']:
row += f'<td {td_style}>{format_html(answer["answer"])}</td>'
if answer['llm_duration'] != -1:
row += f'<td {td_style}>{answer["llm_duration"]}</td>'
if answer['rag_duration'] != -1:
row += f'<td {td_style}>{answer["rag_duration"]}</td>'
row += '</tr>\n'
html_content += row
html_content += '</table>'
with open(output_path, 'w') as file:
file.write(html_content)
def format_html(text):
"A more comprehensive function to format text with HTML tags based on Markdown syntax including lists."
# Define replacements for simple Markdown syntax
replacements = {
'**': '<b>',
'__': '<b>',
'*': '<i>',
'_': '<i>',
'```': '<code>',
'`': '<code>',
'> ': '<blockquote>',
'\n': '<br>',
'# ': '<h1>',
'## ': '<h2>',
'### ': '<h3>',
'#### ': '<h4>',
'##### ': '<h5>',
'###### ': '<h6>',
}
# Apply replacements
for md, html in replacements.items():
text = text.replace(md, html)
# Handle unordered lists
lines = text.split('<br>')
in_list = False
for i, line in enumerate(lines):
if line.startswith('* ') or line.startswith('- ') or line.startswith('+ '):
if not in_list:
lines[i] = '<ul><li>' + line[2:] + '</li>'
in_list = True
else:
lines[i] = '<li>' + line[2:] + '</li>'
else:
if in_list:
lines[i - 1] = lines[i - 1] + '</ul>'
in_list = False
if in_list:
lines[-1] += '</ul>'
# Handle ordered lists
in_list = False
for i, line in enumerate(lines):
if line.lstrip().startswith(tuple(f'{num}.' for num in range(1, 10))):
if not in_list:
lines[i] = '<ol><li>' + line.split('. ', 1)[1] + '</li>'
in_list = True
else:
lines[i] = '<li>' + line.split('. ', 1)[1] + '</li>'
else:
if in_list:
lines[i - 1] = lines[i - 1] + '</ol>'
in_list = False
if in_list:
lines[-1] += '</ol>'
return '<br>'.join(lines)
def save_answers_markdown(json_data, output_path):
with open(output_path, 'w') as file:
# Write the table header
header = '| Question |'
if json_data:
for answer in json_data[0]['answers']:
header += f" {answer['model']} |"
if answer['llm_duration'] != -1:
header += f" {answer['model']} LLM Duration |"
if answer['rag_duration'] != -1:
header += f" {answer['model']} RAG Duration |"
file.write(header + '\n')
# Write the separator line
separator = '|' + '---|' * (header.count('|') - 1) + '\n'
file.write(separator)
# Write the data rows
for item in json_data:
row = f"| {escape_markdown(item['question'])} |"
for answer in item['answers']:
row += f" {escape_markdown(answer['answer'])} |"
if answer['llm_duration'] != -1:
row += f" {answer['llm_duration']} ms |"
if answer['rag_duration'] != -1:
row += f" {answer['rag_duration']} ms |"
file.write(row + '\n')
def escape_markdown(text):
"""Escapes markdown special characters and formats for table cells."""
text = text.replace('|', '\\|')
text = text.replace('\n', ' ') # Replace newlines with spaces
text = text.replace('\r', '') # Remove carriage returns
return text