-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeniee.py
140 lines (110 loc) · 3.68 KB
/
geniee.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
from dotenv import load_dotenv
import io
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from pytubefix import YouTube
from llama_index.core import (
Settings,
SimpleDirectoryReader,
VectorStoreIndex,
StorageContext,
load_index_from_storage
)
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
load_dotenv()
api_key = os.getenv("GROQ_API_KEY")
Settings.llm = Groq(
api_key=api_key,
# model="llama-3.1-70b-versatile",
model="gemma2-9b-it",
temperature=0.6,
max_tokens=4096,
streaming=True,
)
Settings.embed_model = HuggingFaceEmbedding(
model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
cache_folder='./embedding',
device="cpu"
)
Settings.context_window = 8192
def unduh_soal(soal_data):
pdf_buffer = io.BytesIO()
c = canvas.Canvas(pdf_buffer, pagesize=letter)
c.setFont("Helvetica", 12)
y_position = 750
for nomor, soal in enumerate(soal_data, start=1):
c.drawString(100, y_position, f"{nomor}. {soal['pertanyaan']}")
y_position -= 20
if 'opsi' in soal:
for opsi in soal["opsi"]:
c.drawString(120, y_position, opsi)
y_position -= 15
y_position -= 10
if y_position < 50:
c.showPage()
c.setFont("Helvetica", 12)
y_position = 750
c.showPage()
c.save()
pdf_buffer.seek(0)
return pdf_buffer
def unduh_jawaban(soal_data):
txt_buffer = io.StringIO()
for nomor, soal in enumerate(soal_data, start=1):
txt_buffer.write(f"{nomor}.: {soal['kunci_jawaban']}\n")
return txt_buffer.getvalue()
def youtube_dl(url):
yt = YouTube(url)
ys = yt.streams
audio = ys.get_audio_only().download("temp/", filename="audio.mp3")
return audio
def Geniee(materi, pelajaran, jumlah, tipe):
temp_dir = './temp/'
data_dir = './data/'
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
if not os.listdir(temp_dir):
if materi:
for file in materi:
with open(os.path.join(temp_dir, file.name), "wb") as f:
f.write(file.getbuffer())
if not os.path.exists(data_dir):
documents = SimpleDirectoryReader(input_dir=temp_dir).load_data()
store = VectorStoreIndex.from_documents(documents)
store.storage_context.persist(persist_dir=data_dir)
context = StorageContext.from_defaults(persist_dir=data_dir)
index = load_index_from_storage(context)
query = f'''
Buatkan soal ujian untuk pelajaran {pelajaran} sebanyak {jumlah} soal dengan tipe soal {tipe}.
Hanya berikan hasil generate berupa JSON.
Format JSON untuk tipe soal Pilihan Ganda:
{{
"soal": [
{{
"pertanyaan": "",
"opsi": [
"A.",
"B.",
"C.",
"D.",
"E."
],
"kunci_jawaban": "(kunci jawaban untuk koreksi)"
}}
]
}}.
Format JSON untuk tipe soal Essay:
{{
"soal": [
{{
"pertanyaan": "",
"kunci_jawaban": "(kucni jawaban untuk koreksi. Tanpa penjelasan lanjutan!)"
}}
]
}}.
'''
engine = index.as_query_engine(similarity_top_k=20)
resp = engine.query(query)
return resp