Replies: 1 comment 2 replies
-
These models are not guaranteed to give perfectly structured json output. You should look into grammar constraints, the guidance library from Microsoft, or use a larger or paid model that has better instruct adherence. Otherwise, it's often just a matter of trying a few times. If the json output is not correct, you can try again. |
Beta Was this translation helpful? Give feedback.
2 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
below code was working fine and give results
from llama_cpp import Llama
import timeit
from PyPDF2 import PdfReader
cc_path1 = r'data\contract_note\15.11.2023 S_E_CONTRACT_8560017_20231115.PDF'
cc_path2 = r"D:\llama_cpp\data\sample_cc_zerodha_updated.pdf"
reader = PdfReader(cc_path1)
number_of_pages = len(reader.pages)
print(number_of_pages)
page = reader.pages[0]
text = page.extract_text()
import pdfplumber
print(text)
text1 = text.splitlines()
new_text1 =" ".join(text1)
print(new_text1)
start = timeit.default_timer()
prompt1 = f'''Extract the Broker name, Client ID, Contract Note No., Trade Date, Exchange/Clg. Corp, Order No., Order Time, Trade No, Trade Time, Security/Contract Description, ISIN CODE, Buy/Sell, Quantity, Gross Rate/Trade Price per unit (Rs) @, SEBI Turnover Fee, Brokerage per Unit (Rs), Closing Rate per Unit (Only for Derivatives) (Rs), Net Total (Before Levies) (Rs), Payin/PayOut Obligation, Minimum Charges, IGST, CGST, SGST, STT, SEBI T/O Fees, Exchange Clearing Chrgs, Other Charges Cash 2, Demat Charges , Stamp Duty, Other Charges, IPF, Total Net and Remark from following text and give output in json.
Desired format:
Data: {{"Data":["Broker name":null,"Client ID":null,"Contract Note No.":null,"Trade Date":null,"Exchange/Clg. Corp":null, "Order No.":null,"Order Time":null,"Trade No":null,"Trade Time":null,"Security/Contract Description":null,"ISIN CODE":null,"Buy/Sell":null,"Quantity":null,"Security/Contract Description":null,"Gross Rate/Trade Price per unit (Rs) @":null,"SEBI Turnover Fee":null,"Brokerage per Unit (Rs)":null,"Net Total (Before Levies) (Rs)":null, "Payin/PayOut Obligation":null, "Minimum Charges":null, "Exchange Tr. Chrg":null, "IGST":null, "CGST":null,"SGST":null, "STT":null, "SEBI T/O Fees":null, "Exchange Clearing Chrgs":null, "Other Charges Cash 2":null, "Demat Charges":null, "Stamp Duty":null,"Clearing Charges": null, "IPF": null, "Total Net": null, "Remark": null]}}
Input: {new_text1}'''
llm = Llama( model_path=r"models\Meta-Llama-3-8B-Instruct.Q5_K_S.gguf",n_ctx=4096,chat_format="chatml")
x = llm.create_chat_completion(
messages=[
{
"role": "system",
"content": "You are a helpful assistant that outputs in JSON.",
},
{"role": "user", "content": prompt1},
],
response_format={
"type": "json_object",
)
print(x['choices'][0]['message']['content'])
end = timeit.default_timer()
print(end-start)
pdf that i was provided contains tabular data and the prompt i want to extract these values fill the below json
{
"Data": [
{
"Broker name": null,
"Client ID": null,
"Contract Note No.": null,
"Trade Date": null,
"Exchange/Clg. Corp": null,
"Order No.": null,
"Order Time": null,
"Trade No.": null,
"Trade Time": null,
"Security/Contract Description": null,
"ISIN CODE": null,
"Buy/Sell": null,
"Quantity": null,
"Gross Rate/Trade Price per unit (Rs) @": null,
"SEBI Turnover Fee": null,
"Brokerage per Unit (Rs)": null,
"Closing Rate per Unit (Only for Derivatives) (Rs)": null,
"Net Total (Before Levies) (Rs)": null,
"Payin/PayOut Obligation": null,
"Minimum Charges": null,
"Exchange Tr. Chrg": null,
"IGST": null,
"CGST": null,
"SGST": null,
"STT": null,
"SEBI T/O Fees": null,
"Exchange Clearing Chrgs": null,
"Other Charges Cash 2": null,
"Demat Charges": null,
"Stamp Duty": null,
"Other Charges": null,
"Clearing Charges": null,
"IPF": null,
"Total Net": null,
"Remark": null
} now it is giving below result
{
"Data": [
{
"Broker name": "ITI Securities Broking Limited",
"Client ID": "8560017",
"Contract Note No.": "110071",
"Trade Date": "15-Nov-2023",
"Exchange/Clg. Corp": "NSE Capital Market",
"Order No.": "2023216",
"Order Time": "10:26:42",
"Trade No": "62293618",
"Trade Time": "10:26:42",
"Security/Contract Description": "TATA POWER CO LTD INE245A01021 N",
"ISIN CODE": "INE245A01021",
"Buy/Sell": "N",
"Quantity": 500,
"Gross Rate/Trade Price per unit (Rs) @": 260.9305,
"SEBI Turnover Fee": null,
"Brokerage per Unit (Rs)": null,
"Net Total (Before Levies) (Rs)": 130,
593.0463252999998. Please help
Beta Was this translation helpful? Give feedback.
All reactions