mohsenim
diff --git a/‎README.md
+94 b/‎README.md
+94
diff --git a/‎persianp-public-1.0-SNAPSHOT.jar
38 MB b/‎persianp-public-1.0-SNAPSHOT.jar
38 MB
diff --git a/‎res/c.m
10.5 MB b/‎res/c.m
10.5 MB
diff --git a/‎res/fi.t
672 Bytes b/‎res/fi.t
672 Bytes
diff --git a/‎res/fr.t
+2 b/‎res/fr.t
+2
diff --git a/‎res/l.m
29 MB b/‎res/l.m
29 MB
diff --git a/‎res/lr.t
2.38 KB b/‎res/lr.t
2.38 KB
diff --git a/‎res/m.t
302 KB b/‎res/m.t
302 KB
diff --git a/‎res/p.m
9.14 MB b/‎res/p.m
9.14 MB
diff --git a/‎res/pn.t
46.8 KB b/‎res/pn.t
46.8 KB
diff --git a/‎res/rr.t
+2 b/‎res/rr.t
+2
diff --git a/‎res/s.t
8.82 KB b/‎res/s.t
8.82 KB
diff --git a/‎res/v.t
17.4 KB b/‎res/v.t
17.4 KB
diff --git a/‎res/vr.t
994 KB b/‎res/vr.t
994 KB
diff --git a/‎res/wl.t
+1 b/‎res/wl.t
+1
diff --git a/‎res/wt.t
1 MB b/‎res/wt.t
1 MB
diff --git a/‎res/wtl.t
7.52 KB b/‎res/wtl.t
7.52 KB
@@ -0,0 +1,94 @@
+## Persianp Processing Toolbox
+
+Persianp is a text processing tool developed in Java to accomplish preprocessing tasks in Persian texts. The toolbox accomplishes following task:
+* Character-level normalization
+* Tokenization
+* Lemmatization
+* POS tagging
+* Stopword detection
+* Noun phrase chunking
+
+### Using Persianp from the command line
+Be sure folder 'res' is next to the 'jar' file.
+
+'''bash
+$ java -cp persianp-toolbox-1.0.jar com.persianp.nlp.process.Process -input inputfile.txt -output outputfile.txt -task (tokenize|tag|lemmatize|taglemmatize) [-nostopword] [-prop propertyFile.properties]
+'''
+
+At the moment NP chunking is not supported from the comand line.
+
+### Using the Persianp API
+Add the API to libraries of your program. The following example shows how to use the toolbox.
+
+'''
+public class TestPersianp { 
+
+    public static void main(String[] args) { 
+        TestPersianp testPersianp = new TestPersianp(); 
+        testPersianp.process(); 
+    } 
+
+    private void process() { 
+        try { 
+            Properties properties = new Properties(); 
+            properties.load(this.getClass().getClassLoader().getResourceAsStream("persianp.properties"));
+            Process process = new Process(properties); 
+            InputStream in = this.getClass().getClassLoader().getResourceAsStream("testText.txt");
+            BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
+            String line; 
+            while ((line = br.readLine()) != null) { 
+                process.process(line); 
+
+                System.out.println(process.getText()); 
+//                process.getTokens(); 
+//                process.getTokensText(); 
+//                process.getTags(); 
+//                process.getChunkTag(); 
+//                process.getLemmas(); 
+//                process.getNonStopwordTokens(); 
+
+                int sentenceSize = process.getSentencesSize(); 
+                for (int j = 0; j < sentenceSize; ++j) { 
+//                    List tokensText = process.getTokensTextInSentence(j); 
+//                    List tags = process.getTagsInSentence(j); 
+//                    List lemmas = process.getLemmasInSentence(j); 
+                    List tokens = process.getTokensInSentence(j); 
+                    for (int k = 0; k < tokens.size(); ++k) { 
+                        System.out.println(tokens.get(k).getText() + "\t\t\t" + tokens.get(k).getLemma() + "\t\t\t" + tokens.get(k).getTag());
+                    } 
+                } 
+            } 
+            in.close(); 
+            br.close(); 
+        } catch (Exception e){ 
+            e.printStackTrace(); 
+        } 
+    } 
+} 
+
+'''
+
+### More Information / Citing This Toolbox
+Please cite the paper below if you use the Persianp toolbox in your research. It also provides more information about the toolbox.
+
+> Mahdi Mohseni, Javad Ghofrani, Heshaam Faili
+> Persianp: A Persian Text Processing Toolbox
+> International Conference on Intelligent Text Processing and Computational Linguistics
+CICLing 2016: Computational Linguistics and Intelligent Text Processing pp 75-87
+
+Bibtex citation:
+
+'''
+@InProceedings{Persianp2016,
+author="Mohseni, Mahdi
+and Ghofrani, Javad
+and Faili, Heshaam",
+title="Persianp: A Persian Text Processing Toolbox",
+booktitle="Computational Linguistics and Intelligent Text Processing",
+year="2018",
+publisher="Springer International Publishing",
+pages="75--87",
+isbn="978-3-319-75477-2"
+}
+'''
+
@@ -0,0 +1,2 @@
+҄㸑䧇헵碃ġ䰔蕟噽꺈㺔뢣奂椮虣⪠俕䂉烾堑銀
+�>I���x�!L�_V}����� >���YBi.�c*�O�@�p�X��
@@ -0,0 +1,2 @@
+塬즛̍掩콳ం䐳脄磯驣輙ᔊ䦬遝ᣆ渖盶쵋誇뇝莳�ᑀⓞꍒ⋍㺆ឭ毀櫽✼ᄁ襬쏚�념๡⒵撨줮ꇱ�폰쵷Ⴟꅚ앀ࢶ옵ⱌ갉ꫩ쫏䒾쨚韷㢥ⵢ䞩◀頸፣쿞㋁㉞
+I��]�nv��K���݃�ޤ@$ޣR"�>��k�j�'<�l���o�Pa$�d��.���f���w��Z�@���5,L��	����D����8�-bG�%��8c��2�2^
 
@@ -0,0 +1 @@
+陓㕓퇞䎎�묒䋫⇺ᡧ⳻�ధ�媘迡觝蘭ٷ폞
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+҄㸑䧇헵碃ġ䰔蕟噽꺈㺔뢣奂椮虣⪠俕䂉烾堑銀`
	`2`	`+�>I��x�!L�_V}�� >��YBi.�c*�O�@�p�X��`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+塬즛̍掩콳ం䐳脄磯驣輙ᔊ䦬遝ᣆ渖盶쵋誇뇝莳�ᑀⓞꍒ⋍㺆ឭ毀櫽✼ᄁ襬쏚�념๡⒵撨줮ꇱ�폰쵷Ⴟꅚ앀ࢶ옵ⱌ갉ꫩ쫏䒾쨚韷㢥ⵢ䞩◀頸፣쿞㋁㉞`
	`2`	`+I��]�nv��K��݃�ޤ@$ޣR"�>��k�j�'<�l��o�Pa$�d��.��f��w��Z�@��5,L�� D��8�-bG�%��8c��2�2^`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+陓㕓퇞䎎�묒䋫⇺ᡧ⳻�ధ�媘迡觝蘭ٷ폞`