diff --git a/cmd/accuracy-reports/aggregated-accuracy-values.csv b/cmd/accuracy-reports/aggregated-accuracy-values.csv index f73a4dc..a951965 100644 --- a/cmd/accuracy-reports/aggregated-accuracy-values.csv +++ b/cmd/accuracy-reports/aggregated-accuracy-values.csv @@ -3,7 +3,7 @@ Afrikaans,51,21,39,92,55,22,46,98,64,38,62,93,79,58,81,97 Albanian,NaN,NaN,NaN,NaN,55,18,48,98,80,54,86,99,88,69,95,100 Arabic,89,77,91,99,90,79,92,100,94,88,96,99,98,96,99,100 Armenian,NaN,NaN,NaN,NaN,99,100,100,97,100,100,100,100,100,100,100,100 -Azerbaijani,64,45,58,91,81,62,82,99,82,71,78,96,90,77,92,99 +Azerbaijani,65,45,58,91,81,62,82,99,82,71,78,96,90,77,92,99 Basque,NaN,NaN,NaN,NaN,62,33,62,92,75,56,76,92,84,71,87,93 Belarusian,81,64,80,98,84,67,86,100,92,80,95,100,97,92,99,100 Bengali,100,100,100,100,99,98,99,99,100,100,100,100,100,100,100,100 @@ -16,7 +16,7 @@ Croatian,55,28,44,91,42,26,42,58,60,36,57,86,73,53,74,90 Czech,50,31,46,71,64,39,65,88,71,54,72,87,80,66,84,91 Danish,47,24,38,79,58,26,54,95,70,45,70,95,81,61,84,98 Dutch,47,22,36,82,58,29,47,97,64,36,61,94,77,55,81,96 -English,49,17,35,94,54,22,44,97,63,29,62,97,81,55,89,99 +English,49,18,35,94,54,22,44,97,63,29,62,97,81,55,89,99 Esperanto,52,25,45,88,57,22,51,98,66,44,61,93,84,67,85,98 Estonian,61,36,53,94,70,41,69,99,83,62,88,99,92,80,96,100 Finnish,71,45,70,98,80,58,84,99,91,77,95,100,96,90,98,100 @@ -27,7 +27,7 @@ German,65,38,60,97,66,40,62,98,80,57,84,99,89,74,94,100 Greek,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100 Gujarati,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100 Hebrew,90,76,94,99,NaN,NaN,NaN,NaN,100,100,100,100,100,100,100,100 -Hindi,52,27,40,88,58,34,45,95,33,11,20,67,73,61,64,95 +Hindi,52,26,40,88,58,34,45,95,33,11,20,67,73,61,64,95 Hungarian,62,37,53,95,76,53,76,99,90,77,94,100,95,87,98,100 Icelandic,NaN,NaN,NaN,NaN,71,42,70,99,88,72,92,99,93,83,97,100 Indonesian,67,39,66,95,46,26,45,66,47,25,46,71,61,39,61,83 @@ -39,7 +39,7 @@ Korean,100,100,100,100,99,100,100,98,100,100,100,100,100,100,100,100 Latin,NaN,NaN,NaN,NaN,62,44,58,83,73,49,76,94,87,72,93,97 Latvian,59,36,54,87,75,51,77,98,87,75,90,97,93,85,97,99 Lithuanian,62,38,56,92,72,42,75,99,87,76,89,98,95,86,98,100 -Macedonian,62,39,55,94,60,30,54,97,72,52,70,95,84,66,86,99 +Macedonian,62,39,54,94,60,30,54,97,72,52,70,95,84,66,86,99 Malay,NaN,NaN,NaN,NaN,22,11,22,34,31,22,36,35,31,26,38,28 Maori,NaN,NaN,NaN,NaN,52,22,43,91,82,62,87,98,91,82,92,99 Marathi,73,52,74,93,84,69,84,98,39,16,30,72,85,74,85,96 @@ -49,7 +49,7 @@ Persian,70,46,66,99,76,57,70,99,80,62,80,98,90,78,94,100 Polish,66,45,59,94,77,51,80,99,90,77,93,99,95,85,98,100 Portuguese,57,26,48,96,53,21,40,97,69,42,70,95,81,59,85,99 Punjabi,100,100,100,100,100,99,100,100,100,100,100,100,100,100,100,100 -Romanian,59,34,52,90,53,24,48,88,72,49,74,94,87,69,92,99 +Romanian,59,35,52,90,53,24,48,88,72,49,74,94,87,69,92,99 Russian,53,40,52,68,71,48,72,93,78,59,84,92,90,76,95,98 Serbian,57,34,51,86,78,63,75,95,78,62,80,91,88,74,90,99 Shona,68,44,65,95,76,51,79,99,81,56,86,100,91,78,96,100 @@ -74,3 +74,4 @@ Welsh,NaN,NaN,NaN,NaN,69,43,66,98,82,61,87,99,91,78,96,99 Xhosa,NaN,NaN,NaN,NaN,66,40,65,92,69,45,67,94,82,64,85,98 Yoruba,22,11,14,41,15,5,11,28,62,33,61,92,74,50,77,96 Zulu,70,44,68,98,63,35,63,92,70,45,72,94,81,62,83,97 +Malayalam,100,100,100,100,99,99,100,100,43,23,38,69,100,100,100,99 diff --git a/cmd/accuracy-reports/cld3/Malayalam.txt b/cmd/accuracy-reports/cld3/Malayalam.txt new file mode 100644 index 0000000..15b7be5 --- /dev/null +++ b/cmd/accuracy-reports/cld3/Malayalam.txt @@ -0,0 +1,16 @@ +##### Malayalam ##### + +>>> Accuracy on average: 99.47% + +>> Detection of 1000 single words (average length: 10 chars) +Accuracy: 99.10% +Erroneously classified as Unknown: 0.40%, Yoruba: 0.30%, Finnish: 0.10%, Hungarian: 0.10% + +>> Detection of 1000 word pairs (average length: 20 chars) +Accuracy: 99.80% +Erroneously classified as Marathi: 0.10%, Vietnamese: 0.10% + +>> Detection of 1000 sentences (average length: 127 chars) +Accuracy: 99.50% +Erroneously classified as Bengali: 0.20%, Japanese: 0.10%, Marathi: 0.10%, Vietnamese: 0.10% + diff --git a/cmd/accuracy-reports/lingua-high-accuracy/Malayalam.txt b/cmd/accuracy-reports/lingua-high-accuracy/Malayalam.txt new file mode 100644 index 0000000..82ce9d9 --- /dev/null +++ b/cmd/accuracy-reports/lingua-high-accuracy/Malayalam.txt @@ -0,0 +1,16 @@ +##### Malayalam ##### + +>>> Accuracy on average: 99.80% + +>> Detection of 1000 single words (average length: 10 chars) +Accuracy: 100.00% +Erroneously classified as + +>> Detection of 1000 word pairs (average length: 20 chars) +Accuracy: 100.00% +Erroneously classified as + +>> Detection of 1000 sentences (average length: 127 chars) +Accuracy: 99.40% +Erroneously classified as Unknown: 0.30%, Bengali: 0.20%, Arabic: 0.10% + diff --git a/cmd/accuracy-reports/lingua-low-accuracy/Malayalam.txt b/cmd/accuracy-reports/lingua-low-accuracy/Malayalam.txt new file mode 100644 index 0000000..fcf22d4 --- /dev/null +++ b/cmd/accuracy-reports/lingua-low-accuracy/Malayalam.txt @@ -0,0 +1,16 @@ +##### Malayalam ##### + +>>> Accuracy on average: 43.33% + +>> Detection of 1000 single words (average length: 10 chars) +Accuracy: 22.70% +Erroneously classified as Unknown: 77.30% + +>> Detection of 1000 word pairs (average length: 20 chars) +Accuracy: 37.90% +Erroneously classified as Unknown: 62.10% + +>> Detection of 1000 sentences (average length: 127 chars) +Accuracy: 69.40% +Erroneously classified as Unknown: 30.30%, Bengali: 0.20%, Arabic: 0.10% + diff --git a/cmd/accuracy-reports/whatlang/Afrikaans.txt b/cmd/accuracy-reports/whatlang/Afrikaans.txt index 48414b7..b9a5bfd 100644 --- a/cmd/accuracy-reports/whatlang/Afrikaans.txt +++ b/cmd/accuracy-reports/whatlang/Afrikaans.txt @@ -4,7 +4,7 @@ >> Detection of 1000 single words (average length: 8 chars) Accuracy: 21.00% -Erroneously classified as Unknown: 16.00%, Dutch: 10.30%, German: 7.00%, Danish: 5.70%, Bokmal: 4.20%, Estonian: 4.20%, Nynorsk: 3.40%, French: 3.20%, Swedish: 1.90%, Finnish: 1.80%, Turkish: 1.70%, Italian: 1.50%, Latvian: 1.50%, Romanian: 1.50%, Spanish: 1.50%, Portuguese: 1.40%, Somali: 1.30%, English: 1.20%, Hungarian: 1.20%, Indonesian: 1.20%, Shona: 1.00%, Slovene: 1.00%, Zulu: 0.90%, Esperanto: 0.80%, Lithuanian: 0.80%, Polish: 0.80%, Czech: 0.60%, Tagalog: 0.50%, Croatian: 0.40%, Vietnamese: 0.30%, Azerbaijani: 0.20% +Erroneously classified as Unknown: 16.00%, Dutch: 10.30%, German: 7.00%, Danish: 5.60%, Bokmal: 4.20%, Estonian: 4.20%, Nynorsk: 3.40%, French: 3.20%, Swedish: 1.90%, Finnish: 1.80%, Turkish: 1.70%, Italian: 1.50%, Latvian: 1.50%, Romanian: 1.50%, Spanish: 1.50%, Portuguese: 1.40%, Hungarian: 1.30%, Somali: 1.30%, English: 1.20%, Indonesian: 1.20%, Shona: 1.00%, Slovene: 1.00%, Zulu: 0.90%, Esperanto: 0.80%, Lithuanian: 0.80%, Polish: 0.80%, Czech: 0.60%, Tagalog: 0.50%, Croatian: 0.40%, Vietnamese: 0.30%, Azerbaijani: 0.20% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 39.30% diff --git a/cmd/accuracy-reports/whatlang/Arabic.txt b/cmd/accuracy-reports/whatlang/Arabic.txt index 241a7b7..8d1f52f 100644 --- a/cmd/accuracy-reports/whatlang/Arabic.txt +++ b/cmd/accuracy-reports/whatlang/Arabic.txt @@ -4,7 +4,7 @@ >> Detection of 1000 single words (average length: 6 chars) Accuracy: 77.30% -Erroneously classified as Unknown: 12.50%, Persian: 7.10%, Urdu: 3.10% +Erroneously classified as Unknown: 12.60%, Persian: 7.10%, Urdu: 3.00% >> Detection of 1000 word pairs (average length: 14 chars) Accuracy: 91.20% diff --git a/cmd/accuracy-reports/whatlang/Azerbaijani.txt b/cmd/accuracy-reports/whatlang/Azerbaijani.txt index 98091d4..ff37da0 100644 --- a/cmd/accuracy-reports/whatlang/Azerbaijani.txt +++ b/cmd/accuracy-reports/whatlang/Azerbaijani.txt @@ -1,16 +1,16 @@ ##### Azerbaijani ##### ->>> Accuracy on average: 64.50% +>>> Accuracy on average: 64.57% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 44.60% -Erroneously classified as Unknown: 24.00%, Turkish: 8.80%, Somali: 2.10%, Tagalog: 2.00%, Indonesian: 1.80%, Italian: 1.50%, Finnish: 1.40%, Croatian: 1.00%, French: 1.00%, Estonian: 0.90%, German: 0.90%, Lithuanian: 0.90%, Portuguese: 0.90%, Spanish: 0.90%, Afrikaans: 0.70%, English: 0.70%, Shona: 0.70%, Romanian: 0.60%, Zulu: 0.60%, Hungarian: 0.50%, Nynorsk: 0.50%, Swedish: 0.50%, Danish: 0.40%, Latvian: 0.40%, Slovene: 0.40%, Esperanto: 0.30%, Bokmal: 0.20%, Czech: 0.20%, Polish: 0.20%, Yoruba: 0.20%, Dutch: 0.10%, Vietnamese: 0.10% +Accuracy: 44.70% +Erroneously classified as Unknown: 23.80%, Turkish: 8.80%, Somali: 2.10%, Tagalog: 2.00%, Indonesian: 1.80%, Italian: 1.50%, Finnish: 1.40%, Croatian: 1.00%, French: 1.00%, Estonian: 0.90%, German: 0.90%, Lithuanian: 0.90%, Portuguese: 0.90%, Spanish: 0.90%, Afrikaans: 0.80%, English: 0.70%, Shona: 0.70%, Romanian: 0.60%, Zulu: 0.60%, Danish: 0.50%, Hungarian: 0.50%, Nynorsk: 0.50%, Swedish: 0.50%, Latvian: 0.40%, Slovene: 0.40%, Esperanto: 0.30%, Bokmal: 0.20%, Czech: 0.20%, Polish: 0.20%, Dutch: 0.10%, Vietnamese: 0.10%, Yoruba: 0.10% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 57.70% Erroneously classified as Unknown: 18.70%, Turkish: 8.30%, Indonesian: 2.20%, Italian: 1.70%, Tagalog: 1.60%, Somali: 1.40%, Swedish: 0.90%, Estonian: 0.70%, Spanish: 0.70%, Finnish: 0.60%, German: 0.50%, Latvian: 0.50%, Lithuanian: 0.50%, Portuguese: 0.50%, Croatian: 0.40%, English: 0.40%, Slovene: 0.40%, Esperanto: 0.30%, Nynorsk: 0.30%, Romanian: 0.30%, Zulu: 0.30%, Afrikaans: 0.20%, Dutch: 0.20%, Hungarian: 0.20%, Shona: 0.20%, Bokmal: 0.10%, Czech: 0.10%, French: 0.10% >> Detection of 1000 sentences (average length: 107 chars) -Accuracy: 91.20% -Erroneously classified as Turkish: 4.70%, Unknown: 3.20%, Italian: 0.20%, Somali: 0.20%, Croatian: 0.10%, Finnish: 0.10%, Indonesian: 0.10%, Romanian: 0.10%, Swedish: 0.10% +Accuracy: 91.30% +Erroneously classified as Turkish: 4.60%, Unknown: 3.20%, Italian: 0.20%, Somali: 0.20%, Croatian: 0.10%, Finnish: 0.10%, Indonesian: 0.10%, Romanian: 0.10%, Swedish: 0.10% diff --git a/cmd/accuracy-reports/whatlang/Bokmal.txt b/cmd/accuracy-reports/whatlang/Bokmal.txt index 940c5b2..0dead8e 100644 --- a/cmd/accuracy-reports/whatlang/Bokmal.txt +++ b/cmd/accuracy-reports/whatlang/Bokmal.txt @@ -1,14 +1,14 @@ ##### Bokmal ##### ->>> Accuracy on average: 34.47% +>>> Accuracy on average: 34.43% >> Detection of 1000 single words (average length: 9 chars) -Accuracy: 15.00% -Erroneously classified as Danish: 13.30%, Unknown: 12.80%, Nynorsk: 9.80%, Swedish: 6.90%, Dutch: 4.30%, German: 4.10%, Afrikaans: 3.30%, Estonian: 3.30%, French: 3.30%, Spanish: 2.30%, Esperanto: 2.20%, Italian: 2.20%, Romanian: 2.20%, Hungarian: 2.00%, Turkish: 2.00%, English: 1.60%, Portuguese: 1.50%, Indonesian: 1.40%, Croatian: 1.00%, Tagalog: 0.80%, Finnish: 0.70%, Latvian: 0.70%, Czech: 0.60%, Lithuanian: 0.50%, Polish: 0.50%, Slovene: 0.50%, Somali: 0.40%, Vietnamese: 0.30%, Zulu: 0.30%, Shona: 0.20% +Accuracy: 14.90% +Erroneously classified as Danish: 13.50%, Unknown: 12.70%, Nynorsk: 9.80%, Swedish: 6.90%, Dutch: 4.30%, German: 4.10%, Afrikaans: 3.30%, Estonian: 3.30%, French: 3.30%, Spanish: 2.30%, Esperanto: 2.20%, Italian: 2.20%, Romanian: 2.20%, Hungarian: 2.00%, Turkish: 2.00%, English: 1.60%, Portuguese: 1.50%, Indonesian: 1.40%, Croatian: 1.00%, Tagalog: 0.80%, Finnish: 0.70%, Latvian: 0.70%, Czech: 0.60%, Lithuanian: 0.50%, Polish: 0.50%, Slovene: 0.50%, Somali: 0.40%, Vietnamese: 0.30%, Zulu: 0.30%, Shona: 0.20% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 28.50% -Erroneously classified as Danish: 17.70%, Nynorsk: 16.90%, Unknown: 5.00%, Swedish: 4.90%, Afrikaans: 3.40%, French: 3.40%, Dutch: 2.70%, German: 2.30%, Estonian: 1.90%, English: 1.70%, Esperanto: 1.40%, Portuguese: 1.30%, Italian: 1.10%, Spanish: 1.10%, Turkish: 1.10%, Finnish: 0.90%, Hungarian: 0.90%, Tagalog: 0.60%, Czech: 0.50%, Romanian: 0.50%, Zulu: 0.50%, Indonesian: 0.40%, Croatian: 0.30%, Slovene: 0.30%, Latvian: 0.20%, Lithuanian: 0.20%, Polish: 0.20%, Vietnamese: 0.10% +Erroneously classified as Danish: 17.70%, Nynorsk: 16.90%, Swedish: 5.00%, Unknown: 5.00%, Afrikaans: 3.40%, French: 3.40%, Dutch: 2.60%, German: 2.30%, Estonian: 1.90%, English: 1.70%, Esperanto: 1.40%, Portuguese: 1.40%, Spanish: 1.10%, Turkish: 1.10%, Italian: 1.00%, Finnish: 0.90%, Hungarian: 0.90%, Tagalog: 0.60%, Czech: 0.50%, Romanian: 0.50%, Zulu: 0.50%, Indonesian: 0.40%, Croatian: 0.30%, Slovene: 0.30%, Latvian: 0.20%, Lithuanian: 0.20%, Polish: 0.20%, Vietnamese: 0.10% >> Detection of 1000 sentences (average length: 98 chars) Accuracy: 59.90% diff --git a/cmd/accuracy-reports/whatlang/Bulgarian.txt b/cmd/accuracy-reports/whatlang/Bulgarian.txt index fe9878b..0bd3360 100644 --- a/cmd/accuracy-reports/whatlang/Bulgarian.txt +++ b/cmd/accuracy-reports/whatlang/Bulgarian.txt @@ -4,7 +4,7 @@ >> Detection of 1000 single words (average length: 8 chars) Accuracy: 36.80% -Erroneously classified as Macedonian: 20.30%, Russian: 12.70%, Serbian: 8.40%, Unknown: 8.20%, Ukrainian: 7.30%, Belarusian: 4.10%, Azerbaijani: 2.20% +Erroneously classified as Macedonian: 20.30%, Russian: 12.60%, Serbian: 8.40%, Unknown: 8.20%, Ukrainian: 7.40%, Belarusian: 4.10%, Azerbaijani: 2.20% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 56.90% diff --git a/cmd/accuracy-reports/whatlang/Croatian.txt b/cmd/accuracy-reports/whatlang/Croatian.txt index 30897a6..398cfd7 100644 --- a/cmd/accuracy-reports/whatlang/Croatian.txt +++ b/cmd/accuracy-reports/whatlang/Croatian.txt @@ -1,10 +1,10 @@ ##### Croatian ##### ->>> Accuracy on average: 54.57% +>>> Accuracy on average: 54.60% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 28.30% -Erroneously classified as Unknown: 19.00%, Slovene: 13.70%, Czech: 3.70%, Romanian: 2.70%, Esperanto: 2.60%, Estonian: 2.40%, Lithuanian: 2.00%, Nynorsk: 1.80%, Polish: 1.80%, Portuguese: 1.80%, Swedish: 1.70%, Zulu: 1.70%, Spanish: 1.60%, Tagalog: 1.40%, Afrikaans: 1.30%, Bokmal: 1.30%, Dutch: 1.20%, Turkish: 1.10%, Italian: 1.00%, Latvian: 1.00%, Shona: 1.00%, Danish: 0.90%, English: 0.90%, Finnish: 0.90%, Indonesian: 0.80%, German: 0.70%, French: 0.60%, Hungarian: 0.50%, Somali: 0.40%, Azerbaijani: 0.10%, Vietnamese: 0.10% +Accuracy: 28.40% +Erroneously classified as Unknown: 19.10%, Slovene: 13.70%, Czech: 3.60%, Romanian: 2.70%, Esperanto: 2.60%, Estonian: 2.40%, Lithuanian: 2.00%, Nynorsk: 1.80%, Polish: 1.80%, Portuguese: 1.80%, Swedish: 1.70%, Zulu: 1.70%, Spanish: 1.60%, Afrikaans: 1.30%, Bokmal: 1.30%, Tagalog: 1.30%, Dutch: 1.20%, Turkish: 1.10%, Italian: 1.00%, Latvian: 1.00%, Shona: 1.00%, Danish: 0.90%, English: 0.90%, Finnish: 0.90%, Indonesian: 0.80%, German: 0.70%, French: 0.60%, Hungarian: 0.50%, Somali: 0.40%, Azerbaijani: 0.10%, Vietnamese: 0.10% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 44.00% diff --git a/cmd/accuracy-reports/whatlang/Czech.txt b/cmd/accuracy-reports/whatlang/Czech.txt index ad14bf8..efb4dfc 100644 --- a/cmd/accuracy-reports/whatlang/Czech.txt +++ b/cmd/accuracy-reports/whatlang/Czech.txt @@ -1,14 +1,14 @@ ##### Czech ##### ->>> Accuracy on average: 49.57% +>>> Accuracy on average: 49.53% >> Detection of 1000 single words (average length: 8 chars) Accuracy: 31.40% -Erroneously classified as Unknown: 17.00%, Croatian: 7.30%, Slovene: 5.70%, Polish: 3.70%, Esperanto: 3.40%, Romanian: 2.80%, English: 2.60%, German: 2.00%, Portuguese: 2.00%, French: 1.90%, Shona: 1.80%, Zulu: 1.80%, Estonian: 1.70%, Nynorsk: 1.40%, Spanish: 1.40%, Italian: 1.20%, Afrikaans: 1.10%, Somali: 1.00%, Turkish: 1.00%, Hungarian: 0.90%, Lithuanian: 0.90%, Tagalog: 0.90%, Indonesian: 0.80%, Swedish: 0.80%, Bokmal: 0.70%, Finnish: 0.70%, Latvian: 0.60%, Yoruba: 0.50%, Danish: 0.40%, Dutch: 0.40%, Vietnamese: 0.20% +Erroneously classified as Unknown: 17.00%, Croatian: 7.30%, Slovene: 5.60%, Polish: 3.70%, Esperanto: 3.40%, Romanian: 2.80%, English: 2.60%, German: 2.00%, Portuguese: 2.00%, French: 1.90%, Shona: 1.80%, Zulu: 1.80%, Estonian: 1.70%, Nynorsk: 1.40%, Spanish: 1.40%, Italian: 1.20%, Afrikaans: 1.10%, Somali: 1.00%, Turkish: 1.00%, Hungarian: 0.90%, Lithuanian: 0.90%, Tagalog: 0.90%, Indonesian: 0.80%, Swedish: 0.80%, Bokmal: 0.70%, Finnish: 0.70%, Latvian: 0.60%, Yoruba: 0.60%, Danish: 0.40%, Dutch: 0.40%, Vietnamese: 0.20% >> Detection of 1000 word pairs (average length: 16 chars) -Accuracy: 46.30% -Erroneously classified as Unknown: 9.10%, Croatian: 8.70%, Slovene: 5.50%, Polish: 2.90%, Esperanto: 2.80%, Portuguese: 2.40%, Spanish: 2.20%, German: 2.10%, Romanian: 1.90%, French: 1.50%, Estonian: 1.40%, Tagalog: 1.30%, Danish: 1.20%, Dutch: 1.20%, Italian: 1.20%, Hungarian: 1.10%, English: 1.00%, Afrikaans: 0.80%, Bokmal: 0.80%, Latvian: 0.80%, Zulu: 0.80%, Indonesian: 0.70%, Finnish: 0.40%, Nynorsk: 0.40%, Shona: 0.40%, Lithuanian: 0.30%, Swedish: 0.30%, Somali: 0.20%, Turkish: 0.20%, Yoruba: 0.10% +Accuracy: 46.20% +Erroneously classified as Unknown: 9.10%, Croatian: 8.70%, Slovene: 5.50%, Polish: 2.90%, Esperanto: 2.80%, Portuguese: 2.40%, Spanish: 2.20%, German: 2.10%, Romanian: 1.90%, French: 1.50%, Estonian: 1.40%, Italian: 1.30%, Tagalog: 1.30%, Danish: 1.20%, Dutch: 1.20%, Hungarian: 1.10%, English: 1.00%, Afrikaans: 0.80%, Bokmal: 0.80%, Latvian: 0.80%, Zulu: 0.80%, Indonesian: 0.70%, Finnish: 0.40%, Nynorsk: 0.40%, Shona: 0.40%, Lithuanian: 0.30%, Swedish: 0.30%, Somali: 0.20%, Turkish: 0.20%, Yoruba: 0.10% >> Detection of 1000 sentences (average length: 93 chars) Accuracy: 71.00% diff --git a/cmd/accuracy-reports/whatlang/Danish.txt b/cmd/accuracy-reports/whatlang/Danish.txt index 66ae8a1..6829a6b 100644 --- a/cmd/accuracy-reports/whatlang/Danish.txt +++ b/cmd/accuracy-reports/whatlang/Danish.txt @@ -1,10 +1,10 @@ ##### Danish ##### ->>> Accuracy on average: 46.80% +>>> Accuracy on average: 46.87% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 23.80% -Erroneously classified as Unknown: 13.10%, Bokmal: 9.60%, Nynorsk: 6.10%, Swedish: 5.30%, Dutch: 5.20%, German: 4.00%, French: 3.90%, Estonian: 3.40%, Afrikaans: 3.20%, English: 2.80%, Turkish: 2.40%, Spanish: 2.30%, Hungarian: 2.10%, Italian: 1.80%, Esperanto: 1.30%, Slovene: 1.30%, Romanian: 1.10%, Czech: 1.00%, Lithuanian: 0.90%, Portuguese: 0.90%, Croatian: 0.80%, Indonesian: 0.70%, Latvian: 0.60%, Zulu: 0.60%, Finnish: 0.50%, Shona: 0.40%, Somali: 0.30%, Tagalog: 0.30%, Vietnamese: 0.20%, Polish: 0.10% +Accuracy: 24.00% +Erroneously classified as Unknown: 13.00%, Bokmal: 9.50%, Nynorsk: 6.20%, Swedish: 5.30%, Dutch: 5.20%, German: 4.00%, French: 3.90%, Estonian: 3.40%, Afrikaans: 3.20%, English: 2.80%, Turkish: 2.40%, Spanish: 2.30%, Hungarian: 2.10%, Italian: 1.80%, Esperanto: 1.30%, Slovene: 1.30%, Czech: 1.00%, Romanian: 1.00%, Lithuanian: 0.90%, Portuguese: 0.90%, Croatian: 0.80%, Indonesian: 0.70%, Latvian: 0.60%, Zulu: 0.60%, Finnish: 0.50%, Shona: 0.40%, Somali: 0.30%, Tagalog: 0.30%, Vietnamese: 0.20%, Polish: 0.10% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 37.70% diff --git a/cmd/accuracy-reports/whatlang/Dutch.txt b/cmd/accuracy-reports/whatlang/Dutch.txt index 77d847f..9525388 100644 --- a/cmd/accuracy-reports/whatlang/Dutch.txt +++ b/cmd/accuracy-reports/whatlang/Dutch.txt @@ -4,11 +4,11 @@ >> Detection of 1000 single words (average length: 9 chars) Accuracy: 22.40% -Erroneously classified as Unknown: 14.60%, German: 10.00%, Afrikaans: 9.60%, Danish: 3.70%, French: 3.70%, Estonian: 3.60%, English: 3.40%, Bokmal: 3.30%, Spanish: 3.00%, Finnish: 2.40%, Nynorsk: 2.40%, Swedish: 2.10%, Indonesian: 1.40%, Romanian: 1.40%, Hungarian: 1.30%, Portuguese: 1.30%, Slovene: 1.20%, Lithuanian: 1.10%, Turkish: 1.10%, Zulu: 1.10%, Italian: 1.00%, Polish: 0.90%, Esperanto: 0.80%, Czech: 0.70%, Latvian: 0.70%, Somali: 0.50%, Tagalog: 0.50%, Shona: 0.30%, Vietnamese: 0.30%, Croatian: 0.20% +Erroneously classified as Unknown: 14.60%, German: 10.00%, Afrikaans: 9.50%, Danish: 3.80%, French: 3.80%, Estonian: 3.60%, English: 3.40%, Bokmal: 3.30%, Spanish: 2.90%, Finnish: 2.40%, Nynorsk: 2.40%, Swedish: 2.00%, Indonesian: 1.40%, Romanian: 1.40%, Hungarian: 1.30%, Portuguese: 1.30%, Slovene: 1.20%, Lithuanian: 1.10%, Turkish: 1.10%, Zulu: 1.10%, Italian: 1.00%, Polish: 0.90%, Esperanto: 0.80%, Latvian: 0.80%, Czech: 0.70%, Somali: 0.50%, Tagalog: 0.50%, Shona: 0.30%, Vietnamese: 0.30%, Croatian: 0.20% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 35.70% -Erroneously classified as German: 13.00%, Afrikaans: 12.90%, Unknown: 7.00%, Danish: 3.90%, Bokmal: 3.50%, French: 3.40%, English: 3.10%, Spanish: 2.20%, Nynorsk: 2.10%, Swedish: 2.10%, Estonian: 1.60%, Romanian: 1.40%, Finnish: 1.30%, Italian: 0.90%, Indonesian: 0.80%, Portuguese: 0.80%, Turkish: 0.70%, Somali: 0.60%, Czech: 0.30%, Esperanto: 0.30%, Hungarian: 0.30%, Latvian: 0.30%, Polish: 0.30%, Tagalog: 0.30%, Croatian: 0.20%, Lithuanian: 0.20%, Shona: 0.20%, Slovene: 0.20%, Vietnamese: 0.20%, Zulu: 0.20% +Erroneously classified as Afrikaans: 12.90%, German: 12.90%, Unknown: 7.00%, Danish: 4.00%, Bokmal: 3.50%, French: 3.40%, English: 3.10%, Spanish: 2.20%, Nynorsk: 2.10%, Swedish: 2.10%, Estonian: 1.60%, Romanian: 1.40%, Finnish: 1.30%, Italian: 0.90%, Indonesian: 0.80%, Portuguese: 0.80%, Turkish: 0.70%, Somali: 0.60%, Czech: 0.30%, Esperanto: 0.30%, Hungarian: 0.30%, Latvian: 0.30%, Polish: 0.30%, Tagalog: 0.30%, Croatian: 0.20%, Lithuanian: 0.20%, Shona: 0.20%, Slovene: 0.20%, Vietnamese: 0.20%, Zulu: 0.20% >> Detection of 1000 sentences (average length: 107 chars) Accuracy: 82.50% diff --git a/cmd/accuracy-reports/whatlang/English.txt b/cmd/accuracy-reports/whatlang/English.txt index b67f3bd..af370d6 100644 --- a/cmd/accuracy-reports/whatlang/English.txt +++ b/cmd/accuracy-reports/whatlang/English.txt @@ -1,10 +1,10 @@ ##### English ##### ->>> Accuracy on average: 49.00% +>>> Accuracy on average: 49.03% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 17.40% -Erroneously classified as Unknown: 17.30%, French: 14.30%, Romanian: 5.00%, Danish: 4.20%, Estonian: 3.90%, German: 3.80%, Portuguese: 3.30%, Italian: 3.00%, Spanish: 3.00%, Dutch: 2.90%, Bokmal: 2.50%, Nynorsk: 2.20%, Swedish: 2.10%, Afrikaans: 1.90%, Esperanto: 1.30%, Latvian: 1.30%, Hungarian: 1.20%, Lithuanian: 1.10%, Slovene: 1.00%, Turkish: 1.00%, Vietnamese: 1.00%, Polish: 0.90%, Tagalog: 0.90%, Zulu: 0.90%, Indonesian: 0.80%, Finnish: 0.50%, Somali: 0.50%, Croatian: 0.30%, Czech: 0.30%, Shona: 0.20% +Accuracy: 17.50% +Erroneously classified as Unknown: 17.30%, French: 14.30%, Romanian: 5.00%, Danish: 4.20%, Estonian: 3.90%, German: 3.80%, Portuguese: 3.40%, Italian: 3.00%, Spanish: 2.90%, Dutch: 2.80%, Bokmal: 2.50%, Nynorsk: 2.20%, Swedish: 2.10%, Afrikaans: 1.90%, Esperanto: 1.30%, Latvian: 1.30%, Hungarian: 1.20%, Lithuanian: 1.10%, Slovene: 1.00%, Turkish: 1.00%, Vietnamese: 1.00%, Polish: 0.90%, Tagalog: 0.90%, Zulu: 0.90%, Indonesian: 0.80%, Finnish: 0.50%, Somali: 0.50%, Croatian: 0.30%, Czech: 0.30%, Shona: 0.20% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 35.40% diff --git a/cmd/accuracy-reports/whatlang/Esperanto.txt b/cmd/accuracy-reports/whatlang/Esperanto.txt index bc99e1e..d754587 100644 --- a/cmd/accuracy-reports/whatlang/Esperanto.txt +++ b/cmd/accuracy-reports/whatlang/Esperanto.txt @@ -1,14 +1,14 @@ ##### Esperanto ##### ->>> Accuracy on average: 52.37% +>>> Accuracy on average: 52.30% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 24.90% -Erroneously classified as Unknown: 19.40%, Portuguese: 5.40%, Italian: 4.10%, Spanish: 3.30%, Croatian: 3.10%, Lithuanian: 3.10%, Tagalog: 3.00%, English: 2.80%, Estonian: 2.70%, Shona: 2.30%, French: 2.20%, Indonesian: 2.20%, Finnish: 2.00%, Slovene: 2.00%, Romanian: 1.60%, Zulu: 1.60%, Afrikaans: 1.40%, Danish: 1.40%, Bokmal: 1.30%, Swedish: 1.30%, Turkish: 1.30%, German: 1.20%, Nynorsk: 1.00%, Dutch: 0.90%, Hungarian: 0.90%, Somali: 0.90%, Latvian: 0.80%, Azerbaijani: 0.70%, Polish: 0.50%, Czech: 0.30%, Vietnamese: 0.30%, Yoruba: 0.10% +Accuracy: 24.70% +Erroneously classified as Unknown: 19.40%, Portuguese: 5.40%, Italian: 4.10%, Spanish: 3.30%, Croatian: 3.10%, Lithuanian: 3.10%, Tagalog: 3.00%, English: 2.80%, Estonian: 2.70%, Shona: 2.30%, French: 2.20%, Indonesian: 2.20%, Finnish: 2.00%, Slovene: 2.00%, Zulu: 1.80%, Romanian: 1.60%, Afrikaans: 1.40%, Danish: 1.40%, Bokmal: 1.30%, Swedish: 1.30%, Turkish: 1.30%, Dutch: 1.10%, German: 1.10%, Nynorsk: 1.00%, Hungarian: 0.90%, Somali: 0.90%, Latvian: 0.80%, Azerbaijani: 0.70%, Polish: 0.50%, Czech: 0.30%, Vietnamese: 0.30% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 44.60% -Erroneously classified as Unknown: 14.70%, Portuguese: 8.00%, Spanish: 4.50%, English: 3.00%, Italian: 2.60%, Lithuanian: 2.30%, Tagalog: 2.00%, Estonian: 1.90%, Croatian: 1.80%, Afrikaans: 1.50%, Slovene: 1.50%, Swedish: 1.50%, Indonesian: 1.30%, Zulu: 1.10%, Romanian: 0.90%, German: 0.80%, Nynorsk: 0.80%, Finnish: 0.70%, Somali: 0.70%, Shona: 0.60%, Turkish: 0.60%, Bokmal: 0.50%, Danish: 0.50%, Dutch: 0.40%, Czech: 0.30%, French: 0.30%, Latvian: 0.30%, Polish: 0.20%, Hungarian: 0.10% +Erroneously classified as Unknown: 14.80%, Portuguese: 8.00%, Spanish: 4.50%, English: 3.00%, Italian: 2.60%, Lithuanian: 2.30%, Estonian: 1.90%, Tagalog: 1.90%, Croatian: 1.80%, Afrikaans: 1.50%, Slovene: 1.50%, Swedish: 1.50%, Indonesian: 1.30%, Zulu: 1.10%, Romanian: 0.90%, German: 0.80%, Nynorsk: 0.80%, Finnish: 0.70%, Somali: 0.70%, Shona: 0.60%, Turkish: 0.60%, Bokmal: 0.50%, Danish: 0.50%, Dutch: 0.40%, Czech: 0.30%, French: 0.30%, Latvian: 0.30%, Polish: 0.20%, Hungarian: 0.10% >> Detection of 1000 sentences (average length: 101 chars) Accuracy: 87.60% diff --git a/cmd/accuracy-reports/whatlang/Estonian.txt b/cmd/accuracy-reports/whatlang/Estonian.txt index 5aa866f..c430ec3 100644 --- a/cmd/accuracy-reports/whatlang/Estonian.txt +++ b/cmd/accuracy-reports/whatlang/Estonian.txt @@ -1,14 +1,14 @@ ##### Estonian ##### ->>> Accuracy on average: 60.77% +>>> Accuracy on average: 60.80% >> Detection of 1000 single words (average length: 8 chars) Accuracy: 35.70% -Erroneously classified as Unknown: 18.40%, Finnish: 3.60%, Lithuanian: 3.40%, Somali: 2.70%, Afrikaans: 2.50%, Spanish: 2.50%, Portuguese: 2.40%, Turkish: 2.20%, Esperanto: 1.90%, Croatian: 1.80%, Danish: 1.80%, French: 1.80%, Shona: 1.70%, Italian: 1.50%, Dutch: 1.40%, Indonesian: 1.30%, Romanian: 1.30%, Swedish: 1.30%, Tagalog: 1.30%, Azerbaijani: 1.20%, Nynorsk: 1.20%, Bokmal: 1.00%, Hungarian: 1.00%, Latvian: 1.00%, English: 0.90%, Slovene: 0.90%, Zulu: 0.80%, German: 0.70%, Czech: 0.50%, Polish: 0.30% +Erroneously classified as Unknown: 18.40%, Finnish: 3.60%, Lithuanian: 3.40%, Somali: 2.70%, Afrikaans: 2.50%, Spanish: 2.50%, Portuguese: 2.40%, Turkish: 2.20%, Esperanto: 1.90%, French: 1.90%, Croatian: 1.80%, Danish: 1.70%, Shona: 1.70%, Dutch: 1.40%, Italian: 1.40%, Indonesian: 1.30%, Romanian: 1.30%, Swedish: 1.30%, Tagalog: 1.30%, Azerbaijani: 1.20%, Nynorsk: 1.20%, Bokmal: 1.00%, English: 1.00%, Hungarian: 1.00%, Latvian: 1.00%, Slovene: 0.90%, Zulu: 0.80%, German: 0.70%, Czech: 0.50%, Polish: 0.30% >> Detection of 1000 word pairs (average length: 16 chars) -Accuracy: 52.80% -Erroneously classified as Unknown: 11.60%, Finnish: 4.70%, Lithuanian: 3.90%, Afrikaans: 2.30%, Portuguese: 2.20%, French: 1.80%, Romanian: 1.80%, Somali: 1.80%, Slovene: 1.60%, Turkish: 1.40%, Tagalog: 1.30%, Esperanto: 1.20%, Dutch: 1.10%, Italian: 1.10%, Spanish: 1.00%, English: 0.90%, Nynorsk: 0.90%, Shona: 0.80%, Croatian: 0.70%, Indonesian: 0.70%, Latvian: 0.70%, Swedish: 0.70%, Danish: 0.60%, Bokmal: 0.50%, German: 0.50%, Zulu: 0.50%, Hungarian: 0.40%, Czech: 0.30%, Polish: 0.20% +Accuracy: 52.90% +Erroneously classified as Unknown: 11.50%, Finnish: 4.80%, Lithuanian: 3.90%, Afrikaans: 2.30%, Portuguese: 2.20%, French: 1.80%, Romanian: 1.80%, Somali: 1.80%, Slovene: 1.60%, Turkish: 1.40%, Tagalog: 1.30%, Esperanto: 1.20%, Dutch: 1.10%, Italian: 1.10%, Spanish: 1.00%, English: 0.90%, Nynorsk: 0.80%, Shona: 0.80%, Croatian: 0.70%, Indonesian: 0.70%, Latvian: 0.70%, Swedish: 0.70%, Danish: 0.60%, Bokmal: 0.50%, German: 0.50%, Zulu: 0.50%, Hungarian: 0.40%, Czech: 0.30%, Polish: 0.20% >> Detection of 1000 sentences (average length: 101 chars) Accuracy: 93.80% diff --git a/cmd/accuracy-reports/whatlang/Finnish.txt b/cmd/accuracy-reports/whatlang/Finnish.txt index 2ab3796..9ac4de5 100644 --- a/cmd/accuracy-reports/whatlang/Finnish.txt +++ b/cmd/accuracy-reports/whatlang/Finnish.txt @@ -4,7 +4,7 @@ >> Detection of 1000 single words (average length: 10 chars) Accuracy: 45.00% -Erroneously classified as Unknown: 18.70%, Estonian: 4.20%, Italian: 2.80%, Tagalog: 2.20%, Afrikaans: 2.10%, Lithuanian: 2.10%, Nynorsk: 2.00%, German: 1.90%, Indonesian: 1.80%, Somali: 1.60%, Esperanto: 1.50%, Bokmal: 1.40%, Latvian: 1.10%, French: 1.00%, Hungarian: 1.00%, Spanish: 1.00%, Shona: 0.90%, Swedish: 0.90%, Danish: 0.80%, Portuguese: 0.80%, Slovene: 0.80%, Turkish: 0.80%, Romanian: 0.60%, Zulu: 0.60%, Czech: 0.50%, English: 0.50%, Azerbaijani: 0.40%, Dutch: 0.40%, Croatian: 0.30%, Polish: 0.30% +Erroneously classified as Unknown: 18.80%, Estonian: 4.30%, Italian: 2.80%, Tagalog: 2.20%, Afrikaans: 2.10%, Lithuanian: 2.10%, Nynorsk: 2.00%, German: 1.90%, Indonesian: 1.80%, Somali: 1.60%, Esperanto: 1.50%, Bokmal: 1.40%, Latvian: 1.10%, Spanish: 1.00%, French: 0.90%, Hungarian: 0.90%, Shona: 0.90%, Swedish: 0.90%, Danish: 0.80%, Portuguese: 0.80%, Slovene: 0.80%, Turkish: 0.80%, Romanian: 0.60%, Zulu: 0.60%, Czech: 0.50%, English: 0.50%, Azerbaijani: 0.40%, Dutch: 0.40%, Croatian: 0.30%, Polish: 0.30% >> Detection of 1000 word pairs (average length: 19 chars) Accuracy: 70.50% diff --git a/cmd/accuracy-reports/whatlang/French.txt b/cmd/accuracy-reports/whatlang/French.txt index 5463975..639e882 100644 --- a/cmd/accuracy-reports/whatlang/French.txt +++ b/cmd/accuracy-reports/whatlang/French.txt @@ -1,10 +1,10 @@ ##### French ##### ->>> Accuracy on average: 64.47% +>>> Accuracy on average: 64.50% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 36.60% -Erroneously classified as Unknown: 11.20%, English: 7.70%, Spanish: 4.10%, Romanian: 4.00%, Portuguese: 3.30%, German: 3.10%, Italian: 3.10%, Estonian: 3.00%, Afrikaans: 2.40%, Danish: 2.40%, Nynorsk: 1.90%, Lithuanian: 1.60%, Esperanto: 1.50%, Turkish: 1.50%, Swedish: 1.40%, Bokmal: 1.20%, Hungarian: 1.20%, Latvian: 1.00%, Shona: 1.00%, Dutch: 0.90%, Indonesian: 0.90%, Slovene: 0.90%, Croatian: 0.80%, Zulu: 0.80%, Finnish: 0.70%, Tagalog: 0.50%, Czech: 0.30%, Polish: 0.30%, Vietnamese: 0.30%, Azerbaijani: 0.20%, Somali: 0.10%, Yoruba: 0.10% +Accuracy: 36.70% +Erroneously classified as Unknown: 11.00%, English: 7.70%, Romanian: 4.00%, Spanish: 4.00%, Portuguese: 3.30%, German: 3.10%, Italian: 3.10%, Estonian: 3.00%, Afrikaans: 2.50%, Danish: 2.40%, Nynorsk: 2.00%, Lithuanian: 1.60%, Esperanto: 1.50%, Turkish: 1.50%, Swedish: 1.40%, Hungarian: 1.20%, Bokmal: 1.10%, Latvian: 1.10%, Shona: 1.00%, Dutch: 0.90%, Indonesian: 0.90%, Slovene: 0.90%, Croatian: 0.80%, Zulu: 0.80%, Finnish: 0.70%, Tagalog: 0.50%, Czech: 0.30%, Polish: 0.30%, Vietnamese: 0.30%, Azerbaijani: 0.20%, Somali: 0.10%, Yoruba: 0.10% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 59.40% diff --git a/cmd/accuracy-reports/whatlang/German.txt b/cmd/accuracy-reports/whatlang/German.txt index d2f4929..f49daf5 100644 --- a/cmd/accuracy-reports/whatlang/German.txt +++ b/cmd/accuracy-reports/whatlang/German.txt @@ -4,11 +4,11 @@ >> Detection of 1000 single words (average length: 9 chars) Accuracy: 38.50% -Erroneously classified as Unknown: 10.90%, Danish: 6.20%, Dutch: 5.50%, Afrikaans: 3.40%, French: 3.40%, Bokmal: 3.10%, Nynorsk: 3.00%, English: 2.90%, Spanish: 2.50%, Swedish: 2.50%, Estonian: 1.70%, Italian: 1.60%, Portuguese: 1.60%, Hungarian: 1.50%, Esperanto: 1.30%, Finnish: 1.30%, Indonesian: 1.30%, Romanian: 1.20%, Turkish: 1.20%, Lithuanian: 0.90%, Zulu: 0.90%, Shona: 0.60%, Croatian: 0.50%, Czech: 0.50%, Polish: 0.40%, Somali: 0.40%, Tagalog: 0.40%, Latvian: 0.30%, Slovene: 0.30%, Azerbaijani: 0.10%, Vietnamese: 0.10% +Erroneously classified as Unknown: 11.00%, Danish: 6.30%, Dutch: 5.50%, Afrikaans: 3.40%, French: 3.40%, Bokmal: 3.00%, Nynorsk: 3.00%, English: 2.90%, Spanish: 2.50%, Swedish: 2.50%, Estonian: 1.70%, Italian: 1.60%, Portuguese: 1.60%, Hungarian: 1.50%, Esperanto: 1.30%, Finnish: 1.30%, Indonesian: 1.30%, Romanian: 1.20%, Turkish: 1.20%, Lithuanian: 0.90%, Zulu: 0.90%, Shona: 0.60%, Croatian: 0.50%, Czech: 0.50%, Polish: 0.40%, Somali: 0.40%, Tagalog: 0.40%, Slovene: 0.30%, Latvian: 0.20%, Azerbaijani: 0.10%, Vietnamese: 0.10% >> Detection of 1000 word pairs (average length: 18 chars) Accuracy: 60.40% -Erroneously classified as Dutch: 6.80%, Unknown: 4.00%, Bokmal: 3.70%, Danish: 3.40%, French: 3.40%, Afrikaans: 3.00%, Swedish: 2.60%, Nynorsk: 2.20%, Estonian: 1.60%, English: 1.50%, Hungarian: 1.10%, Finnish: 0.90%, Italian: 0.90%, Turkish: 0.80%, Portuguese: 0.60%, Esperanto: 0.50%, Spanish: 0.50%, Romanian: 0.40%, Indonesian: 0.30%, Lithuanian: 0.30%, Polish: 0.30%, Shona: 0.30%, Croatian: 0.10%, Czech: 0.10%, Somali: 0.10%, Tagalog: 0.10%, Zulu: 0.10% +Erroneously classified as Dutch: 6.80%, Unknown: 4.00%, Bokmal: 3.70%, Danish: 3.40%, French: 3.30%, Afrikaans: 2.90%, Swedish: 2.60%, Nynorsk: 2.20%, Estonian: 1.60%, English: 1.50%, Hungarian: 1.10%, Finnish: 0.90%, Italian: 0.90%, Turkish: 0.80%, Portuguese: 0.60%, Esperanto: 0.50%, Spanish: 0.50%, Romanian: 0.40%, Indonesian: 0.30%, Lithuanian: 0.30%, Polish: 0.30%, Shona: 0.30%, Zulu: 0.30%, Croatian: 0.10%, Czech: 0.10%, Somali: 0.10%, Tagalog: 0.10% >> Detection of 1000 sentences (average length: 111 chars) Accuracy: 97.00% diff --git a/cmd/accuracy-reports/whatlang/Hindi.txt b/cmd/accuracy-reports/whatlang/Hindi.txt index 7d88c2a..0a80c8f 100644 --- a/cmd/accuracy-reports/whatlang/Hindi.txt +++ b/cmd/accuracy-reports/whatlang/Hindi.txt @@ -1,10 +1,10 @@ ##### Hindi ##### ->>> Accuracy on average: 51.73% +>>> Accuracy on average: 51.70% >> Detection of 1000 single words (average length: 6 chars) -Accuracy: 26.60% -Erroneously classified as Unknown: 58.00%, Marathi: 15.40% +Accuracy: 26.50% +Erroneously classified as Unknown: 58.00%, Marathi: 15.50% >> Detection of 1000 word pairs (average length: 14 chars) Accuracy: 40.30% diff --git a/cmd/accuracy-reports/whatlang/Hungarian.txt b/cmd/accuracy-reports/whatlang/Hungarian.txt index 8bea852..1401597 100644 --- a/cmd/accuracy-reports/whatlang/Hungarian.txt +++ b/cmd/accuracy-reports/whatlang/Hungarian.txt @@ -1,14 +1,14 @@ ##### Hungarian ##### ->>> Accuracy on average: 61.93% +>>> Accuracy on average: 61.90% >> Detection of 1000 single words (average length: 9 chars) Accuracy: 37.40% -Erroneously classified as Unknown: 19.00%, Estonian: 3.30%, Afrikaans: 3.10%, French: 3.10%, Danish: 2.50%, Indonesian: 2.50%, Tagalog: 2.30%, Nynorsk: 2.10%, Bokmal: 1.80%, Lithuanian: 1.70%, Portuguese: 1.70%, Slovene: 1.70%, Spanish: 1.50%, Czech: 1.40%, German: 1.40%, Swedish: 1.40%, Esperanto: 1.30%, Dutch: 1.20%, English: 1.20%, Romanian: 1.20%, Finnish: 1.10%, Italian: 1.10%, Turkish: 1.00%, Shona: 0.90%, Latvian: 0.80%, Yoruba: 0.60%, Croatian: 0.50%, Polish: 0.50%, Zulu: 0.50%, Azerbaijani: 0.20% +Erroneously classified as Unknown: 18.90%, Estonian: 3.30%, Afrikaans: 3.10%, French: 3.10%, Danish: 2.50%, Indonesian: 2.50%, Tagalog: 2.30%, Nynorsk: 2.10%, Bokmal: 1.80%, Lithuanian: 1.70%, Portuguese: 1.70%, Slovene: 1.70%, Spanish: 1.50%, Czech: 1.40%, German: 1.40%, Swedish: 1.40%, Esperanto: 1.30%, Dutch: 1.20%, English: 1.20%, Romanian: 1.20%, Finnish: 1.10%, Italian: 1.10%, Turkish: 1.10%, Shona: 0.90%, Latvian: 0.80%, Yoruba: 0.60%, Croatian: 0.50%, Polish: 0.50%, Zulu: 0.50%, Azerbaijani: 0.20% >> Detection of 1000 word pairs (average length: 17 chars) -Accuracy: 53.10% -Erroneously classified as Unknown: 14.10%, Indonesian: 2.60%, Afrikaans: 2.50%, Nynorsk: 2.40%, Bokmal: 2.10%, Estonian: 2.10%, Turkish: 1.70%, Danish: 1.60%, Esperanto: 1.60%, French: 1.60%, German: 1.40%, Lithuanian: 1.30%, Finnish: 1.10%, Swedish: 1.10%, English: 1.00%, Portuguese: 1.00%, Romanian: 0.90%, Tagalog: 0.90%, Dutch: 0.80%, Italian: 0.80%, Czech: 0.70%, Latvian: 0.70%, Spanish: 0.70%, Slovene: 0.60%, Croatian: 0.50%, Polish: 0.30%, Shona: 0.30%, Somali: 0.20%, Yoruba: 0.20%, Zulu: 0.10% +Accuracy: 53.00% +Erroneously classified as Unknown: 14.10%, Indonesian: 2.60%, Afrikaans: 2.50%, Nynorsk: 2.40%, Bokmal: 2.10%, Estonian: 2.10%, French: 1.70%, Turkish: 1.70%, Danish: 1.60%, Esperanto: 1.60%, German: 1.40%, Lithuanian: 1.30%, Finnish: 1.20%, Portuguese: 1.00%, Swedish: 1.00%, English: 0.90%, Romanian: 0.90%, Tagalog: 0.90%, Dutch: 0.80%, Italian: 0.80%, Czech: 0.70%, Latvian: 0.70%, Spanish: 0.70%, Slovene: 0.60%, Croatian: 0.50%, Polish: 0.40%, Shona: 0.30%, Somali: 0.20%, Yoruba: 0.20%, Zulu: 0.10% >> Detection of 1000 sentences (average length: 116 chars) Accuracy: 95.30% diff --git a/cmd/accuracy-reports/whatlang/Indonesian.txt b/cmd/accuracy-reports/whatlang/Indonesian.txt index 1306e5b..24df0dd 100644 --- a/cmd/accuracy-reports/whatlang/Indonesian.txt +++ b/cmd/accuracy-reports/whatlang/Indonesian.txt @@ -4,11 +4,11 @@ >> Detection of 1000 single words (average length: 8 chars) Accuracy: 39.20% -Erroneously classified as Unknown: 27.20%, Tagalog: 4.10%, Esperanto: 2.40%, Estonian: 2.40%, German: 1.80%, Afrikaans: 1.70%, Lithuanian: 1.70%, Portuguese: 1.60%, French: 1.50%, Italian: 1.40%, Nynorsk: 1.40%, Romanian: 1.40%, Hungarian: 1.30%, Finnish: 1.20%, Turkish: 1.20%, Danish: 0.90%, Bokmal: 0.80%, Dutch: 0.80%, Shona: 0.80%, Swedish: 0.80%, English: 0.70%, Zulu: 0.70%, Somali: 0.50%, Spanish: 0.50%, Croatian: 0.40%, Czech: 0.30%, Latvian: 0.30%, Polish: 0.30%, Slovene: 0.30%, Azerbaijani: 0.20%, Vietnamese: 0.20% +Erroneously classified as Unknown: 27.20%, Tagalog: 4.10%, Esperanto: 2.40%, Estonian: 2.40%, German: 1.80%, Afrikaans: 1.70%, Lithuanian: 1.70%, Portuguese: 1.60%, French: 1.50%, Italian: 1.40%, Romanian: 1.40%, Hungarian: 1.30%, Nynorsk: 1.30%, Finnish: 1.20%, Turkish: 1.20%, Danish: 0.90%, Bokmal: 0.80%, Dutch: 0.80%, Shona: 0.80%, Swedish: 0.80%, English: 0.70%, Zulu: 0.70%, Somali: 0.50%, Spanish: 0.50%, Croatian: 0.40%, Latvian: 0.40%, Czech: 0.30%, Polish: 0.30%, Slovene: 0.30%, Azerbaijani: 0.20%, Vietnamese: 0.20% >> Detection of 1000 word pairs (average length: 15 chars) Accuracy: 65.70% -Erroneously classified as Unknown: 17.90%, Tagalog: 4.00%, Esperanto: 1.10%, English: 0.90%, Estonian: 0.80%, Hungarian: 0.80%, Lithuanian: 0.80%, Finnish: 0.70%, Italian: 0.70%, Spanish: 0.70%, French: 0.60%, German: 0.50%, Portuguese: 0.50%, Slovene: 0.50%, Somali: 0.50%, Afrikaans: 0.40%, Nynorsk: 0.40%, Swedish: 0.40%, Bokmal: 0.30%, Danish: 0.30%, Romanian: 0.30%, Turkish: 0.30%, Croatian: 0.20%, Dutch: 0.20%, Latvian: 0.20%, Shona: 0.20%, Czech: 0.10% +Erroneously classified as Unknown: 17.90%, Tagalog: 4.00%, Esperanto: 1.10%, English: 0.80%, Estonian: 0.80%, Hungarian: 0.80%, Lithuanian: 0.80%, Finnish: 0.70%, Italian: 0.70%, Spanish: 0.70%, French: 0.60%, German: 0.50%, Portuguese: 0.50%, Slovene: 0.50%, Somali: 0.50%, Afrikaans: 0.40%, Nynorsk: 0.40%, Swedish: 0.40%, Bokmal: 0.30%, Danish: 0.30%, Romanian: 0.30%, Turkish: 0.30%, Croatian: 0.20%, Czech: 0.20%, Dutch: 0.20%, Latvian: 0.20%, Shona: 0.20% >> Detection of 1000 sentences (average length: 105 chars) Accuracy: 94.90% diff --git a/cmd/accuracy-reports/whatlang/Italian.txt b/cmd/accuracy-reports/whatlang/Italian.txt index 3ae0e15..8cb2272 100644 --- a/cmd/accuracy-reports/whatlang/Italian.txt +++ b/cmd/accuracy-reports/whatlang/Italian.txt @@ -1,14 +1,14 @@ ##### Italian ##### ->>> Accuracy on average: 56.00% +>>> Accuracy on average: 55.97% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 25.10% -Erroneously classified as Unknown: 16.70%, Portuguese: 7.50%, Romanian: 5.80%, French: 4.20%, Spanish: 3.80%, Esperanto: 3.70%, Croatian: 3.40%, Slovene: 3.00%, English: 2.60%, Lithuanian: 2.40%, Estonian: 2.10%, Shona: 1.90%, Finnish: 1.70%, Indonesian: 1.70%, Dutch: 1.60%, Tagalog: 1.40%, Bokmal: 1.10%, German: 1.10%, Zulu: 1.10%, Czech: 1.00%, Hungarian: 1.00%, Polish: 0.90%, Afrikaans: 0.80%, Swedish: 0.80%, Danish: 0.70%, Turkish: 0.70%, Nynorsk: 0.60%, Vietnamese: 0.60%, Azerbaijani: 0.40%, Latvian: 0.40%, Somali: 0.20% +Accuracy: 25.00% +Erroneously classified as Unknown: 16.70%, Portuguese: 7.70%, Romanian: 5.80%, French: 4.20%, Esperanto: 3.80%, Spanish: 3.60%, Croatian: 3.40%, Slovene: 3.00%, English: 2.60%, Lithuanian: 2.40%, Estonian: 2.10%, Shona: 1.90%, Finnish: 1.70%, Indonesian: 1.70%, Dutch: 1.60%, Tagalog: 1.40%, Bokmal: 1.10%, German: 1.10%, Zulu: 1.10%, Czech: 1.00%, Hungarian: 1.00%, Polish: 0.90%, Afrikaans: 0.80%, Swedish: 0.80%, Danish: 0.70%, Turkish: 0.70%, Nynorsk: 0.60%, Vietnamese: 0.60%, Azerbaijani: 0.40%, Latvian: 0.40%, Somali: 0.20% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 47.30% -Erroneously classified as Unknown: 11.20%, Portuguese: 7.30%, Romanian: 6.30%, Spanish: 3.30%, French: 3.00%, Esperanto: 2.90%, Croatian: 1.90%, English: 1.70%, Slovene: 1.70%, German: 1.40%, Indonesian: 1.30%, Afrikaans: 1.10%, Finnish: 1.10%, Lithuanian: 1.10%, Shona: 1.10%, Zulu: 1.00%, Danish: 0.90%, Bokmal: 0.70%, Estonian: 0.70%, Turkish: 0.60%, Nynorsk: 0.50%, Tagalog: 0.50%, Czech: 0.40%, Dutch: 0.30%, Swedish: 0.30%, Hungarian: 0.10%, Latvian: 0.10%, Somali: 0.10%, Vietnamese: 0.10% +Erroneously classified as Unknown: 11.10%, Portuguese: 7.30%, Romanian: 6.30%, Spanish: 3.30%, French: 3.00%, Esperanto: 2.90%, Croatian: 1.90%, English: 1.70%, Slovene: 1.70%, German: 1.40%, Indonesian: 1.40%, Afrikaans: 1.10%, Finnish: 1.10%, Lithuanian: 1.10%, Shona: 1.10%, Zulu: 1.00%, Danish: 0.90%, Bokmal: 0.70%, Estonian: 0.70%, Turkish: 0.60%, Nynorsk: 0.50%, Tagalog: 0.50%, Czech: 0.40%, Dutch: 0.30%, Swedish: 0.30%, Hungarian: 0.10%, Latvian: 0.10%, Somali: 0.10%, Vietnamese: 0.10% >> Detection of 1000 sentences (average length: 123 chars) Accuracy: 95.60% diff --git a/cmd/accuracy-reports/whatlang/Latvian.txt b/cmd/accuracy-reports/whatlang/Latvian.txt index 6497b9a..5462dc7 100644 --- a/cmd/accuracy-reports/whatlang/Latvian.txt +++ b/cmd/accuracy-reports/whatlang/Latvian.txt @@ -4,7 +4,7 @@ >> Detection of 1000 single words (average length: 8 chars) Accuracy: 36.50% -Erroneously classified as Unknown: 16.00%, Lithuanian: 6.40%, Esperanto: 3.40%, Estonian: 3.00%, French: 2.80%, Finnish: 2.40%, Shona: 2.40%, Tagalog: 2.30%, Portuguese: 2.00%, German: 1.80%, Indonesian: 1.60%, Spanish: 1.60%, Hungarian: 1.50%, Polish: 1.50%, Afrikaans: 1.40%, Croatian: 1.30%, Dutch: 1.30%, English: 1.30%, Swedish: 1.30%, Italian: 1.20%, Romanian: 1.20%, Slovene: 1.10%, Czech: 1.00%, Zulu: 1.00%, Nynorsk: 0.90%, Danish: 0.50%, Bokmal: 0.40%, Somali: 0.40%, Turkish: 0.20%, Yoruba: 0.20%, Azerbaijani: 0.10% +Erroneously classified as Unknown: 16.30%, Lithuanian: 6.40%, Esperanto: 3.40%, Estonian: 3.00%, French: 2.90%, Finnish: 2.40%, Shona: 2.40%, Tagalog: 2.30%, Portuguese: 2.00%, German: 1.70%, Indonesian: 1.70%, Hungarian: 1.50%, Polish: 1.50%, Spanish: 1.50%, Afrikaans: 1.40%, Croatian: 1.30%, Dutch: 1.30%, English: 1.30%, Swedish: 1.30%, Italian: 1.20%, Romanian: 1.20%, Slovene: 1.10%, Czech: 1.00%, Zulu: 1.00%, Nynorsk: 0.80%, Danish: 0.50%, Bokmal: 0.40%, Somali: 0.40%, Turkish: 0.20%, Azerbaijani: 0.10% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 53.50% diff --git a/cmd/accuracy-reports/whatlang/Lithuanian.txt b/cmd/accuracy-reports/whatlang/Lithuanian.txt index ba22d7c..b55b3eb 100644 --- a/cmd/accuracy-reports/whatlang/Lithuanian.txt +++ b/cmd/accuracy-reports/whatlang/Lithuanian.txt @@ -1,16 +1,16 @@ ##### Lithuanian ##### ->>> Accuracy on average: 61.97% +>>> Accuracy on average: 61.90% >> Detection of 1000 single words (average length: 8 chars) Accuracy: 37.80% -Erroneously classified as Unknown: 16.60%, Estonian: 4.60%, Esperanto: 4.30%, Latvian: 2.80%, Indonesian: 2.60%, Slovene: 2.60%, Portuguese: 2.50%, Croatian: 2.30%, Tagalog: 2.10%, Shona: 2.00%, Finnish: 1.80%, Spanish: 1.80%, French: 1.60%, Zulu: 1.60%, Swedish: 1.30%, German: 1.20%, Nynorsk: 1.10%, Danish: 1.00%, Italian: 1.00%, Polish: 1.00%, English: 0.90%, Hungarian: 0.90%, Afrikaans: 0.80%, Romanian: 0.80%, Czech: 0.60%, Dutch: 0.60%, Turkish: 0.50%, Bokmal: 0.40%, Somali: 0.40%, Azerbaijani: 0.20%, Vietnamese: 0.20%, Yoruba: 0.10% +Erroneously classified as Unknown: 16.70%, Estonian: 4.40%, Esperanto: 4.30%, Latvian: 2.80%, Indonesian: 2.60%, Slovene: 2.60%, Portuguese: 2.50%, Croatian: 2.40%, Tagalog: 2.10%, Shona: 2.00%, Finnish: 1.80%, Spanish: 1.80%, French: 1.60%, Zulu: 1.60%, Swedish: 1.30%, Danish: 1.20%, German: 1.20%, Nynorsk: 1.10%, Italian: 1.00%, English: 0.90%, Hungarian: 0.90%, Polish: 0.90%, Afrikaans: 0.80%, Romanian: 0.80%, Dutch: 0.70%, Czech: 0.60%, Turkish: 0.50%, Somali: 0.40%, Bokmal: 0.30%, Azerbaijani: 0.20%, Vietnamese: 0.20% >> Detection of 1000 word pairs (average length: 17 chars) -Accuracy: 55.60% -Erroneously classified as Unknown: 12.90%, Esperanto: 3.80%, Portuguese: 3.20%, Estonian: 2.50%, Indonesian: 2.40%, Spanish: 2.40%, Croatian: 1.80%, Latvian: 1.80%, Slovene: 1.80%, French: 1.70%, Italian: 1.50%, Finnish: 1.20%, Shona: 1.20%, Afrikaans: 1.00%, Tagalog: 0.70%, Hungarian: 0.60%, Danish: 0.50%, Nynorsk: 0.50%, Swedish: 0.50%, Zulu: 0.50%, English: 0.40%, Turkish: 0.40%, Czech: 0.30%, German: 0.30%, Polish: 0.20%, Bokmal: 0.10%, Dutch: 0.10%, Somali: 0.10% +Accuracy: 55.50% +Erroneously classified as Unknown: 12.90%, Esperanto: 3.80%, Portuguese: 3.20%, Estonian: 2.50%, Indonesian: 2.40%, Spanish: 2.40%, Slovene: 1.90%, Croatian: 1.80%, Latvian: 1.80%, French: 1.70%, Italian: 1.50%, Finnish: 1.20%, Shona: 1.20%, Afrikaans: 1.00%, Tagalog: 0.70%, Hungarian: 0.60%, Danish: 0.50%, Nynorsk: 0.50%, Swedish: 0.50%, Zulu: 0.50%, English: 0.40%, Turkish: 0.40%, Czech: 0.30%, German: 0.30%, Polish: 0.20%, Bokmal: 0.10%, Dutch: 0.10%, Somali: 0.10% >> Detection of 1000 sentences (average length: 108 chars) -Accuracy: 92.50% -Erroneously classified as Esperanto: 1.10%, Unknown: 1.00%, Portuguese: 0.90%, Italian: 0.80%, Latvian: 0.80%, Spanish: 0.70%, Estonian: 0.60%, French: 0.40%, Croatian: 0.30%, Afrikaans: 0.10%, English: 0.10%, Finnish: 0.10%, Indonesian: 0.10%, Romanian: 0.10%, Slovene: 0.10%, Swedish: 0.10%, Tagalog: 0.10%, Turkish: 0.10% +Accuracy: 92.40% +Erroneously classified as Esperanto: 1.10%, Unknown: 1.00%, Portuguese: 0.90%, Italian: 0.80%, Latvian: 0.80%, Spanish: 0.70%, Estonian: 0.60%, French: 0.40%, Croatian: 0.30%, Afrikaans: 0.10%, English: 0.10%, Finnish: 0.10%, Hungarian: 0.10%, Indonesian: 0.10%, Romanian: 0.10%, Slovene: 0.10%, Swedish: 0.10%, Tagalog: 0.10%, Turkish: 0.10% diff --git a/cmd/accuracy-reports/whatlang/Macedonian.txt b/cmd/accuracy-reports/whatlang/Macedonian.txt index db5418b..d0d3b81 100644 --- a/cmd/accuracy-reports/whatlang/Macedonian.txt +++ b/cmd/accuracy-reports/whatlang/Macedonian.txt @@ -1,14 +1,14 @@ ##### Macedonian ##### ->>> Accuracy on average: 62.47% +>>> Accuracy on average: 62.40% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 38.90% -Erroneously classified as Bulgarian: 20.60%, Serbian: 11.50%, Ukrainian: 8.30%, Unknown: 6.60%, Russian: 5.60%, Azerbaijani: 4.50%, Belarusian: 4.00% +Accuracy: 38.80% +Erroneously classified as Bulgarian: 20.70%, Serbian: 11.40%, Ukrainian: 8.40%, Unknown: 6.60%, Russian: 5.60%, Azerbaijani: 4.50%, Belarusian: 4.00% >> Detection of 1000 word pairs (average length: 16 chars) -Accuracy: 54.50% -Erroneously classified as Bulgarian: 21.80%, Serbian: 8.80%, Ukrainian: 5.10%, Russian: 3.00%, Unknown: 2.80%, Belarusian: 2.10%, Azerbaijani: 1.90% +Accuracy: 54.40% +Erroneously classified as Bulgarian: 21.90%, Serbian: 8.80%, Ukrainian: 5.00%, Russian: 3.10%, Unknown: 2.80%, Belarusian: 2.10%, Azerbaijani: 1.90% >> Detection of 1000 sentences (average length: 120 chars) Accuracy: 94.00% diff --git a/cmd/accuracy-reports/whatlang/Malayalam.txt b/cmd/accuracy-reports/whatlang/Malayalam.txt new file mode 100644 index 0000000..a8c8e35 --- /dev/null +++ b/cmd/accuracy-reports/whatlang/Malayalam.txt @@ -0,0 +1,16 @@ +##### Malayalam ##### + +>>> Accuracy on average: 100.00% + +>> Detection of 1000 single words (average length: 10 chars) +Accuracy: 100.00% +Erroneously classified as + +>> Detection of 1000 word pairs (average length: 20 chars) +Accuracy: 100.00% +Erroneously classified as + +>> Detection of 1000 sentences (average length: 127 chars) +Accuracy: 100.00% +Erroneously classified as + diff --git a/cmd/accuracy-reports/whatlang/Nynorsk.txt b/cmd/accuracy-reports/whatlang/Nynorsk.txt index 3e49ecc..0f8e201 100644 --- a/cmd/accuracy-reports/whatlang/Nynorsk.txt +++ b/cmd/accuracy-reports/whatlang/Nynorsk.txt @@ -1,10 +1,10 @@ ##### Nynorsk ##### ->>> Accuracy on average: 34.33% +>>> Accuracy on average: 34.30% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 10.10% -Erroneously classified as Unknown: 19.20%, Bokmal: 11.00%, Danish: 6.70%, Swedish: 5.60%, Afrikaans: 4.30%, Estonian: 4.10%, German: 3.90%, Dutch: 3.30%, Finnish: 3.30%, Esperanto: 2.90%, French: 2.40%, English: 2.20%, Indonesian: 2.00%, Romanian: 2.00%, Turkish: 2.00%, Italian: 1.80%, Portuguese: 1.80%, Spanish: 1.80%, Slovene: 1.70%, Croatian: 1.10%, Hungarian: 1.00%, Tagalog: 0.90%, Latvian: 0.80%, Lithuanian: 0.70%, Czech: 0.60%, Zulu: 0.60%, Polish: 0.50%, Shona: 0.50%, Azerbaijani: 0.40%, Somali: 0.40%, Vietnamese: 0.40% +Accuracy: 10.00% +Erroneously classified as Unknown: 19.20%, Bokmal: 11.10%, Danish: 6.70%, Swedish: 5.80%, Afrikaans: 4.20%, Estonian: 4.10%, German: 3.90%, Dutch: 3.30%, Finnish: 3.30%, Esperanto: 2.90%, French: 2.50%, English: 2.20%, Indonesian: 2.00%, Romanian: 2.00%, Turkish: 2.00%, Italian: 1.80%, Portuguese: 1.80%, Slovene: 1.70%, Spanish: 1.70%, Croatian: 1.10%, Hungarian: 1.00%, Tagalog: 0.90%, Latvian: 0.80%, Lithuanian: 0.70%, Czech: 0.60%, Zulu: 0.60%, Polish: 0.50%, Azerbaijani: 0.40%, Shona: 0.40%, Somali: 0.40%, Vietnamese: 0.40% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 23.80% diff --git a/cmd/accuracy-reports/whatlang/Persian.txt b/cmd/accuracy-reports/whatlang/Persian.txt index a059e33..ea3ac94 100644 --- a/cmd/accuracy-reports/whatlang/Persian.txt +++ b/cmd/accuracy-reports/whatlang/Persian.txt @@ -1,14 +1,14 @@ ##### Persian ##### ->>> Accuracy on average: 70.27% +>>> Accuracy on average: 70.30% >> Detection of 1000 single words (average length: 6 chars) Accuracy: 45.60% -Erroneously classified as Unknown: 21.20%, Urdu: 17.40%, Arabic: 15.80% +Erroneously classified as Unknown: 21.30%, Urdu: 17.30%, Arabic: 15.80% >> Detection of 1000 word pairs (average length: 13 chars) -Accuracy: 66.00% -Erroneously classified as Urdu: 14.40%, Arabic: 11.30%, Unknown: 8.30% +Accuracy: 66.10% +Erroneously classified as Urdu: 14.30%, Arabic: 11.30%, Unknown: 8.30% >> Detection of 1000 sentences (average length: 107 chars) Accuracy: 99.20% diff --git a/cmd/accuracy-reports/whatlang/Polish.txt b/cmd/accuracy-reports/whatlang/Polish.txt index 11c679d..e556511 100644 --- a/cmd/accuracy-reports/whatlang/Polish.txt +++ b/cmd/accuracy-reports/whatlang/Polish.txt @@ -4,7 +4,7 @@ >> Detection of 1000 single words (average length: 8 chars) Accuracy: 44.90% -Erroneously classified as Unknown: 16.50%, Esperanto: 3.50%, Shona: 3.10%, Croatian: 2.80%, Czech: 2.60%, Estonian: 2.10%, German: 2.10%, Romanian: 2.10%, Slovene: 2.10%, Portuguese: 1.80%, English: 1.40%, Nynorsk: 1.40%, Spanish: 1.30%, Indonesian: 1.20%, Latvian: 1.10%, Hungarian: 1.00%, Afrikaans: 0.90%, Italian: 0.90%, Turkish: 0.90%, Finnish: 0.80%, French: 0.80%, Bokmal: 0.70%, Zulu: 0.70%, Dutch: 0.60%, Lithuanian: 0.60%, Somali: 0.60%, Swedish: 0.40%, Tagalog: 0.40%, Vietnamese: 0.40%, Danish: 0.30% +Erroneously classified as Unknown: 16.40%, Esperanto: 3.50%, Shona: 3.10%, Croatian: 2.80%, Czech: 2.60%, Estonian: 2.10%, German: 2.10%, Romanian: 2.10%, Slovene: 2.10%, Portuguese: 1.80%, Nynorsk: 1.50%, English: 1.40%, Spanish: 1.30%, Indonesian: 1.20%, Latvian: 1.10%, Hungarian: 1.00%, Afrikaans: 0.90%, Italian: 0.90%, Turkish: 0.90%, Finnish: 0.80%, French: 0.80%, Bokmal: 0.70%, Zulu: 0.70%, Dutch: 0.60%, Lithuanian: 0.60%, Somali: 0.60%, Swedish: 0.40%, Tagalog: 0.40%, Vietnamese: 0.40%, Danish: 0.30% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 58.80% diff --git a/cmd/accuracy-reports/whatlang/Romanian.txt b/cmd/accuracy-reports/whatlang/Romanian.txt index 0b3298a..bf66858 100644 --- a/cmd/accuracy-reports/whatlang/Romanian.txt +++ b/cmd/accuracy-reports/whatlang/Romanian.txt @@ -1,10 +1,10 @@ ##### Romanian ##### ->>> Accuracy on average: 59.00% +>>> Accuracy on average: 59.03% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 34.50% -Erroneously classified as Unknown: 16.00%, French: 4.60%, English: 3.40%, Italian: 3.40%, German: 2.80%, Esperanto: 2.50%, Estonian: 2.50%, Portuguese: 2.40%, Spanish: 2.40%, Lithuanian: 1.90%, Afrikaans: 1.80%, Dutch: 1.80%, Croatian: 1.70%, Finnish: 1.70%, Indonesian: 1.60%, Turkish: 1.50%, Czech: 1.40%, Tagalog: 1.40%, Zulu: 1.40%, Danish: 1.30%, Shona: 1.30%, Polish: 1.20%, Hungarian: 1.00%, Bokmal: 0.90%, Nynorsk: 0.90%, Slovene: 0.80%, Latvian: 0.70%, Swedish: 0.50%, Azerbaijani: 0.40%, Somali: 0.20%, Vietnamese: 0.10% +Accuracy: 34.60% +Erroneously classified as Unknown: 16.00%, French: 4.60%, Italian: 3.40%, English: 3.30%, German: 2.80%, Esperanto: 2.50%, Estonian: 2.50%, Portuguese: 2.40%, Spanish: 2.40%, Lithuanian: 1.90%, Afrikaans: 1.80%, Croatian: 1.70%, Dutch: 1.70%, Finnish: 1.70%, Indonesian: 1.60%, Turkish: 1.50%, Zulu: 1.50%, Czech: 1.40%, Tagalog: 1.40%, Danish: 1.30%, Shona: 1.30%, Polish: 1.20%, Hungarian: 1.00%, Bokmal: 0.90%, Nynorsk: 0.90%, Slovene: 0.80%, Latvian: 0.70%, Swedish: 0.50%, Azerbaijani: 0.40%, Somali: 0.20%, Vietnamese: 0.10% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 52.40% diff --git a/cmd/accuracy-reports/whatlang/Russian.txt b/cmd/accuracy-reports/whatlang/Russian.txt index ffca034..d965c4d 100644 --- a/cmd/accuracy-reports/whatlang/Russian.txt +++ b/cmd/accuracy-reports/whatlang/Russian.txt @@ -1,14 +1,14 @@ ##### Russian ##### ->>> Accuracy on average: 53.23% +>>> Accuracy on average: 53.27% >> Detection of 1000 single words (average length: 8 chars) Accuracy: 39.60% Erroneously classified as Serbian: 14.20%, Ukrainian: 12.80%, Bulgarian: 9.30%, Belarusian: 8.60%, Unknown: 6.30%, Macedonian: 5.70%, Azerbaijani: 3.50% >> Detection of 1000 word pairs (average length: 16 chars) -Accuracy: 51.60% -Erroneously classified as Serbian: 12.40%, Ukrainian: 10.70%, Bulgarian: 7.90%, Macedonian: 7.60%, Belarusian: 5.00%, Unknown: 3.60%, Azerbaijani: 1.20% +Accuracy: 51.70% +Erroneously classified as Serbian: 12.30%, Ukrainian: 10.70%, Bulgarian: 8.00%, Macedonian: 7.50%, Belarusian: 5.00%, Unknown: 3.60%, Azerbaijani: 1.20% >> Detection of 1000 sentences (average length: 65 chars) Accuracy: 68.50% diff --git a/cmd/accuracy-reports/whatlang/Serbian.txt b/cmd/accuracy-reports/whatlang/Serbian.txt index 756f962..cea411b 100644 --- a/cmd/accuracy-reports/whatlang/Serbian.txt +++ b/cmd/accuracy-reports/whatlang/Serbian.txt @@ -1,14 +1,14 @@ ##### Serbian ##### ->>> Accuracy on average: 56.83% +>>> Accuracy on average: 56.80% >> Detection of 1000 single words (average length: 8 chars) Accuracy: 33.70% Erroneously classified as Macedonian: 14.50%, Ukrainian: 12.80%, Bulgarian: 12.50%, Unknown: 7.90%, Belarusian: 6.90%, Russian: 6.90%, Azerbaijani: 4.80% >> Detection of 1000 word pairs (average length: 16 chars) -Accuracy: 50.90% -Erroneously classified as Macedonian: 16.50%, Bulgarian: 12.50%, Ukrainian: 7.10%, Russian: 4.20%, Belarusian: 3.60%, Unknown: 3.50%, Azerbaijani: 1.70% +Accuracy: 50.80% +Erroneously classified as Macedonian: 16.50%, Bulgarian: 12.50%, Ukrainian: 7.10%, Russian: 4.30%, Belarusian: 3.60%, Unknown: 3.50%, Azerbaijani: 1.70% >> Detection of 1000 sentences (average length: 100 chars) Accuracy: 85.90% diff --git a/cmd/accuracy-reports/whatlang/Shona.txt b/cmd/accuracy-reports/whatlang/Shona.txt index 2b1bab9..9e9c9c9 100644 --- a/cmd/accuracy-reports/whatlang/Shona.txt +++ b/cmd/accuracy-reports/whatlang/Shona.txt @@ -1,10 +1,10 @@ ##### Shona ##### ->>> Accuracy on average: 68.17% +>>> Accuracy on average: 68.20% >> Detection of 1000 single words (average length: 9 chars) -Accuracy: 44.30% -Erroneously classified as Unknown: 31.90%, Tagalog: 2.40%, Zulu: 2.30%, Indonesian: 2.00%, Italian: 1.40%, Croatian: 1.20%, English: 1.20%, Finnish: 1.10%, French: 1.00%, Somali: 1.00%, Dutch: 0.90%, Danish: 0.80%, Estonian: 0.80%, Lithuanian: 0.80%, Bokmal: 0.70%, Portuguese: 0.70%, Romanian: 0.70%, Spanish: 0.60%, Turkish: 0.60%, Czech: 0.50%, German: 0.50%, Latvian: 0.50%, Slovene: 0.50%, Swedish: 0.50%, Hungarian: 0.40%, Polish: 0.30%, Afrikaans: 0.20%, Esperanto: 0.10%, Nynorsk: 0.10% +Accuracy: 44.40% +Erroneously classified as Unknown: 31.80%, Tagalog: 2.40%, Zulu: 2.30%, Indonesian: 2.00%, Italian: 1.40%, Croatian: 1.20%, English: 1.20%, Finnish: 1.10%, French: 1.00%, Somali: 1.00%, Dutch: 0.90%, Danish: 0.80%, Estonian: 0.80%, Lithuanian: 0.80%, Bokmal: 0.70%, Portuguese: 0.70%, Romanian: 0.70%, Spanish: 0.60%, Turkish: 0.60%, Czech: 0.50%, German: 0.50%, Latvian: 0.50%, Slovene: 0.50%, Swedish: 0.50%, Hungarian: 0.40%, Polish: 0.30%, Afrikaans: 0.20%, Esperanto: 0.10%, Nynorsk: 0.10% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 65.20% diff --git a/cmd/accuracy-reports/whatlang/Somali.txt b/cmd/accuracy-reports/whatlang/Somali.txt index 7b605a9..12d1ced 100644 --- a/cmd/accuracy-reports/whatlang/Somali.txt +++ b/cmd/accuracy-reports/whatlang/Somali.txt @@ -4,7 +4,7 @@ >> Detection of 1000 single words (average length: 8 chars) Accuracy: 38.00% -Erroneously classified as Unknown: 36.90%, Tagalog: 4.40%, Estonian: 1.90%, Indonesian: 1.60%, Finnish: 1.40%, Zulu: 1.20%, Portuguese: 1.10%, English: 1.00%, Dutch: 0.90%, Italian: 0.90%, Lithuanian: 0.90%, Latvian: 0.80%, Shona: 0.80%, Spanish: 0.80%, Afrikaans: 0.70%, Esperanto: 0.70%, Swedish: 0.70%, Azerbaijani: 0.60%, Czech: 0.50%, Romanian: 0.50%, Turkish: 0.50%, Croatian: 0.40%, Danish: 0.40%, Hungarian: 0.40%, Polish: 0.40%, Slovene: 0.40%, Vietnamese: 0.40%, Nynorsk: 0.30%, French: 0.20%, German: 0.20%, Bokmal: 0.10% +Erroneously classified as Unknown: 37.00%, Tagalog: 4.40%, Estonian: 1.90%, Indonesian: 1.60%, Finnish: 1.40%, Zulu: 1.20%, Portuguese: 1.10%, English: 1.00%, Dutch: 0.90%, Italian: 0.90%, Lithuanian: 0.90%, Latvian: 0.80%, Shona: 0.80%, Spanish: 0.80%, Afrikaans: 0.70%, Esperanto: 0.70%, Azerbaijani: 0.60%, Swedish: 0.60%, Czech: 0.50%, Romanian: 0.50%, Turkish: 0.50%, Croatian: 0.40%, Danish: 0.40%, Hungarian: 0.40%, Polish: 0.40%, Slovene: 0.40%, French: 0.30%, Nynorsk: 0.30%, Vietnamese: 0.30%, German: 0.20%, Bokmal: 0.10% >> Detection of 1000 word pairs (average length: 15 chars) Accuracy: 66.40% diff --git a/cmd/accuracy-reports/whatlang/Spanish.txt b/cmd/accuracy-reports/whatlang/Spanish.txt index a88e7e0..b29b074 100644 --- a/cmd/accuracy-reports/whatlang/Spanish.txt +++ b/cmd/accuracy-reports/whatlang/Spanish.txt @@ -1,14 +1,14 @@ ##### Spanish ##### ->>> Accuracy on average: 48.13% +>>> Accuracy on average: 48.10% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 18.80% -Erroneously classified as Unknown: 16.50%, Portuguese: 13.40%, French: 5.10%, Esperanto: 4.40%, Romanian: 3.70%, Italian: 3.40%, Lithuanian: 3.00%, English: 2.60%, Estonian: 2.60%, German: 2.30%, Indonesian: 2.20%, Finnish: 2.00%, Tagalog: 2.00%, Dutch: 1.90%, Nynorsk: 1.80%, Somali: 1.60%, Swedish: 1.60%, Latvian: 1.30%, Turkish: 1.30%, Hungarian: 1.20%, Slovene: 1.20%, Afrikaans: 0.90%, Danish: 0.90%, Shona: 0.90%, Czech: 0.70%, Bokmal: 0.60%, Croatian: 0.60%, Polish: 0.60%, Zulu: 0.60%, Azerbaijani: 0.20%, Vietnamese: 0.10% +Accuracy: 18.70% +Erroneously classified as Unknown: 16.50%, Portuguese: 13.50%, French: 5.10%, Esperanto: 4.40%, Romanian: 3.60%, Italian: 3.40%, Lithuanian: 3.00%, English: 2.70%, Estonian: 2.60%, German: 2.30%, Indonesian: 2.20%, Finnish: 2.10%, Tagalog: 2.00%, Dutch: 1.90%, Nynorsk: 1.70%, Somali: 1.60%, Swedish: 1.60%, Latvian: 1.30%, Turkish: 1.30%, Hungarian: 1.20%, Slovene: 1.20%, Afrikaans: 0.90%, Danish: 0.90%, Shona: 0.90%, Czech: 0.70%, Bokmal: 0.60%, Croatian: 0.60%, Polish: 0.60%, Zulu: 0.60%, Azerbaijani: 0.20%, Vietnamese: 0.10% >> Detection of 1000 word pairs (average length: 16 chars) Accuracy: 32.90% -Erroneously classified as Portuguese: 20.50%, Unknown: 10.50%, Esperanto: 4.50%, Italian: 3.80%, French: 3.60%, Lithuanian: 2.30%, Romanian: 2.10%, English: 1.80%, Swedish: 1.60%, Danish: 1.40%, German: 1.40%, Indonesian: 1.40%, Finnish: 1.30%, Estonian: 1.20%, Afrikaans: 1.10%, Bokmal: 1.10%, Nynorsk: 1.10%, Slovene: 1.10%, Dutch: 1.00%, Shona: 1.00%, Tagalog: 0.80%, Somali: 0.60%, Latvian: 0.40%, Polish: 0.40%, Turkish: 0.30%, Zulu: 0.30%, Hungarian: 0.20%, Azerbaijani: 0.10%, Croatian: 0.10%, Czech: 0.10% +Erroneously classified as Portuguese: 20.50%, Unknown: 10.40%, Esperanto: 4.50%, Italian: 3.80%, French: 3.60%, Lithuanian: 2.30%, Romanian: 2.10%, English: 1.80%, Swedish: 1.60%, Danish: 1.40%, Finnish: 1.40%, German: 1.40%, Indonesian: 1.40%, Estonian: 1.20%, Afrikaans: 1.10%, Bokmal: 1.10%, Nynorsk: 1.10%, Slovene: 1.10%, Dutch: 1.00%, Shona: 1.00%, Tagalog: 0.80%, Somali: 0.60%, Latvian: 0.40%, Polish: 0.40%, Turkish: 0.30%, Zulu: 0.30%, Hungarian: 0.20%, Azerbaijani: 0.10%, Croatian: 0.10%, Czech: 0.10% >> Detection of 1000 sentences (average length: 126 chars) Accuracy: 92.70% diff --git a/cmd/accuracy-reports/whatlang/Swedish.txt b/cmd/accuracy-reports/whatlang/Swedish.txt index fce4a1e..c64dd36 100644 --- a/cmd/accuracy-reports/whatlang/Swedish.txt +++ b/cmd/accuracy-reports/whatlang/Swedish.txt @@ -1,14 +1,14 @@ ##### Swedish ##### ->>> Accuracy on average: 48.73% +>>> Accuracy on average: 48.70% >> Detection of 1000 single words (average length: 9 chars) -Accuracy: 23.90% -Erroneously classified as Unknown: 18.70%, Bokmal: 7.00%, Danish: 5.50%, Nynorsk: 5.30%, German: 3.80%, Dutch: 3.40%, Finnish: 3.30%, French: 3.30%, English: 2.90%, Portuguese: 2.70%, Spanish: 2.30%, Afrikaans: 1.90%, Romanian: 1.70%, Indonesian: 1.60%, Estonian: 1.50%, Esperanto: 1.30%, Lithuanian: 1.20%, Hungarian: 1.10%, Italian: 1.10%, Latvian: 1.10%, Turkish: 0.90%, Somali: 0.80%, Slovene: 0.70%, Zulu: 0.70%, Czech: 0.60%, Tagalog: 0.60%, Shona: 0.40%, Croatian: 0.30%, Vietnamese: 0.20%, Azerbaijani: 0.10%, Polish: 0.10% +Accuracy: 23.80% +Erroneously classified as Unknown: 18.70%, Bokmal: 6.90%, Danish: 5.70%, Nynorsk: 5.30%, German: 3.80%, Dutch: 3.40%, Finnish: 3.30%, French: 3.30%, English: 2.90%, Portuguese: 2.70%, Spanish: 2.30%, Afrikaans: 1.90%, Romanian: 1.70%, Indonesian: 1.60%, Estonian: 1.50%, Esperanto: 1.30%, Lithuanian: 1.20%, Hungarian: 1.10%, Italian: 1.10%, Latvian: 1.10%, Turkish: 0.90%, Slovene: 0.80%, Somali: 0.80%, Zulu: 0.70%, Czech: 0.60%, Tagalog: 0.60%, Croatian: 0.30%, Shona: 0.30%, Vietnamese: 0.20%, Azerbaijani: 0.10%, Polish: 0.10% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 39.50% -Erroneously classified as Unknown: 11.30%, Bokmal: 8.10%, Danish: 7.90%, Nynorsk: 4.90%, French: 3.00%, Dutch: 2.70%, English: 2.60%, Finnish: 2.40%, Afrikaans: 2.30%, Portuguese: 2.30%, German: 2.10%, Spanish: 1.90%, Esperanto: 1.40%, Romanian: 1.10%, Estonian: 0.80%, Italian: 0.80%, Indonesian: 0.60%, Lithuanian: 0.60%, Tagalog: 0.60%, Turkish: 0.60%, Latvian: 0.50%, Hungarian: 0.40%, Croatian: 0.30%, Czech: 0.30%, Slovene: 0.30%, Zulu: 0.30%, Somali: 0.20%, Shona: 0.10%, Vietnamese: 0.10% +Erroneously classified as Unknown: 11.30%, Bokmal: 8.10%, Danish: 7.90%, Nynorsk: 4.90%, French: 3.00%, Dutch: 2.70%, English: 2.60%, Afrikaans: 2.40%, Finnish: 2.40%, Portuguese: 2.30%, German: 2.10%, Spanish: 1.90%, Esperanto: 1.40%, Romanian: 1.10%, Estonian: 0.80%, Italian: 0.80%, Lithuanian: 0.60%, Tagalog: 0.60%, Turkish: 0.60%, Indonesian: 0.50%, Latvian: 0.50%, Hungarian: 0.40%, Croatian: 0.30%, Czech: 0.30%, Slovene: 0.30%, Zulu: 0.30%, Somali: 0.20%, Shona: 0.10%, Vietnamese: 0.10% >> Detection of 1000 sentences (average length: 90 chars) Accuracy: 82.80% diff --git a/cmd/accuracy-reports/whatlang/Tagalog.txt b/cmd/accuracy-reports/whatlang/Tagalog.txt index e6d4d64..3b09059 100644 --- a/cmd/accuracy-reports/whatlang/Tagalog.txt +++ b/cmd/accuracy-reports/whatlang/Tagalog.txt @@ -1,14 +1,14 @@ ##### Tagalog ##### ->>> Accuracy on average: 51.93% +>>> Accuracy on average: 51.87% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 23.20% -Erroneously classified as Unknown: 42.80%, Indonesian: 3.90%, English: 2.90%, Estonian: 2.40%, French: 2.20%, Esperanto: 2.10%, Spanish: 1.70%, Portuguese: 1.60%, German: 1.40%, Lithuanian: 1.40%, Afrikaans: 1.30%, Italian: 1.20%, Somali: 1.20%, Dutch: 1.10%, Nynorsk: 1.10%, Zulu: 1.10%, Shona: 0.90%, Romanian: 0.80%, Croatian: 0.70%, Turkish: 0.70%, Bokmal: 0.60%, Latvian: 0.50%, Slovene: 0.50%, Swedish: 0.50%, Danish: 0.40%, Finnish: 0.40%, Hungarian: 0.40%, Polish: 0.40%, Czech: 0.30%, Azerbaijani: 0.20%, Vietnamese: 0.10% +Accuracy: 23.10% +Erroneously classified as Unknown: 43.00%, Indonesian: 3.90%, English: 2.90%, Estonian: 2.40%, French: 2.20%, Esperanto: 2.00%, Spanish: 1.70%, Portuguese: 1.60%, German: 1.40%, Lithuanian: 1.40%, Afrikaans: 1.30%, Italian: 1.20%, Somali: 1.20%, Dutch: 1.10%, Nynorsk: 1.10%, Zulu: 1.10%, Shona: 0.90%, Romanian: 0.80%, Croatian: 0.70%, Turkish: 0.70%, Bokmal: 0.60%, Latvian: 0.50%, Slovene: 0.50%, Swedish: 0.50%, Danish: 0.40%, Finnish: 0.40%, Hungarian: 0.40%, Polish: 0.40%, Czech: 0.30%, Azerbaijani: 0.20%, Vietnamese: 0.10% >> Detection of 1000 word pairs (average length: 16 chars) -Accuracy: 42.90% -Erroneously classified as Unknown: 39.50%, Indonesian: 2.50%, English: 2.40%, Spanish: 1.70%, French: 1.40%, Estonian: 1.00%, Lithuanian: 0.90%, Esperanto: 0.80%, German: 0.80%, Afrikaans: 0.60%, Italian: 0.60%, Portuguese: 0.60%, Romanian: 0.60%, Zulu: 0.60%, Danish: 0.50%, Dutch: 0.40%, Shona: 0.40%, Somali: 0.40%, Nynorsk: 0.30%, Swedish: 0.20%, Turkish: 0.20%, Bokmal: 0.10%, Croatian: 0.10%, Czech: 0.10%, Finnish: 0.10%, Hungarian: 0.10%, Slovene: 0.10%, Vietnamese: 0.10% +Accuracy: 42.80% +Erroneously classified as Unknown: 39.60%, Indonesian: 2.50%, English: 2.40%, Spanish: 1.70%, French: 1.40%, Estonian: 1.00%, Lithuanian: 0.90%, Esperanto: 0.80%, German: 0.80%, Afrikaans: 0.60%, Italian: 0.60%, Portuguese: 0.60%, Romanian: 0.60%, Zulu: 0.60%, Danish: 0.50%, Dutch: 0.40%, Shona: 0.40%, Somali: 0.40%, Nynorsk: 0.30%, Swedish: 0.20%, Turkish: 0.20%, Bokmal: 0.10%, Croatian: 0.10%, Czech: 0.10%, Finnish: 0.10%, Hungarian: 0.10%, Slovene: 0.10%, Vietnamese: 0.10% >> Detection of 1000 sentences (average length: 123 chars) Accuracy: 89.70% diff --git a/cmd/accuracy-reports/whatlang/Turkish.txt b/cmd/accuracy-reports/whatlang/Turkish.txt index b437293..ec303fc 100644 --- a/cmd/accuracy-reports/whatlang/Turkish.txt +++ b/cmd/accuracy-reports/whatlang/Turkish.txt @@ -1,14 +1,14 @@ ##### Turkish ##### ->>> Accuracy on average: 53.97% +>>> Accuracy on average: 54.00% >> Detection of 1000 single words (average length: 9 chars) Accuracy: 25.90% -Erroneously classified as Unknown: 31.00%, Azerbaijani: 5.30%, Tagalog: 4.20%, Indonesian: 3.90%, Estonian: 2.30%, Hungarian: 2.20%, Dutch: 1.90%, Danish: 1.80%, Finnish: 1.80%, Afrikaans: 1.60%, Zulu: 1.60%, Somali: 1.40%, Bokmal: 1.30%, Romanian: 1.30%, Esperanto: 1.20%, German: 1.20%, Nynorsk: 1.10%, Swedish: 1.10%, Croatian: 0.90%, Italian: 0.90%, Portuguese: 0.90%, Spanish: 0.90%, English: 0.80%, Shona: 0.80%, French: 0.70%, Czech: 0.40%, Latvian: 0.40%, Lithuanian: 0.40%, Polish: 0.40%, Slovene: 0.40% +Erroneously classified as Unknown: 31.10%, Azerbaijani: 5.30%, Tagalog: 4.10%, Indonesian: 3.90%, Estonian: 2.30%, Hungarian: 2.20%, Dutch: 1.90%, Danish: 1.80%, Finnish: 1.80%, Zulu: 1.60%, Afrikaans: 1.50%, Somali: 1.40%, Bokmal: 1.30%, German: 1.30%, Romanian: 1.30%, Esperanto: 1.20%, Nynorsk: 1.10%, Swedish: 1.10%, Croatian: 0.90%, Italian: 0.90%, Portuguese: 0.90%, Spanish: 0.90%, English: 0.80%, Shona: 0.80%, French: 0.70%, Czech: 0.40%, Latvian: 0.40%, Lithuanian: 0.40%, Polish: 0.40%, Slovene: 0.40% >> Detection of 1000 word pairs (average length: 17 chars) -Accuracy: 44.00% -Erroneously classified as Unknown: 22.80%, Indonesian: 4.20%, Azerbaijani: 3.60%, Estonian: 2.20%, Danish: 1.90%, Dutch: 1.60%, Zulu: 1.60%, Afrikaans: 1.50%, German: 1.50%, Lithuanian: 1.30%, Nynorsk: 1.20%, Tagalog: 1.20%, Hungarian: 1.10%, Portuguese: 1.10%, Spanish: 1.10%, Swedish: 1.10%, Italian: 0.90%, Bokmal: 0.80%, Croatian: 0.80%, Finnish: 0.80%, Romanian: 0.80%, Esperanto: 0.70%, Shona: 0.50%, Somali: 0.50%, French: 0.40%, English: 0.30%, Slovene: 0.30%, Czech: 0.10%, Latvian: 0.10% +Accuracy: 44.10% +Erroneously classified as Unknown: 22.70%, Indonesian: 4.20%, Azerbaijani: 3.60%, Estonian: 2.20%, Danish: 1.90%, Dutch: 1.60%, Zulu: 1.60%, Afrikaans: 1.50%, German: 1.50%, Lithuanian: 1.30%, Nynorsk: 1.20%, Tagalog: 1.20%, Hungarian: 1.10%, Portuguese: 1.10%, Spanish: 1.10%, Swedish: 1.10%, Italian: 0.90%, Bokmal: 0.80%, Croatian: 0.80%, Finnish: 0.80%, Romanian: 0.80%, Esperanto: 0.70%, Shona: 0.50%, Somali: 0.50%, French: 0.40%, English: 0.30%, Slovene: 0.30%, Czech: 0.10%, Latvian: 0.10% >> Detection of 1000 sentences (average length: 119 chars) Accuracy: 92.00% diff --git a/cmd/accuracy-reports/whatlang/Ukrainian.txt b/cmd/accuracy-reports/whatlang/Ukrainian.txt index 82335ef..586e172 100644 --- a/cmd/accuracy-reports/whatlang/Ukrainian.txt +++ b/cmd/accuracy-reports/whatlang/Ukrainian.txt @@ -1,14 +1,14 @@ ##### Ukrainian ##### ->>> Accuracy on average: 72.27% +>>> Accuracy on average: 72.30% >> Detection of 1000 single words (average length: 8 chars) -Accuracy: 52.70% -Erroneously classified as Russian: 12.90%, Serbian: 10.00%, Unknown: 6.70%, Bulgarian: 5.90%, Belarusian: 5.80%, Macedonian: 4.00%, Azerbaijani: 2.00% +Accuracy: 52.80% +Erroneously classified as Russian: 12.90%, Serbian: 9.90%, Unknown: 6.70%, Bulgarian: 5.90%, Belarusian: 5.70%, Macedonian: 4.10%, Azerbaijani: 2.00% >> Detection of 1000 word pairs (average length: 17 chars) Accuracy: 71.30% -Erroneously classified as Russian: 9.10%, Serbian: 6.70%, Bulgarian: 3.70%, Belarusian: 3.20%, Macedonian: 2.90%, Unknown: 2.00%, Azerbaijani: 1.10% +Erroneously classified as Russian: 9.20%, Serbian: 6.70%, Bulgarian: 3.60%, Belarusian: 3.20%, Macedonian: 2.90%, Unknown: 2.00%, Azerbaijani: 1.10% >> Detection of 1000 sentences (average length: 108 chars) Accuracy: 92.80% diff --git a/cmd/accuracy-reports/whatlang/Urdu.txt b/cmd/accuracy-reports/whatlang/Urdu.txt index 75d2be6..e3c41bd 100644 --- a/cmd/accuracy-reports/whatlang/Urdu.txt +++ b/cmd/accuracy-reports/whatlang/Urdu.txt @@ -1,10 +1,10 @@ ##### Urdu ##### ->>> Accuracy on average: 56.97% +>>> Accuracy on average: 56.93% >> Detection of 1000 single words (average length: 6 chars) -Accuracy: 30.90% -Erroneously classified as Unknown: 33.60%, Persian: 23.80%, Arabic: 11.70% +Accuracy: 30.80% +Erroneously classified as Unknown: 33.80%, Persian: 23.80%, Arabic: 11.60% >> Detection of 1000 word pairs (average length: 13 chars) Accuracy: 45.50% diff --git a/cmd/accuracy-reports/whatlang/Vietnamese.txt b/cmd/accuracy-reports/whatlang/Vietnamese.txt index b20a7d8..1fb0f89 100644 --- a/cmd/accuracy-reports/whatlang/Vietnamese.txt +++ b/cmd/accuracy-reports/whatlang/Vietnamese.txt @@ -1,10 +1,10 @@ ##### Vietnamese ##### ->>> Accuracy on average: 72.75% +>>> Accuracy on average: 72.64% >> Detection of 879 single words (average length: 4 chars) -Accuracy: 36.29% -Erroneously classified as Unknown: 43.46%, Tagalog: 2.05%, Zulu: 2.05%, English: 1.93%, Yoruba: 1.93%, Lithuanian: 1.37%, Indonesian: 1.25%, Romanian: 0.91%, Spanish: 0.91%, French: 0.80%, Latvian: 0.80%, Shona: 0.80%, Afrikaans: 0.68%, Swedish: 0.57%, Turkish: 0.57%, Croatian: 0.46%, German: 0.46%, Hungarian: 0.46%, Czech: 0.34%, Polish: 0.34%, Portuguese: 0.34%, Estonian: 0.23%, Italian: 0.23%, Somali: 0.23%, Bokmal: 0.11%, Dutch: 0.11%, Esperanto: 0.11%, Nynorsk: 0.11%, Slovene: 0.11% +Accuracy: 35.95% +Erroneously classified as Unknown: 44.25%, English: 2.39%, Zulu: 2.28%, Yoruba: 1.93%, Lithuanian: 1.37%, Indonesian: 1.25%, Romanian: 0.91%, Tagalog: 0.91%, French: 0.80%, Shona: 0.80%, Spanish: 0.80%, Afrikaans: 0.68%, Latvian: 0.68%, Swedish: 0.57%, Turkish: 0.57%, Croatian: 0.46%, German: 0.46%, Hungarian: 0.46%, Portuguese: 0.46%, Czech: 0.34%, Polish: 0.34%, Estonian: 0.23%, Italian: 0.23%, Nynorsk: 0.23%, Somali: 0.23%, Bokmal: 0.11%, Dutch: 0.11%, Esperanto: 0.11%, Slovene: 0.11% >> Detection of 957 word pairs (average length: 12 chars) Accuracy: 85.27% diff --git a/cmd/accuracy-reports/whatlang/Yoruba.txt b/cmd/accuracy-reports/whatlang/Yoruba.txt index 5bbdc53..92e8c44 100644 --- a/cmd/accuracy-reports/whatlang/Yoruba.txt +++ b/cmd/accuracy-reports/whatlang/Yoruba.txt @@ -4,11 +4,11 @@ >> Detection of 1000 single words (average length: 7 chars) Accuracy: 11.00% -Erroneously classified as Unknown: 36.60%, Romanian: 3.20%, Estonian: 2.90%, Indonesian: 2.80%, French: 2.40%, Tagalog: 2.30%, English: 2.20%, German: 2.20%, Afrikaans: 2.10%, Esperanto: 2.00%, Latvian: 2.00%, Zulu: 2.00%, Croatian: 1.90%, Shona: 1.90%, Finnish: 1.80%, Hungarian: 1.80%, Azerbaijani: 1.60%, Italian: 1.60%, Turkish: 1.50%, Lithuanian: 1.40%, Czech: 1.30%, Danish: 1.30%, Slovene: 1.30%, Spanish: 1.20%, Bokmal: 1.10%, Dutch: 1.10%, Polish: 1.10%, Portuguese: 1.10%, Somali: 1.10%, Swedish: 1.00%, Nynorsk: 0.80%, Vietnamese: 0.40% +Erroneously classified as Unknown: 36.60%, Romanian: 3.20%, Estonian: 2.90%, Indonesian: 2.80%, French: 2.40%, Tagalog: 2.30%, English: 2.20%, German: 2.20%, Afrikaans: 2.10%, Esperanto: 2.00%, Latvian: 2.00%, Zulu: 2.00%, Croatian: 1.90%, Shona: 1.90%, Finnish: 1.80%, Hungarian: 1.80%, Azerbaijani: 1.60%, Italian: 1.60%, Turkish: 1.50%, Lithuanian: 1.40%, Czech: 1.30%, Slovene: 1.30%, Danish: 1.20%, Dutch: 1.20%, Spanish: 1.20%, Bokmal: 1.10%, Polish: 1.10%, Portuguese: 1.10%, Somali: 1.10%, Swedish: 1.00%, Nynorsk: 0.80%, Vietnamese: 0.40% >> Detection of 1000 word pairs (average length: 15 chars) Accuracy: 14.20% -Erroneously classified as Unknown: 36.70%, Romanian: 5.70%, Indonesian: 3.00%, Italian: 3.00%, Tagalog: 2.50%, Estonian: 2.30%, Turkish: 2.30%, French: 2.20%, Somali: 2.20%, Afrikaans: 2.00%, English: 2.00%, Esperanto: 2.00%, Shona: 1.90%, German: 1.80%, Croatian: 1.50%, Finnish: 1.40%, Portuguese: 1.40%, Spanish: 1.30%, Lithuanian: 1.10%, Zulu: 1.10%, Danish: 1.00%, Nynorsk: 1.00%, Slovene: 1.00%, Swedish: 0.90%, Azerbaijani: 0.80%, Bokmal: 0.80%, Dutch: 0.70%, Hungarian: 0.70%, Latvian: 0.50%, Polish: 0.50%, Czech: 0.40%, Vietnamese: 0.10% +Erroneously classified as Unknown: 36.80%, Romanian: 5.70%, Indonesian: 3.00%, Italian: 3.00%, Tagalog: 2.50%, Turkish: 2.30%, Estonian: 2.20%, French: 2.20%, Somali: 2.20%, Afrikaans: 2.00%, English: 2.00%, Esperanto: 2.00%, Shona: 1.90%, German: 1.80%, Croatian: 1.50%, Finnish: 1.40%, Portuguese: 1.40%, Spanish: 1.30%, Lithuanian: 1.10%, Zulu: 1.10%, Danish: 1.00%, Nynorsk: 1.00%, Slovene: 1.00%, Swedish: 0.90%, Azerbaijani: 0.80%, Bokmal: 0.80%, Dutch: 0.70%, Hungarian: 0.70%, Latvian: 0.50%, Polish: 0.50%, Czech: 0.40%, Vietnamese: 0.10% >> Detection of 1000 sentences (average length: 86 chars) Accuracy: 41.10% diff --git a/cmd/accuracy-reports/whatlang/Zulu.txt b/cmd/accuracy-reports/whatlang/Zulu.txt index 0270835..d0e1b4a 100644 --- a/cmd/accuracy-reports/whatlang/Zulu.txt +++ b/cmd/accuracy-reports/whatlang/Zulu.txt @@ -1,14 +1,14 @@ ##### Zulu ##### ->>> Accuracy on average: 70.07% +>>> Accuracy on average: 70.03% >> Detection of 1000 single words (average length: 9 chars) Accuracy: 44.40% -Erroneously classified as Unknown: 30.40%, Shona: 3.40%, Tagalog: 2.40%, Italian: 1.70%, Somali: 1.70%, Estonian: 1.40%, English: 1.30%, Danish: 1.20%, Indonesian: 1.00%, Portuguese: 1.00%, Turkish: 0.90%, Slovene: 0.80%, Croatian: 0.70%, French: 0.70%, German: 0.70%, Romanian: 0.70%, Azerbaijani: 0.60%, Czech: 0.60%, Dutch: 0.60%, Spanish: 0.60%, Swedish: 0.60%, Esperanto: 0.40%, Lithuanian: 0.40%, Afrikaans: 0.30%, Finnish: 0.30%, Hungarian: 0.30%, Nynorsk: 0.30%, Bokmal: 0.20%, Latvian: 0.20%, Polish: 0.20% +Erroneously classified as Unknown: 30.50%, Shona: 3.40%, Tagalog: 2.40%, Italian: 1.70%, Somali: 1.70%, Estonian: 1.40%, English: 1.30%, Danish: 1.20%, Indonesian: 1.00%, Portuguese: 1.00%, Turkish: 0.90%, Slovene: 0.80%, Croatian: 0.70%, French: 0.70%, German: 0.70%, Romanian: 0.70%, Azerbaijani: 0.60%, Czech: 0.60%, Dutch: 0.60%, Spanish: 0.60%, Swedish: 0.50%, Esperanto: 0.40%, Lithuanian: 0.40%, Afrikaans: 0.30%, Finnish: 0.30%, Hungarian: 0.30%, Nynorsk: 0.30%, Bokmal: 0.20%, Latvian: 0.20%, Polish: 0.20% >> Detection of 1000 word pairs (average length: 17 chars) -Accuracy: 67.70% -Erroneously classified as Unknown: 21.10%, Tagalog: 1.30%, Shona: 1.10%, Somali: 1.00%, Estonian: 0.80%, English: 0.70%, Italian: 0.70%, Dutch: 0.60%, Finnish: 0.50%, Spanish: 0.50%, Turkish: 0.50%, Croatian: 0.40%, Hungarian: 0.40%, Indonesian: 0.30%, Portuguese: 0.30%, Slovene: 0.30%, Bokmal: 0.20%, Danish: 0.20%, Esperanto: 0.20%, French: 0.20%, German: 0.20%, Nynorsk: 0.20%, Romanian: 0.20%, Swedish: 0.20%, Azerbaijani: 0.10%, Vietnamese: 0.10% +Accuracy: 67.60% +Erroneously classified as Unknown: 21.10%, Tagalog: 1.30%, Shona: 1.10%, Somali: 1.00%, English: 0.80%, Estonian: 0.80%, Italian: 0.70%, Dutch: 0.60%, Finnish: 0.50%, Spanish: 0.50%, Turkish: 0.50%, Croatian: 0.40%, Hungarian: 0.40%, Indonesian: 0.30%, Portuguese: 0.30%, Slovene: 0.30%, Bokmal: 0.20%, Danish: 0.20%, Esperanto: 0.20%, French: 0.20%, German: 0.20%, Nynorsk: 0.20%, Romanian: 0.20%, Swedish: 0.20%, Azerbaijani: 0.10%, Vietnamese: 0.10% >> Detection of 1000 sentences (average length: 115 chars) Accuracy: 98.10%