diff --git a/binding.gyp b/binding.gyp index 9426bcc..2215ff9 100644 --- a/binding.gyp +++ b/binding.gyp @@ -25,6 +25,19 @@ " +#include +#include + +void LanguageClassifier(const Nan::FunctionCallbackInfo& info) { + v8::Isolate *isolate = info.GetIsolate(); + + if (info.Length() < 1) { + Nan::ThrowTypeError("Usage: language_classifier(text)"); + return; + } + + if (!info[0]->IsString()) { + Nan::ThrowTypeError("First argument must be a string"); + return; + } + + Nan::Utf8String text_utf8(info[0]); + char *text = *text_utf8; + + if (text == NULL) { + Nan::ThrowTypeError("Could not convert first argument to string"); + return; + } + + libpostal_language_classifier_response_t *response = libpostal_classify_language(text); + + if (response != NULL) { + v8::Local lang_array = Nan::New(response->num_languages); + + for (size_t i = 0; i < response->num_languages; ++i) { + const char *language = response->languages[i]; + const double probability = response->probs[i]; + + v8::Local lang_obj = Nan::New(); + Nan::Set(lang_obj, Nan::New("language").ToLocalChecked(), Nan::New(language).ToLocalChecked()); + Nan::Set(lang_obj, Nan::New("probability").ToLocalChecked(), Nan::New(probability)); + + Nan::Set(lang_array, i, lang_obj); + } + libpostal_language_classifier_response_destroy(response); + info.GetReturnValue().Set(lang_array); + } +} + +void cleanup(void*) { + libpostal_teardown(); + libpostal_teardown_language_classifier(); +} + +void init(v8::Local exports) { + if (!libpostal_setup() || !libpostal_setup_language_classifier()) { + Nan::ThrowError("Could not load libpostal"); + return; + } + + // Check Node.js version + #if NODE_MAJOR_VERSION >= 16 + v8::Local context = exports->GetCreationContext().ToLocalChecked(); + #else + v8::Local context = exports->CreationContext(); + #endif + + exports->Set( + context, + Nan::New("language_classifier").ToLocalChecked(), + Nan::New(LanguageClassifier)->GetFunction(context).ToLocalChecked() + ); + + #if NODE_MAJOR_VERSION >= 12 + node::Environment* env = node::GetCurrentEnvironment(Nan::GetCurrentContext()); + node::AtExit(env, cleanup, NULL); + #else + node::AtExit(cleanup); + #endif +} + +NODE_MODULE(language_classifier, init) diff --git a/test/index.test.js b/test/index.test.js index 5c0b722..0c2694f 100644 --- a/test/index.test.js +++ b/test/index.test.js @@ -56,3 +56,86 @@ describe('parser', function() { }); }) }) + +describe('language_classifier', function() { + it('should classify phrases', function() { + const cases = [ + { + text: 'street', + expected: [{ language: 'en', probability: 0.9975550392228959 }] + }, + { + text: 'calle', + expected: [{ language: 'es', probability: 0.9948278315613933 }] + }, + { + text: '200 santa monica pier santa monica coahuila 90401 usa', + expected: [{ language: 'es', probability: 0.9889375382113144 }] + }, + { + text: '200 santa monica pier santa monica compania 90401 usa', + expected: [ + { language: 'es', probability: 0.6896583016342134 }, + { language: 'it', probability: 0.12355268595569942 }, + { language: 'en', probability: 0.09448842175870104 }, + { language: 'pt', probability: 0.09152280207660235 } + ] + }, + { + text: '200 santa monica pier santa monica compania anonima 90401 usa', + expected: [ + { language: 'en', probability: 0.39249744852100377 }, + { language: 'pt', probability: 0.27690798537637573 }, + { language: 'es', probability: 0.2631088374468901 }, + { language: 'it', probability: 0.06642801798298495 } + ] + }, + { + text: '200 santa monica pier santa monica calle 90401 usa', + expected: [ + { language: 'es', probability: 0.9332794851572307 }, + { language: 'it', probability: 0.0653229950496308 } + ] + }, + { + text: '200 santa monica pier santa monica ca 90401 usa', + expected: [ + { language: 'es', probability: 0.6860753090491215 }, + { language: 'it', probability: 0.16420520901155986 }, + { language: 'en', probability: 0.08797427320205269 }, + { language: 'pt', probability: 0.061694606284459816 } + ] + }, + { + text: '200 santa monica pier santa monica casa 90401 usa', + expected: [ + { language: 'it', probability: 0.7099331948983176 }, + { language: 'pt', probability: 0.1484441794525817 }, + { language: 'en', probability: 0.09171194632862785 } + ] + }, + { + text: '200 santa monica pier santa monica cagliari 90401 usa', + expected: [{ language: 'it', probability: 0.9409304710772296 }] + }, + { + text: '200 santa monica pier santa monica california 90401 usa', + expected: [ + { language: 'en', probability: 0.529208078531959 }, + { language: 'pt', probability: 0.26387306487348855 }, + { language: 'es', probability: 0.20130465470054235 } + ] + }, + { + text: '200 santa monica pier santa monica companhia anonima 90401 usa', + expected: [ + { language: 'pt', probability: 0.8631314862441015 }, + { language: 'en', probability: 0.1090210811124842 } + ] + } + ]; + for (const c of cases) { + assert.deepEqual(postal.language_classifier(c.text), c.expected, c.text); + } + }); +}) \ No newline at end of file