Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@
"<!(node -e \"require('nan')\")",
"/usr/local/include"
]
},
{
"target_name": "language_classifier",
"sources": [
"src/language_classifier.cc"
],
"libraries": [
"-lpostal", "-L/usr/local/lib"
],
"include_dirs": [
"<!(node -e \"require('nan')\")",
"/usr/local/include"
]
}
]
}
3 changes: 2 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

module.exports = {
expand: require('bindings')('expand'),
parser: require('bindings')('parser')
parser: require('bindings')('parser'),
language_classifier: require('bindings')('language_classifier').language_classifier
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit unergonomic 🤷

}
78 changes: 78 additions & 0 deletions src/language_classifier.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#include <libpostal/libpostal.h>
#include <nan.h>
#include <string.h>

void LanguageClassifier(const Nan::FunctionCallbackInfo<v8::Value>& info) {
v8::Isolate *isolate = info.GetIsolate();

if (info.Length() < 1) {
Nan::ThrowTypeError("Usage: language_classifier(text)");
return;
}

if (!info[0]->IsString()) {
Nan::ThrowTypeError("First argument must be a string");
return;
}

Nan::Utf8String text_utf8(info[0]);
char *text = *text_utf8;

if (text == NULL) {
Nan::ThrowTypeError("Could not convert first argument to string");
return;
}

libpostal_language_classifier_response_t *response = libpostal_classify_language(text);

if (response != NULL) {
v8::Local<v8::Array> lang_array = Nan::New<v8::Array>(response->num_languages);

for (size_t i = 0; i < response->num_languages; ++i) {
const char *language = response->languages[i];
const double probability = response->probs[i];

v8::Local<v8::Object> lang_obj = Nan::New<v8::Object>();
Nan::Set(lang_obj, Nan::New("language").ToLocalChecked(), Nan::New(language).ToLocalChecked());
Nan::Set(lang_obj, Nan::New("probability").ToLocalChecked(), Nan::New(probability));

Nan::Set(lang_array, i, lang_obj);
}
libpostal_language_classifier_response_destroy(response);
info.GetReturnValue().Set(lang_array);
}
}

void cleanup(void*) {
libpostal_teardown();
libpostal_teardown_language_classifier();
}

void init(v8::Local<v8::Object> exports) {
if (!libpostal_setup() || !libpostal_setup_language_classifier()) {
Nan::ThrowError("Could not load libpostal");
return;
}

// Check Node.js version
#if NODE_MAJOR_VERSION >= 16
v8::Local<v8::Context> context = exports->GetCreationContext().ToLocalChecked();
#else
v8::Local<v8::Context> context = exports->CreationContext();
#endif

exports->Set(
context,
Nan::New("language_classifier").ToLocalChecked(),
Nan::New<v8::FunctionTemplate>(LanguageClassifier)->GetFunction(context).ToLocalChecked()
);

#if NODE_MAJOR_VERSION >= 12
node::Environment* env = node::GetCurrentEnvironment(Nan::GetCurrentContext());
node::AtExit(env, cleanup, NULL);
#else
node::AtExit(cleanup);
#endif
}

NODE_MODULE(language_classifier, init)
83 changes: 83 additions & 0 deletions test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,86 @@ describe('parser', function() {
});
})
})

describe('language_classifier', function() {
it('should classify phrases', function() {
const cases = [
{
text: 'street',
expected: [{ language: 'en', probability: 0.9975550392228959 }]
},
{
text: 'calle',
expected: [{ language: 'es', probability: 0.9948278315613933 }]
},
{
text: '200 santa monica pier santa monica coahuila 90401 usa',
expected: [{ language: 'es', probability: 0.9889375382113144 }]
},
{
text: '200 santa monica pier santa monica compania 90401 usa',
expected: [
{ language: 'es', probability: 0.6896583016342134 },
{ language: 'it', probability: 0.12355268595569942 },
{ language: 'en', probability: 0.09448842175870104 },
{ language: 'pt', probability: 0.09152280207660235 }
]
},
{
text: '200 santa monica pier santa monica compania anonima 90401 usa',
expected: [
{ language: 'en', probability: 0.39249744852100377 },
{ language: 'pt', probability: 0.27690798537637573 },
{ language: 'es', probability: 0.2631088374468901 },
{ language: 'it', probability: 0.06642801798298495 }
]
},
{
text: '200 santa monica pier santa monica calle 90401 usa',
expected: [
{ language: 'es', probability: 0.9332794851572307 },
{ language: 'it', probability: 0.0653229950496308 }
]
},
{
text: '200 santa monica pier santa monica ca 90401 usa',
expected: [
{ language: 'es', probability: 0.6860753090491215 },
{ language: 'it', probability: 0.16420520901155986 },
{ language: 'en', probability: 0.08797427320205269 },
{ language: 'pt', probability: 0.061694606284459816 }
]
},
{
text: '200 santa monica pier santa monica casa 90401 usa',
expected: [
{ language: 'it', probability: 0.7099331948983176 },
{ language: 'pt', probability: 0.1484441794525817 },
{ language: 'en', probability: 0.09171194632862785 }
]
},
{
text: '200 santa monica pier santa monica cagliari 90401 usa',
expected: [{ language: 'it', probability: 0.9409304710772296 }]
},
{
text: '200 santa monica pier santa monica california 90401 usa',
expected: [
{ language: 'en', probability: 0.529208078531959 },
{ language: 'pt', probability: 0.26387306487348855 },
{ language: 'es', probability: 0.20130465470054235 }
]
},
{
text: '200 santa monica pier santa monica companhia anonima 90401 usa',
expected: [
{ language: 'pt', probability: 0.8631314862441015 },
{ language: 'en', probability: 0.1090210811124842 }
]
}
];
for (const c of cases) {
assert.deepEqual(postal.language_classifier(c.text), c.expected, c.text);
}
});
})