From 8f00a64547f64a33f8f2f0c1d8035c39a8f36aa7 Mon Sep 17 00:00:00 2001 From: James Taracevicz Date: Fri, 30 Jun 2023 19:41:41 -0700 Subject: [PATCH 1/2] feat: added language_classifier.cc --- binding.gyp | 11 ++++++ src/language_classifier.cc | 70 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 src/language_classifier.cc diff --git a/binding.gyp b/binding.gyp index 9426bcc..737675b 100644 --- a/binding.gyp +++ b/binding.gyp @@ -25,6 +25,17 @@ " +#include +#include + +void LanguageClassifier(const Nan::FunctionCallbackInfo& info) { + v8::Isolate *isolate = info.GetIsolate(); + + if (info.Length() < 1) { + Nan::ThrowTypeError("Usage: language_classifier(text)"); + return; + } + + if (!info[0]->IsString()) { + Nan::ThrowTypeError("First argument must be a string"); + return; + } + + Nan::Utf8String text_utf8(info[0]); + char *text = *text_utf8; + + if (text == NULL) { + Nan::ThrowTypeError("Could not convert first argument to string"); + return; + } + + libpostal_language_classifier_response_t *response = libpostal_classify_language(text); + + if (response != NULL) { + v8::Local lang_array = Nan::New(response->num_languages); + for (size_t i = 0; i < response->num_languages; i++) { + const char* language = response->languages[i]; // Directly access the array + + v8::Local lang_obj = Nan::New(); + Nan::Set(lang_obj, Nan::New("language").ToLocalChecked(), Nan::New(language).ToLocalChecked()); + + Nan::Set(lang_array, i, lang_obj); + } + libpostal_language_classifier_response_destroy(response); + info.GetReturnValue().Set(lang_array); + } +} + +void cleanup(void*) { + libpostal_teardown(); + libpostal_teardown_language_classifier(); +} + +void init(v8::Local exports) { + if (!libpostal_setup() || !libpostal_setup_language_classifier()) { + Nan::ThrowError("Could not load libpostal"); + return; + } + + v8::Local context = exports->CreationContext(); + + exports->Set( + context, + Nan::New("language_classifier").ToLocalChecked(), + Nan::New(LanguageClassifier)->GetFunction(context).ToLocalChecked() + ); + + #if NODE_MAJOR_VERSION >= 12 + node::Environment* env = node::GetCurrentEnvironment(Nan::GetCurrentContext()); + node::AtExit(env, cleanup, NULL); + #else + node::AtExit(cleanup); + #endif +} + +NODE_MODULE(language_classifier, init) From c4e4a4675b3125c210d0baebce0d09e8457e2272 Mon Sep 17 00:00:00 2001 From: James Taracevicz Date: Fri, 30 Jun 2023 19:51:06 -0700 Subject: [PATCH 2/2] feat: included probability in language_classifier --- src/language_classifier.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/language_classifier.cc b/src/language_classifier.cc index 22782e7..45082d1 100644 --- a/src/language_classifier.cc +++ b/src/language_classifier.cc @@ -27,11 +27,14 @@ void LanguageClassifier(const Nan::FunctionCallbackInfo& info) { if (response != NULL) { v8::Local lang_array = Nan::New(response->num_languages); - for (size_t i = 0; i < response->num_languages; i++) { - const char* language = response->languages[i]; // Directly access the array + + for (size_t i = 0; i < response->num_languages; ++i) { + const char *language = response->languages[i]; + const double probability = response->probs[i]; v8::Local lang_obj = Nan::New(); Nan::Set(lang_obj, Nan::New("language").ToLocalChecked(), Nan::New(language).ToLocalChecked()); + Nan::Set(lang_obj, Nan::New("probability").ToLocalChecked(), Nan::New(probability)); Nan::Set(lang_array, i, lang_obj); }