feat: add place extraction

gamemaker1 · gamemaker1 · commit 801d2928c20a · 2021-03-09T14:43:09.000+05:30
- still misses a lot of places
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "intel-api-server",
-  "version": "0.4.4",
+  "version": "0.5.0",
   "description": "An implementation of the Dabbu Intel API that helps in extraction of and one pagers/summaries of all knowledge around a certain topic, person or place based on information/data from multiple providers",
   "main": "src/server.js",
   "scripts": {
@@ -23,11 +23,12 @@
   },
   "homepage": "https://github.com/dabbu-knowledge-platform/intel-api-server#readme",
   "dependencies": {
+    "country-in-text-detector": "^1.0.10",
     "express": "^4.17.1",
     "fs-extra": "^9.1.0",
+    "lda": "0.2.0",
     "multer": "^1.4.2",
-    "office-text-extractor": "^1.0.2",
-    "lda": "0.2.0"
+    "office-text-extractor": "^1.0.2"
   },
   "devDependencies": {
     "pkg": "^4.4.9",
diff --git a/src/routes/extract_info.js b/src/routes/extract_info.js
@@ -26,7 +26,6 @@ const multer = require('multer')
 const extractText = require('office-text-extractor')
 // The library used to extract keywords
 const lda = require('lda')
-
 // Helper function to return keywords in an object format
 const extractTopics = (sentences) => {
   // Run the LDA library on it (10 clusters of five terms each)
@@ -60,6 +59,8 @@ const extractTopics = (sentences) => {
   // Return the final array of keywords
   return finalResult
 }
+// The library used to detect names of a country
+const detectPlace = require('country-in-text-detector').detect
 
 // Files library, used to do all file operations across platforms
 const fs = require('fs-extra')
@@ -172,6 +173,27 @@ async function extractEmails(name, text) {
   return matches
 }
 
+// Extract places from text
+// FLAW: Does NOT work for all places
+async function extractPlaces(name, text) {
+  // Check if the text is non null
+  if (!text) return null
+  let matches = detectPlace(text)
+
+  // Convert it to an array of objects and add the file name
+  matches = matches.map((match) => {
+    return {
+      place: match.name,
+      type: match.type,
+      country: match.iso3166,
+      file: name,
+    }
+  })
+
+  // Return sucessfully
+  return matches
+}
+
 // Put together the above functions and run them for each file
 async function processFiles(files) {
   // Check if there are any files
@@ -180,6 +202,7 @@ async function processFiles(files) {
     let results = {
       topics: [],
       people: [],
+      places: [],
     }
     for (const file of files) {
       // Extract the text from the file
@@ -189,6 +212,7 @@ async function processFiles(files) {
         ...(await extractCommonWords(file.name, text))
       )
       results.people.push(...(await extractEmails(file.name, text)))
+      results.places.push(...(await extractPlaces(file.name, text)))
     }
 
     // Return successfully