diff --git a/build.gradle b/build.gradle index 3f44f2d28..084592a6d 100644 --- a/build.gradle +++ b/build.gradle @@ -136,6 +136,7 @@ dependencies { compile group: 'xerces', name: 'xerces', version: '2.4.0' compile group: 'org.unbescape', name: 'unbescape', version: '1.0' compile 'org.eclipse.paho:org.eclipse.paho.client.mqttv3:1.2.0' + compile 'org.mongodb:mongodb-driver:3.4.3' } tasks.withType(Javadoc) { diff --git a/conf/config.properties b/conf/config.properties index 12d4a9cf6..b9ea5c500 100644 --- a/conf/config.properties +++ b/conf/config.properties @@ -230,14 +230,14 @@ elasticsearch.index.translog.flush_threshold_size=8mb # Please fill in synonyms and acronyms. Messages are NOT categorized by these words; # instead a bayesian filter is trained with tweets which contain these keywords. # The filter will learn from existing tweets with these keys to bootstrap. -classification.emotion.joy=ecstacy,joy,serenity,ekstase,freude,gelassenheit,axtasis,alegría,serenidad,extase,joie,sérénité,Amused,Calm,Encouraged,Friendly,Hopeful,Inspired,Jovial,Open,Peaceful,Cheerful,Contented,Delighted,Excited,Fulfilled,Glad,Gleeful,Gratified,Happy,Selfesteem,Joyful,Lively,Merry,Optimistic,Playful,Pleased,Proud,Rejuvenated,Satisfied,Awe-filled,Blissful,Ecstatic,Egocentric,Elated,Enthralled,Euphoric,Exhilarated,Giddy,Jubilant,Manic,Overconfident,Overjoyed,Radiant,Rapturous,Self-aggrandized,Thrilled -classification.emotion.trust=admiration,trust,acceptance,bewunderung,vertauen,akzeptanz,admiración,confianza,aprobación,admiration,confiance,acceptation -classification.emotion.fear=terror,fear,apprehension,schrecken,angst,besorgnis,terror,miedo,temor,terreur,peur,appréhension,Alert,Apprehensive,Cautious,Concerned,Confused,Curious,Disconcerted,Disoriented,Disquieted,Doubtful,Edgy,Fidgety,Hesitant,Indecisive,Insecure,Instinctive,Intuitive,Leery,Pensive,Shy,Timid,Uneasy,Watchful,Afraid,Alarmed,Anxious,Aversive,Distrustful,Fearful,Jumpy,Nervous,Perturbed,Rattled,Shaky,Startled,Suspicious,Unnerved,Unsettled,Wary,Worried,Dread,Horrified,Panicked,Paralyzed,Petrified,Phobic,Shocked,Terrorized,Scared,scare,scary -classification.emotion.surprise=amazement,surprise,distraction,erstauenen,überraschung,verstörtheit,asombro,sorpresa,distracción,étonnement,surprise,distraction -classification.emotion.sadness=grief,sadness,pensiveness,kummer,traurigkeit,schwermut,pena,tristeza,melancolía,chagrin,tristesse,songerie,Contemplative,Disappointed,Disconnected,Distracted,Grounded,Listless,Low,Regretful,Steady,Wistful,Dejected,Discouraged,Dispirited,Down,Downtrodden,Drained,Forlorn,Gloomy,Grieving,Heavy-hearted,Melancholy,Mournful,Sad,Sorrowful,Weepy,World-weary,Anguished,Bereaved,Bleak,Depressed,Despairing,Despondent,Grief-stricken,Heartbroken,Hopeless,Inconsolable,Morose -classification.emotion.disgust=loathing,disgust,boredom,abscheu,abneigung,langeweile,dio,aversión,tedio,aversion,dégoût,ennui -classification.emotion.anger=rage,anger,annoyance,wut,groll,verdruss,furia,ira,enfado,rage,colère,contrariété,Annoyed,Apathetic,Bored,Certain,Cold,Crabby,Cranky,Critical,Cross,Detached,Displeased,Frustrated,Impatient,Indifferent,Irritated,Peeved,Rankled,Affronted,Aggravated,Angry,Antagonized,Arrogant,Bristling,Exasperated,Incensed,Indignant,Inflamed,Mad,Offended,Resentful,Sarcastic,Aggressive,Appalled,Belligerent,Bitter,Contemptuous,Disgusted,Furious,Hateful,Hostile,Irate,Livid,Menacing,Outraged,Ranting,Raving,Seething,Spiteful,Vengeful,Vicious,Vindictive,Violent -classification.emotion.anticipation=vigilance,anticipation,interest,umsicht,erwartung,interesse,vigilancia,anticipación,interés,vigilance,anticipation,intérêt +classification.emotion.joy=ecstacy,joy,serenity,ekstase,freude,gelassenheit,axtasis,alegría,serenidad,extase,joie,sérénité,Amused,Calm,Encouraged,Friendly,Hopeful,Inspired,Jovial,Open,Peaceful,Cheerful,Contented,Delighted,Excited,Fulfilled,Glad,Gleeful,Gratified,Happy,Selfesteem,Joyful,Lively,Merry,Optimistic,Playful,Pleased,Proud,Rejuvenated,Satisfied,Awe-filled,Blissful,Ecstatic,Egocentric,Elated,Enthralled,Euphoric,Exhilarated,Giddy,Jubilant,Manic,Overconfident,Overjoyed,Radiant,Rapturous,Self-aggrandized,Thrilled +classification.emotion.trust=admiration,trust,acceptance,bewunderung,vertauen,akzeptanz,admiración,confianza,aprobación,admiration,confiance,acceptation +classification.emotion.fear=terror,fear,apprehension,schrecken,angst,besorgnis,terror,miedo,temor,terreur,peur,appréhension,Alert,Apprehensive,Cautious,Concerned,Confused,Curious,Disconcerted,Disoriented,Disquieted,Doubtful,Edgy,Fidgety,Hesitant,Indecisive,Insecure,Instinctive,Intuitive,Leery,Pensive,Shy,Timid,Uneasy,Watchful,Afraid,Alarmed,Anxious,Aversive,Distrustful,Fearful,Jumpy,Nervous,Perturbed,Rattled,Shaky,Startled,Suspicious,Unnerved,Unsettled,Wary,Worried,Dread,Horrified,Panicked,Paralyzed,Petrified,Phobic,Shocked,Terrorized,Scared,scare,scary +classification.emotion.surprise=amazement,surprise,distraction,erstauenen,überraschung,verstörtheit,asombro,sorpresa,distracción,étonnement,surprise,distraction +classification.emotion.sadness=grief,sadness,pensiveness,kummer,traurigkeit,schwermut,pena,tristeza,melancolía,chagrin,tristesse,songerie,Contemplative,Disappointed,Disconnected,Distracted,Grounded,Listless,Low,Regretful,Steady,Wistful,Dejected,Discouraged,Dispirited,Down,Downtrodden,Drained,Forlorn,Gloomy,Grieving,Heavy-hearted,Melancholy,Mournful,Sad,Sorrowful,Weepy,World-weary,Anguished,Bereaved,Bleak,Depressed,Despairing,Despondent,Grief-stricken,Heartbroken,Hopeless,Inconsolable,Morose +classification.emotion.disgust=loathing,disgust,boredom,abscheu,abneigung,langeweile,dio,aversión,tedio,aversion,dégoût,ennui +classification.emotion.anger=rage,anger,annoyance,wut,groll,verdruss,furia,ira,enfado,rage,colère,contrariété,Annoyed,Apathetic,Bored,Certain,Cold,Crabby,Cranky,Critical,Cross,Detached,Displeased,Frustrated,Impatient,Indifferent,Irritated,Peeved,Rankled,Affronted,Aggravated,Angry,Antagonized,Arrogant,Bristling,Exasperated,Incensed,Indignant,Inflamed,Mad,Offended,Resentful,Sarcastic,Aggressive,Appalled,Belligerent,Bitter,Contemptuous,Disgusted,Furious,Hateful,Hostile,Irate,Livid,Menacing,Outraged,Ranting,Raving,Seething,Spiteful,Vengeful,Vicious,Vindictive,Violent +classification.emotion.anticipation=vigilance,anticipation,interest,umsicht,erwartung,interesse,vigilancia,anticipación,interés,vigilance,anticipation,intérêt # profanity may have different facets,partly difficult to categorize. swear words and words in sexual content belong there. The following has been selected from various sources, including but not exclusive from https://encyclopediadramatica.se classification.profanity.swear=bastard,biatch,bitch,bloody,boner,bonk,bugger,crap,damn,dyke,excrement,goddamn,lmao,lmfao,nigga,nigger,omg,poop,prick,screw,shag,shit,shithead,shitting,spastic,spunk # the following sex-related word list is probably highly incomplete, please add more as you discover @@ -249,10 +249,10 @@ classification.profanity.troll=newbies,trollface,rage,fu,fuu,fuuu,fffuu,megusta, # languages: may be identified by the most common words in the corresponding language # the following words are taken mostly from https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists classification.language.english=a,about,after,all,an,and,any,are,as,at,be,been,before,but,by,can,could,did,do,down,first,for,from,good,great,had,has,have,he,her,him,his,I,if,in,into,is,it,its,know,like,little,made,man,may,me,men,more,Mr,much,must,my,no,not,now,of,on,one,only,or,other,our,out,over,said,see,she,should,so,some,such,than,that,the,their,them,then,there,these,they,this,time,to,two,up,upon,us,very,was,we,were,what,when,which,who,will,with,would,you,your -classification.language.german=der,und,die,in,ist,von,den,des,eine,im,ein,mit,das,zu,für,dem,sich,auf,als,auch,wird,oder,aus,wurde,werden,sind,an,einer,nicht,durch,nach,bei,es,war,zum,er,zur,am,einem,einen,sie,bis,man,über,um,dass,wie,hat,eines,nur,Stadt,kann,bezeichnet,noch,aber,siehe,vor,so,unter,Gemeinde,vom -classification.language.french=de,la,le,et,les,des,en,un,du,une,que,est,pour,qui,dans,a,par,plus,pas,au,sur,ne,se,Le,ce,il,sont,La,Les,ou,avec,son,Il,aux,d'un,En,cette,d'une,ont,ses,mais,comme,on,tout,nous,sa,Mais,fait,été,aussi,leur,bien,peut,ces,y,deux,A,ans,l,encore,n'est,marché,d,Pour,donc,cours,qu'il,moins,sans,C'est,Et,si,entre,Un,Ce,faire,elle,c'est,peu,vous,Une,prix,On,dont,lui,également,Dans,effet,pays,cas -classification.language.spanish=que,de,no,a,la,el,es,y,en,lo,un,por,qué,me,una,te,los,se,con,para,mi,está,si,bien,pero,yo,eso,las,sí,su,tu,aquí,del,al,como,le,más,esto,ya,todo,esta,vamos,muy,hay,ahora,algo,estoy,tengo,nos,tú,nada,cuando,ha,este,sé,estás,así,puedo,cómo,quiero,sólo,soy,tiene,gracias,o,él,bueno,fue,ser,hacer,son,todos,era,eres,vez,tienes,creo,ella,he,ese,voy,puede,sabes,hola,sus,porque,dios,quién,nunca,dónde,quieres,casa,favor,esa,dos,tan,señor,tiempo,verdad,estaba,mejor,están,va,hombre,usted,mucho,hace,entonces,siento,tenemos,puedes,ahí,ti,vida,ver,alguien,sr,hasta,sin,mí,solo,años,sobre,decir,uno,siempre,oh,ir,cosas,también,antes,has,ni,mis,día,estar,estamos,noche,nadie,otra,quiere,parece,nosotros,poco,padre,trabajo,gente,mira,vas,sea,les,donde,mismo,hecho,ellos,dijo,pasa,dinero,hijo,tal,otro,hablar,seguro,claro,estas,lugar,mundo,amigo,espera,mierda,han,tus,sabe,después,momento,desde,fuera,cosa,tipo,mañana,podemos,dije,gran,necesito,estado,podría,acuerdo,papá,tener,dice,mío,crees,buena,gusta,nuestro,nuevo,será,haciendo,días,nombre,buen,había,ven,tres,menos,debe,tenía,mal,conmigo,madre,hoy,quien,sido,mamá,tienen,luego,todas,allí,toda,hora,mujer,visto,haces,importa,contigo,ve,tarde,oye,parte,haber,hombres,problema,mas,saber,quería,aún,veces,nuestra,hacerlo,cada,hizo,veo,tanto,razón,ustedes,idea,esos,van,quizá,debo,alguna,cierto,ud,muerto,unos,estos,salir,policía,realmente,demasiado,familia,pueden,cabeza,hemos,amigos,chica,cariño,lado,allá,entre,minutos,digo,algún,serio,cuidado,pasó,buenas,somos,amor,puerta,ves,vaya,ah,suerte,eh,rápido,cuenta,quizás,io,esas,pues,pasado,pensé,todavía,hermano,debes,casi,forma,aqui,chico,ok,dicho,nueva,sabía,muchas,dentro,hice,contra,auto,camino,ayuda,primera,hacia,vi,miedo,adiós,primero,debería,poder,niños,sería,historia,hey,mientras,ciudad,dijiste,espero,cuánto,esposa,pronto,chicos,cualquier,viejo,debemos,deja,año,muerte,hablando,manos,da,loco,problemas,mano,guerra,semana,pasar,vale,cuál,viene,volver,toma,caso,agua,haré,vete,entiendo,horas,personas,capitán,adelante,niño,listo,noches,buenos -classification.language.dutch=de,van,een,het,en,in,is,dat,op,te,De,zijn,voor,met,die,niet,aan,er,om,Het,ook,als,dan,maar,bij,of,uit,nog,worden,door,naar,heeft,tot,ze,wordt,over,hij,In,meer,jaar,was,ik,kan,je,zich,al,hebben,geen,hun,we,wat,Een,Maar,werd,moet,wel,kunnen,Dat,nu,dit,deze,zal,Ik,veel,zo,En,andere,nieuwe,zou,twee,moeten,onder,eerste,haar,Van,wil,tegen,men,mensen,gaat,tussen,grote,waar,goed,maken,dus,alleen,Hij,Op,frank,ons,u,daar,na,had,gaan,alle,Als,Er,één +classification.language.german=der,und,die,in,ist,von,den,des,eine,im,ein,mit,das,zu,für,dem,sich,auf,als,auch,wird,oder,aus,wurde,werden,sind,an,einer,nicht,durch,nach,bei,es,war,zum,er,zur,am,einem,einen,sie,bis,man,über,um,dass,wie,hat,eines,nur,Stadt,kann,bezeichnet,noch,aber,siehe,vor,so,unter,Gemeinde,vom +classification.language.french=de,la,le,et,les,des,en,un,du,une,que,est,pour,qui,dans,a,par,plus,pas,au,sur,ne,se,Le,ce,il,sont,La,Les,ou,avec,son,Il,aux,d'un,En,cette,d'une,ont,ses,mais,comme,on,tout,nous,sa,Mais,fait,été,aussi,leur,bien,peut,ces,y,deux,A,ans,l,encore,n'est,marché,d,Pour,donc,cours,qu'il,moins,sans,C'est,Et,si,entre,Un,Ce,faire,elle,c'est,peu,vous,Une,prix,On,dont,lui,également,Dans,effet,pays,cas +classification.language.spanish=que,de,no,a,la,el,es,y,en,lo,un,por,qué,me,una,te,los,se,con,para,mi,está,si,bien,pero,yo,eso,las,sí,su,tu,aquí,del,al,como,le,más,esto,ya,todo,esta,vamos,muy,hay,ahora,algo,estoy,tengo,nos,tú,nada,cuando,ha,este,sé,estás,así,puedo,cómo,quiero,sólo,soy,tiene,gracias,o,él,bueno,fue,ser,hacer,son,todos,era,eres,vez,tienes,creo,ella,he,ese,voy,puede,sabes,hola,sus,porque,dios,quién,nunca,dónde,quieres,casa,favor,esa,dos,tan,señor,tiempo,verdad,estaba,mejor,están,va,hombre,usted,mucho,hace,entonces,siento,tenemos,puedes,ahí,ti,vida,ver,alguien,sr,hasta,sin,mí,solo,años,sobre,decir,uno,siempre,oh,ir,cosas,también,antes,has,ni,mis,día,estar,estamos,noche,nadie,otra,quiere,parece,nosotros,poco,padre,trabajo,gente,mira,vas,sea,les,donde,mismo,hecho,ellos,dijo,pasa,dinero,hijo,tal,otro,hablar,seguro,claro,estas,lugar,mundo,amigo,espera,mierda,han,tus,sabe,después,momento,desde,fuera,cosa,tipo,mañana,podemos,dije,gran,necesito,estado,podría,acuerdo,papá,tener,dice,mío,crees,buena,gusta,nuestro,nuevo,será,haciendo,días,nombre,buen,había,ven,tres,menos,debe,tenía,mal,conmigo,madre,hoy,quien,sido,mamá,tienen,luego,todas,allí,toda,hora,mujer,visto,haces,importa,contigo,ve,tarde,oye,parte,haber,hombres,problema,mas,saber,quería,aún,veces,nuestra,hacerlo,cada,hizo,veo,tanto,razón,ustedes,idea,esos,van,quizá,debo,alguna,cierto,ud,muerto,unos,estos,salir,policía,realmente,demasiado,familia,pueden,cabeza,hemos,amigos,chica,cariño,lado,allá,entre,minutos,digo,algún,serio,cuidado,pasó,buenas,somos,amor,puerta,ves,vaya,ah,suerte,eh,rápido,cuenta,quizás,io,esas,pues,pasado,pensé,todavía,hermano,debes,casi,forma,aqui,chico,ok,dicho,nueva,sabía,muchas,dentro,hice,contra,auto,camino,ayuda,primera,hacia,vi,miedo,adiós,primero,debería,poder,niños,sería,historia,hey,mientras,ciudad,dijiste,espero,cuánto,esposa,pronto,chicos,cualquier,viejo,debemos,deja,año,muerte,hablando,manos,da,loco,problemas,mano,guerra,semana,pasar,vale,cuál,viene,volver,toma,caso,agua,haré,vete,entiendo,horas,personas,capitán,adelante,niño,listo,noches,buenos +classification.language.dutch=de,van,een,het,en,in,is,dat,op,te,De,zijn,voor,met,die,niet,aan,er,om,Het,ook,als,dan,maar,bij,of,uit,nog,worden,door,naar,heeft,tot,ze,wordt,over,hij,In,meer,jaar,was,ik,kan,je,zich,al,hebben,geen,hun,we,wat,Een,Maar,werd,moet,wel,kunnen,Dat,nu,dit,deze,zal,Ik,veel,zo,En,andere,nieuwe,zou,twee,moeten,onder,eerste,haar,Van,wil,tegen,men,mensen,gaat,tussen,grote,waar,goed,maken,dus,alleen,Hij,Op,frank,ons,u,daar,na,had,gaan,alle,Als,Er,één # flags flag.log.dao = true @@ -302,5 +302,9 @@ dump.write_enabled = true stream.enabled = false stream.mqtt.address = tcp://127.0.0.1:1883 +# MongoDB +db.mongo.enabled = false +db.mongo.address = mongodb://127.0.0.1:27017 + # noio: if true do not create search indexes -noio = false +noio = false \ No newline at end of file diff --git a/src/org/loklak/data/DAO.java b/src/org/loklak/data/DAO.java index e12ef7b28..b07a5087d 100644 --- a/src/org/loklak/data/DAO.java +++ b/src/org/loklak/data/DAO.java @@ -182,7 +182,10 @@ public class DAO { public static TimelineCache timelineCache; public static MQTTPublisher mqttPublisher = null; + public static MongoDBManager mongoDBManager = null; public static boolean streamEnabled = false; + public static boolean mongoDBEnabled = false; + public static String twitterChannel = "twitter"; public static List randomTerms = new ArrayList<>(); public static enum IndexName { @@ -427,6 +430,13 @@ public static void init(Map configMap, Path dataPath) throws Exc mqttPublisher = new MQTTPublisher(mqttAddress); } + // Connect to mongoDB database + String mongoAddress = getConfig("db.mongo.address", "mongodb://127.0.0.1:27017"); + mongoDBEnabled = getConfig("db.mongo.enabled", false); + if (mongoDBEnabled) { + mongoDBManager = new MongoDBManager(mongoAddress); + } + // finally wait for healthy status of elasticsearch shards ClusterHealthStatus required_status = ClusterHealthStatus.fromString(config.get("elasticsearch_requiredClusterHealthStatus")); boolean ok; @@ -889,6 +899,8 @@ private static Set writeMessageBulkDump(Collection mws) for (MessageWrapper mw: mws) try { mw.t.publishToMQTT(); + //Store message string to mongoDB document + mw.t.saveToMongoDB(DAO.twitterChannel); if (!created.contains(mw.t.getPostId())) continue; synchronized (DAO.class) { diff --git a/src/org/loklak/data/MongoDBManager.java b/src/org/loklak/data/MongoDBManager.java new file mode 100644 index 000000000..cf2f547f1 --- /dev/null +++ b/src/org/loklak/data/MongoDBManager.java @@ -0,0 +1,42 @@ +package org.loklak.data; + +import com.mongodb.MongoClient; +import com.mongodb.MongoClientURI; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.MongoDatabase; +import org.bson.Document; + +/** + * @author dawei.ma + * @date 2018/9/6 19:45 + */ +public class MongoDBManager { + private MongoClient mongoClient; + private String clientId; + private MongoDatabase database; + + public MongoDBManager(String address, String clientId) { + this.mongoClient = new MongoClient(new MongoClientURI(address)); + this.clientId = clientId; + this.database = mongoClient.getDatabase(clientId); + } + + public MongoDBManager(String address) { + this(address, "loklak_server"); + } + + public MongoClient getMongoClient() { + return mongoClient; + } + + public String getClientId() { + return clientId; + } + + public void saveChannelMessage(String channel, String message) { + MongoCollection collection = database.getCollection(channel); + Document doc = Document.parse(message); + collection.insertOne(doc); + } +} + diff --git a/src/org/loklak/harvester/Post.java b/src/org/loklak/harvester/Post.java index 7670a3c3b..2f0e1a63a 100644 --- a/src/org/loklak/harvester/Post.java +++ b/src/org/loklak/harvester/Post.java @@ -164,7 +164,16 @@ public final void publishToMQTT() { DAO.mqttPublisher.publish(this.getStreamChannels(), this.toString()); } } - + + /** + * Publish data to MongoDB + */ + public final void saveToMongoDB(String channel) { + if (DAO.mongoDBManager != null) { + DAO.mongoDBManager.saveChannelMessage(channel, this.toString()); + } + } + public void setTimestamp(long timestamp) { this.put("timestamp_id", timestamp); this.timestamp = timestamp;