diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
new file mode 100644
index 0000000..b7741ed
--- /dev/null
+++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
@@ -0,0 +1,682 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import ast"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Companies | \n",
+ " Positions | \n",
+ " ID | \n",
+ " Descriptions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " State Street | \n",
+ " Software Engineer I (EMS Trading Team) - Charl... | \n",
+ " e8a9b71f6c5126ae | \n",
+ " ['Opportunity to influence and impact the arch... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Google | \n",
+ " Software Engineering Intern, BS, Winter 2020 | \n",
+ " 1d99705341f0d46c | \n",
+ " ['1. In the “Resume Section:” attach an update... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " DataDog | \n",
+ " Software Engineer | \n",
+ " f47387c886f1b1dd | \n",
+ " ['Build distributed, high-throughput, real-tim... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Pluralsight | \n",
+ " Software Engineer | \n",
+ " ddc20246b79174f2 | \n",
+ " ['Provide architectural, strategic, and scale ... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Achievement Network (ANet) | \n",
+ " Software Engineer | \n",
+ " a0303ea18678af0c | \n",
+ " ['Implement ANet’s next generation of technolo... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Companies \\\n",
+ "0 State Street \n",
+ "1 Google \n",
+ "2 DataDog \n",
+ "3 Pluralsight \n",
+ "4 Achievement Network (ANet) \n",
+ "\n",
+ " Positions ID \\\n",
+ "0 Software Engineer I (EMS Trading Team) - Charl... e8a9b71f6c5126ae \n",
+ "1 Software Engineering Intern, BS, Winter 2020 1d99705341f0d46c \n",
+ "2 Software Engineer f47387c886f1b1dd \n",
+ "3 Software Engineer ddc20246b79174f2 \n",
+ "4 Software Engineer a0303ea18678af0c \n",
+ "\n",
+ " Descriptions \n",
+ "0 ['Opportunity to influence and impact the arch... \n",
+ "1 ['1. In the “Resume Section:” attach an update... \n",
+ "2 ['Build distributed, high-throughput, real-tim... \n",
+ "3 ['Provide architectural, strategic, and scale ... \n",
+ "4 ['Implement ANet’s next generation of technolo... "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_directory = os.path.dirname(os.path.realpath('__file__')) + \"\\data\"\n",
+ "df_software = pd.read_csv(data_directory+\"\\software_engineer.csv\")\n",
+ "df_software.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Descriptions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ['Opportunity to influence and impact the arch... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ['1. In the “Resume Section:” attach an update... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ['Build distributed, high-throughput, real-tim... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ['Provide architectural, strategic, and scale ... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ['Implement ANet’s next generation of technolo... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Descriptions\n",
+ "0 ['Opportunity to influence and impact the arch...\n",
+ "1 ['1. In the “Resume Section:” attach an update...\n",
+ "2 ['Build distributed, high-throughput, real-tim...\n",
+ "3 ['Provide architectural, strategic, and scale ...\n",
+ "4 ['Implement ANet’s next generation of technolo..."
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_software = df_software.dropna()\n",
+ "df_software = df_software[[\"Descriptions\"]]\n",
+ "print(type(df_software))\n",
+ "df_software.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Descriptions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " [Opportunity to influence and impact the archi... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " [1. In the “Resume Section:” attach an updated... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " [Build distributed, high-throughput, real-time... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " [Provide architectural, strategic, and scale r... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " [Implement ANet’s next generation of technolog... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Descriptions\n",
+ "0 [Opportunity to influence and impact the archi...\n",
+ "1 [1. In the “Resume Section:” attach an updated...\n",
+ "2 [Build distributed, high-throughput, real-time...\n",
+ "3 [Provide architectural, strategic, and scale r...\n",
+ "4 [Implement ANet’s next generation of technolog..."
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Turn all data from list literals to list\n",
+ "for i in range (0, len(df_software)):\n",
+ " if type(df_software.iloc[i,0]) is str:\n",
+ " df_software.iloc[i,0] = ast.literal_eval(df_software.iloc[i,0])\n",
+ "\n",
+ "df_software.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Testing\n",
+ "\n",
+ "df_software.iloc[0][0][2]\n",
+ "for data in df_software[\"Descriptions\"]:\n",
+ " print(type(data))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "46"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "desc_se = df_software[\"Descriptions\"].tolist()\n",
+ "len(desc_se)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "664"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "desc_se_flat = [item for sublist in desc_se for item in sublist]\n",
+ "\n",
+ "# create the rank of documents – we will use it later\n",
+ "ranks = []\n",
+ "for i in range(1, len(desc_se_flat)+1):\n",
+ " ranks.append(i)\n",
+ "len(desc_se_flat)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Opportunity influence impact architecture, standards, design key product initiatives applications Java services',\n",
+ " 'Contribute Sr. Individual contributor within team top engineers',\n",
+ " 'Work dynamic, fast-paced, Agile team environment',\n",
+ " 'BS/MS Computer Science equivalent field',\n",
+ " '3+ years commercial software development, proficient developing multi-tier solutions']"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Remove punctuations\n",
+ "from nltk.corpus import stopwords\n",
+ "\n",
+ "df_stopwords = pd.DataFrame({'descs':desc_se_flat})\n",
+ "\n",
+ "stop = stopwords.words('english')\n",
+ "\n",
+ "df_stopwords['descs'] = df_stopwords['descs'].apply(lambda x: \" \".join(x for x in x.split() if x not in stop))\n",
+ "df_stopwords\n",
+ "desc_se_flat = df_stopwords[\"descs\"].tolist()\n",
+ "desc_se_flat[0:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import nltk\n",
+ "from nltk.stem.snowball import SnowballStemmer\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Stop Words\n",
+ "stopwords = nltk.corpus.stopwords.words('english')\n",
+ "# Load 'stemmer'\n",
+ "stemmer = SnowballStemmer(\"english\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Functions for sentence tokenizer, to remove numeric tokens and raw punctuation\n",
+ "def tokenize_and_stem(text):\n",
+ " tokens = [word for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n",
+ " filtered_tokens = []\n",
+ " for token in tokens:\n",
+ " if re.search('[a-zA-Z]', token):\n",
+ " filtered_tokens.append(token)\n",
+ " stems = [stemmer.stem(t) for t in filtered_tokens]\n",
+ " return stems\n",
+ "\n",
+ "def tokenize_only(text):\n",
+ " tokens = [word.lower() for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n",
+ " filtered_tokens = []\n",
+ " for token in tokens:\n",
+ " if re.search('[a-zA-Z]', token):\n",
+ " filtered_tokens.append(token)\n",
+ " return filtered_tokens"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.feature_extraction.text import TfidfVectorizer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:301: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['abov', 'afterward', 'alon', 'alreadi', 'alway', 'ani', 'anoth', 'anyon', 'anyth', 'anywher', 'becam', 'becaus', 'becom', 'befor', 'besid', 'cri', 'describ', 'dure', 'els', 'elsewher', 'empti', 'everi', 'everyon', 'everyth', 'everywher', 'fifti', 'forti', 'henc', 'hereaft', 'herebi', 'howev', 'hundr', 'inde', 'mani', 'meanwhil', 'moreov', 'nobodi', 'noon', 'noth', 'nowher', 'onc', 'onli', 'otherwis', 'ourselv', 'perhap', 'pleas', 'sever', 'sinc', 'sincer', 'sixti', 'someon', 'someth', 'sometim', 'somewher', 'themselv', 'thenc', 'thereaft', 'therebi', 'therefor', 'togeth', 'twelv', 'twenti', 'veri', 'whatev', 'whenc', 'whenev', 'wherea', 'whereaft', 'wherebi', 'wherev', 'whi', 'yourselv'] not in stop_words.\n",
+ " 'stop_words.' % sorted(inconsistent))\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(664, 7850)\n",
+ " (0, 4929)\t0.17743746587167045\n",
+ " (0, 3727)\t0.17743746587167045\n",
+ " (0, 3567)\t0.17264559939088153\n",
+ " (0, 529)\t0.14895401805525083\n",
+ " (0, 6668)\t0.1534023937405399\n",
+ " (0, 1955)\t0.10971647619477819\n",
+ " (0, 3999)\t0.1684946958388015\n",
+ " (0, 5389)\t0.11621622033271585\n",
+ " (0, 3748)\t0.17743746587167045\n",
+ " (0, 458)\t0.11550157734889074\n",
+ " (0, 3901)\t0.12261525509304118\n",
+ " (0, 6244)\t0.13185549725911921\n",
+ " (0, 4932)\t0.19898436235309114\n",
+ " (0, 3730)\t0.1831050409852232\n",
+ " (0, 3568)\t0.1831050409852232\n",
+ " (0, 543)\t0.1831050409852232\n",
+ " (0, 6671)\t0.17743746587167045\n",
+ " (0, 1990)\t0.1831050409852232\n",
+ " (0, 4002)\t0.1831050409852232\n",
+ " (0, 5401)\t0.1831050409852232\n",
+ " (0, 3749)\t0.1831050409852232\n",
+ " (0, 491)\t0.19898436235309114\n",
+ " (0, 3930)\t0.17743746587167045\n",
+ " (0, 4933)\t0.19898436235309114\n",
+ " (0, 3731)\t0.1831050409852232\n",
+ " :\t:\n",
+ " (662, 4348)\t0.1671037662089728\n",
+ " (663, 2046)\t0.09036326564983015\n",
+ " (663, 6513)\t0.1354035771870305\n",
+ " (663, 5334)\t0.1463354649491617\n",
+ " (663, 5878)\t0.16033148000186215\n",
+ " (663, 5719)\t0.19862564732951413\n",
+ " (663, 5034)\t0.15358533579231382\n",
+ " (663, 1672)\t0.20797235646308396\n",
+ " (663, 2358)\t0.20797235646308396\n",
+ " (663, 1673)\t0.20797235646308396\n",
+ " (663, 4105)\t0.2211458030981143\n",
+ " (663, 5042)\t0.2211458030981143\n",
+ " (663, 2359)\t0.2211458030981143\n",
+ " (663, 5886)\t0.2211458030981143\n",
+ " (663, 2143)\t0.2211458030981143\n",
+ " (663, 6533)\t0.2211458030981143\n",
+ " (663, 5721)\t0.2211458030981143\n",
+ " (663, 4106)\t0.2211458030981143\n",
+ " (663, 5043)\t0.2211458030981143\n",
+ " (663, 1674)\t0.2211458030981143\n",
+ " (663, 2360)\t0.2211458030981143\n",
+ " (663, 5887)\t0.2211458030981143\n",
+ " (663, 2144)\t0.2211458030981143\n",
+ " (663, 6534)\t0.2211458030981143\n",
+ " (663, 5722)\t0.2211458030981143\n"
+ ]
+ }
+ ],
+ "source": [
+ "# tfidf vectorizer\n",
+ "tfidf_vectorizer = TfidfVectorizer(max_df=0.5, max_features=200000, min_df=1, stop_words='english', use_idf=True,\n",
+ " tokenizer=tokenize_and_stem, ngram_range=(1,3))\n",
+ "\n",
+ "#fit the vectorizer to data\n",
+ "tfidf_matrix = tfidf_vectorizer.fit_transform(desc_se_flat)\n",
+ "terms = tfidf_vectorizer.get_feature_names()\n",
+ "\n",
+ "print(tfidf_matrix.shape)\n",
+ "print(tfidf_matrix)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[4, 3, 3, 2, 4, 4, 3, 1, 4, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 2, 1, 4, 4, 3, 2, 4, 4, 1, 1, 2, 1, 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3, 3, 4, 3, 3, 1, 1, 1, 1, 2, 3, 0, 3, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 3, 4, 1, 1, 1, 0, 1, 3, 1, 3, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 2, 4, 3, 1, 1, 1, 1, 1, 1, 2, 2, 4, 2, 4, 1, 2, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, 2, 1, 0, 1, 1, 1, 2, 1, 1, 1, 3, 1, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 0, 1, 1, 1, 4, 1, 4, 1, 1, 1, 1, 2, 4, 3, 3, 2, 4, 4, 1, 4, 1, 1, 1, 1, 1, 0, 1, 1, 1, 3, 1, 0, 1, 1, 3, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 1, 1, 1, 1, 0, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 3, 1, 0, 1, 3, 3, 2, 0, 1, 1, 3, 1, 1, 3, 4, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 3, 2, 1, 3, 1, 1, 4, 3, 3, 4, 2, 4, 4, 3, 1, 4, 1, 1, 1, 1, 0, 1, 1, 1, 2, 3, 1, 1, 1, 1, 3, 3, 1, 0, 3, 1, 1, 1, 0, 1, 2, 4, 4, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 2, 3, 3, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 4, 1, 1, 1, 1, 2, 2, 1, 4, 4, 3, 2, 4, 4, 1, 1, 2, 1, 4, 3, 4, 3, 3, 2, 4, 4, 1, 4, 1, 1, 1, 1, 1, 0, 1, 1, 1, 3, 1, 3, 1, 0, 1, 1, 1, 1, 1, 0, 3, 3, 1, 2, 1, 1, 3, 1, 1, 3, 1, 0, 3, 3, 3, 1, 3, 1, 1, 3, 1, 3, 1, 1, 4, 2, 3, 3, 0, 1, 1, 2, 4, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 4, 4, 0, 1, 0, 1, 3, 3, 1, 2, 1, 1, 1, 1, 3, 0, 0, 3, 1, 0, 1, 1, 1, 1, 2, 4, 4, 4, 4, 4, 4, 4, 1, 1, 0, 3, 3, 1, 1, 1, 1, 3, 3, 1, 0, 1, 4, 1, 3, 1, 1, 1, 0, 0, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 4, 1, 1, 0, 1, 1, 3, 0, 3, 1, 3, 1, 4, 3, 0, 1, 1, 1, 1, 1, 1, 2, 4, 0, 2, 4, 1, 0, 0, 1, 3, 3, 0, 1, 3, 0, 2, 4, 1, 1, 1, 4, 3, 0, 1, 1, 0, 1, 1, 1, 2, 3, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 3, 1, 1, 1, 3, 3, 1, 4, 3, 4, 4, 2, 4, 4, 4, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 0, 3, 1, 0, 1, 1, 1, 1, 3, 3, 1, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 2, 2, 2, 4, 2, 3, 0, 1, 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Import Kmeans\n",
+ "from sklearn.cluster import KMeans\n",
+ "\n",
+ "# Define number of clusters\n",
+ "num_clusters = 5\n",
+ "\n",
+ "#Running clustering algorithm\n",
+ "km = KMeans(n_clusters=num_clusters, init=\"k-means++\")\n",
+ "km.fit(tfidf_matrix)\n",
+ "\n",
+ "#final clusters\n",
+ "clusters = km.labels_.tolist()\n",
+ "print(clusters)\n",
+ "description_data = {'rank': ranks, 'descriptions': desc_se_flat, 'cluster': clusters }\n",
+ "frame = pd.DataFrame(complaints_data, index = [clusters], columns = ['rank', 'cluster'])\n",
+ "\n",
+ "#number of docs per cluster\n",
+ "# frame['cluster'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 137,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Cluster 0 words: b'design', b'design', b'development', b'familiar', b'strong', b'oo', b'oo', b'oo',\n",
+ "Cluster 1 words: b'experience', b'skills', b'communication', b'strong', b'software', b'testing', b'development', b'data',\n",
+ "Cluster 2 words: b'computer', b'degree', b'science', b'computer', b'field', b'degree', b'degree', b'related',\n",
+ "Cluster 3 words: b'work', b'team', b'environment', b'agile', b'experience', b'ability', b'fast-paced', b'ability',\n",
+ "Cluster 4 words: b'years', b'development', b'java', b'experience', b'development', b'software', b'software', b'years',\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:15: DeprecationWarning: \n",
+ ".ix is deprecated. Please use\n",
+ ".loc for label based indexing or\n",
+ ".iloc for positional indexing\n",
+ "\n",
+ "See the documentation here:\n",
+ "http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated\n",
+ " from ipykernel import kernelapp as app\n"
+ ]
+ }
+ ],
+ "source": [
+ "totalvocab_stemmed = []\n",
+ "totalvocab_tokenized = []\n",
+ "for i in desc_se_flat:\n",
+ " allwords_stemmed = tokenize_and_stem(i)\n",
+ " totalvocab_stemmed.extend(allwords_stemmed)\n",
+ " allwords_tokenized = tokenize_only(i)\n",
+ " totalvocab_tokenized.extend(allwords_tokenized)\n",
+ "vocab_frame = pd.DataFrame({'words': totalvocab_tokenized}, index = totalvocab_stemmed)\n",
+ "\n",
+ "#sort cluster centers by proximity to centroid\n",
+ "order_centroids = km.cluster_centers_.argsort()[:, ::-1]\n",
+ "for i in range(num_clusters):\n",
+ " print(\"Cluster %d words:\" % i, end=\" \")\n",
+ " for ind in order_centroids[i, :8]:\n",
+ " print(' %s' % vocab_frame.ix[terms[ind].split(' ')].values.tolist()[0][0].encode('utf-8', 'ignore'), end=',')\n",
+ " print()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/.ipynb_checkpoints/extract-checkpoint.py b/.ipynb_checkpoints/extract-checkpoint.py
new file mode 100644
index 0000000..7795a13
--- /dev/null
+++ b/.ipynb_checkpoints/extract-checkpoint.py
@@ -0,0 +1,205 @@
+import time
+from datetime import datetime
+import os # for path control
+import sys
+
+# ! Get the package to create dialog boxes
+import easygui
+
+# ! Get the package to visually print results
+import matplotlib.pyplot as plt
+
+# ! Get the package to control the web
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
+
+# ! These packages to deal with tables and multi-dimensional array
+import numpy as np
+import pandas as pd
+
+
+master_job_link = "https://www.indeed.com/jobs?q=Software+Engineer&l=Boston"
+master_job_link2 = "https://www.indeed.com/jobs?q=data+scientist&l=Boston"
+master_job_link3 = "https://www.indeed.com/jobs?q=web+developer&l=Boston"
+link2 = "https://www.indeed.com/jobs?q=Software%20Engineer&l=Boston&vjk=2826852a029ff8f6" #Example of a job's link
+
+
+def headless_options():
+ """
+ Sets the configurations for the driver. In our case, we add the headless settings because we want the program
+ to crawl in the background
+
+ Returns:
+ options -- the options configurations to be used in the Google Chrome driver
+ """
+ options = webdriver.ChromeOptions()
+ # options.add_argument("headless")
+ # options.add_argument("--window-size=1920,1080")
+ options.add_argument("--window-size=1366,768")
+ options.add_argument("--disable-extensions")
+ options.add_argument("--proxy-server='direct://'")
+ options.add_argument("--proxy-bypass-list=*")
+ options.add_argument("--start-maximized")
+ # options.add_argument('--headless')
+ options.add_argument('--disable-gpu')
+ options.add_argument('--disable-dev-shm-usage')
+ options.add_argument('--no-sandbox')
+ options.add_argument('--ignore-certificate-errors')
+ prefs = {"profile.managed_default_content_settings.images": 2}
+ options.add_experimental_option("prefs", prefs)
+ return options
+
+
+def get_all_ids(driver_path, job_link, num_page, ):
+ """
+ This function gets all the ids found in the master_job_link and returns it as a list
+
+ :return: list of such ids. the WebDriver itself
+ """
+ ids = []
+ driver = webdriver.Chrome(driver_path, options=headless_options())
+ for page in range(0, num_page):
+ driver.get(job_link + '&start='+str(page*10))
+ ids_elements = driver.find_elements_by_xpath('//*[@data-jk]')
+ ids.extend([link.get_attribute("data-jk") for link in ids_elements])
+
+ return ids, driver
+
+
+def write_to_txt(all_ids):
+ """
+ This function writes all the ids to txt file
+
+ :param all_ids: all the job ids in a list
+ :return:
+ """
+
+ output = open("data\ids.txt", "w+")
+ output.writelines(["%s\n" % item for item in all_ids])
+ output.write("%s" % datetime.now(tz=None))
+ output.close()
+
+
+def read_from_txt():
+ file = open("data\ids.txt", "r+")
+ read_ids = file.read()
+ read_ids = read_ids.split("\n")
+ crawl_time = read_ids.pop()
+ return read_ids, crawl_time
+
+
+def test(driver, job_link, job_ids):
+ pass
+ # driver.get(job_link + "&vjk=" + job_ids)
+ # company = driver.find_element_by_xpath('//*[@id="vjs-cn"]').text
+ # print("Companies' ids:", company, sep="\n", end="\n\n")
+
+
+def test():
+ a = datetime.now(tz=None)
+ print(a)
+ time.sleep(2)
+ b = datetime.now(tz=None)
+ print(b)
+ print(b-a)
+
+
+def get_desc(driver, job_link, job_ids):
+ """
+ This function gets all the listed items in the job descriptions and writes them into pandas table
+
+ :param driver: the WebDriver
+ :param job_link: the master job link
+ :param job_ids: a list of all ids
+ :return: a Pandas DataFrame with each job's information
+ """
+
+ # These are the information to be included in the DataFrame
+ companies = []
+ positions = []
+ all_ids = []
+ descriptions = []
+
+ # for each job
+ for id in job_ids:
+ try:
+ driver.get(job_link + "&vjk=" + id)
+
+ # wait for element to be visible then get it
+ desc_li = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located(
+ (By.XPATH, '//div[@id="vjs-desc"]//li')))
+ desc_li = [el.text for el in desc_li] # get the text part in the gotten WebElements
+ company = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#vjs-cn'))).text
+ position = WebDriverWait(driver, 10).until(
+ EC.visibility_of_element_located((By.CSS_SELECTOR, '#vjs-jobtitle'))).text
+
+ # Append the gotten info
+ companies.append(company)
+ positions.append(position)
+ all_ids.append(id)
+ descriptions.append(desc_li)
+ except:
+ print("Oops!", sys.exc_info()[0], "occurred.")
+ companies.append(np.nan)
+ positions.append(np.nan)
+ all_ids.append(np.nan)
+ descriptions.append(np.nan)
+
+ # Writes the info to nparray then format and create pandas DataFrame
+ everything = np.array([companies, positions, all_ids, descriptions])
+ everything = everything.transpose()
+ df = pd.DataFrame(data=everything, columns=["Companies", "Positions", "ID", "Descriptions"])
+ return df
+
+
+def write_to_csv(dframe):
+ """
+ This function creates a folder data (optional) and export the DataFrame to a .csv file
+ :return: No return
+ """
+ file_name = "\data_test.csv"
+ directory = os.path.dirname(os.path.realpath('__file__')) + "\data"
+ try:
+ # Create target Directory
+ os.mkdir(directory)
+ print("Directory ", directory, " Created ")
+ except FileExistsError:
+ print("Directory ", directory, " already exists")
+
+ dframe.to_csv((directory + file_name), index=None, header=True)
+
+# def get_desc_test(driver, job_link)
+
+
+def main():
+ """
+ Run everything
+ :return: nothing
+ """
+
+ driver_path = ChromeDriverManager().install()
+
+ # Next 3 lines is getting the ids
+ num_pages = 5
+ all_ids, driver = get_all_ids(driver_path, master_job_link2, num_pages)
+ write_to_txt(all_ids)
+
+ # This gets the
+ deez_ids, time = read_from_txt()
+ print("deez ids", deez_ids, "\n", "Time: ", time)
+ df = get_desc(driver, master_job_link2, deez_ids)
+ write_to_csv(df)
+
+ driver.implicitly_wait(10)
+ driver.quit()
+
+ return
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.ipynb_checkpoints/extract_skills-checkpoint.py b/.ipynb_checkpoints/extract_skills-checkpoint.py
new file mode 100644
index 0000000..56c4a38
--- /dev/null
+++ b/.ipynb_checkpoints/extract_skills-checkpoint.py
@@ -0,0 +1,278 @@
+# scrape indeed job listings to rank given skills in order of most needed
+# takes about 1 minute per 10 job listings or 2 minutes per page
+# @author: Osamah Mandawi
+# @email: oamandawi@brandeis.edu
+
+"""
+This is an explanation of the structure of indeed.com
+"""
+# This is an example of what the first job listings page for software
+ # engineering in MA looks like: https://www.indeed.com/jobs?q=software+engineer&l=MA&sort=date
+# Now, if we look at a single job: https://www.indeed.com/jobs?q=software+engineer&l=MA&sort=date&vjk=3916106ade6d80b3
+# Note that this is the same URL as the one before, with only vjk=3916106ade6d80b3, the unique job id, added to it.
+# Overall, this means we can replace the text after q= to get results for a different job (with spaces converted to +),
+# and replace text after l= with state abbreviation
+
+# ? Must have the following:
+# 1. Have pip ready: https://stackoverflow.com/questions/4750806/how-do-i-install-pip-on-windows?rq=1
+# * Note, you may already have pip, so check by going to cmd, typing python, and then import pip and you should get no errors, if you have it
+# 2. Have selenium ready: https://pypi.org/project/selenium/
+# * use: 'pip install selenium' without quotes in cmd
+
+# ? Nice to have the following to get visual results:
+# 1. Have easygui ready: https://pypi.org/project/easygui/
+# * use: 'pip install easygui' without quotes in cmd
+# 2. Have matplotlib ready (this is quite heavy): https://pypi.org/project/matplotlib/
+# * use: 'pip install matplotlib' without quotes in cmd
+
+
+# ! Get the packages to count time program took to run
+import time
+import datetime
+
+# ! Get the package to create dialog boxes
+import easygui
+
+# ! Get the package to visually print results
+import matplotlib.pyplot as plt
+
+# ! Get the package to control the web
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
+
+
+def headless_options():
+ """
+ Sets the configurations for the driver. In our case, we add the headless settings because we want the program to crawl in the background
+
+ Returns:
+ options -- the options configurations to be used in the Google Chrome driver
+ """
+ options = webdriver.ChromeOptions()
+ options.add_argument("headless")
+ options.add_argument("--window-size=1920,1080")
+ options.add_argument("--disable-extensions")
+ options.add_argument("--proxy-server='direct://'")
+ options.add_argument("--proxy-bypass-list=*")
+ options.add_argument("--start-maximized")
+ options.add_argument('--headless')
+ options.add_argument('--disable-gpu')
+ options.add_argument('--disable-dev-shm-usage')
+ options.add_argument('--no-sandbox')
+ options.add_argument('--ignore-certificate-errors')
+ prefs = {"profile.managed_default_content_settings.images": 2}
+ options.add_experimental_option("prefs", prefs)
+ return options
+
+
+def set_driver_path():
+ """
+ Sometimes your driver path is not installed. Other times, you don't know where it is. This installs it, if it's not there, and returns
+ where it is, when it's there.
+
+ Returns:
+ driver_path -- path of Chrome driver
+ """
+ driver_path = ChromeDriverManager().install()
+ return driver_path
+
+
+def set_driver(driver_path):
+ """Sets the Chrome driver with the driver path and the headless options
+ Arguments:
+ driver_path {string address} -- path of Chrome driver
+ Returns:
+ driver -- the Chrome driver to be used for web crawling
+ """
+ driver = webdriver.Chrome(driver_path, options=headless_options())
+ return driver
+
+
+def start_gui():
+ """Introduce user to the program, and get some information: which field, which state, how many pages of indeed, and which skills.
+
+ Returns:
+ field -- string with what field user wants to look at job listings for
+ state -- which U.S. state to find job listings in
+ page_range -- how many pages of indeed job listings to search through
+ skills -- which skills to look for
+ counter_dict -- the skills dictionary with how many jobs each skill appears in
+ search_url_master -- a generated url with the field and state chosen by the user sorted by date to reduce duplicates
+ """
+ easygui.msgbox("Welcome to Extract_Skills V.0.1\nWe will be using indeed.com to extract our data.\nNote that we will only be looking at job listings within the U.S.\n")
+ field = easygui.enterbox(
+ "What kind of job do you want data on?\n(e.g. software engineering)")
+ field = field.replace(" ", "+")
+ state = easygui.enterbox(
+ "What state in the U.S. are you looking at?\n(e.g. CA)")
+ pages_range = int(easygui.enterbox(
+ "How many pages of indeed.com to scrape?"))
+ skills = easygui.enterbox(
+ "Enter skills, such as programming languages, to look for, seperated by /\n(e.g. python/sas/sql/java/php/master's degree/bachelor's degree)")
+ skills = skills.split("/")
+ skills = list(map(str.lower,skills))
+ counter_dict = {i: 0 for i in skills}
+ #! create the search url using the job type and location
+ search_url_master = 'https://www.indeed.com/jobs?q='+field+'&l='+state+'&sort=date'
+ print("Skills looking for:", skills)
+ return field, state, pages_range, skills, counter_dict, search_url_master
+
+
+def gather_job_listings(pages_range, search_url_master, driver_path):
+ """This program gathers all the job listings on as many pages as requested by the user.
+
+ Arguments:
+ pages_range {integer} -- how many pages of indeed job listings to search through
+ search_url_master {string} -- a generated url with the field and state chosen by the user sorted by date to reduce duplicates
+ driver_path {string address} -- path of Chrome driver
+
+ Returns:
+ start_time -- time in seconds of when the searching process started
+ sites -- the urls of all the job listings in a list
+ """
+ start_time = time.time()
+ print("SEARCH STARTS")
+ sites = []
+ for i in range(0, pages_range):
+ driver = set_driver(driver_path)
+ #! crawl to the first page of the search
+ driver.get(
+ search_url_master+'&start='+str(i*10))
+ #! get the ids of all the job listings
+ ids = driver.find_elements_by_xpath('//*[@data-jk]')
+ jdks = []
+ for ii in ids:
+ # print ii.tag_name
+ jdks.append((ii.get_attribute('data-jk')))
+ #! combine the ids with the url and save them in a list so then we can go over them job by job
+ sites.extend(
+ [search_url_master+'&vjk='+jdk for jdk in jdks])
+ driver.quit()
+ #! remove duplicates
+ sites = list(dict.fromkeys(sites))
+ print("Amount of job postings found:", len(sites))
+ return start_time, sites
+
+
+def count_keywords(txt, counter_dict):
+ """Count how many times each skill appears in the text; update counter_dict
+
+ Arguments:
+ txt {string} -- the text description of the job
+ counter_dict {dictionary} -- the dictionary of skills and how often they appeared in different jobs
+
+ Returns:
+ counter_dict -- the update skills dictionary with how many jobs each skill appears in
+ """
+ txt = txt.split()
+ for skill in counter_dict:
+ #! Note that this only counts a skill once even if it appears multiply times in the SAME job description
+ if txt.count(skill) > 0:
+ counter_dict[skill] += 1
+ return counter_dict
+
+
+def print_results(counter_dict):
+ """Print the dictionary of keywords and one how many job listings they have appeared
+
+ Arguments:
+ counter_dict {dictionary} -- the dictionary of skills and how often they appeared in different jobs
+ """
+ for i in counter_dict:
+ print(i, counter_dict[i])
+
+
+def skill_count(sites, counter_dict, driver_path):
+ """Go listing by listing using the urls of the listings and count in how many listings the skills appear
+
+ Arguments:
+ sites {list} -- sites -- the urls of all the job listings in a list
+ counter_dict {dictionary} -- the dictionary of skills and how often they appeared in different jobs
+ driver_path {string address} -- path of Chrome driver
+
+ Returns:
+ counter_dict -- fully updated skills dictionary
+ end_time -- time in seconds of when all the processes finished
+ """
+ count_sites = 1
+ for site in sites:
+ driver = set_driver(driver_path)
+ driver.get(site)
+ print("url of curr page", site)
+ print("job number", count_sites)
+ element = driver.find_elements_by_id("vjs-desc")
+ if len(element) > 0:
+ element = element[0].text.lower()
+ element=element.replace("\n", " ")
+ # print(element) if you want to see what the descriptions says
+ counter_dict = count_keywords(element, counter_dict)
+ print_results(counter_dict)
+ driver.quit()
+ count_sites += 1
+ end_time = time.time()
+ return counter_dict, end_time
+
+
+def end_gui(start_time, end_time, counter_dict, sites):
+ """Print findings after the program finishes
+ """
+ print("Time program took to run: "+str(datetime.timedelta(seconds=(end_time-start_time))))
+ result = ""
+ for k in sorted(counter_dict, key=counter_dict.get, reverse=True):
+ result += (k.capitalize()+" ("+str(counter_dict[k])+")\n")
+ easygui.msgbox("Our final list of skills across "+str(len(sites)) +
+ " jobs, sorted from most needed to least:\n"+result+"\nFinished in "+str(datetime.timedelta(seconds=(end_time-start_time))))
+
+
+def bar_print(field,state,sites,counter_dict):
+ """Prints in a nice graph then saves it
+
+ Arguments:
+ field {string}
+ state {string}
+ sites {list}
+ counter_dict {dictionary}
+ """
+ plt.bar(*zip(*counter_dict.items()))
+ plt.ylabel("Amount of job listing mentions")
+ plt.xlabel("Skill")
+ plt.suptitle("For {} {} jobs in {}, U.S.".format(str(len(sites)),field.replace("+"," "),state))
+ plt.savefig(str(len(sites))+field+state+".png")
+ plt.show()
+
+
+def main():
+ """Run everything
+ """
+ driver_path = set_driver_path()
+ field, state, pages_range, skills, counter_dict, search_url_master = start_gui()
+ start_time, sites = gather_job_listings(
+ pages_range, search_url_master, driver_path)
+ counter_dict, end_time = skill_count(sites, counter_dict, driver_path)
+ end_gui(start_time, end_time, counter_dict, sites)
+ bar_print(field,state,sites,counter_dict)
+
+
+if __name__ == "__main__":
+ main()
+
+# ? Example result from 257 job listings of software engineering in CA
+# job number 257
+# Java 20
+# Python 18
+# Perl 0
+# C++ 15
+# C# 13
+# Rust 2
+# Ruby 3
+# VB 0
+# MATLAB 3
+# PHP 0
+# Scala 1
+# HTML 5
+# CSS 6
diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 0000000..b311cab
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,695 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import ast"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Companies | \n",
+ " Positions | \n",
+ " ID | \n",
+ " Descriptions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " State Street | \n",
+ " Software Engineer I (EMS Trading Team) - Charl... | \n",
+ " e8a9b71f6c5126ae | \n",
+ " ['Opportunity to influence and impact the arch... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Google | \n",
+ " Software Engineering Intern, BS, Winter 2020 | \n",
+ " 1d99705341f0d46c | \n",
+ " ['1. In the “Resume Section:” attach an update... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " DataDog | \n",
+ " Software Engineer | \n",
+ " f47387c886f1b1dd | \n",
+ " ['Build distributed, high-throughput, real-tim... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Pluralsight | \n",
+ " Software Engineer | \n",
+ " ddc20246b79174f2 | \n",
+ " ['Provide architectural, strategic, and scale ... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Achievement Network (ANet) | \n",
+ " Software Engineer | \n",
+ " a0303ea18678af0c | \n",
+ " ['Implement ANet’s next generation of technolo... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Companies \\\n",
+ "0 State Street \n",
+ "1 Google \n",
+ "2 DataDog \n",
+ "3 Pluralsight \n",
+ "4 Achievement Network (ANet) \n",
+ "\n",
+ " Positions ID \\\n",
+ "0 Software Engineer I (EMS Trading Team) - Charl... e8a9b71f6c5126ae \n",
+ "1 Software Engineering Intern, BS, Winter 2020 1d99705341f0d46c \n",
+ "2 Software Engineer f47387c886f1b1dd \n",
+ "3 Software Engineer ddc20246b79174f2 \n",
+ "4 Software Engineer a0303ea18678af0c \n",
+ "\n",
+ " Descriptions \n",
+ "0 ['Opportunity to influence and impact the arch... \n",
+ "1 ['1. In the “Resume Section:” attach an update... \n",
+ "2 ['Build distributed, high-throughput, real-tim... \n",
+ "3 ['Provide architectural, strategic, and scale ... \n",
+ "4 ['Implement ANet’s next generation of technolo... "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_directory = os.path.dirname(os.path.realpath('__file__')) + \"\\data\"\n",
+ "df_software = pd.read_csv(data_directory+\"\\software_engineer.csv\")\n",
+ "df_software.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Descriptions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " ['Opportunity to influence and impact the arch... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " ['1. In the “Resume Section:” attach an update... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " ['Build distributed, high-throughput, real-tim... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " ['Provide architectural, strategic, and scale ... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " ['Implement ANet’s next generation of technolo... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Descriptions\n",
+ "0 ['Opportunity to influence and impact the arch...\n",
+ "1 ['1. In the “Resume Section:” attach an update...\n",
+ "2 ['Build distributed, high-throughput, real-tim...\n",
+ "3 ['Provide architectural, strategic, and scale ...\n",
+ "4 ['Implement ANet’s next generation of technolo..."
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_software = df_software.dropna()\n",
+ "df_software = df_software[[\"Descriptions\"]]\n",
+ "print(type(df_software))\n",
+ "df_software.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Descriptions | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " [Opportunity to influence and impact the archi... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " [1. In the “Resume Section:” attach an updated... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " [Build distributed, high-throughput, real-time... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " [Provide architectural, strategic, and scale r... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " [Implement ANet’s next generation of technolog... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Descriptions\n",
+ "0 [Opportunity to influence and impact the archi...\n",
+ "1 [1. In the “Resume Section:” attach an updated...\n",
+ "2 [Build distributed, high-throughput, real-time...\n",
+ "3 [Provide architectural, strategic, and scale r...\n",
+ "4 [Implement ANet’s next generation of technolog..."
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Turn all data from list literals to list\n",
+ "for i in range (0, len(df_software)):\n",
+ " if type(df_software.iloc[i,0]) is str:\n",
+ " df_software.iloc[i,0] = ast.literal_eval(df_software.iloc[i,0])\n",
+ "\n",
+ "df_software.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Testing\n",
+ "\n",
+ "df_software.iloc[0][0][2]\n",
+ "for data in df_software[\"Descriptions\"]:\n",
+ " print(type(data))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "46"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "desc_se = df_software[\"Descriptions\"].tolist()\n",
+ "len(desc_se)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "664"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "desc_se_flat = [item for sublist in desc_se for item in sublist]\n",
+ "\n",
+ "# create the rank of documents – we will use it later\n",
+ "ranks = []\n",
+ "for i in range(1, len(desc_se_flat)+1):\n",
+ " ranks.append(i)\n",
+ "len(desc_se_flat)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package stopwords to\n",
+ "[nltk_data] C:\\Users\\tarom\\AppData\\Roaming\\nltk_data...\n",
+ "[nltk_data] Unzipping corpora\\stopwords.zip.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "['Opportunity influence impact architecture, standards, design key product initiatives applications Java services',\n",
+ " 'Contribute Sr. Individual contributor within team top engineers',\n",
+ " 'Work dynamic, fast-paced, Agile team environment',\n",
+ " 'BS/MS Computer Science equivalent field',\n",
+ " '3+ years commercial software development, proficient developing multi-tier solutions']"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Remove punctuations\n",
+ "import nltk\n",
+ "nltk.download('stopwords')\n",
+ "\n",
+ "df_stopwords = pd.DataFrame({'descs':desc_se_flat})\n",
+ "\n",
+ "stop = stopwords.words('english')\n",
+ "\n",
+ "df_stopwords['descs'] = df_stopwords['descs'].apply(lambda x: \" \".join(x for x in x.split() if x not in stop))\n",
+ "df_stopwords\n",
+ "desc_se_flat = df_stopwords[\"descs\"].tolist()\n",
+ "desc_se_flat[0:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from nltk.stem.snowball import SnowballStemmer\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Stop Words\n",
+ "stopwords = nltk.corpus.stopwords.words('english')\n",
+ "# Load 'stemmer'\n",
+ "stemmer = SnowballStemmer(\"english\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Functions for sentence tokenizer, to remove numeric tokens and raw punctuation\n",
+ "def tokenize_and_stem(text):\n",
+ " tokens = [word for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n",
+ " filtered_tokens = []\n",
+ " for token in tokens:\n",
+ " if re.search('[a-zA-Z]', token):\n",
+ " filtered_tokens.append(token)\n",
+ " stems = [stemmer.stem(t) for t in filtered_tokens]\n",
+ " return stems\n",
+ "\n",
+ "def tokenize_only(text):\n",
+ " tokens = [word.lower() for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n",
+ " filtered_tokens = []\n",
+ " for token in tokens:\n",
+ " if re.search('[a-zA-Z]', token):\n",
+ " filtered_tokens.append(token)\n",
+ " return filtered_tokens"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.feature_extraction.text import TfidfVectorizer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to\n",
+ "[nltk_data] C:\\Users\\tarom\\AppData\\Roaming\\nltk_data...\n",
+ "[nltk_data] Unzipping tokenizers\\punkt.zip.\n",
+ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\feature_extraction\\text.py:300: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['abov', 'afterward', 'alon', 'alreadi', 'alway', 'ani', 'anoth', 'anyon', 'anyth', 'anywher', 'becam', 'becaus', 'becom', 'befor', 'besid', 'cri', 'describ', 'dure', 'els', 'elsewher', 'empti', 'everi', 'everyon', 'everyth', 'everywher', 'fifti', 'forti', 'henc', 'hereaft', 'herebi', 'howev', 'hundr', 'inde', 'mani', 'meanwhil', 'moreov', 'nobodi', 'noon', 'noth', 'nowher', 'onc', 'onli', 'otherwis', 'ourselv', 'perhap', 'pleas', 'sever', 'sinc', 'sincer', 'sixti', 'someon', 'someth', 'sometim', 'somewher', 'themselv', 'thenc', 'thereaft', 'therebi', 'therefor', 'togeth', 'twelv', 'twenti', 'veri', 'whatev', 'whenc', 'whenev', 'wherea', 'whereaft', 'wherebi', 'wherev', 'whi', 'yourselv'] not in stop_words.\n",
+ " 'stop_words.' % sorted(inconsistent))\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(664, 7850)\n",
+ " (0, 492)\t0.1989843623530912\n",
+ " (0, 3751)\t0.1989843623530912\n",
+ " (0, 5402)\t0.18310504098522326\n",
+ " (0, 4003)\t0.18310504098522326\n",
+ " (0, 1991)\t0.18310504098522326\n",
+ " (0, 6673)\t0.18310504098522326\n",
+ " (0, 544)\t0.18310504098522326\n",
+ " (0, 3569)\t0.18310504098522326\n",
+ " (0, 3731)\t0.18310504098522326\n",
+ " (0, 4933)\t0.1989843623530912\n",
+ " (0, 3930)\t0.1774374658716705\n",
+ " (0, 491)\t0.1989843623530912\n",
+ " (0, 3749)\t0.18310504098522326\n",
+ " (0, 5401)\t0.18310504098522326\n",
+ " (0, 4002)\t0.18310504098522326\n",
+ " (0, 1990)\t0.18310504098522326\n",
+ " (0, 6671)\t0.1774374658716705\n",
+ " (0, 543)\t0.18310504098522326\n",
+ " (0, 3568)\t0.18310504098522326\n",
+ " (0, 3730)\t0.18310504098522326\n",
+ " (0, 4932)\t0.1989843623530912\n",
+ " (0, 6244)\t0.13185549725911924\n",
+ " (0, 3901)\t0.1226152550930412\n",
+ " (0, 458)\t0.11550157734889076\n",
+ " (0, 3748)\t0.1774374658716705\n",
+ " :\t:\n",
+ " (662, 1492)\t0.12115081441669116\n",
+ " (663, 5722)\t0.2211458030981143\n",
+ " (663, 6534)\t0.2211458030981143\n",
+ " (663, 2144)\t0.2211458030981143\n",
+ " (663, 5887)\t0.2211458030981143\n",
+ " (663, 2360)\t0.2211458030981143\n",
+ " (663, 1674)\t0.2211458030981143\n",
+ " (663, 5043)\t0.2211458030981143\n",
+ " (663, 4106)\t0.2211458030981143\n",
+ " (663, 5721)\t0.2211458030981143\n",
+ " (663, 6533)\t0.2211458030981143\n",
+ " (663, 2143)\t0.2211458030981143\n",
+ " (663, 5886)\t0.2211458030981143\n",
+ " (663, 2359)\t0.2211458030981143\n",
+ " (663, 5042)\t0.2211458030981143\n",
+ " (663, 4105)\t0.2211458030981143\n",
+ " (663, 1673)\t0.20797235646308396\n",
+ " (663, 2358)\t0.20797235646308396\n",
+ " (663, 1672)\t0.20797235646308396\n",
+ " (663, 5034)\t0.15358533579231382\n",
+ " (663, 5719)\t0.19862564732951413\n",
+ " (663, 5878)\t0.16033148000186215\n",
+ " (663, 5334)\t0.1463354649491617\n",
+ " (663, 6513)\t0.1354035771870305\n",
+ " (663, 2046)\t0.09036326564983015\n"
+ ]
+ }
+ ],
+ "source": [
+ "nltk.download('punkt')\n",
+ "# tfidf vectorizer\n",
+ "tfidf_vectorizer = TfidfVectorizer(max_df=0.5, max_features=200000, min_df=1, stop_words='english', use_idf=True,\n",
+ " tokenizer=tokenize_and_stem, ngram_range=(1,3))\n",
+ "\n",
+ "#fit the vectorizer to data\n",
+ "tfidf_matrix = tfidf_vectorizer.fit_transform(desc_se_flat)\n",
+ "terms = tfidf_vectorizer.get_feature_names()\n",
+ "\n",
+ "print(tfidf_matrix.shape)\n",
+ "print(tfidf_matrix)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[4, 4, 4, 2, 1, 1, 0, 3, 0, 4, 0, 4, 4, 1, 3, 1, 1, 4, 4, 2, 2, 4, 1, 4, 0, 2, 4, 1, 4, 3, 2, 1, 1, 4, 0, 4, 4, 0, 2, 0, 4, 4, 4, 4, 4, 4, 4, 1, 1, 4, 4, 0, 1, 4, 0, 1, 4, 0, 1, 4, 4, 4, 0, 0, 1, 0, 4, 1, 4, 4, 0, 4, 4, 0, 2, 0, 4, 4, 0, 4, 1, 1, 0, 4, 4, 1, 4, 1, 4, 0, 1, 0, 4, 4, 0, 4, 0, 4, 4, 4, 0, 4, 1, 4, 1, 4, 1, 0, 4, 4, 1, 1, 0, 0, 1, 0, 4, 3, 0, 4, 0, 0, 4, 4, 0, 4, 0, 4, 4, 0, 4, 4, 0, 2, 1, 0, 3, 3, 4, 4, 4, 4, 2, 2, 0, 2, 4, 4, 2, 1, 4, 4, 4, 4, 4, 2, 1, 0, 0, 2, 4, 0, 4, 1, 4, 2, 0, 0, 4, 0, 0, 0, 4, 4, 4, 4, 3, 4, 4, 4, 4, 0, 1, 1, 1, 1, 1, 1, 4, 1, 0, 1, 4, 1, 3, 4, 2, 1, 4, 4, 2, 1, 1, 4, 4, 4, 4, 0, 4, 4, 1, 3, 1, 1, 0, 3, 1, 1, 1, 4, 1, 1, 1, 2, 1, 1, 1, 1, 4, 4, 0, 0, 0, 4, 4, 2, 2, 4, 4, 1, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 0, 1, 1, 1, 1, 1, 4, 4, 4, 4, 4, 1, 1, 0, 0, 1, 1, 4, 0, 2, 4, 4, 1, 0, 3, 4, 0, 1, 4, 0, 0, 4, 0, 0, 2, 4, 4, 4, 0, 0, 4, 1, 0, 4, 4, 4, 4, 0, 2, 4, 0, 0, 1, 4, 4, 4, 1, 2, 1, 1, 0, 3, 0, 4, 0, 4, 4, 1, 3, 1, 1, 2, 1, 0, 3, 0, 4, 0, 0, 1, 1, 0, 0, 1, 4, 1, 4, 2, 1, 4, 4, 1, 0, 4, 4, 4, 4, 0, 0, 4, 0, 4, 0, 0, 0, 4, 2, 1, 0, 1, 3, 1, 4, 4, 4, 0, 4, 0, 2, 4, 0, 0, 4, 4, 2, 2, 4, 1, 4, 0, 2, 4, 1, 4, 3, 2, 1, 1, 4, 1, 4, 4, 2, 1, 1, 4, 4, 4, 4, 0, 4, 4, 1, 3, 1, 1, 0, 3, 4, 1, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 2, 4, 4, 0, 1, 3, 0, 4, 4, 4, 0, 0, 0, 0, 0, 4, 0, 1, 0, 0, 0, 1, 2, 0, 1, 4, 4, 4, 2, 1, 4, 4, 4, 3, 4, 4, 0, 0, 4, 4, 4, 0, 4, 1, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 1, 4, 4, 1, 4, 4, 0, 4, 4, 2, 4, 3, 1, 4, 1, 1, 4, 4, 4, 0, 1, 4, 0, 4, 0, 1, 1, 1, 1, 1, 1, 0, 4, 0, 0, 0, 0, 4, 3, 4, 4, 4, 4, 0, 4, 4, 1, 3, 0, 4, 4, 1, 4, 1, 1, 4, 4, 4, 4, 0, 3, 4, 4, 4, 0, 2, 4, 1, 4, 0, 0, 3, 4, 1, 1, 4, 1, 1, 1, 0, 0, 3, 0, 4, 1, 4, 4, 0, 2, 1, 1, 2, 4, 4, 1, 1, 1, 1, 1, 1, 4, 1, 1, 2, 1, 4, 3, 1, 1, 1, 1, 4, 1, 1, 4, 4, 0, 2, 0, 4, 4, 4, 0, 0, 1, 4, 1, 4, 0, 4, 4, 4, 4, 0, 4, 4, 0, 1, 0, 4, 4, 2, 1, 4, 1, 0, 0, 4, 0, 0, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 0, 4, 0, 4, 0, 0, 0, 4, 2, 0, 1, 0, 0, 1, 3, 1, 4, 4, 2, 2, 2, 4, 2, 0, 0, 4, 1]\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Import Kmeans\n",
+ "from sklearn.cluster import KMeans\n",
+ "\n",
+ "# Define number of clusters\n",
+ "num_clusters = 5\n",
+ "\n",
+ "#Running clustering algorithm\n",
+ "km = KMeans(n_clusters=num_clusters, init=\"k-means++\")\n",
+ "km.fit(tfidf_matrix)\n",
+ "\n",
+ "#final clusters\n",
+ "clusters = km.labels_.tolist()\n",
+ "print(clusters)\n",
+ "description_data = {'rank': ranks, 'descriptions': desc_se_flat, 'cluster': clusters }\n",
+ "# frame = pd.DataFrame(complaints_data, index = [clusters], columns = ['rank', 'cluster'])\n",
+ "\n",
+ "#number of docs per cluster\n",
+ "# frame['cluster'].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Cluster 0 words: b'work', b'experience', b'environment', b'sql', b'ability', b'tools', b'experience', b'ability',\n",
+ "Cluster 1 words: b'development', b'software', b'experience', b'testing', b'years', b'software', b'development', b'design',\n",
+ "Cluster 2 words: b'computer', b'science', b'degree', b'computer', b'field', b'related', b'degree', b'degree',\n",
+ "Cluster 3 words: b'communication', b'skills', b'communication', b'excellent', b'written', b'verbal', b'excellent', b'verbal',\n",
+ "Cluster 4 words: b'experience', b'team', b'engineers', b'c', b'services', b'data', b'skills', b'python',\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:15: FutureWarning: \n",
+ ".ix is deprecated. Please use\n",
+ ".loc for label based indexing or\n",
+ ".iloc for positional indexing\n",
+ "\n",
+ "See the documentation here:\n",
+ "http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated\n",
+ " from ipykernel import kernelapp as app\n"
+ ]
+ }
+ ],
+ "source": [
+ "totalvocab_stemmed = []\n",
+ "totalvocab_tokenized = []\n",
+ "for i in desc_se_flat:\n",
+ " allwords_stemmed = tokenize_and_stem(i)\n",
+ " totalvocab_stemmed.extend(allwords_stemmed)\n",
+ " allwords_tokenized = tokenize_only(i)\n",
+ " totalvocab_tokenized.extend(allwords_tokenized)\n",
+ "vocab_frame = pd.DataFrame({'words': totalvocab_tokenized}, index = totalvocab_stemmed)\n",
+ "\n",
+ "#sort cluster centers by proximity to centroid\n",
+ "order_centroids = km.cluster_centers_.argsort()[:, ::-1]\n",
+ "for i in range(num_clusters):\n",
+ " print(\"Cluster %d words:\" % i, end=\" \")\n",
+ " for ind in order_centroids[i, :8]:\n",
+ " print(' %s' % vocab_frame.ix[terms[ind].split(' ')].values.tolist()[0][0].encode('utf-8', 'ignore'), end=',')\n",
+ " print()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/data/data_scientist.csv b/data/data_scientist.csv
new file mode 100644
index 0000000..3321b2d
--- /dev/null
+++ b/data/data_scientist.csv
@@ -0,0 +1,51 @@
+Companies,Positions,ID,Descriptions
+Simon-Kucher & Partners,Data Scientist,b8f6c807172debe6,"['Data wrangling, cleaning, and pre-processing', 'Conducting exploratory data analysis and communicating insights through clear descriptions and visualizations', 'Developing, testing, and implementing Machine Learning models', 'Conducting research on recent developments in machine learning and AI, with a focus on topics related to pricing, sales, and marketing', 'Being a topic expert on Machine Learning and AI for your project team, enabling strong project planning and team performance', 'Degree in a quantitative field, such as computer science, engineering, statistics, operations research, data science, or equivalent experience', 'Experience with Machine Learning and statistical modelling techniques', 'Strong programming skills in R and/or Python', 'Experience applying advanced analytics and Machine Learning to solve business problems', 'Experience with data visualization software / libraries (Tableau, PowerBI, Matplotlib)', 'Comfort working both on a team and autonomously', 'Strong written and verbal communication skills, ability to simply and concisely explain complex analytical topics', 'Entrepreneurial spirit—we are a fast-growing team with vast opportunities for growth', 'Implementation experience with Machine Learning models and applications', 'Knowledge of cloud-based Machine Learning engines (AWS, Azure, Google, etc.)', 'Experience with large scale data processing tools (Spark, Hadoop, etc.)', 'Ability to query and program databases (SQL, No SQL)', 'Experience with distributed ML frameworks (TensorFlow, PyTorch, etc.)', 'Familiarity with collaborative software tools (Git, Jira, etc.)', 'Experience with user interface libraries / applications (Shiny, Django, etc.)', 'Domain expertise in pricing, demand forecasting, or time-series data', 'Why you are interested in a job at Simon-Kucher.', 'Specific reasons why your application differentiates you from other candidates.']"
+Fidelity Investments,Data Scientist,4bb0fbf03740b709,"['A Ph.D.or Master’s degree in Engineering, Computer Science, Applied Mathematics orother relevant quantitative discipline', 'Industry experience would be an asset', 'Your knowledge in fixed income and equity market, brokerage business and trading', 'Your proven experience with Python, PyTorch, Kafka or KDB/Q to see and understand large amount of data', 'Your in-depth data science, machine learning and analytical knowledge to let data build narratives', 'Your eagerness to learn new things, share ideas freely and build a positive work environment', 'Your leadership and presentation skill to influence product directions', 'Crafting predictive models to improve revenue for fixed income business', 'Building tools, prototypes and simulation engines to streamline processes and test new ideas', 'Directing deployment of machine learning models through production cycle', 'Promoting culture of innovation and thought leadership', 'Influencing product directions for the benefit of our clients']"
+Pluralsight,Data Scientist - Search and Explore,4c43beedb3153810,"['You are intellectually curious, collaborative, and highly driven with an entrepreneurial mindset. You have excellent critical thinking, problem solving, and analytical skills.', 'You can guide complex projects from ideation and planning to model design to deployment and independently make decisions about the technical approach', 'You have a strong foundation in Machine Learning and Natural Language Processing.', 'You have mastery of either Python or R. Regardless of your favorite scientific computing environment, you can flex between the two languages.', 'You have practical experience working with textual data and know how to build recommendation or NLP models using unstructured text.', 'You know your way around SQL-like databases (e.g. PostGres, Impala, Hive) and even better if have experience with Spark and other big data platforms.', 'You are able to perform rapid prototyping of experimental solutions, writing readable, scalable, and reproducible code.', 'You have a solid data science toolkit that you can leverage, with knowledge of and understanding of how and when to apply different algorithms and technical approaches.', 'You can describe and speak in an approachable way about complex analyses and concepts within a cross-functional team. You are a great “analytic translator”.', 'You will collaborate with the “Explore Team” to research, envision, and build machine learning solutions that power our next-generation search and personalized browse experience for Pluralsight learners.', 'You will create, iterate, and innovate on NLP and recommendation models and partner to implement them on the Pluralsight platform.', 'Champion the introduction of new technology by developing proof-of-concepts and prototypes and effectively communicating highly complex information to Experience team partners', 'You will propose and design experiments to test new product features and analyze their results.', 'You will also serve as a data science expert and consultant to Pluralsight’s product teams, leveraging your statistical and analytical skills to answer ad hoc questions.', 'M.S. or Ph.D in Computer Science, Statistics, Mathematics, Data Science, Linguistics, or related quantitative discipline', 'Minimum 5 years in a non-academic data science role, conducting analysis, developing algorithms and building prototypes', 'Experience working with product development teams and/or with developers']"
+DeepHealth,Data Scientist,1d8cf47f1019b24e,"['Advanced degree in a field related to data science', 'Strong programming skills in python and libraries such as pandas', 'Experience working with DICOM data or healthcare-related data', 'Knowledge and experience in cloud infrastructure (e.g., Google Cloud, AWS, etc)']"
+TripAdvisor,Data Scientist,c9169f5f8ed90ee1,"['Apply machine learning and statistical methods to solve large scale problems, including: full funnel revenue modeling; macro traveler trends; and user value models', 'Full funnel time series modeling using multi-model inputs to project revenue and understand changes to our business trends', 'Influence the team’s direction/strategy by identifying KPI for marketing initiatives, business issues, developing hypotheses and executing analysis—translating data into meaningful insights and presenting recommendations', 'Serve as technical expert and mentor to other analysts as well as a cross-functional data science consultant to other projects and teams.', 'Contribute to the development of the team’s tools, skills, standards, culture and impact.', 'Domain expertise in marketplace optimization/ design', 'Strong foundation in product analytics and statistics', 'Keen eye for detail and thoughtful investigation of data before relying upon it', 'Ability to think and execute at multiple altitudes: from strategy and vision to execution', '2+ years in industry with proven business impact', 'MS in quantitative field preferred', 'Experience with languages used for querying (e.g. Hive/Pig/SQL), preprocessing (e.g. unix/python), and statistical analysis (e.g. R/Matlab/Stata)', 'Excellent communication, organizational and project management skills. Ability to work between teams, functions and geographies, with high autonomy and little guidance']"
+eviCore healthcare,Senior Machine Learning Data Scientist,d99865eae70c9779,"['Bachelor’s degree in Computer Science or related field required.', 'Master’s degree in Computer Science a plus', '4+ years of industry experience involving machine learning for commercial production in a structured environment.', 'Machine Learning expertise', 'Ability to select appropriate models given a well defined problem / data set', 'Ability to conceptualize, validate, and improve upon features used within models', 'Ability to measure model performance appropriately and prevent overfitting and other common modeling pitfalls.', 'Familiarity with industry tools, libraries and coding quality', 'High proficiency in Python', 'High proficiency writing SQL', 'Experience with Deep Learning and/or Natural Language Processing algorithms and tools a strong plus.', 'Experience with Java/C++ a strong plus.', 'Experience with big data frameworks (Hadoop, Spark, etc.) a plus.', 'Experience with healthcare data a plus.', '“Can do” attitude: you are biased towards action and ready to speak your mind.', 'Excellent organizational and communication skills.', 'Flexible scheduling and work/life balance with remote and work from home opportunities', '3 weeks of PTO(starting) per year plus paid holidays', 'Education assistance, tuition reimbursement and professional certifications', 'Health, dental, vision, and life benefits with employer funded HSA', 'Comprehensive employee discount program, onsite fitness facilities, and smart casual dress code', 'Paid Volunteer Community Service Days', 'Ample opportunities for growth, advancement, and promotion', '401k retirement plan with company match of 50% employee contributions up to 6%']"
+Hopper,Senior Data Scientist,61f6469e434ce823,"['Frame and conduct complex exploratory analyses needed to deepen our understanding of our users. Partner with product, business and strategy teams to contribute to product improvements and initiatives', 'Use machine learning and big data tools on tremendously large and complex data to deepen our data-driven advice and personalized modeling', 'Conduct research into various aspects of our business and employ statistical and modeling techniques when appropriate to make recommendations to non-technical stakeholders', 'A degree in Math, Statistics, Computer Science, Engineering or other quantitative disciplines', 'Extremely strong analytical and problem-solving skills', 'Proven ability to communicate complex technical work to a non-technical audience', 'A strong passion for and extensive experience in conducting empirical research and answering hard questions with data', 'Experience with relational databases and SQL, especially Hive', 'Experience working with extremely large data sets', 'Experience in Pandas, R, SAS or other tools appropriate for large scale data preparation and analysis', 'Experience with data mining, machine learning, statistical modeling tools and underlying algorithms', 'Proficiency with Unix/Linux environments', 'Well-funded and proven startup with large ambitions, competitive salary and stock options', 'Dynamic and entrepreneurial team where pushing limits is everyday business', '100% employer paid medical, dental, vision, disability and life insurance plans', 'Access to a 401k (Boston) or Retirement Savings Plan (Montreal)', 'Easy commute with a paid-for public transportation or parking pass', 'IATA Travel Agent Card for discounts in the travel industry', 'Fully stocked kitchen with: coffee/tea, beer, bagels and snacks (both healthy and not-healthy)', 'In Cambridge, work in a historic factory building near Kendall Square; in Montreal, work in an artist’s loft in the Mile End', 'Team lunches, off-site activities and much more!']"
+Lincoln Financial,Data Scientist Analyst,3890f0d479f70f9c,"['Identify opportunities where data-driven solutions to complex business problems using predictive and statistical models can drive earnings while improving the customer experience.', 'Provide expertise and collaborate with stakeholders to define data collection and analysis requirements and establish adequate business understanding to provide effective analysis.', 'Compile and mine data from a wide variety of data repositories. Validate data accuracy, and confirm data quality and consistent use across the business.', 'Design and execute analytic plans. Generate insights and inform business decisions using advanced analytical techniques.', 'Translate quantitative analyses and findings into accessible visuals for non-technical audiences, clearly interpret the data and effectively communicate findings and insights to senior executives, managers, and key stakeholders.', 'Deploy and maintain predictive models. Monitor model performance and update models.', 'Work with key stakeholders in different functions to align and consolidate key operational performance metrics.\n\nEducation', ""4 Year Bachelor's degree in Mathematics, Natural Sciences, Economics, Computer Science, Engineering, Statistics, or Information Systems.\n\nExperience"", 'Proficiency with a major programming language appropriate for data science (e.g. R, Python) to manipulate data and draw insights.', '3 - 5+ years of solid experience working on different data sources and platforms.', 'Knowledge and experience applying statistical and machine learning techniques to real business data.', 'Ability to define problems, collect data, establish facts and draw valid conclusions.', 'Strong relationship management skills with internal clients (e.g. senior management, peers and colleagues); proven ability to develop creative and collaborative approaches.', 'Curiosity and demonstrated capability of quick learning on new technical and business knowledge.', 'Flexibility and fast adaption to the changing business environment and priorities.', 'Strong interpersonal skills with a collaborative style.', 'Strong project management leadership skills including, critical ability to coordinate and balance multiple projects in a time-sensitive environment, under pressure, and meeting deadlines.', 'Proficiency with Microsoft Office Suite (Word, Excel, PowerPoint, Outlook).', 'Actuarial working experience or knowledge and experience in the insurance industry is desirable.']"
+Devoted Health,Data scientist- Analytics,7bc714e40a6c3421,"['Ad-hoc analysis', 'Communication of analysis', 'Visualization/dashboarding', 'Metric definition', 'Data pipeline creation and management', 'Professional experience (1+ years) in a quantitative role', 'Experience in SQL', 'Experience in health care preferred, experience in analyzing Medicare is favored', 'Strong belief in data as a team sport', 'We are not able to provide work sponsorship at this time', 'This role will be located either in Waltham MA or Bay area, CA']"
+AETNA,Data Scientist,f026cf81c7f3717a,"['', '', '', '', '', '', '', '', '', '', '', '', '', '']"
+UnitedHealth Group,Principal Data Scientist,41f63fd1515b11be,"['Improve and extend the analytic methods in our products', 'Work alongside other data scientists, engineers, and project managers to design and implement models and experiments from end to end, including data ingestion and preparation, feature engineering, analysis and modeling, model deployment, performance tracking and documentation', 'Utilize machine learning methodologies', 'Conduct hands-on data analysis and predictive analytics on large datasets', 'Effectively communicate complex technical results to business partners', 'Support and drive analytic efforts around machine learning and innovation', 'Work with a great deal of autonomy to find solutions to complex problems', 'Bachelor’s Degree with Master’s Degree highly preferred in a quantitative field such as Statistics, Mathematics, Computer Science or related field', '2+ years experience in a big data environment integrating data and extracting insights from information across multiple platforms', '2+ years of business experience using Python, or similar software, for data management', 'Experience implementing models based on Artificial Intelligence and Machine Learning (machine learning techniques used need to be clearly defined in resume)', 'Experience with predictive and Prescriptive Modeling', 'Ability to communicate effectively across a variety of stakeholders and cross matrixed teams', 'Capability to manage own work and work as a ‘thought partner’ across the team', 'Supervisory experience', 'Tableau reporting system experience', 'Familiarity with Medicare, Medicaid, ACA concepts; understanding of plan types such as MAPD, DSNP, Group Retiree', 'Experience using R, SAS, and SQL Server for data management and analytics', 'Professional experience with modeling supporting marketing or behavioral science projects', 'Demonstrated ability to work in a fast-paced environment, handling multiple priorities', 'Strong knowledge of health care industry or experience with human clinical research studies in a business environment']"
+Wayfair,"Data Scientist, Developer Experience",fef4e0d6b58105e7,"['Work with large, complex data sets. Solve difficult, non-routine analysis problems, applying advanced analytical methods as needed.', 'Conduct analysis that includes data gathering and requirements specification, processing, analysis, ongoing deliverables, and presentations.', 'Build and prototype analysis pipelines iteratively to provide insights at scale.', 'Interact cross-functionally, making recommendations with effective presentations of findings at multiple levels of stakeholders through visual displays of quantitative information.', ""Research and develop analysis, forecasting, and optimization methods to improve the quality of Wayfair's developer-facing tools and processes."", 'Participate in cutting edge research in machine intelligence and machine learning applications.', 'Develop solutions for real-world, large scale problems.', 'Masters degree in a quantitative discipline (e.g., Statistics, Operations Research, Computer Science, Mathematics, Physics) or equivalent practical experience.', '4 years of relevant work experience.', 'Experience with statistical software (e.g., R, Python, MATLAB, pandas) and database languages (e.g., SQL, NOSQL)', 'Applied experience with machine learning on large datasets.', 'Experience articulating and translating business questions and using statistical techniques to arrive at an answer using available data.', 'Demonstrated leadership and self-direction. Willingness to both teach others and learn new techniques.', 'Effective written and verbal communication skills.']"
+NeuroSoph,Data Scientist,e93ea4e970bcf843,"['Translate product ideas into well-posed data science challenges, and solve them', 'Design and deploy Machine Learning (ML) systems using supervised, unsupervised learning, NLP, and NLU', 'Participate in the broader ML community to stay current with the latest techniques and software', 'Good knowledge of image processing and computer vision techniques is required', 'Flexibility to travel to clients throughout North America', 'Competencies typically acquired through a masters or bachelor’s degree program in Engineering, Computer Science, Mathematics, or a related field – especially an understanding of statistics, linear algebra, and mathematical analysis.', '3-5 years of relevant work experience in the area of machine learning and/or deep learning, including experience manipulating and analyzing unstructured data sets.', 'A strong programming background – preferably with Python, but also Java, C++, or similar language – along with knowledge of standard machine learning libraries (TensorFlow, PyTorch, Caffe2, SciKit-Learn, etc.).', 'A proven track record of rapid prototyping and development, leveraging open source or commercial frameworks.', 'Ability to thrive in a fast-paced environment with significant uncertainty.', 'Possess strong analytical and problem-solving skills with a positive mindset and attitude.', 'Excellent oral communication skills are necessary', 'Ability to travel to Boston to work with the team and client. Travel will be paid for by NeuroSoph.', 'Ability to engage in research and drive to become a lead AI research scientist.']"
+AETNA,Senior Data Scientist,618173f1c642b156,"['Develops and/or uses algorithms and statistical predictive models.', 'Brings clinical insights to inform predictive models: is comfortable reading academic clinically oriented literature and collaborating with clinical subject matter experts to inform predictive models.', 'Performs analyses of structured and unstructured data to solve multiple and/or complex business problems using advanced statistical techniques and mathematical analyses and broad knowledge of the organization and/or industry.', 'Collaborates with business partners to understand their problems and goals, develop predictive modeling, statistical analysis, data reports and performance metrics.', 'Develops and participates in presentations and consultations to existing and prospective constituents on analytics results and solutions.', 'Interacts with internal and external peers and managers to exchange complex information related to areas of specialization.', 'Use strong knowledge in algorithms and predictive models to investigate problems, detect patterns and recommend solutions.', 'Use strong programming skills to explore, examine and interpret large volumes of data in various forms.', '5 or more years of progressively complex related experience.', 'Demonstrates proficiency in most areas of mathematical analysis', 'Strong knowledge of advanced analytics tools and languages to analyze large data sets from multiple data sources.', 'Anticipates and prevents problems and roadblocks before they occur.', 'Demonstrates strong ability to communicate technical concepts and implications to business partners.', 'Clinically oriented background or professional experience is a plus.']"
+Facebook,"Data Scientist, Infrastructure Strategy Intern",8c5571efc86d4ba7,"['Leverage data and business principles to solve large scale web, mobile and data infrastructure problems.', 'Build and maintain data driven optimization models, experiments, forecasting algorithms, and machine learning models.', 'Leverage tools like R, Python, Hadoop & SQL to drive efficient analytics.', 'Pursuing a BS, MS, or PhD in Computer Science, Engineering, Statistics, Economics, Social Science, Mathematics, or Interdisciplinary programs including the above disciplines', 'Proficient in SQL and at least one programming language (PHP, Python, C, C++, Java)', 'Basic understanding of statistics', 'Basic understanding of Machine Learning techniques', 'Hands-on experience with medium to large datasets (i.e. data extraction, cleaning, analysis and presentation).', 'Proficient in SQL and at least one programming language (PHP,Python,Perl, C, C++, Java)', 'Basic understanding of statistics and optimization techniques.', 'Hands-on experience with medium to large datasets (i.e. data extraction, cleaning, analysis and presentation).', 'Basic experience with packages such as NumPy, SciPy, pandas, scikit-learn, dplyr, ggplot2', 'Must obtain work authorization in country of employment at the time of hire, and maintain ongoing work authorization during employment', 'Intent to return to degree-program after the completion of the internship/co-op', 'Proven ability to solve problems in nonstandard and innovative ways', 'Excellent communication skills']"
+,,,
+Beacon Health Options,Data Scientist,3979cbcc2268abb0,"['Lead development and implementation of models that help us understand and describe the impact of our healthcare programs. This includes literature searches, development and implementation of study methodology and documentation, data extraction, statistical analysis, coordinating with design and implementation of interventions based on findings and assessing intervention impacts to achieve and maintain improvement.', 'Extract, prepare, cleanse, and verify the integrity of data used for analysis', 'Create standard frameworks and processes for systematic statistical modeling.', 'Visualize and interpret the results of analyses to articulate recommendations to shape strategy.', 'Partner with key business partner teams to understand needs and conduct analyses that will help determine strategy and clinical and operational program enhancements.', 'Advocate the benefits of, and appropriate use of, advanced analytics across the organization.', 'Serve as mentor to team analysts should their individual analytics projects require more advanced statistical methods', 'At least 3 years’ experience in related healthcare analytics or value-based care research.', 'Strong knowledge and experience with analytic tools and applications in healthcare like episode analytics and risk adjustment.', 'Excellent technical – and intuitive - analytic skills.', 'Excellent written and verbal communication skills, as well as presentation skills. An ability to ‘tell the story’ from the data and models.', 'Strong technical skills to extract, transpose, and analyze Big Data using SAS, R, Python, SQL or similar methods in Oracle, Teradata, or Hadoop environments (or similar) and use of advanced visualization tools such as Tableau or PowerBI.', 'Direct experience building and productionalizing statistical and machine learning models (linear regression, generalized linear models, random forest, k-nearest neighbors, etc.) on realistic (imperfect) data sets. Understanding of various data structures and common methods in data cleansing and transformation to build the data sets for analyses.', 'Experience in presenting results from analyses and modeling to non-technical partners.', 'Experience with natural language processing a plus']"
+Klaviyo,Data Scientist,d7bf48644cb2042c,"['Have a strong fundamental understanding and deep experience with at least some machine learning algorithms (e.g. regressions, decision trees, k-means clustering, neural networks).', 'Understand Bayesian modeling techniques.', 'Are capable of analyzing data and making rigorous statements about what can or cannot be concluded.', 'Have experience designing and implementing model performance/validation assessments.', 'Have a background in statistics and understand different distributions and the conditions under which they’re valid.', 'Know how code and have used data science tools and packages.', 'Have demonstrated a measurable impact based on the models you’ve created. It’s not always easy getting a model correct and we love talking about places we got stuck and working as a team to think through ideas that could unblock us.', 'Have a desire to ship features powered by data science (in other words, you’re excited by both upfront research and actually getting models into production at cloud scale).', 'Bachelor’s or advanced degree in statistics, applied mathematics, computer science or other relevant quantitative discipline, or equivalent industry experience.', 'Have worked in a data science role for 2+ years professionally or academically and can talk about your projects and the techniques you used.']"
+Cardinal Health,Data Scientist,97f5cbd91543f63c,"['Drive digitizing and systematizing internal knowledge and integrate them into standard processes', 'Contribute internal innovation efforts to make decisions faster and more effectively', 'Remain up to date on emerging applicable technologies and analytical methods and recommend them as appropriate', 'Support change management efforts to help adopt new information assets (e.g., dashboards)', 'Keep existing algorithms and business logic up to date and review periodically to ensure they continue to meet the business needs', 'Responsible for development, validation and implementation of new predictive models, learning algorithms, and analytics', 'Assist others within the team, and cross-functionally, with their analysis. Help to aggregate and structure data that meets team member requirements.', 'Conduct research across the business functions to understand the Red Oak business and the generic pharmaceutical industry', 'Employ statistical and modeling techniques to uncover insights and draw meaningful conclusions about Red Oak’s portfolio and broader business', 'Work with stakeholders to clearly understand the implications of these trends and help develop actionable recommendations to address business problems', '3+ years of related experience', 'Advanced degree in quantitative or technical field preferred (e.g. data science, applied analytics, econometrics, mathematics, engineering, etc.)', 'Strong understanding of advanced statistical methods (sampling; regression methods; classification algorithms; distributions, etc.)', 'Strong understanding of SQL and advanced knowledge of other scripting/statistical programming languages (VBA, Python, R, etc.)', 'Ability to build and present a business case extending from proposed data initiatives and analysis', 'Experience in data analytics platforms such as Alteryx or Azure Data Factory, and visualization software such as Tableau or Qlik', 'Experience with innovation methods is a plus (e.g., human centered design, systems thinking)']"
+"Foundation Medicine, Inc.",Data Scientist,6191757120becbd1,"['Expertise in analyzing clinical health data, particularly survival/outcomes analyses.', 'Expertise in biostatistics, epidemiology, and/or statistical genetics.', 'Proficiency with statistical analysis software and/or scripting languages (R and/or Python preferred, but are amenable to others).', 'Lead our efforts to shape the evolving landscape of clinical oncology with real-world evidence.', 'Support commercial data science projects as part of a cross-functional team.', 'Co-author case studies and peer-reviewed publications.', 'Contribute to a learning culture with conference presentations, journal clubs, etc. to share what you learn and enrich collaborations across Foundation Medicine.', 'PhD in Biostatistics, Epidemiology, Bioengineering, or a related field; or MS with 3-5 years related work experience.', 'Experience with longitudinal electronic health/medical records is a plus but not required.', 'Experience in oncology (clinical oncology, cancer biology, and/or cancer bioinformatics) is a plus but not required.', 'Proficiency in a database query language (e.g. SQL) is a plus.', 'Those with publications or projects directly related to analyses of clinical and/or genomic data preferred. Please submit a publication list (Pubmed or Google Scholar profile accepted).']"
+Retail Business Services,Data Scientist (Co-op),d5f33dad6a30da09,"['Knowledge of analytics tools and platforms (preferably Microsoft Azure) but Tableau,', 'Knowledge of statistical and predictive modeling concepts and disciplines including', 'Knowledge of industry-leading practices', 'Ability to propose analytic strategies and solutions as a business need', 'Analyzing source system data and data flows, working with structured and unstructured data', 'Manipulating high-volume, high-dimensionality data from varying sources to highlight patterns, anomalies, relationships and trends', 'Must possess the ability to effectively communicate using strong verbal, written and listening skills', 'Must be able to adapt quickly to change without being afraid to take on new responsibilities in a fast paced team environment while being proactive and action-oriented', 'Ability to come up with creative technical ways to resolve application and system issues', 'Understanding agile development methodologies or concepts; experience with is a plus', 'Excellent diagnostic, debugging, and troubleshooting skills', 'Working knowledge of any of the following languages: Python, Java, R, SQL, HQL, Gremlin, Cypher']"
+Brigham & Women's Hospital(BWH),Clinical Data Scientist / 40 Hours / Day / BWH APSI,7baaacd0c63d2be6,"['Develops and/or uses statistical algorithms and modeling techniques to evaluate scenarios and predict future outcomes', 'Consults with clinical leaders to understand problems and goals, and translate these needs into advanced analytics solutions', 'Quickly develops subject-matter expertise by fully understanding the complex methodologies used for measurement of healthcare outcomes', 'Applies statistical rigor to analyze large amounts of data using advance data science techniques', 'Wrangles structured and unstructured data to solve complex business problems', 'Manages large and complex analytical projects through data exploration and modeling, testing and evaluation', 'Closely collaborates with analysts and data engineers at the Decision Support Systems (DSS) team at the Brigham, as well as the Partners eCare (PeC) reporting team at Partners HealthCare, to develop technical and business approaches and/or enhanced technical tools', 'Creates innovative visualizations of the data to effectively communicate analytical results', 'Translates the data artifacts into clinical and business insights, and delivers these insights through presentations prepared for audiences at various levels of leadership and technical expertise', 'Demonstrates superior communication skills, and ability to develop and participate in presentations and consultations on analytics results and solutions', 'Works closely with thought leaders in clinical, analytics and data science areas at the Brigham and Women’s Hospital, Center of Clinical Decision Support, and other entities affiliated with Partners Healthcare, to reuse and share knowledge, tools and methodologies', 'Mentors others and champions a culture of transparent feedback and continuous learning', 'Masters’ degree required in data science, biomedical informatics, statistics/biostatistics, epidemiology, economics/econometrics, or related disciplines (PhD is a plus)', '3-5 years of relevant, progressively complex experience in data management in an industry setting required', 'Demonstrated experience with observational and interventional study designs required', 'Demonstrated experience with data mining and predictive modeling techniques required; familiarity with neural networks and Bayesian modeling techniques is a plus.', 'Deep understanding of platforms and languages to analyze large data sets from multiple sources preferred', 'Experience with electronic health record (EHR) data required (experience with Epic Systems highly preferred)', 'Strong skills with SQL and one statistical programming language required (experience with R preferred). Broad experience in programming across different platforms and languages preferred.', 'Experience with managing several concurrent, complex projects required (2 years minimum)', 'Excellent organizational skills and attention to detail required', 'Excellent communication and writing skills required', 'Demonstrated analytical and problem-solving capabilities required', 'Clinically oriented background or professional experience is a plus', 'Familiarity and experience with Agile development principles is a plus']"
+Vertex Pharmaceuticals,Healthcare Data Scientist,3f06785cad4921c1,"['Be the primary technical lead focused on creative use of diverse data sources, such as clinical trial data, electronic health record data, insurance claims data, and/or other provider and patient level data.', 'Guide deployment of advanced analytical techniques to tackle problems across different functions.', 'Advise senior management on opportunities and gaps in the company’s current capabilities across data science technology and infrastructure, and recommend plans for growth through adoption of new methods and/or technologies.', 'Mentor and support the training of new and existing staff in data science and advanced analytics.', 'Support the translation and appropriately champion advanced analytics results and capabilities (e.g. machine learning and natural language processing) to non-technical audiences.', 'Participate in the broader data science community to follow the latest developments in data and technology.', 'Champion an ambitious mindset and collaboration with IT, business partners, and the internal analytics community.', 'PhD in Statistics, Epidemiology, Health Economics, Biostatistics, Computer Science, Clinical/Biomedical Informatics or a related computational and quantitative discipline, or a Master’s degree (scientific field of study) and 3+ years of relevant experience.', 'Domain knowledge in clinical data, real word data (e.g. Optum, Truven, IMS, Pharmetrics, Premier or others), and/or other related data sources such as providers and health systems.', 'Expertise with programming languages such as R, Python or others.', 'Extensive experience in applying data science to complex problems, along with a broad understanding of core statistical principles', 'Understanding and experience with supervised and unsupervised ML techniques.', 'Comfortable working with both technical and non-technical staff to translate concepts and algorithms into working prototypes.', 'Able to thrive in a fast-paced environment with uncertainty.', 'Ability to collaborate with the team and translate existing research into practical solutions and products ability to build and manage relationships with various collaborators across the company', 'Bring an entrepreneurial and ethical mindset, openness, transparency, and collegiality to your work']"
+Cambridge Mobile Telematics,Data Scientist - Machine Vision,11b26e66a4c612c6,"['PhD in Computer Science, Engineering, Mathematics or related technical discipline', '3+ years of work experience involving statistical modeling, machine learning theory and software development', 'Strong programming skills for constructing and understanding data pipelines', 'Knowledge of image segmentation, classification, temporal tracking, attention, alignment techniques.', 'Experience implementing deep learning frameworks (TensorFlow, PyTorch ) and architectures (CNN, RNN)', 'Familiarity with popular object detection and classification models', 'Familiarity with object tracking methods']"
+ENGIE Insight,Associate Data Scientist - INTERN - PART TIME,0e4152230eaa2a74,"['Mine industrial-size data sets in various states of cleanliness for insights', 'Refine and verify statistical and machine learning algorithms within the Analytics R&D group', 'Assist the Analytics R&D team in running energy and algorithmic models and interpreting the results', 'Evaluate business and technology tradeoffs and report them in a manner that product and business teams can make decisions on final specifications', 'BS required, advanced degree in Computer Science, Engineering, Statistics, or equivalent field desired', 'Strong mathematical background with teachable knowledge of various analytic methodologies such as signal processing, model selection, model verification, optimization, artificial neural networks, Bayesian, etc0-3 years of experience developing, testing, and evaluating algorithms including statistical and machine learning', 'Proficient in large-scale SQL/NoSQL database queries, with associated statistical analyses including the following areas: non-linear and multi-variate regressions, logistic regression, time series analyses, and probabilistic and classification models.', 'Intermediate level knowledge of statistical tools (e.g., R, MatLab, etc.)', 'Familiarity with scripting prototyping/scripting languages, e.g., Python', 'Experience with real-world commercial data analytics and visualization', 'Comfortable with working in a fast-paced multidisciplinary environment while maintaining strict customer data privacy requirements', 'Uncompromising integrity and ethical standards', 'Familiarity with cloud storage and processing technologies is desired']"
+Liberty Mutual Insurance,"Assistant Director, Data Science",8f1c1f9d16cf8352,"['Work extensively with key partners to understand business needs, vet opportunities, and deliver analytically-based solutions and tools to drive business results', 'Utilize advanced analytics techniques to solve business problems', 'Regularly utilize, update and adapt predictive modeling tools and/or focus on the application of the tools', 'Build and deliver presentations to communicate technical ideas and analytical findings to non-technical partners and senior leadership, including underwriters and IT professionals', 'Lead complex projects while providing technical and analytical assessments of issues facing the business', 'Lead discussions with senior leadership at the department or functional level', 'Advanced degree in Statistics, Mathematics, Economics, Actuarial Science or other quantitative field of study plus:', '2 years of experience in modeling or advanced analytics with a Ph.D.', ""4 years of experience in modeling or advanced analytics with a Master's degree"", ""5+ years of experience in modeling or advanced analytics with a Bachelor's degree"", 'Knowledge of property and casualty insurance principles, especially underwriting and ratemaking concepts is a plus.', 'Proven understanding of core statistical techniques with prior experience analyzing data and building models in a statistical programming language (e.g., SAS, R, Python, etc.)', 'Ability to quickly grasp concepts and technologies and adapt to changes and demands in fast-paced environment including drawing conclusions and making decisions from imperfect and disparate datasets', 'Strong analytical, strategic, project management, decision-making and problem-solving skills.', 'Good written and verbal communication skills, as well as confirmed ability to effectively present technical concepts to non-technical individuals within and outside the organization']"
+"Amazon.com Services, Inc.",Research Scientist,6981a9d7adfa0d90,"['Master’s or PhD in a relevant field', '2+ years experience with various data analysis and visualization tools', 'Experience in Perl, Python, or another scripting language; command line usage', 'Experience with various machine learning techniques and parameters that affect their performance', 'Ensure data quality throughout all stages of acquisition and processing, including such areas as data sourcing/collection, ground truth generation, normalization, transformation, cross-lingual alignment/mapping, etc.', 'Clean, analyze and select data to achieve goals', 'Build and release models that elevate the customer experience and track impact over time', 'Collaborate with colleagues from science, engineering and business backgrounds.', 'Present proposals and results in a clear manner backed by data and coupled with actionable conclusions', 'Work with engineers to develop efficient data querying infrastructure for both offline and online use cases', 'Track record of diving into data to discover hidden patterns and of conducting error/deviation analysis', 'Ability to develop experimental and analytic plans for data modeling processes, use of strong baselines, ability to accurately determine cause and effect relations', 'Understanding of relevant statistical measures such as confidence intervals, significance of error measurements, development and evaluation data sets, etc.', 'The motivation to achieve results in a fast-paced environment.', 'Experience with statistical modelling / machine learning', 'Strong attention to detail', 'Exceptional level of organization', 'Comfortable working in a fast paced, highly collaborative, dynamic work environment', 'Ability to think creatively and solve problems']"
+Fidelity Investments,Systematic Trader,c60809d50bce2dad,"['Manage the execution of the varied order flow handled by the systematic trading desk. Use knowledge of the markets and market structure, the available execution tools, understanding of trading costs and risk, and the underlying objectives of the portfolio managers to achieve efficient and effective execution. Utilize FMR’s proprietary systematic trading system, as well as direct execution leveraging a full suite of the desk’s execution channels. Exercise effective communication across the trading desk, including sector traders and international desks, as well as portfolio managers and analysts to achieve the highest quality execution results.', 'Work closely with portfolio managers and CIOs, senior management, and the trading desk regarding large/complex transitions and portfolio rebalances. Take the lead on developing effective execution strategies for trades given objectives and constraints. Perform pre, intra, and post-trade analysis to inform, guide, and evaluate trading approach and results. Manage the direct execution of the rebalances/transitions utilizing proprietary systematic trading system and full execution toolkit. Effectively communicate and partner with the rest of the desk to jointly produce the best trading outcomes.', 'Take a leading role in engaging with the broker-dealer community to further develop and refine the electronic trading suite using data and proprietary suite of analytics. Use knowledge of the markets and market structure, coupled with analytical and quantitative skills to drive broker evaluation and advancement of the electronic toolkit used on the desk. Use these skills and deep understanding of trading objectives to identify new strategies/tools to develop. Be a subject matter expert for the electronic toolkit for the desk.', 'Work closely with a team of traders, quants, data scientists, and developers to help with the evolution of the automated trading system and systematic strategies used by the Equity Trading Desk. Apply trading knowledge and analytical capabilities to help develop the underlying strategies, implementation of execution of full life cycle for varied types of order flow, and development of new liquidity interactions to further optimize the execution results through the trading system.', 'Liaise with Technology, Compliance, Legal, Operations, investment personnel and other internal Fidelity partners to provide the highest quality service and execution capabilities for shareholders.', '3 years of experience with at least 1.5 years on trading desk preferred', 'Experience with systematic/automated trading', 'Ability to multi-task in a fast-paced environment with a constant eye toward accuracy', 'Quantitative aptitude and sharp analytical skills demonstrated through academic or work experience', 'Ability to think creatively and independently while working in a collaborative environment', 'Self-starter, capable of developing new tools and capabilities to solve novel problems facing the trading desk', 'Track record of intellectual curiosity and a passion for problem solving', 'Team-first attitude with flexibility, willingness to pitch-in where needed, familiarity with front-to-back office operations, proficiency with technology, and skill towards mitigating risk', 'Proficiency in trading operations, terminology, market mechanics, and has demonstrated a strong interest in the market', 'Accountability', 'Enthusiasm', 'Integrity', 'Teamwork', 'Respect']"
+Analysis Group,Data Scientist,8202bd856d4b33c4,"['Working with project teams to address data science/computing challenges', 'Identifying opportunities for technology to enhance service offerings', 'Acting as a resource and participating in client engagements and research as part of the project team', 'Maintaining up-to-date knowledge of computing tools, providing technical training and helping to grow the in-house knowledge base, specifically in a Linux environment', 'Presenting research at selected conferences', 'Optimizing procedures for managing and accessing large databases (e.g., insurance claims, electronic health records, financial transactions)', 'Creating interactive analytics portals and data visualizations (e.g., using R/Shiny, Python/Flask, D3)', 'Building and maintaining high performance computing (HPC) tools on grid and cloud computing environments', 'Developing and reviewing software and packages in R, Python and other Object Oriented Languages', 'Establishing optimized procedures for repetitive or computationally intensive tasks (C, C++, Cuda-C)', 'Using natural language processing methodologies to work with EMR data, social media data and other unstructured data', 'Strong credentials and experience in database management and data visualization', 'Significant experience working within a Linux environment required', 'Background in Statistics/Econometrics or Biostatistics', 'Ideally PhD in Computer Science, Mathematics, Statistics, Economics or other relevant scientific degree with relevant experience. Other candidates with at least one year of experience in the field may also be considered', 'Excellent written and verbal communication skills', 'Project experience with R and/or Python', 'Familiar with online/cloud computing/storage (e.g., AWS)', 'Demonstrated experience working on project teams and collaborating with others', 'L’utilisation du genre masculin sert uniquement à alléger le texte et est utilisé ici en tant que genre neutre', 'Collaborer avec les consultants pour relever les défis de la science des données et de sciences informatiques', ""Agir à titre de ressource et participer aux mandats et à la recherche en tant que membre de l'équipe de projet"", 'Maintenir à jour les connaissances sur les outils informatiques, fournir une formation technique et aider à développer la base de connaissances interne, notamment dans un environnement Linux', 'Présenter la recherche à des conférences choisies', ""Optimiser les procédures de gestion et d'accès aux grandes bases de données (ex. réclamations d'assurance, dossiers de santé électroniques, transactions financières)"", ""Création de portails d'analyse interactifs et de visualisations de données (par exemple, en utilisant R/Shiny, Python/Flask, D3)"", 'Construire et maintenir des outils de calcul de haute performance (HPC).', 'Développement et révision de codes en R, Python et autres langages', 'Mise en place de procédures optimisées pour les tâches répétitives ou intensives en calcul (C, C++, Cuda-C)', ""Utiliser des méthodologies NLP pour travailler avec les données médicales électroniques, les données des médias sociaux et d'autres données non structurées"", 'Solides références et expérience dans la gestion de bases de données et de la visualisation de données', 'Expérience de travail significative dans un environnement Linux requise', 'Expérience antérieure en statistique/économétrie ou bio-statistique', ""Idéalement, être titulaire d'un doctorat en sciences informatiques, en mathématiques, en statistique, en économie ou d'un autre diplôme scientifique pertinent et posséder une expérience pertinente. Les candidats ayant au moins un an d'expérience dans le domaine peuvent également être considérés."", 'Excellentes aptitudes de communication écrite et verbale', 'Expérience de projet avec R et/ou Python', ""Familiarité avec l'informatique en ligne/info nuagique et le stockage (AWS)"", ""Expérience de travail démontrée au sein d'équipes de projet et de collaboration avec d'autres personnes"", 'Equal Opportunity Employer/Protected Veterans/Individuals with Disabilities.', 'Please view Equal Employment Opportunity Posters provided by OFCCP here.', ""The contractor will not discharge or in any other manner discriminate against employees or applicants because they have inquired about, discussed, or disclosed their own pay or the pay of another employee or applicant. However, employees who have access to the compensation information of other employees or applicants as a part of their essential job functions cannot disclose the pay of other employees or applicants to individuals who do not otherwise have access to compensation information, unless the disclosure is (a) in response to a formal complaint or charge, (b) in furtherance of an investigation, proceeding, hearing, or action, including an investigation conducted by the employer, or (c) consistent with the contractor's legal duty to furnish information. 41 CFR 60-1.35(c)""]"
+Keurig Dr Pepper Inc.,Data Scientist I,4feed7c6f3a4627f,"['Apply data exploration and analysis techniques to examine POS, clickstream, CRM, and complimentary online retail data from multiple disparate sources, with the goal of improving customer understanding and providing a competitive advantage for KDP’s ecommerce business teams.', 'Apply statistical and predictive modeling concepts, clustering and classification techniques, and recommendation algorithms to help optimize customer targeting, reduce churn, and increase customer life-time-value.', 'Navigate many multi-functional stakeholders, and communicate technical results effectively to non-technical audiences.', 'Implement strategic analyses with the know-how to balance getting the details right, while still moving at the speed of business.', 'You will engage and partner on critical projects to create and operationalize a harmonized ecommerce sales tracker across all of KDP’s products and retail channels.', 'You will receive mentor support for your professional development.', 'You will learn how to break apart day-to-day business challenges into manageable segments for business analytic application.', 'You will apply data collection, exploration, and analysis techniques to identify and interpret critical sales drivers across our Omni-channel partners.', 'You will learn how to apply predictive modeling concepts to help forecast sales and develop strategic insights to support incremental sales creation across eCommerce Omni-channel retail partners.', 'Bachelor’s degree and 1 year of experience or an MS and minimum 1 year of experience, in Ecommerce, data analytics, sales, consumer marketing, or another related field.', '1+ years of experience in analyzing data in a professional setting.', 'Knowledge of statistical and predictive modeling concepts, machine learning approaches, clustering and classification techniques, and recommendation and optimization algorithms.', 'Experience with one or more statistical analysis and scripting tools, including R, Python, and AzureML.', 'Experience with data extraction from SQL databases', 'Experience with Hadoop-based technologies (e.g. MapReduce, Hive and Pig)', 'Possess excellent oral and written communication skills', 'Possess a unique understanding of the internet/tech and media ecosystem', 'Highly detail–oriented and organized', 'Possess strong business and financial acumen']"
+Applause,Data Scientist,4010e39f2f78984c,"['Uses best practices to develop statistical machine learning techniques to build models that address business needs', 'Extract data from databases, write scripts to parse, clean, combine, and process them', 'Prepare graphs or visualizations of processed data, spot trends, anomalies', 'Draw inferences and conclusions, communicate results through reports, charts, or tables', 'Uses effective project planning techniques to break down basic and occasionally moderately complex projects into tasks and ensure deadlines are kept', 'Uses and learns a wide variety of tools and languages to achieve results (e.g., Python or R)', 'Collaborates with the team in order to improve the effectiveness of business decisions through the use of data and machine learning/predictive modeling', 'Quickly deploy out-of-the-box solutions where possible, innovate when necessary', 'Contributes on a wide variety of projects', 'Executes on modeling/machine learning projects effectively', 'Communicates findings to team and leadership to ensure models are well understood and incorporated into business processes', 'Works with leaders to ensure the project will meet their needs', 'Have completed at least a Bachelor’s degree in a quantitative field such as computer science, data science, mathematics, statistics, or physics', '2+ years of experience as a Data Scientist', 'Solid background in SQL, and Python (or R).', 'Experience in using statistical modeling and/or machine learning techniques to build models that have driven company decision making preferred', 'Experience in managing and manipulating large, complex datasets', 'Ability to code and develop prototypes and final product in languages such as Python, Java, Javascript', 'Familiarity with Unix shell scripting', 'Knowledge of advanced modeling techniques preferred', 'Ability to analyze and interpret moderate to complex concepts', 'Ability to provide written and oral interpretation of highly specialized terms and data, and ability to present this data to others with different levels of expertise']"
+"Amazon Web Services, Inc.",Data Scientist - Nationwide Opportunities,13ee178fbb97d6f6,"['A Bachelor or Masters Degree in a highly quantitative field (Computer Science, Machine Learning, Operational Research, Statistics, Mathematics, etc.) or equivalent experience', '4+ years of industry experience in predictive modeling, data science and analysis', 'Previous experience in a ML or data scientist role and a track record of building ML or DL models', 'Experience using Python and/or R', 'Experience using ML libraries, such as scikit-learn, caret, mlr, mllib', 'Experience in writing and tuning SQL', 'Experience with SparkML', 'Experience working with GPUs to develop model', 'Experience handling terabyte size dataset', 'Experience using data visualization tools', 'Understand the customer’s business need and guide them to a solution using our AWS AI Services, AWS AI Platforms, AWS AI Frameworks, and AWS AI EC2 Instances .', '', 'Assist customers by being able to deliver a ML / DL project from beginning to end, including understanding the business need, aggregating data, exploring data, building & validating predictive models, and deploying completed models to deliver business impact to the organization.', '', 'Use Deep Learning frameworks like MXNet, Caffe 2, Tensorflow, Theano, CNTK, and Keras to help our customers build DL models.', '', 'Use SparkML and Amazon Machine Learning (AML) to help our customers build ML models.', '', 'Work with our Professional Services Big Data consultants to analyze, extract, normalize, and label relevant data.', '', 'Work with our Professional Services DevOps consultants to help our customers operationalize models after they are built.', '', 'Assist customers with identifying model drift and retraining models.', '', 'Research and implement novel ML and DL approaches, including using FPGA.', '', 'PhD in a highly quantitative field (Computer Science, Machine Learning, Operational Research, Statistics, Mathematics, etc.)', '4+ years of industry experience in predictive modeling and analysis', 'Skills with programming languages, such as Java or C/C++', 'Ability to develop experimental and analytic plans for data modeling processes, use of strong baselines, ability to accurately determine cause and effect relations', 'Consulting experience and track record of helping customers with their AI needs', 'Publications or presentation in recognized Machine Learning, Deep Learning and Data Mining journals/conferences', 'Experience with AWS technologies like Redshift, S3, EC2, Data Pipeline, & EMR', 'Combination of deep technical skills and business savvy enough to interface with all levels and disciplines within our customer’s organization', 'Demonstrable track record of dealing well with ambiguity, prioritizing needs, and delivering results in a dynamic environment', 'Experience diving into data to discover hidden patterns', 'Able to write production level code, which is well-written and explainable']"
+UnitedHealth Group,"Data Scientist, Healthcare Methodology",ed2b2633019ed4de,"['Bachelor’s degree in mathematics, statistics, biostatistics, engineering, or related field or a degree in health studies with a strong emphasis in analytic methods', '2+ years’ work (or equivalent academic) experience in data analysis using tools like SQL/Spark SQL', 'Skilled in manipulating, analyzing, describing and enriching data assets', 'Ability to identify and document data anomalies and to collaborate with cross-functional teams for effective resolution', 'Strong organization skills, detail oriented, with careful attention to work processes', 'Ability to work creatively and flexibly, both independently and as part of a team', 'Enjoys a fast-paced environment and the opportunity to rapidly learn new content and skills', 'Analytical, intellectually curious and a problem solver', 'Location for this role is Boston, MA or telecommute considered for the right candidate', 'Master’s degree in fields described above', 'Strong preference for experience in Healthcare-focused analytics', 'Technical experience: Scala, Git, Jira (or similar ticketing systems), R or other statistical packages, Hadoop', 'Analytic experience: statistics, predictive and prescriptive modeling (traditional and more advanced machine learning approaches)', 'Content knowledge: Population health metrics, Center for Medicare and Medicaid Services (CMS) reporting requirements, other clinical guidelines and measures', 'Data experience: payer claims and member eligibility data, EMR/clinical data']"
+AETNA,Principal Data Scientist,2433f2863988385c,"['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']"
+Liberty Mutual Insurance,Data Scientist or Sr. Data Scientist - Solaria Labs,40f3e73956cd7498,"['Translate product ideas into well-posed data science challenges, and solve them', 'Find new markets and novel applications for our core internal data', 'Design and deploy Machine Learning (ML) systems backing our products', 'Translate and champion our ML capabilities for non-technical audiences', 'Participate in the broader ML community to stay current with the latest techniques and software', 'Engage with staff within and beyond the Innovation team, training and advocating for the use of ML techniques in other business areas', 'Competencies typically acquired through a Ph.D. degree (in Engineering, Computer Science, Mathematics, or other scientific field of study) and 0-4+ years of relevant experience, a Master’s degree (scientific field of study) and 3-5+ years of relevant experience or may be acquired through a Bachelor’s degree(scientific field of study) and 4-7+ years of relevant experience.', 'Extensive experience analyzing data and a broad understanding of core statistical and ML techniques', 'Expertise with Python, Java, C, or similar language, along with standard machine learning libraries', 'Proven track record of rapid prototyping and development, leveraging open source or commercial frameworks (e.g., TensorFlow, Hadoop, AWS)', 'Computer science background, comfortable working with both technical and non-technical staff to translate concepts and algorithms into working prototypes', 'Ability to thrive in a fast-paced environment with significant uncertainty']"
+Autodesk,"Intern, Experience Design for Machine Learning",c79f7d37bbe5e4c4,"['Conduct user research on customer needs', 'Work with internal subject matter experts in architecture, machine learning, experience design, and computational design to generate ideas for workflows and solutions', 'Sketch conceptual designs and workflows, share your ideas with our experience design team, get feedback and iterate', 'Design and work with your engineering peer to implement a prototype of a service that uses machine learning to create something meeting customer needs', 'Iteratively test the solution with customers to get feedback and improve', 'Full-time student pursuing Bachelor’s, Master’s or PhD in HCI, Interaction Design, Architecture, or a related field at an accredited program in the United States with at least one academic term to complete post internship to be eligible for internship participation', 'Experience in prototyping and iterative design work', 'Passionate about both understanding and solving customer problems', 'Highly collaborative', 'Experience with Autodesk tools, such as Revit, or in architecture or building engineering', 'Basic web UI creation skills for the purposes of interactive prototyping']"
+,,,
+Day Zero Diagnostics,Associate Data Scientist,3b4349a5a2beb0a3,"['Develop and implement algorithms that speed up data processing and machine learning model research.', 'Implement numerically optimized CPU/GPU kernels.', 'Build statistical and analytical tools to research and support machine learning models.', 'Develop, implement, and test machine learning models for predicting phenotypic traits from genomic sequences.', 'Maintain organized, tested code and corresponding documentation.', 'Present data within and outside of the company at meetings and symposia.', 'Write, edit, and submit manuscripts/abstracts/grants detailing the results of the project.', 'Work closely within the group and with outside collaborators.', 'Maintain close communications with the team regarding progress.', ""Bachelor's or Master’s Degree in Computer Science, Machine Learning, Computational Biology, Bioinformatics, or equiv."", 'Fluency in high performance computing (e.g., parallel computing, memory management, OpenMP, CUDA)', 'Fluency in Python, C/C++, Linux; Familiarity with TensorFlow, SQL and git', 'Familiarity with neural network models (CNN, RNN, etc.) and machine learning best practices', 'Familiarity with NGS data analysis, particularly ONT MinION data, helpful', 'Highly motivated and independent, with the ability to work in a dynamic team environment', 'Strong oral and written communications skills', 'Excellent organizational skills and attention to detail', 'Flexibility to occasionally work evenings or weekends']"
+Spotify,Data Scientist – Personalization,c069b1ea74cb0123,"['Develop research questions and undertake data exploration to understand who our listeners are, how they get value out of Spotify, and where we can develop our product to bring greater value', 'Work closely with the team including data engineering to build lasting solutions to our data modeling', 'Design, analyze, and interpret the results of experiments', 'Perform exploratory analyses to understand the performance of our machine learning systems', 'Define, evaluate, dashboard and monitor key metrics, understanding root causes of changes', 'Contribute to the development of the Product Insights function and the wider analytics community at Spotify', 'Mentor and coach other data scientists', 'Work from our office in Boston', 'You will have 2 to 5 years relevant applied experience with a degree or higher in economics, psychology, computer science, statistics, or mathematics or another quantitative discipline', 'Technical competence to perform advanced analytics:\nCoding skills (such as R, Python, or Scala)\nExperience with analytics & visualization tools (SQL, Tableau, ggplot/matplotlib or equivalent)\nExperience performing analysis with large datasets\nYou have strong statistical understanding of online testing methodologies and metric development', 'Coding skills (such as R, Python, or Scala)', 'Experience with analytics & visualization tools (SQL, Tableau, ggplot/matplotlib or equivalent)', 'Experience performing analysis with large datasets', 'You have strong statistical understanding of online testing methodologies and metric development', 'You are an independent thinker, able to work autonomously, capable of taking on loosely defined problems and translating complex thinking into practical application for diverse audiences', 'You are a communicative person who values building strong relationships with colleagues and partners and enjoys mentoring and teaching others']"
+,,,
+AETNA,Lead Data Scientist,fa89fb3f68872c72,"['Develops and/or uses algorithms and statistical predictive models and determines analytical approaches and modeling techniques to evaluate scenarios and potential future outcomes.', 'Performs analyses of structured and unstructured data to solve multiple and/or complex business problems utilizing advanced statistical techniques and mathematical analyses and broad knowledge of the organization and/or industry.', 'Collaborates with business partners to understand their problems and goals, develop predictive modeling, statistical analysis, data reports and performance metrics.', 'Develops and participates in presentations and consultations to exiting and prospective constituents on analytics results and solutions.', 'Interacts with internal and external peers and managers to exchange complex information related to areas of specialization.', 'Use strong knowledge in algorithms and predictive models to investigate problems, detect patterns and recommend solutions.', 'Use strong programming skills to explore, examine and interpret large volumes of data in various forms.', '7-10 or more years of progressively complex related experience.', 'Demonstrates proficiency in most areas of mathematical analysis methods, machine learning, statistical analyses, and predictive modeling and in-depth specialization in some areas.', 'Strong knowledge of advanced analytics tools and languages to analyze large data sets from multiple data sources.', 'Anticipates and prevents problems and roadblocks before they occur.', 'Demonstrates strong ability to communicate technical concepts and implications to business partners.']"
+Nift Networks,Senior Data Scientist / Machine Learning,9d5e29b62aa24032,"['Lead and guide data engineers in implementing our methodologies on different platforms, such as R, Spark, H2O, tensorflow, while evaluating models and making trade-off decisions on the best solutions.', 'Creative use of existing data, as well frequent integration of new types of data, for feature engineering that will results in more predictive, accurate, and insightful models.', 'Deep dive into the internal mechanics of machine learning algorithms, suggesting modifications to our current implementation and pipeline.', 'Lead and guide data engineers in comparing the performance of different machine learning methods, such as kernel methods, XGBoost, regularized regression, and deep nets.', 'Develop causal models.', 'Develop and implement practical approaches to deal with real life data challenges, such as incomplete data, high signal to noise ratio.', 'Accelerate research-to-production cycle by developing new and novel experimental frameworks and metrics.', 'Integrate learning-to-rank techniques and collaborative filtering methods.', 'Ability to write clean and concise code, especially in R or Python.', 'Solid understanding of statistics.', 'Keen eye for detail and thoughtful investigation of data before relying upon it.', 'Intuition for data science best practices, stemming from proven experience.', 'Steadfast focus on creating impactful change and ability to prioritize between many tasks to maximize the improvement of the business.', ""5+ years industry experience and a Master's degree in a quantitative field, Ph.D. is a plus."", 'Collaborative team player who values the contribution of others.']"
+Massachusetts General Hospital(MGH),Data Analyst I - Please Include a Cover Letter,2dbe5b67cd791fa6,"['Responsible for data management and statistical analysis for new and ongoing studies. Job duties require applicant to have strong working knowledge of regression modeling and epidemiological study design and knowledge of R and related data analysis pipelines.', 'Responsibilities include managing large-scale dataset and both developing and applying regression based analyses of epidemiologic and epigenetic data.', 'Collaborate with PI, Center staff, and site PIs in performance of the above duties.', 'Collaborate on the preparation of grant applications, manuscripts, and presentations.', 'Understanding of data management and data analysis workflows.', 'Advanced knowledge of statistical and epidemiological study design.', 'Proven ability to learn new computational tools and packages.', 'Excellent communications skills and ability to handle multiple projects simultaneously.']"
+Trial Courts of Massachusetts,Child Welfare Data Analyst - Department of Research and Planning,20748cd6498b3c2c,"['fair and impartial administration of justice;', 'protection of constitutional and statutory rights and liberties;', 'equal access to justice for all in a safe and dignified environment with policies and practices that strengthen and support diversity, equity, and inclusion;', 'efficient, effective, and accountable resolution of disputes;', 'prompt and courteous service to the public by committed and dedicated professionals utilizing best practices in a manner that inspires public trust and confidence.']"
+Twitter,Data Scientist,0ee3cb029370032f,"['Conduct analyses to learn from our vast amount of data, including exploratory data analysis, investigating A/B test results, observational and network data analysis, etc.', 'Analyze user behavior at scale, ranging from individual-level attributes to user populations.', 'Apply advanced statistical and machine learning techniques to model user behavior, identify causal impact and attribution, and build and benchmark metrics.', 'Use tools for interacting with large datasets such as Hadoop, Spark, and Presto.', 'Use Python or R (or similar) to conduct complex data analyses.', 'Use data visualization tools (e.g. Tableau or Zeppelin).', 'Communicate findings to executives and cross-functional product teams.', 'You communicate your findings clearly and empathetically to a wide audience of relevant partners, and are capable of building meaningful presentations and analyses that tell a “story”.', 'You’re capable of collaborating with our business, engineering, and product colleagues to discover new opportunities that benefit Twitter.', 'You’re a pragmatic problem solver with a strong quantitative background and passion for deriving insights from data.', 'You’re a self-starter who takes initiative and is capable of learning on the job.', 'You have experience owning complex projects from start to finish.', 'You’re excited to learn and apply new data analysis techniques and tools.', 'Masters or PhD dealing with quantitative analysis or commensurate work experience', 'Strong hands-on proficiency with at least one programming language such as Python, R, Java, Scala.', 'Experience using large scale quantitative customer data analysis to solve problems and answer questions', 'Experience with data visualization', 'Experience with social network data a plus']"
+Novartis,Data Scientist – Data Connector,f1443e26a94bee1e,"['Engage with computational peers across the research organization to identify recurrent problems that can be solved at scale, focusing on all data domains that are of practical use in drug discovery.', 'Design, implement, and maintain robust methods, algorithms, and packages (python, R) that help the computational community solve old and new problems with ease.', 'Define, refine and promote the computational glue that is between large-scale data processing (such as NGS pipelines) and insights at very detailed level.', 'Ideate and implement visualizations, dashboards & webservices for data dissemination to computational peers as well as to non-computational collaborators.', 'Excellent scripting skills in both R and python, demonstrated ability to develop & deploy packages/modules for end users; proficiency in Linux, git, SQL and relevant software packages are essential.', 'Algorithmic/computational understanding of efficiently dealing with small and large sets of heterogeneous data.', 'Familiarity with biological data of relevance for drug discovery and an ability to communicate it with broad audiences (English oral and written).', 'A desire to relentlessly improve the status quo in terms of efficiently linking data across domains.', 'Higher education degree in a relevant field or equivalent years of experience (mathematics, computer science, bioinformatics, physics, biology, chemistry, etc.)', 'Statistics; machine learning; bioinformatics/cheminformatics domain knowledge.', 'Familiarity with literate programming environments (such as jupyter, R Markdown, Databricks) and/or cloud computing.']"
+Systems & Technology Research,Data Scientist,9b45a0cc2353d2e9,"['A degree in a scientific field such as Statistics, Mathematics, or Computer Science', 'Experience in statistical modeling including performance evaluation and uncertainty quantification', 'Proficiency with a scientific programming language, preferably Python, and familiarity with Numpy, Pandas, and/or Scikit-learn packages', 'Experience in grooming sparse, incomplete, and noisy datasets', 'Motivated collaborator and an excellent communicator of ideas to both technical and non-technical audiences', 'US citizen and willing to obtain a U.S. Security Clearance', 'MS or PhD in a scientific field such as Statistics, Mathematics, Computer Science, or Data Science or 2+ years of relevant work experience', 'Familiarity with handling and analyzing data at scale, for example using Hadoop, Dask, Spark, and MapReduce', 'Working knowledge of data store tools such as SQL and Elasticsearch, and experience interacting with databases', 'Experience with deep learning and neural network training, testing, and evaluation with fluency in Tensorflow or PyTorch', 'Specialized expertise in a data-rich field such as time-series analysis, graph analytics, geospatial analysis, image processing, or Bayesian programming', 'Active U.S. Security Clearance', 'Competitive salary', 'Comprehensive benefits (Medical, Dental, Vision, Disability, Life)', '401k company match', 'Competitive and flexible paid time off', 'Continued higher education reimbursement', 'Profit sharing (Additional match to 401k)', 'Phone reimbursement plan', 'And more!']"
+,,,
+Applied Invention LLC,Summer Internship - 2020,05def312620fce4e,"['Software Programming/Development', 'Image Processing Development', 'Data Science', 'Electrical Engineering', 'Mechanical Engineering', 'Robotics', 'Actively support project managers and team members on a variety of projects', 'Specific day to day responsibilities will vary based upon skills, expertise and project needs', 'Intern candidates must be enrolled in a college or university working toward an advaced degree in any of the following: science, technology, engineering, math, or design', 'Resourceful, with a relentless drive for finding innovative and creative solutions', 'Passionate about STEM; has STEM hobbies', 'Creative individuals who approach problems with curiosity and enthusiasm', 'Ability to manage through ambiguity, comfort with grey space', 'Perseverance and tenacity', 'Multi-disciplinary engineering coursework with project experience in design and prototyping a plus', 'Intern positions at Applied Minds are paid and work full or part-time over the summer break', 'Positions are available in: Burbank, CA, Provo, UT, and Cambridge, MA. No relocation assistance will be offered.', 'The work environment is open and casual']"
+Abcam,Logistics Technician,185a94c6d077d8dc,"['Process incoming shipments from suppliers, ensuring accuracy of all product data.', 'Aliquot product into vials and assemble kits for customer orders and global stocking.', 'Work with colleagues across the business to resolve discrepancies quickly.', 'Maintain accurate inventory levels using both manual and automated systems.', 'Shipping', 'Fulfill customer orders using internal systems and processes, ensuring accuracy of all products in all orders.', 'Generate required shipping documentation.', 'Dispatch orders in line with daily goals and customer expectations.', 'Ship domestically and internationally to customers and other distribution hubs.', 'Work with teammates to improve the efficiency and accuracy of any processes implemented within the Logistics Department using creativity and resourcefulness.', 'Comply with all Health & Safety (H&S) regulations.', 'Other duties as required', 'High School Diploma or GED', 'Bachelor’s Degree', 'Proven attention to detail, with a focus on accuracy', 'Proven ability to meet tight deadlines.', 'Prior experience working in a team-oriented environment.', 'Ability to prioritize and multi-task in a fast-paced environment.', 'Display solid organizational skills and exhibit effective and proactive communication across all departments and levels', 'Experience solving problems both independently and with others.', 'Able to lift 35lbs']"
diff --git a/data/data_test.csv b/data/data_test.csv
new file mode 100644
index 0000000..57e6ae4
--- /dev/null
+++ b/data/data_test.csv
@@ -0,0 +1,51 @@
+Companies,Positions,ID,Descriptions
+Fidelity Investments,Data Scientist,4bb0fbf03740b709,"['A Ph.D.or Master’s degree in Engineering, Computer Science, Applied Mathematics orother relevant quantitative discipline', 'Industry experience would be an asset', 'Your knowledge in fixed income and equity market, brokerage business and trading', 'Your proven experience with Python, PyTorch, Kafka or KDB/Q to see and understand large amount of data', 'Your in-depth data science, machine learning and analytical knowledge to let data build narratives', 'Your eagerness to learn new things, share ideas freely and build a positive work environment', 'Your leadership and presentation skill to influence product directions', 'Crafting predictive models to improve revenue for fixed income business', 'Building tools, prototypes and simulation engines to streamline processes and test new ideas', 'Directing deployment of machine learning models through production cycle', 'Promoting culture of innovation and thought leadership', 'Influencing product directions for the benefit of our clients']"
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
+,,,
diff --git a/data/df.csv b/data/df.csv
new file mode 100644
index 0000000..6c6fbd6
--- /dev/null
+++ b/data/df.csv
@@ -0,0 +1,11 @@
+Companies,Positions,ID,Descriptions
+Liberty Mutual Insurance,Software Engineer,f747dbc4c6fa3e4e,"['Develop maintainable/scalable, effective, defect-free source code that meets business requirements and team standards', 'Conduct unit test case development and highly complex test scripts', 'Conduct impact analysis', 'Develop component design and system/application architecture', 'Support the production support transition and implementation phases responsible for overall success of the project.', 'Work within project team to lead iterative development that delivers a high-quality product.', 'Support interactions with customers and development team to gather and define requirements.', 'Develop and lead the production of support documentation.', 'Lead functional/system integration testing support.', 'Lead quality assurance architecture and design guide review defects or discrepancies in requirements identified and resolved with appropriate partners.', 'Plan work out appropriately, effectively communicating and responding to escalations/inquiries.', 'Develop Build and Deployment guide.', 'Identify and implement appropriate continuous improvement opportunities.', 'Complete other project work or tasks as assigned.', ""Bachelor's or Master's degree in Computer Science or related technical discipline"", '5+ years of professional experience. Knowledge of Salesforce concepts, strategies and methodologies preferred.', 'Highly proficient in multiple Object-Oriented programming languages and tools; ability to develop on multiple platforms.', 'Salesforce Developer Certifications helpful, development and integration, including Lightning, Visualforce and Apex', 'Strong negotiation, facilitation and consensus building skills.', 'Excellent oral and written communication skills; presentation skills', 'Extensive knowledge of the following: IT concepts, strategies, methodologies. Versed in diverse technologies and new technical architecture principles and concepts.', 'Experienced in layered systems architectures solutions, designs and shared software concepts.', 'Extensive understanding of agile software development concepts and processes.', 'Must be proactive, demonstrate initiative and be a logical thinker.', 'Extensive consultative skills, including the ability to understand and apply customer requirements, including drawing out unforeseen implications and making recommendations for design, the ability to define design reasoning, understanding potential impacts of design requirements.', 'Extensive understanding of backlog tracking, burndown metrics, and incremental delivery. Strong collaboration, prioritization, and adaptability skills required.']"
+ViaSat,Software Engineer Intern,a9f0ccf39692f70a,"['Web & mobile application engineering', 'Data analytics & cloud application engineering', 'Distributed enterprise software applications', 'System infrastructure development; scripting, automation, data visualization & dashboarding', 'Cybersecurity software & systems engineering', ""Currently pursuing a Bachelor's degree or higher in Computer Science &Engineering and/or a related field"", 'Exposure or desire to work with any of the following; Java, Python, JavaScript, Hadoop, MapReduce, SQL, Postrgres, C/C++ and many more', 'Able to commit to a 10-12 week internship', 'Interest in Cyber Security or Data Science', 'Experience with or desire to learn Linux']"
+Pluralsight,Software Engineer,ddc20246b79174f2,"['Provide architectural, strategic, and scale recommendations for both frontend and backend development', 'Collaborate with Product Manager and UX Designer to better understand the customer, provide valuable input into standards, layouts, navigational features, functional design, and usability', 'Help us assure absolute stability in our product through continuous integration, staged releases and feature toggles', 'Implement asynchronous messaging patterns to communicate between services', 'Apply your experience in making intelligent, forward-thinking, technical decisions to our development process, including implementing new standards, tools, APIs, and workflows', 'Prior experience building software for high traffic websites, using languages such as Javascript and Python', 'Full stack experience preferred', 'Experience with web frameworks and APIs (e.g. Flask)', 'Experience with frontend development, including Javascript and responsive layout across a wide number of devices. Experience with React/Redux is a bonus.', 'Deep understanding of relational databases, and other types of data stores', 'AWS DevOps and networking experience is a bonus', 'Experience with automated and unit testing', 'Experience with microservices architecture a plus']"
+Zoll Medical Corporation,Software Test Engineer - Manual,a31faafd9f83a6f6,"['Design, develop and support unit, integration and verification tests for an embedded software application', 'Interface and collaborate as part of the software development team to report and aid diagnosis of issues.', 'Review and provide input to software designs to aid manual testing.', 'Participate in the software development life cycle including maintaining test protocols and requirement tracing.', 'Follows standard operating procedures and software test engineering best practices', '', '3+ years of software development or Manual testing experience', 'Experience with SDLC tools including bug tracking and code repository', 'Experience with embedded software development or testing is preferred', 'Experience with Scrum is helpful, but not required', 'Experience with medical device development or testing is helpful, but not required', 'Strong written & verbal communication skills', 'Candidate need to come with positive attitude and affinity for teamwork, logic, innovation, imitative and self –sufficiency', 'BS in computer science or engineering discipline is required']"
+Accenture,"Quality Engineer - Charlotte & Raleigh, NC",820b6e2b9a5b2408,"['Adapts existing methods and procedures to create possible alternative solutions to moderately complex problems.', 'Understands the strategic direction set by senior management as it relates to team goals.', 'Uses considerable judgment to determine solution and seeks guidance on complex problems.', 'Primary upward interaction is with direct supervisor. May interact with peers and/or management levels at a client and/or within Accenture.', 'Determines methods and procedures on new assignments with guidance.', 'Decisions often impact the team in which they reside.', 'Manages small teams and/or work efforts (if in an individual contributor role) at a client or within Accenture.', 'Must be able and willing to travel 100% (Mon – Thursday every week)', 'A minimum of 3 years of experience in software testing', 'A minimum of 1 year of experience working with Agile Delivery', 'A minimum of 1 year of experience with one of the following: Jira, Rally, Microfocus ALM, Microsoft TFS, Tricentis qTest', 'A minimum of 2 years of demonstrated experience in one of the following: Java/J2EE, Groovy, Python, Ruby, JavaScript, C#', ""Bachelor's Degree or equivalent (minimum 12 years) work experience. If Associate’s Degree, must have minimum 6 years work experience."", 'A minimum of 2 year of experience performing API testing', 'A minimum of 1 year of healthcare payer experience with claims, enrollment, EDI, or encounters', 'A minimum of 1 year of experience in test automation', 'Demonstrated experience with Continuous Integration, including two or more of the following build/deploy/version control tools: Jenkins, Hudson, Bamboo, Git, Mercurial, Subversion, Bitbucket, GitHub, Grunt, Gradle, Gulp, Maven, MS TFS']"
+Google,"Software Engineering Intern, BS, Winter 2020",1d99705341f0d46c,"['1. In the “Resume Section:” attach an updated CV or resume.', '2. In the “Education Section:” attach a current or recent unofficial or official transcript in English.', 'Under “Degree Status,” select “Now attending” to upload a transcript.', ""Currently enrolled in a Bachelor's degree program in Computer Science, or related technical field."", 'Experience with Data Structures or Algorithms gathered from: completing a data structures or algorithms class, coursework, course projects, research, individual projects, internships, or other practical experience inside or outside of school or work (including open source hobby coding).', 'Experience in Software Development and coding in a general purpose programming language.', 'Examples of coding in one of the following programming languages including but not limited to: C, C++, Java, JavaScript, or Python.', 'Authorization to legally work in the United States.', 'Returning to a degree program after completion of the internship.', 'Experience programming in two or more of the languages including but not limited to: C, C++, C#, Java, JavaScript, Go or Python.', 'Experience working with some of the following: web application development, Unix/Linux environments, mobile application development, distributed and parallel systems, machine learning, information retrieval, natural language processing, networking, developing large software systems, and/or security software development.', 'Demonstrated interest and ability to learn other coding languages as needed.', 'Excellent communication skills, with the ability to speak and write in English.', 'Apply knowledge gained in computer science courses to real-world challenges.', 'Analyze information and evaluate results to choose the best solution to effectively solve challenges.', 'Develop scripts to automate routine tasks.', 'Create and support a productive and innovative team. This includes working with peers, managers, and teams.']"
+State Street,CRD Java Software Engineer,f0c15090579e0f84,"['CRD (Charles River Development) has a need for a Java Developer influences and impacts the architecture, standards, and design of key product initiatives for applications in C#/WPF and Java services', 'Contribute as an individual contributor within a team of top engineers', 'Work in a dynamic, fast-paced, Agile team environment', 'BS/MS in Computer Science or equivalent field is REQUIRED', '0 to 3+ years of commercial software development, proficient in developing multi-tier solutions, multithreading and core Java proficiency', 'No professional Java development experience required', 'Technical experience includes:', 'Extensive Java, J2EE n-tier application experience', 'Hands-on experience with web services', 'C#/.Net also highly desirable', 'Strong SQL skills with considerable experience in Oracle or SQL Server', 'Knowledge of XML/XSL', 'Experience with Tomcat, JBoss, Weblogic or WebSphere', 'Strong with OO design and development', 'Solid grasp of algorithms, solving difficult heuristic/optimization types of problems (Math and/or finance background highly desirable)', 'Experience with developing applications for the financial markets is highly desirable (Fixed income, Trading, FX, Risk, Portfolio/Wealth Management, Market Data, FIX, etc).', 'Product delivery experience includes developing and supporting commercial software products which were sold to non-technical customers in vertical markets', 'Ability to work independently, handle multiple tasks simultaneously and adapt quickly to changes', 'Excellent communication skills (verbal and written), good interpersonal skills, ability to gather and understand requirements in the financial sector']"
+DeepHealth,Software Engineer,682a90083b60716a,"['Degree in Computer Science or related field', '2+ years experience in related field', 'Strong programming skills (e.g., python, javascript)', 'Excellent documentation, version control, and organization skills', 'Experience in web backend development, such as Flask, Django, Node.JS, etc', 'Experience with container technology, such as Docker, Amazon ECS, etc.', 'Experience with DICOM data, PACS infrastructure, FDA-regulated software development is a plus']"
+Massachusetts General Hospital(MGH),Software Engineer,e73760f123b76725,"['Develop software requirements specifications for new software releases,', 'Assign software development tasks to other members of the team and monitor their progress,', 'Participate in the coding and testing of the software,', 'Lead the deployment of new software version to the two test sites (Boston Children’s Hospital and University of Houston Health),', 'Organize the collection of feedback data from the test sites and compile summaries of the feedback for the next software development cycle, and', 'Participate in the creation of user documentation and training of users,', 'Keep the project web site up to date,', 'Participate in the weekly videoconferences with the three PIs of the project and report progress.', 'Strong C++ programming skills are required and experience in using the Qt toolkit is desirable.', 'Familiarity with the LINUX, Windows, and/or Mac OSX operating systems is required.', 'Candidates need to be able to work in a modern software development environment and be familiar with controlled software development processes for future FDA compliance.', 'Experience in leading a major software development project is desirable.', 'A successful candidate can work in a dynamic, international, and collaborative work environment.', 'He/She will gain experience in recording and analyzing electrophysiological data, working with state-of-the-art brain mapping technologies as well as engaging in top level research.', 'Familiarity with MEG/EEG and MRI analysis software packages, MNE and FreeSurfer is also beneficial.']"
+Worldpay,Associate Software Engineer,8a81d361f7a2fac4,"['Plans, designs, develops and tests software systems or applications for software enhancements and new products.', 'Works cross-functionally to address issues and emerging needs in software systems.', 'Rigorously tests software in preparation for deployment.', 'Works on projects from enhancements, new features and bug fixes to new and replacement products.', 'Follows standard practices and procedures in analyzing situations or data from which answers can be readily obtained.', 'Builds stable working relationships internally.', 'Typically requires no previous professional experience.']"
diff --git a/data/ids.txt b/data/ids.txt
new file mode 100644
index 0000000..96251ed
--- /dev/null
+++ b/data/ids.txt
@@ -0,0 +1,51 @@
+4bb0fbf03740b709
+b8f6c807172debe6
+4c905d0807fa2eed
+c9169f5f8ed90ee1
+1d8cf47f1019b24e
+602473536ae7e150
+3890f0d479f70f9c
+618173f1c642b156
+98dcd62a5d1aa652
+b09745676a324eff
+e2646d60162b5288
+e93ea4e970bcf843
+70421ac3295dea39
+d99865eae70c9779
+41f63fd1515b11be
+64c21291ed0707bc
+ba79fee39edaeec7
+61f6469e434ce823
+c27d8f34540e8c7d
+97f5cbd91543f63c
+7baaacd0c63d2be6
+40f3e73956cd7498
+fa89fb3f68872c72
+8202bd856d4b33c4
+8f1c1f9d16cf8352
+3979cbcc2268abb0
+d7bf48644cb2042c
+6191757120becbd1
+13ee178fbb97d6f6
+c069b1ea74cb0123
+fef4e0d6b58105e7
+3f06785cad4921c1
+4feed7c6f3a4627f
+3b4349a5a2beb0a3
+d5f33dad6a30da09
+c60809d50bce2dad
+a49c437b4c7a7b08
+2dbe5b67cd791fa6
+6981a9d7adfa0d90
+699342424ce92899
+4010e39f2f78984c
+f1443e26a94bee1e
+0c592e819dbc750a
+5a1615cd1dcdaf25
+6aa8e43af94649b1
+20748cd6498b3c2c
+591242e9d34727c1
+ccba71ecc7fb728d
+88c7a1719e6aabfb
+62a2dd85a3b24466
+2019-08-20 18:46:14.865162
\ No newline at end of file
diff --git a/data/software_engineer.csv b/data/software_engineer.csv
new file mode 100644
index 0000000..705e42a
--- /dev/null
+++ b/data/software_engineer.csv
@@ -0,0 +1,51 @@
+Companies,Positions,ID,Descriptions
+State Street,Software Engineer I (EMS Trading Team) - Charles River Development,e8a9b71f6c5126ae,"['Opportunity to influence and impact the architecture, standards, and design of key product initiatives for applications in Java services', 'Contribute as a Sr. Individual contributor within a team of top engineers', 'Work in a dynamic, fast-paced, Agile team environment', 'BS/MS in Computer Science or equivalent field', '3+ years of commercial software development, proficient in developing multi-tier solutions', '3+ years of professional Java development experience, core Java and Multi-threading', 'Ability to work independently, handle multiple tasks simultaneously and adapt quickly to changes', 'Excellent communication skills (verbal and written), good interpersonal skills, ability to gather and understand requirements in the financial sector (Math background highly desirable)', 'Extensive Java n-tier application experience', 'Hands-on experience with Web Services (C#/.Net also highly desirable)', 'Strong SQL skills with considerable experience in Oracle or SQL Server', 'Knowledge of XML/XSL', 'Experience with Tomcat, JBoss, Weblogic or Websphere', 'Strong with OO design and development', 'Solid grasp of algorithms, solving difficult heuristic/optimization types of problems', 'Experience with developing applications for the financial markets is required (Fixed Income, Trading, FX, Risk, Portfolio/Wealth Management, Market Data, FIX, etc.)', 'Has played a key role in developing and supporting commercial software products which were sold to non-technical customers in vertical markets']"
+Google,"Software Engineering Intern, BS, Winter 2020",1d99705341f0d46c,"['1. In the “Resume Section:” attach an updated CV or resume.', '2. In the “Education Section:” attach a current or recent unofficial or official transcript in English.', 'Under “Degree Status,” select “Now attending” to upload a transcript.', ""Currently enrolled in a Bachelor's degree program in Computer Science, or related technical field."", 'Experience with Data Structures or Algorithms gathered from: completing a data structures or algorithms class, coursework, course projects, research, individual projects, internships, or other practical experience inside or outside of school or work (including open source hobby coding).', 'Experience in Software Development and coding in a general purpose programming language.', 'Examples of coding in one of the following programming languages including but not limited to: C, C++, Java, JavaScript, or Python.', 'Authorization to legally work in the United States.', 'Returning to a degree program after completion of the internship.', 'Experience programming in two or more of the languages including but not limited to: C, C++, C#, Java, JavaScript, Go or Python.', 'Experience working with some of the following: web application development, Unix/Linux environments, mobile application development, distributed and parallel systems, machine learning, information retrieval, natural language processing, networking, developing large software systems, and/or security software development.', 'Demonstrated interest and ability to learn other coding languages as needed.', 'Excellent communication skills, with the ability to speak and write in English.', 'Apply knowledge gained in computer science courses to real-world challenges.', 'Analyze information and evaluate results to choose the best solution to effectively solve challenges.', 'Develop scripts to automate routine tasks.', 'Create and support a productive and innovative team. This includes working with peers, managers, and teams.']"
+DataDog,Software Engineer,f47387c886f1b1dd,"['Build distributed, high-throughput, real-time data pipelines', 'Do it in Go and Python, with bits of C or other languages', 'Use Kafka, Redis, Cassandra, Elasticsearch and other open-source components', 'Own meaningful parts of our service, have an impact, grow with the company', 'You have a BS/MS/PhD in a scientific field or equivalent experience', 'You have significant backend programming experience in one or more languages', 'You can get down to the low-level when needed', 'You care about code simplicity and performance', 'You want to work in a fast, high-growth startup environment that respects its engineers and customers', ""You wrote your own data pipelines once or twice before (and know what you'd like to change)"", ""You've built high scale systems with Cassandra, Redis, Kafka or Numpy"", 'You have significant experience with Go, C, or Python', 'You have a strong background in stats']"
+Pluralsight,Software Engineer,ddc20246b79174f2,"['Provide architectural, strategic, and scale recommendations for both frontend and backend development', 'Collaborate with Product Manager and UX Designer to better understand the customer, provide valuable input into standards, layouts, navigational features, functional design, and usability', 'Help us assure absolute stability in our product through continuous integration, staged releases and feature toggles', 'Implement asynchronous messaging patterns to communicate between services', 'Apply your experience in making intelligent, forward-thinking, technical decisions to our development process, including implementing new standards, tools, APIs, and workflows', 'Prior experience building software for high traffic websites, using languages such as Javascript and Python', 'Full stack experience preferred', 'Experience with web frameworks and APIs (e.g. Flask)', 'Experience with frontend development, including Javascript and responsive layout across a wide number of devices. Experience with React/Redux is a bonus.', 'Deep understanding of relational databases, and other types of data stores', 'AWS DevOps and networking experience is a bonus', 'Experience with automated and unit testing', 'Experience with microservices architecture a plus']"
+Achievement Network (ANet),Software Engineer,a0303ea18678af0c,"['Implement ANet’s next generation of technology products that support data-driven educational practices and help to close the achievement gap in the underserved communities we partner with.', 'Create, monitor and deliver against the product road map and corresponding release cycles (as negotiated with the Product Management Team).', 'Help manage, improve, and automate our deployment frameworks.', 'Keep abreast of new technologies and propose adoption as appropriate.', 'Actively embrace and model best practices such as code reviews, unit testing, and planning/scoping/vetting larger features.', 'Commensurate with experience, provide strategic leadership in setting goals and measuring progress in RESTful server-side APIs and/or agile, componentized client-side code.', 'Commensurate with experience, proactively look for ways to build our team through individual contributions which help us improve code quality, team culture, and team velocity.', 'You have at least one year of experience delivering software that is deployed in production using:', 'You are a team player who motivates and educates colleagues; you enjoy working in a highly collaborative and agile culture.', 'You are skilled in working closely with business partners to build technology solutions that meet the usability and functional needs of both internal and external users.', 'You operate comfortably among a highly independent staff with diverse backgrounds.', 'You bring an innovative approach to analyze, evaluate and solve problems.', 'You have a passionate commitment to and a sense of urgency for the support of schools, along with a belief that all students can achieve at high levels.', 'You are motivated by working in an environment where we live out our core values daily and are eager to examine personal archetypes and biases while discussing topics related to race, class, and privilege which relate to ANet’s Advance Equity and People Matter values.', 'You have a Bachelor’s Degree in Computer Science, Math, Engineering or related degree', 'For candidates interested in working remotely, you bring demonstrated experience and previous success working remotely']"
+PatientPing,Back End Software Engineer,03a75bcd6a667f1f,"['Work closely with the Director of Engineering to ensure the highest standards of design, implementation, performance, and security are met', 'Work closely with the Product team to ensure all customer needs are met', 'Design and build new features', 'Maintain and extend existing features', 'Anticipate and start planning the next generation of software products', 'Become familiar with the details of our business domain, development process, and codebase', 'Ask questions and build healthcare industry knowledge', 'Build new capabilities and enhance existing ones based on the needs of our customers', 'Participate in the design process', 'Develop a new feature from concept to launch', 'Conduct code reviews for peers', 'Demonstrate consistently delivering on feature development expectations', 'Suggest ideas for technical enhancements', 'Work as a strong partner to product', '3+ years of Java development', 'Modern Java frameworks (Spring Boot preferred)', 'Relational database design and query via both SQL and ORM', 'ElasticSearch or other document-oriented database', 'Microservice architecture, including REST API design', 'Asynchronous data pipelines using Kafka or similar modern messaging framework', 'AWS or other public cloud environment', 'Join one of the fastest growing health tech companies in the country', 'Have the autonomy to build something with a brilliant and enthusiastically supportive team', 'Learn best practices from world class investors and advisors', 'Become an expert on healthcare delivery system transformation', 'Receive cash and equity compensation with health, dental, and other benefits']"
+eviCore healthcare,Software Engineer in Test,cc4c17c9b550e324,"['Testing new and existing features for every agile sprint cycle.', 'Document and report issues discovered during testing, follow up for resolution utilizing JIRA for defect logging', 'Perform manual as well as automated testing to support system testing to insure effectiveness and quality of the components and systems developed', 'Proactively identify and diagnose performance and slowness issues. Perform root cause analysis when issues are found', 'Participates in requirements reviews, design reviews and change control activities and present the strategy for testing with product management, engineering, and other quality assurance members', 'Learn and add to the QA processes for continuous improvement and efficiency.', 'Work closely with Customer Care and customers to clearly define and validate issues, assist with defect correction priorities and validation of software corrections.', 'Collaboration with the documentation and training staff to create value focused documentation, release notes and training materials', 'Experience in testing applications with healthcare IT vendor', 'Experience in testing multi-tiered applications developed using Java/J2EE related technologies', 'Experience with SQL, ability to write, execute and analyzes SQL queries.', 'Comfortable with working in a Linux/Unix environment.', 'Experience with scripting /programming languages/automation to be able to convert existing manual test cases into automated test suites.', 'Ability to work independently with general direction setting', 'Demonstrated ability to successfully handle multiple demands and short deadlines; experience in resolving conflicts and dealing with shifting priorities', 'Excellent verbal and written communication skills', 'Experience with API testing tools and Azure', 'Agile processes', 'Experience with Selenium and BDD', 'Flexible scheduling and work/life balance with remote and work from home opportunities', '3 weeks of PTO(starting) per year plus paid holidays', 'Education assistance, tuition reimbursement and professional certifications', 'Health, dental, vision, and life benefits with employer funded HSA', 'Comprehensive employee discount program, onsite fitness facilities, and smart casual dress code', 'Paid Volunteer Community Service Days', 'Ample opportunities for growth, advancement, and promotion', '401k retirement plan with company match of 50% employee contributions up to 6%']"
+Karen Clark & Company,Software Development Engineer,a4bd773d481af06c,"['Take ownership of maintaining and improving application functionality and performance', 'Develop/tune database queries to optimize application performance', 'Collaborate with other team members to ensure application enhancements are both backward compatible and extensible', 'Work on extending front-end, middle-layer, and back-end components of a Windows application using Microsoft technologies', 'MS/BS degree in computer science or equivalent', 'Strong (3 to 5 years) of C#/WinForms development experience', 'Experience working with large and complex databases (MS SQL Server)', 'Excellent problem solving, analytical, and people skills', 'Must be flexible, a good listener, and have excellent written and verbal communication skills', 'Quantitative background (math/science/engineering) a plus', 'Insurance industry experience and/or exposure to natural hazard science a plus']"
+Google,"Software Engineering Intern, PhD, Winter 2020",6a9e8246f9a80bd2,"['1. In the “Resume Section:” attach an updated CV or resume.', '2. In the “Education Section:” attach a current or recent unofficial or official transcript in English.', 'Under “Degree Status,” select “Now attending” to upload a transcript.', 'Currently pursuing a PhD degree in Computer Science or in a related technical field.', 'Programming experience in one or more of the following: C/C++, Java or Python.', 'Returning to a degree program after the completion of the internship.', 'Implementation skills with one or more general purpose programming languages, including but not limited to: Java, C/C++, C#, Objective C, Python, JavaScript and/or Go.', 'Research experience in the following areas; Algorithms, Architecture, Artificial Intelligence, Compilers, Database, Data Mining, Distributed Systems, Machine Learning, Networking and/or Systems.', 'Demonstrated background in computer science, with competencies in data structures, algorithms and software design.', ""Research, conceive and develop software applications to extend and improve on Google's product offering."", 'Contribute to a wide variety of projects utilizing natural language processing, artificial intelligence, data compression, machine learning, and search technologies.', 'Collaborate on scalability challenges involving access to massive amounts of data and information.']"
+XPO Logistics,Software Engineer,1024b4f07ddfadb3,"['Build industry-changing applications for the Third-Party Logistics (3PL) industry', 'Develop web, web services and back-office applications using the Microsoft stack of technologies, including ASP.NET MVC and WCF in C# and SQL Server', 'Continuously learn about new technologies and products', 'Background or education in Computer Science or Engineering', '4 years of software development experience', 'C# and .NET experience', 'HTML/CSS/JavaScript/JQuery experience', ""Bachelor's or master's degree in Computer Science or Electrical Engineering"", 'ASP.NET MVC, Entity Framework, WCF and WPF experience', 'SQL Server Guru designation']"
+Raytheon,Software Engineer 2020 New College Grad! (Start in 2020),70f36ea0f40aebb2,"['Integrated Air and Defense systems, which produces the world’s most sophisticated short-, medium- and long-range, low- to high-altitude, all-weather air and missile defense systems, such as the combat-proven Patriot Missile which protects US and allied forces around the globe', 'Large Ground and Naval Radar systems where Raytheon IDS is a world leader in large scale radar development, technology, and production such as exhibited in the AN/TPY-2, UEWR, AMDR, EASR radars', 'Naval Combat Systems and Electronics such as those being built for the DDG 1000 Zumwalt-class destroyer, America’s next generation, multi-mission, naval destroyer, serving as the vanguard of an entire new generation of advanced multi-mission surface combat ships', 'Candidates must be currently enrolled in an undergraduate (or completed a degree no more than 18 months prior to start date) with an emphasis in Electrical Engineering, Computer Engineering, Computer Science, Mathematics, Physics, Software Engineering, or related engineering field.', 'U.S. Citizenship status is required as this position will need a U.S. Security Clearance within 1 year of start date.', '3.0 cumulative GPA', 'Strong analytical skills', 'Ability to work in an agile, collaborative environment', 'MATLAB, Simulink, and/or other analysis tools', 'Working knowledge of Unix', 'C, C++, JAVA', 'Unified Modeling Language (UML)', 'Engineering Project or Internship experience', 'Customer focus and collaboration skills', 'Excellent written and oral communication skills; knowledge of MS Office or other presentation tools', 'Follow @Raytheon on Twitter', 'Follow Raytheon on YouTube', 'Follow Raytheon on LinkedIn', 'Follow Raytheon Company on Instagram', 'Thomas A. Kennedy']"
+Massachusetts General Hospital(MGH),Research Software Engineer,cd79a2b399aca747,['Minimum of 1 year of experience in building and debugging software projects']
+Zoll Medical Corporation,Software Test Engineer - Manual,a31faafd9f83a6f6,"['Design, develop and support unit, integration and verification tests for an embedded software application', 'Interface and collaborate as part of the software development team to report and aid diagnosis of issues.', 'Review and provide input to software designs to aid manual testing.', 'Participate in the software development life cycle including maintaining test protocols and requirement tracing.', 'Follows standard operating procedures and software test engineering best practices', '', '3+ years of software development or Manual testing experience', 'Experience with SDLC tools including bug tracking and code repository', 'Experience with embedded software development or testing is preferred', 'Experience with Scrum is helpful, but not required', 'Experience with medical device development or testing is helpful, but not required', 'Strong written & verbal communication skills', 'Candidate need to come with positive attitude and affinity for teamwork, logic, innovation, imitative and self –sufficiency', 'BS in computer science or engineering discipline is required']"
+State Street,CRD Java Software Engineer,f0c15090579e0f84,"['CRD (Charles River Development) has a need for a Java Developer influences and impacts the architecture, standards, and design of key product initiatives for applications in C#/WPF and Java services', 'Contribute as an individual contributor within a team of top engineers', 'Work in a dynamic, fast-paced, Agile team environment', 'BS/MS in Computer Science or equivalent field is REQUIRED', '0 to 3+ years of commercial software development, proficient in developing multi-tier solutions, multithreading and core Java proficiency', 'No professional Java development experience required', 'Technical experience includes:', 'Extensive Java, J2EE n-tier application experience', 'Hands-on experience with web services', 'C#/.Net also highly desirable', 'Strong SQL skills with considerable experience in Oracle or SQL Server', 'Knowledge of XML/XSL', 'Experience with Tomcat, JBoss, Weblogic or WebSphere', 'Strong with OO design and development', 'Solid grasp of algorithms, solving difficult heuristic/optimization types of problems (Math and/or finance background highly desirable)', 'Experience with developing applications for the financial markets is highly desirable (Fixed income, Trading, FX, Risk, Portfolio/Wealth Management, Market Data, FIX, etc).', 'Product delivery experience includes developing and supporting commercial software products which were sold to non-technical customers in vertical markets', 'Ability to work independently, handle multiple tasks simultaneously and adapt quickly to changes', 'Excellent communication skills (verbal and written), good interpersonal skills, ability to gather and understand requirements in the financial sector']"
+Humatics,Associate Robotics Engineer,b97a8748d149f463,"['Design, implement, and maintain localization software included in our Spatial Intelligence Platform™ products.', 'Rigorously quantify localization software performance including optimization, analysis, testing, and verification to ensure software complies with requirements.', 'Develop software requirements at the system and subsystem levels from feature definitions level requirements as well as interactions between various subsystems (both software and hardware).', 'Apply scientific research and methods to identify algorithm limitations and provide solutions to management or cross-functional team leads.', 'Perform analysis, simulation, and testing of prototype localization systems. Participate in the building and maintaining of our realtime software ecosystem.', 'Create documentation, tests and participate in code reviews.', 'Experience developing software products using algorithms related to robotics, machine learning and controls.', 'A Bachelor’s or Master’s degree in Robotics, Electrical, or Computer Engineering.', 'Expert knowledge in localization and sensor fusion algorithms (e.g., Kalman or Particle filters) including rapidly developing and implementing.', 'Prototyping algorithms and developing simulations using C++, Python or Matlab.', 'Deploying and testing localization algorithms on hardware.', 'Develop data analysis and testing methods using hardware and simulation pipelines.', 'Fluency in C++/Python and a demonstrated ability to produce product ready code.', 'Experience with ROS, Docker, Git, iPython, Jupyter Notebooks.', 'Experience in inertial navigation, industrial automation, and/or controls.', 'Competitive salary', 'Meaningful equity ownership in a well-funded start-up', 'Excellent Medical, Dental, and Vision benefits', 'Paid holidays and unlimited paid time off']"
+DeepHealth,Software Engineer,682a90083b60716a,"['Degree in Computer Science or related field', '2+ years experience in related field', 'Strong programming skills (e.g., python, javascript)', 'Excellent documentation, version control, and organization skills', 'Experience in web backend development, such as Flask, Django, Node.JS, etc', 'Experience with container technology, such as Docker, Amazon ECS, etc.', 'Experience with DICOM data, PACS infrastructure, FDA-regulated software development is a plus']"
+Formlabs,Special Projects Engineer,55817f77a052d808,"['Design and build initial prototypes for the next generation of 3D printing', 'Work with a small team inside of a rapidly growing startup', 'Have hacker skills that would put MacGyver to shame', 'Are ready to dive into complex electromechanical systems', 'Thrive in a technical environment with rapidly evolving goals and priorities', 'Enjoy contributing to projects that are larger than you can build alone', 'Excited to learn whatever you don’t already know', 'Flexible vacation', 'Premium coverage for medical, dental, and vision plans', 'Paid parental leave', 'Commuter benefits', 'Unlimited 3D prints']"
+Massachusetts General Hospital(MGH),Software Engineer,e73760f123b76725,"['Develop software requirements specifications for new software releases,', 'Assign software development tasks to other members of the team and monitor their progress,', 'Participate in the coding and testing of the software,', 'Lead the deployment of new software version to the two test sites (Boston Children’s Hospital and University of Houston Health),', 'Organize the collection of feedback data from the test sites and compile summaries of the feedback for the next software development cycle, and', 'Participate in the creation of user documentation and training of users,', 'Keep the project web site up to date,', 'Participate in the weekly videoconferences with the three PIs of the project and report progress.', 'Strong C++ programming skills are required and experience in using the Qt toolkit is desirable.', 'Familiarity with the LINUX, Windows, and/or Mac OSX operating systems is required.', 'Candidates need to be able to work in a modern software development environment and be familiar with controlled software development processes for future FDA compliance.', 'Experience in leading a major software development project is desirable.', 'A successful candidate can work in a dynamic, international, and collaborative work environment.', 'He/She will gain experience in recording and analyzing electrophysiological data, working with state-of-the-art brain mapping technologies as well as engaging in top level research.', 'Familiarity with MEG/EEG and MRI analysis software packages, MNE and FreeSurfer is also beneficial.']"
+State Street,CRD Software Engineer I - Wealth Management,b22dc3e348cb175e,"['Understand and develop user interfaces and server side systems for complex securities and trading software', 'Work closely with internal product teams and be capable of spanning Java and/or C# technologies', 'Work in a dynamic, fast-paced (agile) environment similar to a start-up company', 'Bachelor’s or Master’s degree in Computer Science or related', 'Strong academic credentials (high GPA, internship experience ideal)', 'Some level of programming skills in Java, C#, SQL, Perl, XML, etc.', 'Exposure to concepts including: agile/scrum, object oriented programming, software engineering techniques, parallel programming, databases, unit testing, etc.', 'Strong work ethic with ability to ramp up quickly', 'Excellent communication skills (verbal and written), good interpersonal skills, ability to gather and understand requirements in the financial sector', 'Flexibility', 'Ability to work in a very fast paced environment', 'Exposure to commercial software, developing multi-tier enterprise software solutions', 'Any exposure or knowledge of the financial industry is a strong plus, but not required']"
+Simon-Kucher & Partners,Data Engineer,655eee310d21f291,"['Validate data and ensure completeness, correctness, and relevance for analyses', 'Develop and automate analytical tools based on the generated cubes to provide marketing, sales and pricing insights', 'Detect, solve, and implement internal process and technology improvements, as well as create, optimize, and maintain new and existing data transfer channels', 'Advise and support clients on how to model, implement and automate robust ETL processes', 'Work with internal and external business and technology experts across the world', 'Degree in a quantitative field, such as computer science, engineering, statistics, operations research, data science, or equivalent experience', 'Experience in relational data management software, languages (esp. SQL), and ETL processes', 'Proven capabilities to build and maintain large and complex data sets', 'Extensive knowledge of analytical / Business Intelligence software, programming languages (e. g. Tableau, PowerBI, SAS)', 'Sharp analytical mindset with a pro-active and reliable attitude', 'Experience with large scale data processing tools (Spark, Hadoop, NoSQL, etc.)', 'Strong programming skills in R and/or Python', 'Data modeling (variable transformation & summarization, algorithm development)', 'Experience with cloud-based data infrastructure (AWS, Azure, Google, etc.)', 'Familiarity with machine learning models and data pipelines', 'Why you are interested in a job at Simon-Kucher.', 'Specific reasons how your application differentiates you from other candidates.']"
+Harvard University,Software Engineer,b017958bc83a96a2,"['Augment a new, high-profile application', 'Work with smart and creative people', 'Solve interesting software engineering and computer science problems', 'Use industry-leading technologies', 'Work closely with our designers and product owners to manifest evolving requirements into a working application', 'Design, code, deploy, maintain, and improve end-to-end web applications using Ruby-on-Rails, JavaScript, HTML, SASS/SCSS/CSS, ERB/HAML (or other templating languages), MySQL, Postgres, and related frameworks and tools', 'Develop appropriate unit tests, adhere to coding standards and best practices, and implement a cohesive end product']"
+State Street,Java Software Engineer - Multiple Levels - Charles River Development,9741730e400ce9ff,"['Opportunity to influence and impact the architecture, standards, and design of key product initiatives for applications in Java services', 'Contribute as a Sr. Individual contributor within a team of top engineers', 'Work in a dynamic, fast-paced, Agile team environment', '5 to 10+ years of commercial software development, proficient in developing multi-tier solutions', 'BS/MS in Computer Science or equivalent field', 'Knowledgeable of commercial software development, proficient in developing multi-tier solutions', 'Professional Java development experience, core Java and Multithreading', 'Ability to work independently, handle multiple tasks simultaneously and adapt quickly to changes', 'Excellent communication skills (verbal and written), good interpersonal skills, ability to gather and understand requirements in the financial sector (Math background highly desirable)', 'Extensive Java n-tier application experience', 'Hands-on experience with Web Services (C#/.Net also highly desirable)', 'Strong SQL skills with considerable experience in Oracle or SQL Server', 'Knowledge of XML/XSL', 'Experience with Tomcat, JBoss, Weblogic or Websphere', 'Strong with OO design and development', 'Solid grasp of algorithms, solving difficult heuristic/optimization types of problems', 'Experience with developing applications for the financial markets is required (Fixed Income, Trading, FX, Risk, Portfolio/Wealth Management, Market Data, FIX, etc.)', 'Has played a key role in developing and supporting commercial software products which were sold to non-technical customers in vertical markets']"
+Frontier Technology Inc.,Software Engineer,6e09a52f7281c54a,"['BS in Computer Science, Electrical Engineering or related study.', 'Minimum of 5+ years’ experience working in a commercial agile software engineering environment', 'Must be a U.S. Citizen', 'Strong verbal and written communication skills.', 'Comfortable supporting and presenting to customers', 'Ability to obtain and maintain a U.S. government security clearance.', 'Current working experience in C\\C++ and Python at an Intermediate to Advanced level', 'Experience working under both Agile and Incremental development methodologies', 'Strong understanding and use of formal software process and standards', 'Experience designing, developing and maintaining application user interfaces', 'Experience with scientific programming and working on large, complex code-bases', 'Experience in distributed and multithreaded processing', 'Experience in Qt and cross platform development under both Windows and Linux', 'Strong background in mathematics, statistics or physical sciences', 'Experience designing, developing, and maintaining APIs', 'Experience in some or all: Jira, continuous integration, Java, MySQL, RESTful Web Services, Big Data algorithms and structure, Machine learning or statistical inference']"
+Casa Systems Inc,Software Engineer (entry level) BOSTON,2ed07422dd39f9e0,"['A minimum of an undergraduate degree in Computer Science, Electrical Engineering or related technical discipline. If recent graduate, must have a 3.0 GPA or higher.', 'Up to 5 years of related software development engineering experience (preferably within the telecom network industry).', '2+ to 5 years of experience programming in ""C"".', 'Experience programming in Python and GO a plus.', 'Knowledge and experience with cloud application development a plus.', 'Knowledge and experience with 3GPP protocols in EPC/5G a plus.', 'Good troubleshooting and diagnostic skills.', 'Able to quickly learn new technologies.', 'Able to work in a team oriented, fast-paced environment.', 'Problem solver – Able to be resourceful and “figure things out.”', 'Self-confident – Able to express opinions clearly and effectively.', 'Professional demeanor.']"
+BookBub,Software Engineering Intern (Summer 2020),96b98add8e9cd7af,"['Receive one-on-one mentorship and training from experienced engineers who are dedicated to helping you grow quickly', 'Learn the tools we use, and deepen your understanding if you already know them: Rails, React.js, Clojure, Postgres, Redis, AWS, Docker, and more', 'Engage in many deep technical discussions with friendly and talented engineers', 'Ship code to production almost every day via our continuous deployment pipeline', 'Use automated tests and monitoring to ensure code works as expected, and runs correctly in production', 'Work on high-scale systems used by millions of people', 'Complete your internship with the skills and experience to be a professional software engineer at a top-tier tech company', ""You're pursuing an undergraduate degree in computer science or a related field, with anticipated graduation in 2021 or 2022"", ""You've had at least one previous coop or internship developing software with a team"", ""You're comfortable working in at least one general-purpose programming language"", 'You enjoy collaborating with others on shared goals', ""You're an effective communicator, both in writing and in person"", ""You're able to break complex problems into manageable pieces and efficiently assemble solutions""]"
+DataDog,Open-Source Software Engineer,70bfd2ba63deaa7a,"['Write open source code that instruments thousands of distributed applications written in Go around the world.', 'Drive our open source Go projects and engage with the community to find and address the most important challenges.', 'Join a great team building software the right way.', 'You’re a master Go programmer. You’ve written high-performance and concurrent applications, know your way around `go tool pprof`. You don’t reinvent the wheel but you prefer keeping your code concise and efficient.', 'You are a great community ambassador and can drive hard technical conversations towards a good solution.', 'You want to work in a fast, high growth startup environment.', 'You have a BS/MS/PhD in a scientific field.', 'You have significant experience with Python, Java, JavaScript, Ruby or PHP.', 'You have have experience with code telemetry and introspection.', 'You have experience with distributed systems.']"
+Google,"Software Engineering Intern, MS, Winter 2020",d4224e440fee2b72,"['1. In the “Resume Section:” attach an updated CV or resume.', '2. In the “Education Section:” attach a current or recent unofficial or official transcript in English.', 'Under “Degree Status,” select “Now attending” to upload a transcript.', ""Currently enrolled in a Master's degree program in Computer Science or related technical field."", 'Experience with Data Structures or Algorithms gathered from: completing a data structures or algorithms class, coursework, course projects, research, individual projects, internships, or other practical experience inside or outside of school or work (including open source hobby coding).', 'Experience in Software Development and coding in a general purpose programming language.', 'Examples of coding in one of the following programming languages including but not limited to: C, C++, Java, JavaScript, or Python.', 'Authorization to legally work in the United States.', ""Returning to or enrolling in a Master's degree program after completion of the internship."", 'Experience programming in two or more of the languages including but not limited to: C, C++, C#, Java, JavaScript, Go or Python.', 'Experience working with some of the following: web application development, Unix/Linux environments, mobile application development, distributed and parallel systems, machine learning, information retrieval, natural language processing, networking, developing large software systems, and/or security software development.', 'Demonstrated interest and ability to learn other coding languages as needed.', 'Excellent communication skills, with the ability to speak and write in English.', 'Apply knowledge gained in computer science courses to real-world challenges.', 'Analyze information and evaluate results to choose the best solution to effectively solve challenges.', 'Develop scripts to automate routine tasks.', 'Create and support a productive and innovative team. This includes working with peers, managers, and teams.']"
+State Street,Software Engineer II - Charles River Development,46473bc85001af2b,"['CRD (Charles River Development) has a need for a Java Developer influences and impacts the architecture, standards, and design of key product initiatives for applications in C#/WPF and Java services', 'Contribute as an individual contributor within a team of top engineers', 'Work in a dynamic, fast-paced, Agile team environment', 'BS/MS in Computer Science or equivalent field is REQUIRED', '0 to 3+ years of commercial software development, proficient in developing multi-tier solutions, multithreading and core Java proficiency', 'No professional Java development experience required', 'Technical experience includes:', 'Extensive Java, J2EE n-tier application experience', 'Hands-on experience with web services', 'C#/.Net also highly desirable', 'Strong SQL skills with considerable experience in Oracle or SQL Server', 'Knowledge of XML/XSL', 'Experience with Tomcat, JBoss, Weblogic or WebSphere', 'Strong with OO design and development', 'Solid grasp of algorithms, solving difficult heuristic/optimization types of problems (Math and/or finance background highly desirable)', 'Experience with developing applications for the financial markets is highly desirable (Fixed income, Trading, FX, Risk, Portfolio/Wealth Management, Market Data, FIX, etc).', 'Product delivery experience includes developing and supporting commercial software products which were sold to non-technical customers in vertical markets', 'Ability to work independently, handle multiple tasks simultaneously and adapt quickly to changes', 'Excellent communication skills (verbal and written), good interpersonal skills, ability to gather and understand requirements in the financial sector']"
+Ignite Mental Health,"Software Engineer, Mental Health Incubator",da57d9b97a79152c,"['Able to work and deliver results in a cross-functional team, with people who are skilled in Computer Science, Engineering and have some familiarity with mental health support', 'Proficient in mobile app development, especially in Javascript and React Native', 'Good data and system design skills', 'Deeply passionate about mental health and improving others lives', 'Experience with AWS cloud services', 'Some experience with Data Science and Machine Learning. Specifically, some knowledge/experience with collecting labeled data for further Natural Language Processing analysis', 'Desire to learn and contribute to mental health related work, and to learn new code bases']"
+Direct Digital LLC,"Full Stack Developer, Software Engineer",0e96d393d54646fa,"['Build and maintain websites/applications related to our marketed products.', 'Perform system analysis, design, programming, testing, and debugging of backend technologies including Adaptive Health custom CRM, ad serving platform, and analytics dashboard.', 'Work with team to identify areas of opportunity, provide input on new design features, and advocate for continuous improvement', '3+ years of experience working in a LAMP Stack environment', 'Laravel and/or VUE.js', 'BS Degree in Computer Science or equivalent experience', 'Proficiency with HTML, CSS, and Javascript', 'Strong understanding of object-oriented programming with PHP', 'Experience working in ecommerce/marketing environment a plus', 'Collaborative and solution driven', 'Excellent Communications Skills (written and verbal)', 'Ability to work effectively as part of a team', 'Able to solve complex problems.']"
+"Foundation Medicine, Inc.",Software Engineer,adb7b61b9e80fff5,"['Work with technical leaders to help drive sound engineering design, process, quality practices and ultimately production-ready software.', 'Work with technical leads and the rest of the team to understand and hold everyone accountable to our coding and engineering practices.', 'Work closely with product owners, key business and technical stakeholders to ensure FMI is building the best products.', 'Work in an agile/scrum driven environment to deliver new innovative products including participation in daily scrum, code review and knowledge sharing sessions.', 'Strong understanding of code versioning tools (Git/Bitbucket).', 'Demonstrated ability working on modern web applications (JS/NodeJS/DB technologies/cloud-hosted).', 'Strong understanding of RESTFul API development (especially in Node/ExpressJS or similar).', 'Strong understanding of database technologies (PostgreSQL, Oracle).', 'Experience working with cloud hosted environments (AWS/Azure).', 'Demonstrated understanding of testing practices and differences in test types and practices.', 'Experience working with a CI/CD pipeline (Jenkins/Travis/etc.).', 'Experience and knowledge of logging & monitoring tools (New Relic, Logz.io).', 'Demonstrable affinity for pinball.', '2+ years of experience on a software development team in a collaborative environment.', ""Bachelor's Degree in Computer Science, IT or related technology field or demonstrable relevant experience in a technical field."", 'Experience working on an open-source, modern web technology stack.', 'Experience working on large scale applications as part of a software development team.']"
+Wellframe,Associate Software Engineer,82d93b8dfa5650ca,"['Develop a dashboard for care managers that prioritizes patient follow-up based on rich data analysis of user activity patterns, clinical protocols and behavior anomalies, use rich data analysis of user activity patterns, clinical protocols and behavior anomalies, Combine clinical medicine, machine learning, and mobile technology (50%)', 'Deliver personalized tasks and content to patients based on clinical disease treatment protocols, as well as user engagement / adherence, to maximize clinical outcomes (25%)', 'Abstract away integrations with customer enterprise back-office systems, electronic medical records, pharmacy data, patient subscriber and claims information (25%)', 'A B.S. or M.S. degree in Computer Science, Computer Engineering, or a closely related field of study.', 'Development experience building SaaS or cloud-based commercial software products.', 'Experienced with backend web frameworks such as Ruby on Rails (or Django, Play, etc.).', 'Proficient with a modern scripting language such as Ruby or Python.', 'Proficient with front end frameworks such as React, Angular, etc.', 'Excellent communicator, comfortable explaining technical problems and plans in person and in writing.', 'Thrives on diverse technical challenges — our system integrates a wide variety of healthcare and other technologies.', 'Passionate about leveraging their technical skills to help improve patient care.', 'Works effectively in fast-paced, agile startup environment, and finds fulfillment delivering innovative solutions.']"
+,,,
+Booz Allen Hamilton,Boston Event Profile,8058ac59eeb320d6,"['Program Managers/Schedulers', 'Platform Solutions Architects', 'Cybersecurity Engineers', 'Cyber Business Leads', 'Cloud Architects', 'Configuration Managers', 'Software Developers/Engineers', 'Information Assurance Engineers', 'Penetration Testers', 'Network Security Architects and Engineers', 'Software Engineers', 'Systems Architects and Engineers', 'Executive Assistants', 'DevOps Architects', 'Agile Coaches', 'Site Reliability Engineers', 'and more!!!!']"
+"Amazon.com Services, Inc.",Software Development Engineer,52a8fb4ae988e8b3,"['2+ years of non-internship professional software development experience', 'Programming experience with at least one modern language such as Java, C++, or C# including object-oriented design', '1+ years of experience contributing to the architecture and design (architecture, design patterns, reliability and scaling) of new and current systems.', 'Develop performant cutting edge speech technology software', 'Design, prototype and evaluate new machine learning inference algorithms', 'Participate in deep-dive analysis and profiling of production code', 'Work in an Agile/Scrum environment to deliver high quality software against aggressive schedules.', 'Hold a high bar for technical excellence within the team and across the organization', 'Experience in speech recognition or related technology', ""Bachelor's degree or higher in related fields (CS, Speech, NLU, Computer Vision, etc.)"", 'Solid s/w engineer & advanced C++ experience', 'Excellence in technical communication with peers and non-technical audiences', 'Demonstrated ability to mentor other software developers and provide technical guidance and vision']"
+Staples,Software Engineer I,e0156e75dbb3c100,"['Participate and gather deep technical expertise in all mobile areas as they relate to Android platform including but not limited to Android and Android Internals.', 'Develop in-depth knowledge of several Staples business processes and systems environment', 'Act in the capacity of a software engineer to actively participate in the definition, design, development, testing and implementation of software functionality using the Agile methodology', 'Create or modify complex programs, modules, routines, scripts and data objects from a detailed design where efficiency, performance and reliability are critical', 'Work with and advise both business and technology teams (internal and external) on how best to implement desired features and enhancements', 'Ensure that technical solutions follow best practices, are reliable, are easily maintainable and are scalable under sustained load', 'Participate in design and code reviews', 'Collaborate to identify, isolate and resolve malfunctions related to software', 'Conduct knowledge transition on solutions to the designated application support team(s)', 'Create and evolve application documentation as required', 'Operate in compliance with IT standards, policies, procedures, and best practices', 'Bachelors degree (CS preferred) or equivalent work experience', '0-2 years of related software Development experience.', '0-2 years of full software development lifecycle experience including the ability to perform independent analysis', '0-2 years of development experience in Java 8.', '0-2 years of experience developing applications in Android Studio, Retrofit, Gradle and Android OS.', '0-2 years of experience in SQL and developing applications with relational database technologies', '0-2 years of experience with development of Native Android apps on the platform', '0-2 years of experience working with web services (SOAP/HTTP, REST), Retrofit', 'Experience with any of the following: Agile/Scrum, open source technologies, responsive application design, continuous integration using Jenkins', 'Experience with Service Oriented Architecture', 'Familiarity with Quality Assurance methodologies and automated testing using tools such as Selenium', 'Exposure to working in a retail, manufacturing, or print solutions', 'Experience working directly with non-IT business partners', 'Strong analytical abilities and a strong intellectual curiosity', 'Strong verbal and written communication skills', 'Strong time management and organization skills', 'Ability to balance multiple priorities at a given time', 'Must be team oriented and have a customer service mindset', 'Self-driven, able to work independently or within a team setting', 'Results oriented and adaptable']"
+,,,
+Delphix,Software Engineer - Boston,8af989c7be76eae5,"['Design and implement large components of the product', 'Deliver complex projects by defining structure in areas of uncertainty and identifying discrete deliverable', '1–2 years experience in software engineer', 'Excellent analytical and problem-solving skills', 'Ability and desire to work in a fast-paced, test-driven, agile, collaborative and iterative programming environment', 'Ability to clearly articulate information with the appropriate technical depth', 'A desire to build great products, learn new technical areas, and dive in wherever there is a need']"
+,,,
+IQVIA,Software Engineer 2,03848a9b515f3457,"['Independently lead the development of small to medium size system components', 'Provide peer support to Software Engineers in the design of new small system components', 'Contribute to the design and development of new features and applications.', 'Unit testing and functional testing of new functionality.', 'Support customers and professional services as required to address any questions or resolve any issues related to the DrugDev Data Solutions platform.', 'Help on-board new Software Engineers to the team', 'Review of functional specifications and other validation deliverables as assigned', 'Other duties as assigned', 'Technical/functional expertise', 'Communication', 'Decision making', 'Customer focus', 'Conflict management', 'Energy, motivation and work rate', 'BS in a related field (CS, Math) or comparable field', 'Graduate degree desirable', '3 to 5 years prior related experience in developing web applications, graduate experience, or demonstrated success in development with DrugDev Spark or equivalent system', 'Experience with Ruby on Rails, Python, React, Ember or similar web framework.', 'Ability to write clean code', 'History of successfully designing small to medium system components', 'Good interpersonal communication and presentation skills', 'Attention to detail']"
+Ascensus,Software Engineer,b939233da6916c8d,"['Work in an agile development environment by collaborating with the team to meet shared objectives.', 'Design and develop web applications and batch jobs.', 'Participate in meetings and/or brainstorming sessions with team members within and across scrum teams.', 'Deliver unit tests for the software components being built.', 'Work with quality assurance team to make sure software artifacts are tested in time for the sprint launch', 'Good OOA/OOD skills and strong understanding of Software architecture concepts.', '5+ years of work experience in Java, SQL and relational databases', 'Ability to work with the team and handle a fast-paced work environment, deadlines and new challenges', 'Great communication, critical thinking, problem solving, and time-management skills', 'Agile/Scrum experience', 'REST Web service', 'Mobile app development using Angular and Ionic', 'Spring Batch framework exposure', 'Amazon Web Services (AWS) knowledge', 'Financial Services experience']"
+Google,"Software Engineer, Engineering Productivity",ab135ef72d4060eb,"[""Bachelor's in Computer Science or related technical field or equivalent practical experience."", 'Software development experience in one or more general purpose programming languages.', 'Experience in one or more of the following: test automation, refactoring code, test-driven development, build infrastructure, optimizing software, debugging, building tools and testing frameworks.', ""Master's or PhD degree in Computer Science or related technical field."", 'Experience with one or more general purpose programming languages including but not limited to: Java, C/C++, C#, Objective-C, Python, JavaScript, or Go.', 'Scripting skills in Python, Perl, Shell or another common language.', 'Lead/contribute to engineering efforts from design to implementation, solving complex technical challenges around developer and engineering productivity and velocity.', 'Design and build advanced automated build, test, and release infrastructure.', 'Drive adoption of best practices in code health, testing, and maintainability.', 'Analyze and decompose complex software systems and collaborate with cross-functional teams to influence design for testability.']"
+Liberty Mutual Insurance,Software Engineer,a21ae949be68d190,"['You will collaborate closely with a team of technologists, analysts, developers and test engineers to deliver complex software solutions.', 'Drive full cycle end to end development from design through implementation.', 'The role requires hands-on development, problem resolution and knowledge of various horizontal and vertical packages - spanning across all layers of our technical stack.', 'In an Agile environment, work with business team members, product owners, and other software engineers to review and qualify business requirements, functional specifications, use-cases, and test plans.', 'Design, prototype and author code for software components and applications based on functional specification and optimize them for system performance.', ""Bachelor's Degree in technical discipline preferably computer science or software development."", 'Generally 3+ years of professional development experience.', 'Moderate knowledge of IT concepts, strategies, methodologies, architectures and technical standards.', 'Excellent analytical, problem solving, and communication skills.', 'Experience with layered system architectures and layered solutions; understanding of shared software concepts.', 'Java development experience', 'Experience working with agile methodologies (Scrum, Kanban, XP) and cross-functional teams (Product Owners, Scrum Masters, Developers, Designers, Test Engineers)', 'Ideally familiar with Design Thinking, Behavior and Test-Driven Development', 'Experience with HTML, JavaScript, XML/XSD, Web Services.', 'Experience with Unit Testing frameworks (e.g., Junit, Mockito).', 'Experience with IBM Integration Designer, WebSphere Integration Developer (WID) is a plus.', 'Experience with Spring Boot and/or Docker is a plus.', 'Experience with cloud services such as Amazon Web Services is a strong plus.', 'Experience integrating with and managing microservices exposed via SOAP and/or REST APIs including development and support of Java services a strong plus.']"
+Physical Sciences Inc.,Laser Development Scientist/Engineer,ad05511ec8e4cf27,"['Must have a bachelor’s degree in a relevant field is strongly preferred but an associate degree with >2 years of relevant experience in optical systems will be considered.', 'A background or coursework in laser technology as well as a willingness to work with lasers of all classifications.', 'Prior experience in a physics or engineering laboratory setting with the background or willingness to learn common optical assembly methods as well as how to monitor and test laser operation and performance using laser power and energy meters.', 'Assist staff scientists in the design and set-up of experiments, including mounting of optical elements and alignment.', 'Run experiments, capture, organize, and analyze data,', 'Maintain documents/manuals of optical, mechanical and electronic equipment including calibration and repair.', 'Maintain the laboratory equipment including laser safety goggles and tool box.', 'The ability to use multimeters, oscilloscopes, cameras, and other electronic test equipment.', 'Excellent soldering, wiring and assembly skills.', 'Familiarity with National Instruments hardware and software.', 'Ability to read and understand mechanical assembly drawings.', 'Familiar and comfortable with skilled and safe use of hand and power tools, including compact milling machine and lathe.']"
+Accenture,"Quality Engineer - Charlotte & Raleigh, NC",820b6e2b9a5b2408,"['Adapts existing methods and procedures to create possible alternative solutions to moderately complex problems.', 'Understands the strategic direction set by senior management as it relates to team goals.', 'Uses considerable judgment to determine solution and seeks guidance on complex problems.', 'Primary upward interaction is with direct supervisor. May interact with peers and/or management levels at a client and/or within Accenture.', 'Determines methods and procedures on new assignments with guidance.', 'Decisions often impact the team in which they reside.', 'Manages small teams and/or work efforts (if in an individual contributor role) at a client or within Accenture.', 'Must be able and willing to travel 100% (Mon – Thursday every week)', 'A minimum of 3 years of experience in software testing', 'A minimum of 1 year of experience working with Agile Delivery', 'A minimum of 1 year of experience with one of the following: Jira, Rally, Microfocus ALM, Microsoft TFS, Tricentis qTest', 'A minimum of 2 years of demonstrated experience in one of the following: Java/J2EE, Groovy, Python, Ruby, JavaScript, C#', ""Bachelor's degree or equivalent (minimum 12 years) work experience. (If Associate’s Degree, must have minimum 6 years work experience)"", 'A minimum of 2 year of experience performing API testing', 'A minimum of 1 year of healthcare payer experience with claims, enrollment, EDI, or encounters', 'A minimum of 1 year of experience in test automation', 'Demonstrated experience with Continuous Integration, including two or more of the following build/deploy/version control tools: Jenkins, Hudson, Bamboo, Git, Mercurial, Subversion, Bitbucket, GitHub, Grunt, Gradle, Gulp, Maven, MS TFS']"
+LogRocket,Growth Engineer,cf1c6c51cf9ae395,"[""You're a strong collaborator. You're transparent about progress on tasks, seek feedback early and often, enjoy reviewing code and getting your code reviewed, and work effectively with the whole team."", 'You consistently deliver on your engineering estimates.', ""You're comfortable with JavaScript, CSS, HTML, and React or another modern front end framework"", ""You're comfortable working with APIs and marketing/CRM tools"", 'Build a system that lets customers try LogRocket on their sites with a chrome extension', 'Build a React hooks plugin for LogRocket', 'Enrich Salesforce data with customer usage data', 'Build a system that automatically recommends integrations for our customers based on their toolset', 'Update our blog to use Gatsby as the front-end layer', 'Publish a blog post that analyzes front-end performance across our customer base']"
+Voya Financial,Software Developer,69fa3acb8bedaa39,"['Customer Focused: Passionate drive to delight our customers and offer unique solutions that deliver on their expectations.', 'Critical Thinking: Thoughtful process of analyzing data and problem solving data to reach a well-reasoned solution.', 'Team Mentality: Partnering effectively to drive our culture and execute on our common goals.', 'Business Acumen: Appreciation and understanding of the financial services industry in order to make sound business decisions.', 'Learning Agility: Openness to new ways of thinking and acquiring new skills to retain a competitive advantage.']"
+BookBub,Software Engineering Co-op (Spring 2020),0b8b7d8cb5594253,"['Receive one-on-one mentorship and training from experienced engineers who are dedicated to helping you grow quickly', 'Learn the tools we use, and deepen your understanding if you already know them: Rails, React.js, Clojure, Postgres, Redis, AWS, Docker, and more', 'Engage in many deep technical discussions with friendly and talented engineers', 'Ship code to production almost every day via our continuous deployment pipeline', 'Use automated tests and monitoring to ensure code works as expected and runs correctly in production', 'Work on high-scale systems used by millions of people', 'Complete your co-op with the skills and experience to be a professional software engineer at a top-tier tech company', ""You're pursuing an undergraduate degree in computer science or a related field, with anticipated graduation in 2021 or 2022"", ""You're available to work in person at our office in Cambridge, MA for six months at the beginning of 2020 (typically January-June timeline)"", ""You've had at least one previous coop or internship developing software with a team"", ""You're comfortable working in at least one general-purpose programming language"", ""You've had experience working in at least one general-purpose programming language"", 'You enjoy collaborating with others on shared goals', ""You're an effective communicator, both in writing and in person"", ""You're able to break complex problems into manageable pieces and efficiently assemble solutions""]"
+Google,Research Intern,7b314cbda5939778,"['1. In the “Resume Section:” attach an updated CV or resume.', '2. In the “Education Section:” attach a current or recent unofficial or official transcript in English.', 'Under “Degree Status,” select “Now attending” to upload a transcript.', 'Must be currently enrolled in a BS, MS or PhD degree program in Computer Science, Linguistics, Computational Linguistics, Statistics, Biostatistics, Applied Mathematics, Operations Research, Economics, Natural Sciences including Biomedical, Chemistry, Materials Science, Physical Modeling, Physics and Scientific Computing or a related technical field or equivalent practical experience.', 'Experience (classroom or work related) in one or more areas of computer science, such as Natural Language Understanding, Neural Networks, Computer Vision, Machine Learning, Deep Learning, Algorithmic Foundations of Optimization, Data Science, Privacy, Trust & Safety, Software Engineering, Programming Languages, Distributed Systems, Human Computer Interaction, Networking,Operating Systems, Computer Architecture, Data Mining and/or Machine Intelligence (Artificial Intelligence).', 'Experience with one or more general purpose programming languages including: C/C++, Java, MATLAB, Go or Python.', 'Returning to a degree program after completion of the internship.', 'Relevant work experience, including internships, full time industry experience or as a researcher in a lab.', 'Ability to design and execute a research agenda.', 'Contribution to research communities and/or efforts, including publishing papers (i.e. being listed as author) in major conferences or journals.', 'Participate in cutting edge research to develop solutions for real-world, large-scale problems.']"
+,,,
diff --git a/data/web_developer.csv b/data/web_developer.csv
new file mode 100644
index 0000000..929abbb
--- /dev/null
+++ b/data/web_developer.csv
@@ -0,0 +1,51 @@
+Companies,Positions,ID,Descriptions
+,,,
+Ignite Mental Health,"Front End Web Developer, Mental Health Incubator",0fd383f30f8d049e,"['Building kick ass responsive websites for organizations and movements working to transform mental health', 'Actively learning on the job and diving into new areas where you may have little expertise in', ""Learning from mistakes and failed projects (you'll be wearing a lot of hats in a lot of different functional areas)"", 'Own the mission of our organization and wear whatever ""hat"" is necessary to achieve OKR\'s!', 'Team specific responsibilities will depend on the team you are placed on and can be discussed more during the interview process', 'Deeply passionate about improving mental health', 'Growth mindset, hungry to learn and apply new skills', 'Able to perform well with a high performance team working towards ambitious goals', 'Proficient in HTML5, CSS3, jQuery, Javascript Frameworks', 'Multi-Browser/Device Programming Ability (Mobile, Tablet, Chrome, IE, Firefox or Opera if you’re feeling fancy)', 'Documentation + Version Control experience', 'Link to your portfolio / links to websites etc', 'Experience building Analyzable and Secure websites', 'Experience With Social Media and Third Party APIs', 'Proficient in Web Application Development Frameworks', 'Understanding of Object-Oriented Programming (OOP)', 'Ability to work well with difficult clients!', 'Appreciation of puns :-)']"
+Formlabs,Web Developer,1991368a570db682,"['Create, update, and maintain web content in a content management system (Django-CMS), monitor (quality control) content on live site and troubleshoot', 'Develop world-class e-commerce experiences to make it easy for our customers to buy', 'Work closely with the marketing and design teams to keep our website looking fresh and serving our customers', 'Well-versed in building and shipping high-traffic websites in a production environment - have a portfolio of public facing company websites you can share', 'Excited to work with an interdisciplinary marketing and web engineering team', 'Experienced with HTML/CSS and frontend development using libraries such as React', 'Work with and have designed and built RESTful APIs', 'Comfortable being the primary owner of a codebase', 'Able to write clear code, as well as communicate and document your work', 'Guide projects to completion, on time and to plan', 'Django-cms and other content management systems', 'Front-end frameworks such as Foundation, React, Redux', 'Python, PHP, Node.js in production environments', 'Magento or comparable e-commerce platforms', 'Git experience preferred', 'Flexible vacation', 'Premium coverage for medical, dental, and vision plans', 'Paid parental leave', 'Commuter benefits', 'Unlimited 3D prints']"
+Fenix Outdoor Import LLC,Front-end Web Developer,db037aa56f984c97,"['2+ years of front-end web development and design in a corporate environment', 'CMS experience: Wordpress, Drupal, Episerver, Sitecore,Ektron or equivalent', 'Proven experience and samples of work in web design and development', 'Build website wireframes and mock-ups', 'Translate our customer needs into functional and appealing interactive, intuitive landing pages', 'Knowledge of responsive web design practices', 'Participate in the on-going development of new features and enhancements of all our websites', 'Develop and test user interface designs across multiple browsers and mobile devices', 'Ability to design CSS classes and maintain good organizational structure for CSS libraries', 'Demonstrate clean coding practices in HTML/HTML5, CSS/CSS3 and JavaScript', 'Collaborate with other team members and departments to ensure understanding of business requirements', 'Define a standard user experience that captures the look-and-feel and the navigation model across teams', 'Develops comprehensive testing procedures to ensure websites display correctly cross-browser, operating systems and different digital devices', 'Develop web graphics and slideshows for online and email-based marketing initiatives', 'Designing and developing landing pages for different marketing initiatives', 'Updating and maintaining website content', 'A/B testing, Web optimization experience', 'Has a strong eye for design and a desire to create user-centered, content-driven websites.', 'Loves web design and typography', 'Understands the connection between good design and user goals', 'Enjoys working on and with a team', 'Experienced in grid systems/CSS frameworks (Bootstrap is our preference)', 'Experienced in CSS pre-processing (SASS is our preference)', 'Some familiarity with javascript/jQuery', 'Experienced in working with content management systems', 'Additional Preferred Qualifications:', 'SEO experience', 'Strong aptitude for digital marketing and marketing metrics', 'Web and Market integration Experience executing e-mail marketing campaigns']"
+Perkins School for the Blind,Applications/Web Developer,fa97cc31008a6902,"['Provides website development and support across all Perkins’ programs.', 'Work closely with Business Systems Analysts and the Applications Manager to understand requirements and priorities as it relates to applications.', 'Works closely with Perkins staff to understand requirements, works to develop websites using Drupal and other open source technologies.', 'Formats new site pages/sections in a web content management system', 'Create, re-create or optimize programming, configuration, integrations and customizations to maximize the benefit to the organization of the technologies we utilize and help Perkins achieve efficiencies by eliminating inefficient processes, redundant systems and duplicate data-entry.', 'Recommend improvements to business processes to improve efficiency and reduce operational costs or to provide new operational capabilities.', 'Provides support of organizational initiatives related to the web.', 'Recommends tactics to support Perkins mission, including rationalizing and justifying enhancements to functionality and experience or adoption of new technologies.', 'Assist in software selection, business process re-engineering, application testing and solution implementation.', 'Implements best practices and Perkins standards for web accessibility.', 'Maintains eCommerce stores in Shopify, Amazon and other eCommerce initiatives.', 'Work closely with the Applications Manager, Consultants, and IT management to validate that the proposed and developed solutions meet end user needs.', 'Provide status reports for end users and internal management', 'Provide application and process training to end users', 'Perform systems administration/maintenance tasks on a variety of systems as necessary.Salesforce system administration/development experience strongly preferred.', 'Acts as the project coordinator on a diverse group of digital projects.', 'Tests new pages and functionality to uphold Perkins high accessibility standards.', 'Troubleshoots and helps resolve problems associated with Perkins websites.', 'Performs other related duties as assigned or requested.']"
+Deloitte,Custom Web Developer,c2d9e91468dbfcb3,"['A Bachelor’s degree and 3+ years of front end design experience are required.', 'Available portfolio of in-market examples of successful user interface design, focused on web or mobile.', 'Expertise in designing interactive user interfaces and web interface details for various mobile and interactive platforms.', 'Demonstrated experience using HTML5, CSS, and Adobe CS tools.', 'Ability to work well in teams while independently managing all of the design components related to a given project.', 'Experience leading portions of design work.', 'Communicates clearly and concisely, both written and verbal.', 'Strong problem solving and troubleshooting skills with the ability to exercise mature judgment', 'Proficiency in D3.js.', 'Experience designing interfaces for wearable technology.', 'Experience in animation and videography.', 'Experience in data visualization using Tableau or a comparable BI tool.', 'Experience with Spotfire, Qlikview, D3, R, SPSS, SAS.', 'Graduate degree with a major in Human Computer Interaction or a closely-related design field.']"
+Grubhub,UI Developer,bbe2cf04ad8e27aa,"['Build merchant applications utilizing our Web SDK', 'Ensure the technical feasibility of UI/UX designs', 'Translate designs and wireframes into high quality code', 'Build and maintain large-scale, high-availability web applications with an active and growing user base', 'Build reusable code and libraries for future use', 'Optimize components for maximum performance across a vast array of web-capable devices and browsers', 'Collaborate with other team members and stakeholders', 'Professional experience (2+ years) in front-end web development', 'Proficient understanding of web markup, including HTML5, CSS3 and a basic understanding of JavaScript', 'Experience with CSS pre-processing platforms, such as LESS and SASS', 'Exposure to a client side framework - React prefered', 'Basic experience with RESTful APIs', 'Distributed version control experience with Git', 'Proficient understanding of cross-browser compatibility issues and ways to work around them.', 'Familiarity with software like Adobe Creative Suite and Sketch a plus', 'Experience with responsive design', 'Ability to understand business requirements and translate them into technical requirements', 'Familiar with Agile software development methodologies', 'A knack for benchmarking and optimization', 'Team player with excellent written and spoken English communication skills', 'Ability to work creatively and analytically in a problem-solving environment', 'Flexible PTO. It’s true, no strings attached and all the time you need to recharge.', 'Better Benefits. Get quality insurance, flex-spending accounts, retirement options and commuter perks.', 'Free Food. Kitchens are stocked and free Grubhub each week.', 'Stock Up. All of our employees are owners, in fact, they’re granted Restricted Stock Units, which means we’re all in it to win it.', 'Casual Culture. Catch rays on the rooftop or get comfy on a couch and get to know your coworkers — because work, should be a place you want to be.']"
+,,,
+,,,
+Massachusetts Medical Society,Junior Software Developer,9b5f35b423018794,"['Write code using a variety of programming languages and platforms such as Angular 7, NodeJS, HTML, CSS and JavaScript.', 'Build expertise in creating high-quality customer-centric user experiences (Responsive) from design deliverables and functional specifications.', 'Learn different Agile methodologies which are practiced in the organization.', '4-year degree (preferably in Computer Science, Information Technology, Engineering, Mathematics, or similar) is preferred.', 'A strong aptitude in Information technology (IT) and the interest in expanding IT skill set with strong exposure to JavaScript frameworks including NodeJS, REACT, Angular, etc.', 'A problem solver with a keen eye for detail.', 'Excellent communication, interpersonal and time management skills.', 'Experience with HTML5, CSS.', 'An ability to engage yourself in a project and enjoy the challenges and processes involved.']"
+Intersections Inc.,Front End Developer,151988405a483227,"['Build the fast, modern websites that will gow our business', 'Build frameworks that will make future development easier', 'Drive improvements to website performance, conversions, engagement, and user satisfaction', 'Turn mockups into reality', 'Experience building sites that handle traffic at scale', 'Experience working on e-commerce sites', 'Hands-on experience with HTML, CSS, and JavaScript', 'Hands-on experience with the React framework', 'Experience working in a team environment', 'Creativity and flexibility—we’re still scrappy and growing, and our team is going to act like a startup', 'Experience working with distributed teams', 'Strong work ethic and self-motivated', 'Knowledge of a modern server-side language (Node, Ruby, Python)', 'Experience working with localized websites', 'Experience working with web APIs']"
+Haven Life,Front End Developer,b651cf274fb0162c,"['2+ years experience developing web-based applications in production environments', 'Thorough understanding of CSS/HTML and modern page layout techniques', 'Experience working with designers cutting up layouts to implement pixel perfect pages', 'Experience with responsive layout and design, preferably with Bootstrap framework', 'Familiarity with website optimization techniques', 'Experience with Javascript as it pertains to enhancing web pages and managing 3rd party scripts', 'Strong visual design sense and appreciation for development with a client experience focus', 'Clear communication skills (written and oral) and an ability to work effectively with marketing teams', 'Willingness to learn and grow', 'BA/BS', 'Authorized to work in the US with or without sponsorship', 'Programming experience in Javascript or other languages', 'Single page app experience (e.g. Angular, Ember etc).', 'Some experience with an MVC framework like Angular, Backbone, Ember etc.', 'Experience with web analytics (Google Analytics, Mixpanel, etc.)', 'Experience with social media interfaces and related analytical tools', 'Familiarity with SEO and how it affects implementation']"
+Adventive,Adventive Flash Designer,0cf30846c5d34647,"['Analyze customer needs and design core ad units to achieve results', 'Estimate, design, deploy ad units via the Adventive Platform', 'Collaborate and share knowledge with Team engineers and Designers', 'Individual Contributor position with growth potential', 'Excellence and passion for web technologies and personal drive to make a meaningful change.', 'Minimum 3-5 years of Flash experience with AS2/AS3 Scripting experience', 'Experience with Flex/Flash Builder is not required but would be a plus.', 'Familiarity with HTML/HTML5 would be a great plus.', 'Online Advertising experience would be a plus', 'Self starter who has an ability to work with minimum direction', 'Team player with a passion to collaborate with others', 'A willingness to own a project from inception to solution']"
+TUFTS University,Associate Web Developer- Marketing and Communication,418a46a537343641,"['Bachelor’s Degree in related field, specialized training or equivalent experience', '1 -3 years of experience with WordPress and Drupal content management systems.', 'Proficiency with HTML, Javascript/JQuery, CSS, and git version control', 'Experience working in a linux command line interface', 'Strong written and verbal communication skills', 'Ability to brainstorm creative solutions to problems', 'Experience collaborating with a diverse, multidisciplinary team', 'Attention to detail', 'Desire to learn and apply new skills', 'Friendly, creative, collaborative, and independent', 'Cares about writing code that is clean and maintainable', 'Experience working in higher education a plus', 'Experience with continuous integration systems and modern dev-ops practices a plus']"
+Velir,Front End Developer,dc4999e690e301df,"['Collaborate with a cross-functional team to understand requirements, communicate designs, and implement specifications', 'Build content and data driven enterprise level web applications, implementing dynamic user interfaces emphasizing responsive web design and accessibility', 'Integrate with third-party systems and providers by consuming web services and collaborate with back-end developers to design APIs and service interfaces', 'Write clean, well-organized code, following industry standard coding practices (SASS, SMACCS, BEM)', 'Create documentation around processes including front end design and service architecture', 'Work in both Agile and Waterfall project methodologies, meeting delivery milestones and quality expectations.', 'Apply quality measures around performance, security, and scalability', 'Deep knowledge in HTML5 and CSS3', 'Experience in JavaScript including frameworks such as React', 'Able to communicate technical concepts effectively, orally and in written documentation, to members of cross-functional teams, clients, and stakeholders', 'Ability to identify roadblocks, raise these risks, and provide leadership with resolutions', 'Ability to create or extend digital designs', 'Ability to maintain and foster a high level of communication and collaboration, an aptitude for building consensus and compromise with your approach, and enthusiasm for introducing new concepts and processes', 'Ability to learn and master new technologies; technologies used at Velir include: ReactJS, AngularJS, jQuery, Gulp, Sass, GIT, MVC, NPM, Avocode, PatternLab', 'BS in Computer Science or equivalent', '2+ years of developing dynamic web applications', 'Demonstrated experience building responsive web designs', 'Demonstrated experience working on collaborative teams in a dynamic environment', 'Frequent sitting at a desk performing work on a computer', 'Reasonable accommodations may be made to enable individuals with disabilities to perform the essential functions', 'Take the Long View - Ensure the company is built to last', ""Be Courageous - Make the right decisions even when they aren't the easiest decisions"", 'Be Genuine - Bring honesty and authenticity to all that you do', 'Work with Focus + Passion - Display purpose and pride in your work and never stop learning']"
+JD Softtech,Urgent Hiring for User Experience Tester,bd0c38e15fb76359,"['Test website form functionality for optimal user experience.', 'View, analyze and make recommendations to improve the online user experience based on user testing data.', 'Review, analyze and make recommendations to improve the online user experience based on internal feedback collection mechanisms', 'Troubleshoot and fix front end website issues', 'Assist in reporting and documenting issues for our development team.', 'Test and validate features and fixes from our development team.', 'Take the initiative to learn about our technology and platforms.', 'Be innovative, objectively providing alternatives and solutions to problems and projects.', 'Work closely with ecommerce marketers, developers, business analysts and web/graphic designers.']"
+Echo & Co.,Web Developer,3857d318e153ba5f,"['BS in computer science or a related field, or significant equivalent experience', '3 years minimum experience with HTML and CSS', '2 years minimum Web programming experience with server side programming languages such as PHP and client-side scripting using JavaScript/jQuery', '1 year minimum experience working with relational database systems such as MySQL & MSSQL and a good working knowledge of SQL', 'Self-starter with strong self-management skills', 'Ability to organize and manage multiple priorities', 'Strong communication and team skills', 'Experience developing on and extending popular web authoring tools and/or implementing open source software', 'Linux shell experience and a working knowledge of Git', 'Experience using build tools such as Gulp, Grunt, or Webpack', 'Knowledge of the Drupal and Wordpress content management systems', 'Experience with Composer for managing PHP dependencies', 'Experience setting up DevOps workflows on a Platform as a Service like Heroku, OpenShift, or Platform.sh', 'Principles of responsive design', 'Ability and willingness to work in a fast-paced, demanding agency environment', 'A social good orientation with an interest in working for mission-driven clients']"
+ENGIE North America Inc.,Asset Management Intern,6b9520569697b65b,"['Work with the Genbright CTO on back-end and front-end software projects (development, testing, and production) used to construct and operate the Genbright Asset Management Platform and surrounding infrastructure.', 'Work on a suitable project chosen from a pre-defined list created by the CTO commensurate with the abilities of the intern.', 'Work to implement, unit test, and document as appropriate for the selected project in the following areas:\nData acquisition software used to capture market and other asset management related data provided by customers and third parties.\nGenbright’s asset valuation model and underlying optimization engine framework used to house asset optimizations.\nReporting and comparison tools for analysis associated with asset optimization outputs.\nAsset telemetry systems used to communicate with various ISOs, manage, and monitor the state of electrical assets as part of an ISO compliant network operations center.\nAPIs intended to communicate with third parties and Genbright partners.\nCommunications with data warehousing and data marts used Genbright’s asset management system on the backend.\nWeb based user interfaces presented as part of the asset management platform.\nSettlements against wholesale and retail power market participation.', 'Data acquisition software used to capture market and other asset management related data provided by customers and third parties.', 'Genbright’s asset valuation model and underlying optimization engine framework used to house asset optimizations.', 'Reporting and comparison tools for analysis associated with asset optimization outputs.', 'Asset telemetry systems used to communicate with various ISOs, manage, and monitor the state of electrical assets as part of an ISO compliant network operations center.', 'APIs intended to communicate with third parties and Genbright partners.', 'Communications with data warehousing and data marts used Genbright’s asset management system on the backend.', 'Web based user interfaces presented as part of the asset management platform.', 'Settlements against wholesale and retail power market participation.', 'The Genbright git repositories as flat text files and images that are checked in on a regular basis, or', 'The wiki within the Genbright Trac instance.', 'Must be at least a junior or senior (by end of spring semester) working toward a Bachelor’s degree in TBD, or Master’s/MBA student working towards a master’s in engineering, or closely-related degree', 'Must possess a 3.0 GPA or higher & be able to work up to 40 hours per week', 'Must be legally authorized to work in the United States, and should not require, now or in the future sponsorship for employment visa status', 'Must have excellent oral and written communication skills', 'Must have the ability to think analytically, present ideas professionally, and work in project teams', 'Must have exposure to at least two of the following in coursework towards an accredited university program:\nC\nC++\nPython\nJava\nC#\nSQL\nR\nStatistics\nLinear algebra\nMulti-variable Calculus\nAccounting', 'C', 'C++', 'Python', 'Java', 'C#', 'SQL', 'R', 'Statistics', 'Linear algebra', 'Multi-variable Calculus', 'Accounting']"
+Mathematica Policy Research,Junior Full Stack Developer,4fec247423fee28d,"['Build applications and web platforms that can be used to manage, clean, and analyze datasets', 'Create data presentation-layers in the form of static reports and interactive dashboards', 'Manage and ensure the quality of large datasets', 'Work with policy researchers to understand requirements for web-based products', 'Work both independently and on a team', 'Be self-driven to learn, push new ideas, and teach others', 'Gain fluency in a designated specialty such as:\nData Visualization Design\nUI/UX Design\nQA Testing\nDeployment/Architecture\nSecurity', 'Data Visualization Design', 'UI/UX Design', 'QA Testing', 'Deployment/Architecture', 'Security', 'A Bachelor’s degree in Computer Science or other relevant field or equivalent experience', '0-5 years of professional experience as a Software Developer or similar role', 'An understanding of how web applications work', 'Intermediate knowledge of JavaScript (D3 experience preferable)', 'Proficiency in any Server Side language (C#, Java, Ruby, Python)', 'Familiarity with Relational Databases', 'A strong desire to improve society through evidence-based research']"
+Harvard University,Web and Applications Developer,d430bae3da798887,"[""Planning, writing, and debugging code in multiple languages (PHP, Java, Python, SQL) to add new/enhanced features to the Center's core information systems (website and CRM), while maintaining best practices in object-oriented coding techniques."", 'Creating complex data queries and reports to assist the department. Identify and implement, as directed, opportunities to make best use of our business database.', 'Assist in growth and maintenance of our Amazon Web Services infrastructure, including deploying new system elements, implementing security policies, and optimizing system resource utilization.', 'Assist the Senior Technology and CRM Lead in the administration of our CRM system, including document and process automation, security permissions, and integration with external systems', 'Participate in or lead system improvement team projects as assigned, including translating business needs into functional requirements.', 'Participate fully in software development life cycle', 'Support technical solutions to deliver business requirements', 'Troubleshoot problems and suggest improvements to coding practices', 'Act as technical liaison to internal clients', 'Abide by and follow the Harvard University IT technical standards, policies and Code of Conduct', ""Minimum of two years' post-secondary education and/or relevant work experience"", 'Knowledge of information technology applications, processes, software and equipment', 'Demonstrated team performance skills, service mindset approach, and the ability to act as a trusted advisor', ""Bachelor's degree in Computer Science or related field preferred\nExperience programming in Java or other high level languages, ideally in a database environment, preferred"", 'Website development and administration preferred', 'Programming in Java, PHP, Javascript, knowledge of Python preferred', 'Working knowledge of relational databases and SQL preferred', 'Exposure to cloud based systems architectures, such as AWS, preferred', 'Experience effectively communicating and partnering with non-technical colleagues preferred', 'Experience balancing multiple projects simultaneously preferred', 'Experience and enjoyment working on teams preferred', 'Inquisitive, good listener, practical problem solver preferred', 'Completion of Harvard IT Academy specified foundational courses (or external equivalent) preferred', 'Work is performed in an office setting']"
+Granite Telecommunications,Front End Web Developer I,84b70cf59acf3619,"['Experience with HTML, CSS, SCSS, Javascript, Jquery, Bootstrap', 'Experience with Adobe Photoshop', 'Exceptional knowledge of graphic design principles, layout, typography, color and branding is required', 'Experience with image optimization, web accessibility standards, debugging, design and development standards, and cross-platform testing', 'Deep understanding of interactive user experience (UX) disciplines including user interface (UI) in web', 'Ability to create graphics, icons, animations and other visual assets for UI development', 'Ability to create UX design documents including wireframes, sitemaps, design concepts, mockups and prototypes.', 'Experience with responsive design and working for mobile devices', 'Knowledge of Angular JS and AJAX', 'Experience with .NET applications', 'Experience using Team Foundation Server for source control', 'Experience with raster and vector graphics']"
+Mendix,Sales Development Representative,5b0a28887fc6f4a1,"['Achieving daily/weekly call metrics, Sales Opportunity creation and progression to Qualified Opportunity', 'Following up on the hottest Marketing Qualified Leads, who have interacted with Mendix in a specific way, warranting outreach', 'Working closely with other SDRs, as well as the Marketing, and Sales teams', 'Partnering with Account Executives within your assigned Sales Unit, to focus on target accounts through developing outreach strategy focusing on persona, industry and company', 'Setting up outbound calling campaigns on target accounts, that focus on call cadences and outbound email best practices', 'Finding the right decision makers and/or influencers within accounts, and engaging in conversations to educate them on low-code and the Mendix Platform, while uncovering their company challenges in app development', 'Establishing customer relationships with the ""decision making unit"" through conversations, such as C-levels, IT Managers, and other influencers, and scheduling and hosting discovery calls', 'Hosting discovery calls to continue the conversation and perform adequate discovery to determine if an account is a Mendix Fit, along a specific set of criteria', 'Progress the right accounts through the sales process and work with Account Executives to hand off the customer', 'Staffing our booth during industry events to drive awareness and pipeline generation', '1-3 years of experience in enterprise software sales as a BDR/SDR or Inside Sales', 'Excellent phone and written communications skills', 'The desire to master this role and take on new challenges', 'Natural curiosity to continue personal learning of technology and uncovering customer needs and challenges']"
+Brigham & Women's Hospital(BWH),Applications Analyst II / 40 Hours / Day / BWH Department Of Medicine Channing Lab,a4862689f4db8e31,"['Create program user codes using vendor provided applications, SQL, PHP, Java and JavaSript', 'Performing quality control and testing of applications and reports', 'Coordinating implementation of applications including developing new work flows and reports', 'Troubleshooting production issues and implementing solutions accordingly', 'Updating and maintaining documents related to implementation of applications and reports', 'Analyze current laboratory operations and make recommendations on how the system could be implemented or improved.', 'Provide training in the use of the application either individually or in a classroom environment during implementation and as new employees are hired.', 'Configure Information Management systems as required either as part of an implementation or on on-going basis. Programming changes to integrate with existing systems.', 'Develop and maintain all training and support documentation.', '3 years experience with Web Based Information Management applications.', 'Experience testing web based applications; Knowledge of SQL and Java required.', 'Excellent written and verbal communication skills are essential; Must be able to collect and translate requirements into a set of system requirements to create functional documents and code', 'Experience with LabVantage Suite, LIMESurvey, RedCap is preferred.', 'Experience with Continuous Integration (CI); SDLC tools Maven and GIT is preferred.', 'Background and / or LIMS software development is an asset to this position.', 'A sense of accountability for all project work assigned to the individual and team.', '3+ year experience working with or supporting computer systems and related processes in a laboratory environment or chemist / laboratory technician with excellent computer skills.', 'Must have the ability to work independently and adapt to an ever-changing environment.', 'Strong communication skills (verbal/written) with the ability to articulate themselves succinctly to a variety of audiences (Project Manager, Technical specialists, Developers, End-users, etc)']"
+AB Sciex Pte,Web Developer,792cb50e6aca7cce,"['Apply deep understanding of Adobe Experience Manager (AEM) and other Adobe solutions (i.e. Target, Analytics, etc.) to translate business requirements into well-architected technical solutions.', 'Build and maintain web applications using Adobe Experience Manager (AEM) including site architecture, custom components, workflow development, Digital Asset Management (DAM) implementation, object model design (Java APIs), implementation, and unit testing.', 'Integrate Adobe Experience Manager (AEM) with sales, service, and marketing automation platforms.', 'Support multi-site and multi-channel delivery, personalization / targeting, content aggregation & syndication, multi-lingual, automated workflow management, etc.', 'Diagnose and solve technical problems involving multiple environment and application integrations.', 'Responsible for Adobe Experience Manager (AEM) site maintenance, enhancements, bug fixes and new feature releases.', 'Conduct code reviews to ensure optimal code quality and appropriate design patterns.', 'What We’re looking for:', 'An experienced Adobe Experience Manager (AEM) Developer responsible for the design and development of components, templates, workflows, dialogs, providing review of various development tasks and documentation.', 'Developer who will mentor more junior developers and serves as a Subject Matter Expert (SME) on Adobe Experience Manager (AEM).', 'Thrive in a fast-paced work environment that requires strong problem-solving skills and independent self-direction, coupled with an aptitude for team collaboration, open communication, and building consensus.', '3+ years of Adobe AEM experience implementing full cycle Adobe AEM projects (AEM 6.0 and later required; AEM 6.4 is a plus)', 'Solid experience with AEM building blocks including templates, components, dialogs, widgets and bundles', 'Develop, implement and configure web content management solutions using Adobe Experience Manager and technologies such as Java, HTML/DHTML, XML, JavaScript, and web services', 'Familiar with Multi Site Manager (MSM), Language Copy, Dispatcher', 'Experience with Continuous Integration and related tools (Jenkins / GitHub, etc.).', 'Experience with automation testing platforms a plus (Selenium, etc.)', 'Experience with Adobe Target, Adobe Analytics, Magento and/or other eCommerce platforms a plus.']"
+Sovos Compliance,Database Developer,5259b9873a0e7d88,"['Engineer systems with Oracle Exadata technology', 'Utilize Jaspersoft and other analytical reporting tools to analyze and troubleshoot data discrepancies', 'Collaborate with engineers and technology leaders to engineer a dimensional model for a data warehouse capable of working cross-functionally across our product suite', 'Experience with Oracle Exadata platform', 'PL/SQL', 'Jaspersoft and other BI tools', 'Experience with Tableau, preferred', 'Experience with Java frameworks, a plus', 'Mentoring Programs', 'Management Bootcamps and Development Trainings', 'Bi-yearly performance reviews where compensation and performance are assessed and rewarded generously', 'Clear paths for growth within our roles (but we know that career development can be like a spider web, not a ladder - we give you the tools to move outside of your career as well!)', 'MyTime flexible time off (no worrying about accruing time or running out of sick days - If you need time off, take It!)', 'Paid family leave', 'Tuition reimbursement', 'Generous 401k match', 'Progressive Wellness Program', 'Company seeded and matched Health Savings Account (HSA)']"
+,,,
+"Digital Management, LLC",Front End Developer,4775e675f49bc34f,"['Build and improve software and tools by translating functional requirements into robust, interesting, supportable, and exciting Web-based applications that work within the overall system architecture', 'Participate in the full lifecycle site development, including client-side browser development, implementation, browser compatibility validation, and bug-fixing.', 'Actively troubleshoot and support applications in production-critical environments', 'Have a demonstrated ability to roll up his/her sleeves to develop and implement new features and functionality, especially related to client browser-side capabilities, including JavaScript, HTML5, CSS3, and responsive web design', 'Be passionate about writing reliable, efficient, and maintainable pages and code', 'Have an entrepreneurial spirit with a drive to contribute new ideas for future development', 'Deep understanding of HTML5, CSS, and JavaScript a must', 'Experience with responsive frameworks, such as Bootstrap', 'Experience with AJAX, JQuery, JSON, XML, Node.js', 'Familiarity with browser capabilities and debugging', 'Ability to write code that meets web accessibility guidelines (WCAG or section 508)', 'Self-starter with the ability to ramp up quickly and work independently', 'Ability to manage workflow, priorities, and deadlines, while handling multiple projects', 'Experience providing accurate task breakdowns and estimates', 'Ability to track and monitor requests from inception to completion', 'Flexibility to shift priorities quickly to ensure deadlines are met', 'Strong organizational skills', '5+ years of experience as a UX Developer or Front-End Developer', 'Familiarity with PHP, jQuery, Java', 'Experience with Magento, Hybris, and/or Oracle Commerce (formerly ATG)', 'Experience with JSP and JSTL', 'Bachelor’s degree or equivalent experience', 'Community – Blood drives, volunteering opportunities, Holiday parties, summer picnics, Tech Chef, Octoberfest just to name a few ways DMI comes together as a community.', 'Convenience/Concierge - Virtual visits through health insurance, pet insurance, commuter benefits, discount tickets for movies, travel and many other items to provide convenience.', 'Development – Annual performance management, continuing education and tuition assistance, internal job opportunities along with career enrichment and advancement to help each employee with their professional and personal development.', 'Financial – Generous 401k match for both pre-tax and post-tax (ROTH) contributions along with financial wellness education, EAP, Life Insurance and Disability help provide financial stability for each DMI employee.', 'Recognition – Great achievements do not go unnoticed by DMI through Annual Awards ceremony, service anniversaries, peer-to-peer acknowledgement through Give-A-Wow, employee referral bonuses.', 'Wellness – Healthcare benefits, Wellness programs, Flu Shots, Biometric screenings, on-site lactation rooms provide employees with several wellness options.']"
+Klaviyo,Front-End Web Developer,474ac232a3ac049f,"['Understood Klaviyo product, brand and audience', 'Reviewed current site structure and have an indepth understanding our web properties and architecture.', 'Familiarized yourself with processes for deployment, bugs, reporting, site management, and project management.', 'Have an in depth understanding of how our CMS and techstack works and have identified areas to make updates, improvements, and changes.', 'Worked on optimizing and improving at least 2 content pieces', 'Are proactively making ongoing improvements to klaviyo.com’s user experience and functionality', 'Reviewed, recommended, and executed at least 3 improvements to the blog ui and functionality to improve usability, experience, and/or performance.', 'Contributed to improved and revised standards for coding, deployment, code review, and QA', 'Update all web properties’ architecture, CMS templates for code architecture, CMS performance, UI performance and branding', 'Contribute to improved site UI and experience', 'Collaborate on user experience experiments to improve actions toward marketing goals', 'Developed at least 3 guides and/or reports that stand out for their interaction experience and ui as much as they do their content.', 'At least 3 years of experience developing and maintaining one (or more) website(s)', 'Experience developing/maintaining CMS-based templates/pages/architecture', 'Experience using SCSS and modular JS systems', 'Experience using a build system, such as Grunt/Gulp/Yarn', 'Comfortable on day 0 running WordPress locally, logging into a production server, and using GitHub and AWS']"
+LevelUp,React Developer,bcb8f6985771dcd9,"['Contribute to open source softwareBuild and maintain large-scale, high-availability web applications with an active and growing user base', 'Develop new user-facing features for our growing Web SDK', 'Construct reusable components and front-end libraries for future use', 'Manage stages of the continuous development and release process', 'Translate designs and wireframes into high quality code', 'Optimize components for maximum performance across a vast array of web-capable devices and browsers', 'Mentor experience for other engineers in a professional setting', 'Professional experience (2+ years) in front-end web development, including command of a modern framework - React required', 'Experience with common front-end development tools such as Babel, Webpack, NPM, etc.', 'Exposure to other programming languages that compiles into JavaScript - TypeScript preferred', 'Familiar with Agile software development methodologies', 'Distributed version control experience with GitFamiliarity with RESTful APIs', 'Experience with responsive design', 'Ability to understand business requirements and translate them into technical requirements', 'A knack for benchmarking and optimization', 'Team player with excellent written and spoken English communication skills']"
+,,,
+State Street,Java/JavaScript Web-Stack Developer - CRD,7f2478771eb0700b,"['Work under minimal supervision to analyze, design, develop, test, and debug software enhancements and solutions within Charles River’s Wealth business including modifications to core frameworks', 'Collaborate with UI/UX Designers, Business Analysts and Product Managers to turn complex business requirements into working and sustainable software', 'Develop, test, debug, and implement software programs, applications and projects using Javascript, Java, and other browser-based technologies, include Sencha ExtJS.', 'Provide informed guidance and direction in code reviews', 'Write unit and automation tests to ensure a high-quality product using a JavaScript/Jasmine/Selenium platform', 'Contribute to written design and API documentation, and participate in customer documentation process', 'Assist in improving development test methodologies and contribute to related test methodology frameworks', 'Conduct manual tests to ensure a high-quality product', 'Provide expert level troubleshooting on large, mission critical client implementations', 'Actively participate in the agile software development process by adhering to and advancing the CRD scrum methodology, including attending all daily standups, sprint planning, backlog grooming, and retrospectives', 'Participate in cross-team group activities to complete assignments\nProvide mentoring to junior staff', 'B.S. degree (or foreign education equivalent) in Computer Science, Engineering, Mathematics, and Physics or other technical course of study required. MS degree strongly preferred.', '6 to 10 years of progressively responsible professional software engineering experience preferably in a financial services product delivery setting', 'Authoritative experience with Javascript and Java is essential', 'Able to contribute to complex design specs in consultation with senior staff', 'Able to work on medium to large projects with no supervision and on more complex tasks with minimal oversight', 'Excellent written and verbal communication skills', 'Able to work well with peers in a collaborative team environment', 'Experience with Agile development methodology strongly desired']"
+JD Softtech,PHP Framework Developer,74841ae117d41295,"['Advanced MVC framework knowledge: CodeIgniter & Laravel', 'Write ""clean"", well designed code', 'Knowledge of Javascript — jQuery & AJAX', 'Produce detailed specifications', 'Troubleshoot, test and maintain the core product software and databases to ensure strong optimization and functionality', 'Contribute in all phases of the development lifecycle', 'Follow industry best practices', 'Develop and deploy new features to facilitate related procedures and tools if necessary']"
+,,,
+University of Massachusetts Medical School,Intake Coordinator,40fe1c25c677d996,"['Responsible for understanding Intake policies and procedures as they relate to the Unit and updating as necessary', 'Coordinate, monitor, and measure work distribution/productivity/quality levels of Intake staff responsible for opening new cases and submit results to Manager through available communication channels.', 'Conduct thorough analysis of case documentation and history, and if necessary apprise relevant parties on Unit policies and procedures including need for case documentation required by law.', 'Use large web based databases (MMIS,Cognos & Case Tracking) to acquire member eligibility reports, claims history, and case-head information to aid in case coordination.', 'Maintain an understanding and knowledge of member aid categories to determine which, if any, benefits were received', 'Train Intake staff or temporary staff on procedural changes; and provide input to senior staff and software developers on methods to improve or streamline the Intake process.', 'Oversee and assist Intake staff with processing incoming and outgoing mail, closed case filing and reporting, and other Unit related duties as requested or required', 'Research relevant case information through the use of web based applications/tools such as Accurint, MMIS, Cognos, Lexis/Nexis, Registry of Deeds.', 'Participate in weekly/monthly Unit management team meetings process.', 'Conduct regular meetings with staff to review essential Unit and operational updates and ensure consistent implementation of Intake policies and procedures.', 'Oversee the unit’s paperless scanning environment process as well as, but not limited to, software knowledge, staff training and equipment maintenance and purchases.', 'Perform other duties as assigned or required.', 'Bachelor’s Degree or equivalent experience', '1 year related customer service or business experience', 'Strong analytical, oral and written communication skills', 'Proficient in Office and Web-based applications', 'Ability to read, interpret, apply and explain the provisions of the laws, rules, regulations, policies, procedures, specifications, standards and guidelines governing agency operations and assigned unit activities.', 'Working knowledge of Microsoft Office', 'The applicant will demonstrate the ability to successfully navigate a heavy telephone work environment.', 'Ensure a high level of customer service is maintained at all times.', 'Bi-Lingual highly preferred']"
+,,,
+Source Code,Web Developer,b8a3dc307b7541cb,"['Demonstrated web development skills (e.g., through a portfolio of work, storyboards, technical diagrams, etc.)', 'Proficiency in PHP 7.x', 'Proficiency in CakePHP 3.x', 'Proficiency in the following languages: HTML5, CSS3, JavaScript', 'Proficiency with Git and basic command line functions', 'Understanding of OOP', 'Understanding of MVC', 'Strong understanding of performance optimization best practices and tools', 'Strong understanding with process flowcharting, wireframes, mock-ups, site maps', 'A firm grasp of SEO / SEM best practices', 'Proficiency in creating basic test plans', 'Love of learning', 'Ability to work independently', 'Dedicated team player', '5+ years’ experience in application development and testing', 'Permission to work in the US without sponsorship', 'MySQL, MS SQL Server, AJAX, XML, DevOps/Agile and MVC', 'Linux environment', 'Data Mapping, Web server/network/IT experience', 'Google Analytics', 'Microsoft Office Suite (specifically Outlook, PowerPoint, Excel, and OneDrive)', 'PHPstorm or Eclipse IDE', 'website hosting and server technology', 'Browser testing and debugging', 'The primary office will be at our HQ located in Waltham, MA', 'Some travel to other office locations required (Seattle, WA, and Los Angeles, CA)', 'This is a fulltime position / No contractors or other outsourcing (please)', 'Competitive salary & performance-based bonus', 'Annual paid vacation & sick days', 'Health & Dental Insurance', 'Short term and Long term disability Insurance', '401(k) retirement plan', 'Bachelor’s Degree in Computer Science or related field']"
+Neptune Web,Web Developer,6840903ac20c98f9,"['5-7 years of web development experience', 'Exposure to current trends in web development in a cross-platform environment', 'Experience developing clean, compact, cross-browser and cross-platform web software', 'Experience with programming in PHP, ASP.net (Perl, VBScript, Java, C or C++ will be a plus)', 'Experience with frontend technologies including HTML, CSS, Javascript and frameworks eg. jQuery, Backbone', 'Excellent communication skills - the ability to analyze and identify clients problems and effectively solve them']"
+TakeTours.com,Web Developer,9209ec8020eed52e,"['Design and develop scalable web solutions with Java and other open source techniques', 'Work with and assist front end developers with HTML5, CSS, JavaScript, JQuery', 'Work with remote and local developers to implement web, cloud, and mobile products', 'Develop web services for 3rd party integration', '2+ years of experience in developing software applications', 'Fluent in one or a few objected oriented languages', 'Clear understanding of best practice on software development', 'Ability to work with HTML5, CSS, JavaScript including JQuery', 'Ability to develop web services and scalable web applications', 'CS or engineering degree with preference in advanced degree', 'Experience on iPhone/Android applications is a plus', 'Experience on database development is a plus', 'Experience with Linux servers is a plus', 'Competitive base salary', 'Benefits include health, dental and 401(k)', 'Travel Credits and employee discount']"
+Google,"Software Engineering Intern, BS, Winter 2020",1d99705341f0d46c,"['1. In the “Resume Section:” attach an updated CV or resume.', '2. In the “Education Section:” attach a current or recent unofficial or official transcript in English.', 'Under “Degree Status,” select “Now attending” to upload a transcript.', ""Currently enrolled in a Bachelor's degree program in Computer Science, or related technical field."", 'Experience with Data Structures or Algorithms gathered from: completing a data structures or algorithms class, coursework, course projects, research, individual projects, internships, or other practical experience inside or outside of school or work (including open source hobby coding).', 'Experience in Software Development and coding in a general purpose programming language.', 'Examples of coding in one of the following programming languages including but not limited to: C, C++, Java, JavaScript, or Python.', 'Authorization to legally work in the United States.', 'Returning to a degree program after completion of the internship.', 'Experience programming in two or more of the languages including but not limited to: C, C++, C#, Java, JavaScript, Go or Python.', 'Experience working with some of the following: web application development, Unix/Linux environments, mobile application development, distributed and parallel systems, machine learning, information retrieval, natural language processing, networking, developing large software systems, and/or security software development.', 'Demonstrated interest and ability to learn other coding languages as needed.', 'Excellent communication skills, with the ability to speak and write in English.', 'Apply knowledge gained in computer science courses to real-world challenges.', 'Analyze information and evaluate results to choose the best solution to effectively solve challenges.', 'Develop scripts to automate routine tasks.', 'Create and support a productive and innovative team. This includes working with peers, managers, and teams.']"
+LogicManager,Database Developer,53281f7186d8b62d,"['Build, test, and deliver software using Agile Methodologies', 'Design and implement database solutions within architectural, security and code quality guidelines considering performance, scalability and sustainability', 'Troubleshoot and tuning complex SQL statements and making recommendations on indexing strategies', 'Work closely with application developers to ensure proper design and implementation of database systems', 'Participate in database architectural review and design discussions', 'Analyze complex data requirements - Must be strong in working with complex queries;', 'Perform code reviews, learning from your peers, and sharing your knowledge with them in a positive, friendly way', 'Actively participate in promoting company culture', 'Bachelors degree in a technical or quantitative field preferred', '3-5 years of technical experience as database or reporting developer', 'Demonstrated ability to understand various data structures and common methods in data extraction and transformation', 'Experience writing SQL code to query databases', 'Experience with Oracle database 11g/12c', 'Proficiency with OpenText iHub reporting platform or similar product', 'Proficiency with data preparation software such Tableau Data Prep, Alteryx, TOAD, or Veera', 'Excellent organizational and time management skills', 'Ability to work collaboratively and effective customer service skills', 'Ability to manage multiple concurrent projects and activities yet maintain attention to detail', 'Award-Winning Culture: We’re named Best Place to Work 2019 by Built In, one of the Best Tech Work Cultures by the Timmy Awards, and certified as a Great Place to Work® since 2017', 'Collaborative Offices: We have a modern, open floor layout designed for cross functional collaboration and employee growth. We also have conference rooms for brainstorming sessions and team meetings.', 'Downtown Location: We are located in Boston’s growing Seaport and Innovation District with great views of the channel and within walking distance to many restaurants, cafes, bars, and shops. Our offices are easily accessible by public transportation and a quick walk from South Station.', 'Great Benefits & Perks: We believe in supporting our employees with benefits that matter, which is why we offer competitive salaries, benefits, company stock options, paid vacation, community service days, and more. We also celebrate our successes with frequent company-sponsored lunches, happy hours, events, and other perks.', 'Leadership with a Vision: LogicManager is a workforce of thought leaders and dynamic innovators coming together with a vision to dramatically improve the way companies protect their reputations.']"
+MassMutual,Web Developer,668c34e39fb99a71,"['Drive toward the effective utilization of the latest technologies, partnering with business stakeholders across the team, as well as experts in other areas including data science to achieve goals', 'Learn and master new and emerging technologies and take initiative to offer technical direction and creative solutions', 'Build responsive web applications that provide best-in-class support for multiple browsers and mobile devices', 'Mentor other developers on technologies and techniques in JavaScript application development', 'Engage in collaborative environment with agile squads across the organization to develop highly interactive digital experiences on exciting technology directives.', 'Develop complex software programming applications', 'Responsible for components of an application or sub-systems, such as sophisticated file management routines, large telecommunications networks, administrative and statistical software packages, performance evaluation and enhancements', 'Perform related duties as assigned or requested', 'Receives general supervision and is competent to work at a high level of all technical phases of software programming while working on own most of the time', 'HTML5: expertise in cross-browser design and implementations; expertise crafting pages with semantic markup; expertise in DOM manipulation; familiarity with W3C standards', 'JavaScript: expertise building single-page applications or progressive web applications; expertise building applications in one of: React, Vue, Angular, underscore, require; proficiency in ES6; proficiency with web bundlers, such as webpack or parcel; proficiency with package managers; proficiency with templating syntax, like ejs', 'Progressive Web App/Graceful Degradation: experience in implementations across devices, including: desktop, tablet, and mobile browsers', 'Git: expertise with workflow steps, including: branching, merging, rebasing, pull requests; working knowledge of reversion and alternative git flows', 'Docker: experience building images, Dockerfile syntax, secrets, volumes, and port binding; working knowledge of docker-compose', 'Virtualization: experience with virtual machines; one of: Vagrant, VirtualBox, Parallels, VMWare; experience mounting volumes and port binding', 'Agile: experience working in an Agile development environment; experience with an online planning system, such as JIRA, Trello, Pivotal Tracker, etc.', 'Authorized to work in the United States with or without sponsorship', 'Bachelor’s degree', '5 years Web Development experience', 'Cloud Architecture: understanding of stateless/â€\x9d12 factorâ€\x9d application principles', 'APIs: experience with RESTful APIs; understanding of API specifications, such as Swagger docs', 'Deployments: experience with “blue/greenâ€\x9d (or “red/blackâ€\x9d), rolling, or canary deployment strategies', 'Advanced React: experience with higher order components, and composition of components', 'Redux or Flux: experience with application data architecture', 'Webpack: experience customizing webpack configurations for multi target builds.', 'Testing: experience writing unit, integration, integration and load tests; expertise with one of jest, mocha, chai, enzyme, and sinon; proficiency with cross-browser testing tools and/or platforms', 'Code Quality: experience with eslint, csslint', 'Continuous Integration: experience using pipelines to support continuous delivery; understanding of the value stream', 'Documentation: proficiency with technical writing; proficiency with markdown syntax; proficiency writing wiki pages for technical documentation including troubleshooting, tutorials, and reference material']"
+Hyperlink Web International,Senior Web Developer,d6dd58d06d4be827,"['Highly familiar with front-end programming in HTML5 and CSS 3.0', 'Paypal, Google, Facebook and Paypal APIs: You should be familiar with the server- and client-side SDKs of these common web services. We use these for payment processing, analytics, and user authentication.', 'WordPress: A majority of our websites are powered by WP, meaning you will design themes, author plugins, and set up certain endpoints (REST, XML-RPC).', 'jQuery/Prototype/Pure JavaScript: You should be very comfortable using native JavaScript and creating reusable jQuery plugins from scratch.', 'PHPDoc/JavaDoc: Just as important as writing code is letting other developers know how your classes and methods work.', 'PHPUnit / JUnit / Selenium: Apart from clean code, you should make production-grade, brute-tested code before shipping it to the team.', 'Performance tuning: Given our rather large user base, you are expected to be familiar with at least one caching service (e.g. basic object/file caching libraries, Redis, Memcached).', 'REST/XML services: You should be able to create endpoints to our web services so they can be accessed by apps and extensions.', 'GIT: Above average proficiency collaborating code with teams, setting up hooks, and introducing workflows that improve efficiency.', 'JIRA / Redmine / Basecamp: Experience with at least one of these project collaboration platforms.', 'Linux: BASH savvy is important, as a lot of deployment and debugging work involves working with virtual private server terminals.', 'MySQL or MariaDB: You will architect, optimize, and reengineer database schemas. A degree of fluency with advanced functions is preferred.', 'Attention to detail: Telling us about spelling or grammar errors in this job post is welcome an the interview.', 'Java J2EE 7 & Spring Framework: Our flagship project runs on the Spring Framework, meaning we’ll devote a lot more time on it than other projects. You should also be comfortable deploying and configuring Tomcat / Glassfish / Jetty containers.', 'Jenkins: Knowing GIT is half the story. Strong knowledge of continuous integration is highly preferred.', 'Drupal: We will start using Drupal on a large scale very soon, and experience in developing themes and extending community modules certainly piques our interest.', 'Other JavaScript libraries/frameworks/compilers: node.js, CoffeeScript, Dojo, Knockout, AngularJS, Mootools.', 'Chrome extensions/GreaseMonkey scripting.', 'PostgreSQL: Not as important as MySQL at this point, but considered an added advantage.', 'PHP MVC frameworks\nSymfony2: A Symfony certificate is deemed the highest qualification, but if you have a suite of repositories or production websites powered by Symfony, feel free to brandish.\nLaravel: Just as relevant as Symfony2 (if you already know it).\nCodeIgniter: Some of our current projects use CodeIgniter. We do not actively use CI anymore, but experience in its considered an advantage.', 'Symfony2: A Symfony certificate is deemed the highest qualification, but if you have a suite of repositories or production websites powered by Symfony, feel free to brandish.', 'Laravel: Just as relevant as Symfony2 (if you already know it).', 'CodeIgniter: Some of our current projects use CodeIgniter. We do not actively use CI anymore, but experience in its considered an advantage.', 'UX\nLESS/SASS: A definite advantage is if you can recompile and extend Bootstrap / Foundation themes using tools like Node.JS and/or Ruby.\nAdobe Photoshop: For asset creation and photo manipulation.\nAdobe Fireworks: For prototyping projects and creating GIF/PNG assets.\nAngularJS: For creating fluid back-ends. This goes hand-in-hand with Symfony. Not required but a major plus.', 'LESS/SASS: A definite advantage is if you can recompile and extend Bootstrap / Foundation themes using tools like Node.JS and/or Ruby.', 'Adobe Photoshop: For asset creation and photo manipulation.', 'Adobe Fireworks: For prototyping projects and creating GIF/PNG assets.', 'AngularJS: For creating fluid back-ends. This goes hand-in-hand with Symfony. Not required but a major plus.', 'SEO\nSchema.org / Microformats: We prefer a developer who understands the value of the semantic web.\nW3C-compliant code: Syntactically correct HTML5-compliant code is the foundation of better search results.', 'Schema.org / Microformats: We prefer a developer who understands the value of the semantic web.', 'W3C-compliant code: Syntactically correct HTML5-compliant code is the foundation of better search results.']"
+TripAdvisor,Senior Web Developer - CoreX,0e6f614c2a4c9aa5,"['Write code.', 'Work with your team, and other groups such as product management, design, and others, to help bring our projects to fruition.', 'Drive user facing features for our team, and also means being able to touch and have a hand in whatever level of code is required in order to complete your project.', ""Be creative and pragmatic when solving the problems you're working on."", 'Be a mentor for others on the team.', ""Move quickly. We have a daily release so you'll get to build it and ship it!"", '6 or more years of experience as a professional software engineer.', 'An open mind', 'Willingness to learn', 'A desire to move quickly (one of our mottos is ""Speed Wins!"")', 'The mindset that done is often the better option over perfection.', 'Source: Jumpshot for TripAdvisor Sites, worldwide, February 2019']"
+Warren for President,Designer,9008beab883f0af2,"['Quickly, professionally, and patiently complete design tasks that may be well above or below your skill level.', 'Support (and sometimes lead) the visual and experience design for a wide range of projects – including web/email/social fundraising graphics, one-off initiatives, rapid response, merchandise, placards and flyers, photography and photo research, as well as collaborating with the video team to ensure our static and motion graphics are aligned.', 'Collaborate with the team to develop ideas and creative solutions; present and articulate design rationale to the team; collaborate and iterate based on feedback and testing outcomes', 'Flexible skill set and versatile design aesthetic: a range of examples of how to inspire and activate a range of audiences across platforms. Expertise in Adobe Creative Suite (PS/AI) & Sketchapp are required. Extra credit for knowledge of prototyping tools like Marvel or InVision, After Effects, and in pushing Google Slides and Google Docs to their absolute graphic limits.', 'Have a capacity to be resourceful and self-directed while handling multiple projects in a fast-paced environment.', 'Strong conceptual and strategic thinking: an advanced understanding of how design fits into the broader vision of a project, and how to respond to creative challenges within the constraints of budgets, timing, and resources.', 'Agility: an ability to adapt to evolving project needs and communicate effectively with both colleagues and clients to solve problems on the fly. Teamwork and scrappiness required.', 'Drive: a shared curiosity and commitment to making change. This includes comfort with ambiguity, a restlessness that resists the status quo, and a dedication to creative integrity that ensures we’re always making progress in moving our work forward.', 'Resilience: Empathy, humility, respect, and the ability to act as a role model at campaign headquarters']"
+,,,
+Financial Recovery Technologies,Web Application Developer,f6e5356d32b64553,"['Take pride in delivering quality software while adhering to industry best practice\nProduce good, well-written code - code that is stable, secure, and maintainble\nFocus on front-end technologies, including Angular7 (TypeScript), Angular Material, Sass, and Webpack', 'Produce good, well-written code - code that is stable, secure, and maintainble', 'Focus on front-end technologies, including Angular7 (TypeScript), Angular Material, Sass, and Webpack', 'Interface with business-side counterparts to convert requirements into well-designed systems, creating responsive, accessible, and performant User Interfaces in partnership with stakeholders and designers\nWork closely with BAs to understand business needs\nBuild reasonable systems, appropriately designed to satisfy those needs\nIntroduce complexity only to the extent necessary', 'Work closely with BAs to understand business needs', 'Build reasonable systems, appropriately designed to satisfy those needs', 'Introduce complexity only to the extent necessary', 'Be an active part of an Agile team\nParticipate in code review\nContribute to design discussions\nBe unafraid to admit ignorance in a complex business - ask any question necessary to ensure you get it\nWork share and pick up tasks as needed', 'Participate in code review', 'Contribute to design discussions', 'Be unafraid to admit ignorance in a complex business - ask any question necessary to ensure you get it', 'Work share and pick up tasks as needed', 'Deliver on critical business and technical initiatives - what is on the docket for the rest of this year?\nredesigns of our client portal and our client notification engine\nupgrade of our Angular framework\ncontinued automation efforts', 'redesigns of our client portal and our client notification engine', 'upgrade of our Angular framework', 'continued automation efforts', 'An experienced engineer who is excited about working with Angular and has multiple years of experience writing client-side Javascript applications using Angular', 'Focused on front-end technologies, including Angular7 (TypeScript), Angular Material, Sass, and Webpack, with experience with Node/js, Mongo, and Linux', 'Highly proficient with and interest in modern web technologies and techniques such as HTML5, CSS3, ECMAScript 2016, CSS pre-processors (SASS, Less, Stylus), TypeScript, and JavaScript build tools (Webpack, Gulp) - experience developing JSON RESTful HTTP services is a plus', 'Committed to learning and keeping up to date with new technologies and industry best practices', 'An excellent engineer and problem solver who also has phenomenal communication and collaboration skills;', 'Intellectually curious with a demonstrated commitment to continuous growth; and', 'Of course, interested in joining a growing company with a vibrant, entrepreneurial culture, dedicated to being the top provider in the class action recovery space.', 'Fun and diverse colleagues in a relaxed environment', 'Free drinks and snacks and free access to onsite gym', 'Health, dental, vision', '401k (with company match)', 'Income protection plans (life, AD&D, short- and long-term disability) and access to a suite of voluntary benefit programs', 'Close to public transit (walking distance to Wellington T on Orange Line)', 'Free parking onsite']"
+XL Catlin,Assistant Scientist – Quality Engineering,1e71dc003663cb42,"['Drive the strategic vision for System and User Acceptance testing\nAutomation of tests including reconciliations, user interfaces such as Business Intelligence solutions and validation of calculations\nDocumentation of test plan, cases, results\nTriage and Prioritization of defects with all stakeholders\nCommunications for Business Partners, Business Product Owners and testing teams\nSupport onshore/offshore resource strategy with all testing team stakeholders', 'Automation of tests including reconciliations, user interfaces such as Business Intelligence solutions and validation of calculations', 'Documentation of test plan, cases, results', 'Triage and Prioritization of defects with all stakeholders', 'Communications for Business Partners, Business Product Owners and testing teams', 'Support onshore/offshore resource strategy with all testing team stakeholders', 'The overall plan, management, and execution of the entire User Acceptance Testing (UAT) flow, including:\nUAT strategy\nUAT plan\nUAT scenarios and test cases\nIntegration of UAT feedback into the agile sprint process', 'UAT strategy', 'UAT plan', 'UAT scenarios and test cases', 'Integration of UAT feedback into the agile sprint process', 'System Integration Testing background\nDevelop and execute test plan, scripts, and procedures for functional (IT), System Integration Testing (SIT).\nReview testing documentation, develop testing standards, define criteria for testing, analyze test results, and present results to developers and end users.\nKnowledge on Agile-SCRUM methodology.\nHands-on Experience in automation testing tools such as QTP /Selenium and development\nof automation framework.\nCollaborate with developers and Test leads to improve testability.\nFamiliar with setting up of test environments', 'Develop and execute test plan, scripts, and procedures for functional (IT), System Integration Testing (SIT).', 'Review testing documentation, develop testing standards, define criteria for testing, analyze test results, and present results to developers and end users.', 'Knowledge on Agile-SCRUM methodology.', 'Hands-on Experience in automation testing tools such as QTP /Selenium and development', 'of automation framework.', 'Collaborate with developers and Test leads to improve testability.', 'Familiar with setting up of test environments', 'Leadership accountability for ensuring that every release to customers is fit for purpose.', '8 to 10 years of strong testing background, including knowledge/experience in automation', 'Excellent written and speaking communication skills with experience of successfully influencing others', 'Experience in Setting up the automated frameworks for Testing', 'Experience with SQL, HIVE, ADLS, CosmosDB, SQL DW Analytics is required', 'Understanding of Web Service Architecture, System Integration, Agile development methodology', 'Having experience doing UAT/System Integration testing in the insurance industry is a plus', 'Proven track record of value addition done to the Product / Solutions through strong QA processes', 'Someone who can be a path setting leader through individual contribution in Testing', 'Strong expertise in tools such as Lightning, Bravo and any other Testing Automation Tools', 'Strong experience in Manual and Automated testing methodologies', 'Experience in non-functional testing types API testing, UI Testing is big plus', 'Knowledge of different types of Software Architectures and how they work', 'Ability to interpret complex data requirements and architect solutions', 'Distinctive problem-solving and analytical skills combined with strong business acumen', 'Knowledge of Commercial Insurance business', 'Knowledge/Experience of Testing in cloud-based systems in different layers']"
+MassMutual,Front End Developer,097298c2cc2a879a,"['Leveraging a solid knowledge of HTML5, CSS3, JavaScript, and JavaScript frameworks, work in a collaborative environment to develop highly interactive digital experiences.', 'Drive toward the effective utilization of the latest technologies, partnering with business stakeholders across the team, as well as experts in other areas including data science to achieve these goals.', 'Learn and master new and emerging technologies and take initiative to offer technical direction and creative solutions.', 'Manage execution and timely completion of front-end engineering projects by analyzing scope and determining project specifications, establishing testing methods for product investigations, analyzing test programs for adequacy and sequence, examining samples for compliance with requirements, and preparing reports', 'Formulate plans with detailed calculations, drawings, diagrams, project specifications and budget estimates, and read and interpret precast reinforcement shop drawings, civil grading plans and structural drawings.', 'Maintain system functionality through rigorous testing, identifying ways to improve efficiency and productivity, and providing maintenance and support activities for all existing solutions', 'Monitor industry trends and implement appropriate methods to keep production on the cutting edge, while meeting financial goals and providing cost savings', 'Utilize software programming for the preparation of technical specifications, user documentation, presentations, and progress and results tracking', 'Build professional skills through continued training, education, and networking events', '2 years’ experience in front-end web development proven by an outstanding online portfolio or online code repository', 'HTML5: cross-browser design and implementations; proficiency crafting pages with semantic markup; knowledge of W3C standards', 'CSS3: proficiency building libraries that use transitions and media queries; expertise in one of: BEM, Atomic, DRYCSS, OOCSS, SMACSS', 'JavaScript: expert knowledge of element selection, property inspection; proficiency with branching logic; proficiency with package managers', 'Docker: experience building images, Dockerfile syntax, volumes, and port binding; working knowledge of docker-compose', 'Authorized to work in the United States with or without sponsorship', 'Bachelor’s degree is required', 'Accessibility: knowledge of ARI', 'Experience writing unit tests, browser automation tests; understanding of cross-browser testing tools or platforms', 'Experience with ESLint, CSS Lint', 'Experience with data-driven content sites; experience mapping user flows through content sites', 'Experience with theme development across multiple domains', 'Experience building modular code', 'Experience with continuous deployment', 'Progressive Web App/Graceful Degradation: experience in implementations across devices, including: desktop, tablet, and mobile browsers', 'Git: knowledge with workflow steps, including: branching, merging, rebasing, pull requests; working knowledge of reversion and alternative Git flows']"
+Integrated IT Solutions,Web Developer,ac5403791413d4ed,"['Demonstrated web development skills (e.g., through a portfolio of work, storyboards, technical diagrams, etc.)', 'Proficiency in PHP 7.x', 'Proficiency in CakePHP 3.x', 'Proficiency in the following languages: HTML5, CSS3, JavaScript', 'Proficiency with Git and basic command line functions', 'Understanding of OOP', 'Understanding of MVC', 'Strong understanding of performance optimization best practices and tools', 'Strong understanding with process flowcharting, wireframes, mock-ups, site maps', 'A firm grasp of SEO / SEM best practices', 'Proficiency in creating basic test plans', 'Love of learning', 'Ability to work independently', 'Dedicated team player', '5+ years’ experience in application development and testing', 'Permission to work in the US without sponsorship', 'MySQL, MS SQL Server, AJAX, XML, DevOps/Agile and MVC', 'Linux environment', 'Data Mapping, Web server/network/IT experience', 'Google Analytics', 'Microsoft Office Suite (specifically Outlook, PowerPoint, Excel, and OneDrive)', 'PHPstorm or Eclipse IDE', 'website hosting and server technology', 'Browser testing and debugging', 'The primary office will be at our HQ located in Waltham, MA', 'Some travel to other office locations required (Seattle, WA, and Los Angeles, CA)', 'This is a fulltime position / No contractors or other outsourcing (please)', 'Competitive salary & performance-based bonus', 'Annual paid vacation & sick days', 'Health & Dental Insurance', 'Short term and Long term disability Insurance', '401(k) retirement plan', 'Bachelor’s Degree in Computer Science or related field']"
+DIGITALiBIZ,.Net Web Developer,610fc8dc357d7338,"['Bachelor’s degree in Computer Science with progressive work experience', 'Experience working directly with clients, designers, and writers', 'Excellent communication skills, both written and oral', 'Ability to effectively identify technical issues during the design process and suggest effective solutions', 'Must be authorized to work in the United States', 'Must have the following technical requirements\nASP .Net 4.0+\njQuery\nCSS3\nSQL', 'ASP .Net 4.0+', 'jQuery', 'CSS3', 'SQL', 'Experience with the following:\nPL/SQL\nNet\nD3\nHTML5\nMVC 4+', 'PL/SQL', 'Net', 'D3', 'HTML5', 'MVC 4+']"
diff --git a/debug.log b/debug.log
new file mode 100644
index 0000000..7cfafdc
--- /dev/null
+++ b/debug.log
@@ -0,0 +1,9 @@
+[0809/115542.150:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0809/115542.288:ERROR:exception_snapshot_win.cc(98)] thread ID 12528 not found in process
+[0809/115542.869:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0809/115542.873:ERROR:exception_snapshot_win.cc(98)] thread ID 12708 not found in process
+[0809/122525.210:ERROR:process_reader_win.cc(151)] SuspendThread: Access is denied. (0x5)
+[0809/122525.213:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0809/122525.335:ERROR:exception_snapshot_win.cc(98)] thread ID 6984 not found in process
+[0809/122525.231:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0809/122525.337:ERROR:exception_snapshot_win.cc(98)] thread ID 13088 not found in process
diff --git a/extract.py b/extract.py
new file mode 100644
index 0000000..7795a13
--- /dev/null
+++ b/extract.py
@@ -0,0 +1,205 @@
+import time
+from datetime import datetime
+import os # for path control
+import sys
+
+# ! Get the package to create dialog boxes
+import easygui
+
+# ! Get the package to visually print results
+import matplotlib.pyplot as plt
+
+# ! Get the package to control the web
+from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.chrome.options import Options
+from webdriver_manager.chrome import ChromeDriverManager
+
+# ! These packages to deal with tables and multi-dimensional array
+import numpy as np
+import pandas as pd
+
+
+master_job_link = "https://www.indeed.com/jobs?q=Software+Engineer&l=Boston"
+master_job_link2 = "https://www.indeed.com/jobs?q=data+scientist&l=Boston"
+master_job_link3 = "https://www.indeed.com/jobs?q=web+developer&l=Boston"
+link2 = "https://www.indeed.com/jobs?q=Software%20Engineer&l=Boston&vjk=2826852a029ff8f6" #Example of a job's link
+
+
+def headless_options():
+ """
+ Sets the configurations for the driver. In our case, we add the headless settings because we want the program
+ to crawl in the background
+
+ Returns:
+ options -- the options configurations to be used in the Google Chrome driver
+ """
+ options = webdriver.ChromeOptions()
+ # options.add_argument("headless")
+ # options.add_argument("--window-size=1920,1080")
+ options.add_argument("--window-size=1366,768")
+ options.add_argument("--disable-extensions")
+ options.add_argument("--proxy-server='direct://'")
+ options.add_argument("--proxy-bypass-list=*")
+ options.add_argument("--start-maximized")
+ # options.add_argument('--headless')
+ options.add_argument('--disable-gpu')
+ options.add_argument('--disable-dev-shm-usage')
+ options.add_argument('--no-sandbox')
+ options.add_argument('--ignore-certificate-errors')
+ prefs = {"profile.managed_default_content_settings.images": 2}
+ options.add_experimental_option("prefs", prefs)
+ return options
+
+
+def get_all_ids(driver_path, job_link, num_page, ):
+ """
+ This function gets all the ids found in the master_job_link and returns it as a list
+
+ :return: list of such ids. the WebDriver itself
+ """
+ ids = []
+ driver = webdriver.Chrome(driver_path, options=headless_options())
+ for page in range(0, num_page):
+ driver.get(job_link + '&start='+str(page*10))
+ ids_elements = driver.find_elements_by_xpath('//*[@data-jk]')
+ ids.extend([link.get_attribute("data-jk") for link in ids_elements])
+
+ return ids, driver
+
+
+def write_to_txt(all_ids):
+ """
+ This function writes all the ids to txt file
+
+ :param all_ids: all the job ids in a list
+ :return:
+ """
+
+ output = open("data\ids.txt", "w+")
+ output.writelines(["%s\n" % item for item in all_ids])
+ output.write("%s" % datetime.now(tz=None))
+ output.close()
+
+
+def read_from_txt():
+ file = open("data\ids.txt", "r+")
+ read_ids = file.read()
+ read_ids = read_ids.split("\n")
+ crawl_time = read_ids.pop()
+ return read_ids, crawl_time
+
+
+def test(driver, job_link, job_ids):
+ pass
+ # driver.get(job_link + "&vjk=" + job_ids)
+ # company = driver.find_element_by_xpath('//*[@id="vjs-cn"]').text
+ # print("Companies' ids:", company, sep="\n", end="\n\n")
+
+
+def test():
+ a = datetime.now(tz=None)
+ print(a)
+ time.sleep(2)
+ b = datetime.now(tz=None)
+ print(b)
+ print(b-a)
+
+
+def get_desc(driver, job_link, job_ids):
+ """
+ This function gets all the listed items in the job descriptions and writes them into pandas table
+
+ :param driver: the WebDriver
+ :param job_link: the master job link
+ :param job_ids: a list of all ids
+ :return: a Pandas DataFrame with each job's information
+ """
+
+ # These are the information to be included in the DataFrame
+ companies = []
+ positions = []
+ all_ids = []
+ descriptions = []
+
+ # for each job
+ for id in job_ids:
+ try:
+ driver.get(job_link + "&vjk=" + id)
+
+ # wait for element to be visible then get it
+ desc_li = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located(
+ (By.XPATH, '//div[@id="vjs-desc"]//li')))
+ desc_li = [el.text for el in desc_li] # get the text part in the gotten WebElements
+ company = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#vjs-cn'))).text
+ position = WebDriverWait(driver, 10).until(
+ EC.visibility_of_element_located((By.CSS_SELECTOR, '#vjs-jobtitle'))).text
+
+ # Append the gotten info
+ companies.append(company)
+ positions.append(position)
+ all_ids.append(id)
+ descriptions.append(desc_li)
+ except:
+ print("Oops!", sys.exc_info()[0], "occurred.")
+ companies.append(np.nan)
+ positions.append(np.nan)
+ all_ids.append(np.nan)
+ descriptions.append(np.nan)
+
+ # Writes the info to nparray then format and create pandas DataFrame
+ everything = np.array([companies, positions, all_ids, descriptions])
+ everything = everything.transpose()
+ df = pd.DataFrame(data=everything, columns=["Companies", "Positions", "ID", "Descriptions"])
+ return df
+
+
+def write_to_csv(dframe):
+ """
+ This function creates a folder data (optional) and export the DataFrame to a .csv file
+ :return: No return
+ """
+ file_name = "\data_test.csv"
+ directory = os.path.dirname(os.path.realpath('__file__')) + "\data"
+ try:
+ # Create target Directory
+ os.mkdir(directory)
+ print("Directory ", directory, " Created ")
+ except FileExistsError:
+ print("Directory ", directory, " already exists")
+
+ dframe.to_csv((directory + file_name), index=None, header=True)
+
+# def get_desc_test(driver, job_link)
+
+
+def main():
+ """
+ Run everything
+ :return: nothing
+ """
+
+ driver_path = ChromeDriverManager().install()
+
+ # Next 3 lines is getting the ids
+ num_pages = 5
+ all_ids, driver = get_all_ids(driver_path, master_job_link2, num_pages)
+ write_to_txt(all_ids)
+
+ # This gets the
+ deez_ids, time = read_from_txt()
+ print("deez ids", deez_ids, "\n", "Time: ", time)
+ df = get_desc(driver, master_job_link2, deez_ids)
+ write_to_csv(df)
+
+ driver.implicitly_wait(10)
+ driver.quit()
+
+ return
+
+
+if __name__ == "__main__":
+ main()
diff --git a/extract_skills.py b/extract_skills.py
index 5cb7ca7..996ffbc 100644
--- a/extract_skills.py
+++ b/extract_skills.py
@@ -3,10 +3,11 @@
# @author: Osamah Mandawi
# @email: oamandawi@brandeis.edu
-"""This is an explanation of the structure of indeed.com
+"""
+This is an explanation of the structure of indeed.com
"""
# This is an example of what the first job listings page for software
-# engineering in MA looks like: https://www.indeed.com/jobs?q=software+engineer&l=MA&sort=date
+ # engineering in MA looks like: https://www.indeed.com/jobs?q=software+engineer&l=MA&sort=date
# Now, if we look at a single job: https://www.indeed.com/jobs?q=software+engineer&l=MA&sort=date&vjk=3916106ade6d80b3
# Note that this is the same URL as the one before, with only vjk=3916106ade6d80b3, the unique job id, added to it.
# Overall, this means we can replace the text after q= to get results for a different job (with spaces converted to +),
@@ -41,7 +42,8 @@
def headless_options():
- """Sets the configurations for the driver. In our case, we add the headless settings because we want the program to crawl in the background
+ """
+ Sets the configurations for the driver. In our case, we add the headless settings because we want the program to crawl in the background
Returns:
options -- the options configurations to be used in the Google Chrome driver
@@ -64,7 +66,8 @@ def headless_options():
def set_driver_path():
- """Sometimes your driver path is not installed. Other times, you don't know where it is. This installs it, if it's not there, and returns
+ """
+ Sometimes your driver path is not installed. Other times, you don't know where it is. This installs it, if it's not there, and returns
where it is, when it's there.
Returns:
@@ -132,10 +135,10 @@ def gather_job_listings(pages_range, search_url_master, driver_path):
sites = []
for i in range(0, pages_range):
driver = set_driver(driver_path)
- #! crawl to the first page of the search
+ # crawl to the first page of the search
driver.get(
search_url_master+'&start='+str(i*10))
- #! get the ids of all the job listings
+ # get the ids of all the job listings
ids = driver.find_elements_by_xpath('//*[@data-jk]')
jdks = []
for ii in ids:
@@ -222,6 +225,7 @@ def end_gui(start_time, end_time, counter_dict, sites):
" jobs, sorted from most needed to least:\n"+result+"\nFinished in "+str(datetime.timedelta(seconds=(end_time-start_time))))
+
def bar_print(field, state, sites, counter_dict):
"""Prints in a nice graph then saves it
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..48e341a
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,3 @@
+{
+ "lockfileVersion": 1
+}