From 9126e4d01d7a64bdd86f466a094bd72fb3343a25 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Sat, 30 Dec 2017 14:04:22 -0600
Subject: [PATCH 01/12] check for pyQt5 version 5.9.3 or above

---
 notebooks/Sampling and Labeling.ipynb | 10 ----------
 py_labeler/labeler/labeler.py         |  5 ++++-
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/notebooks/Sampling and Labeling.ipynb b/notebooks/Sampling and Labeling.ipynb
index 4d04403..e0a99d9 100644
--- a/notebooks/Sampling and Labeling.ipynb	
+++ b/notebooks/Sampling and Labeling.ipynb	
@@ -15,16 +15,6 @@
     "First, we need to import py_entitymatching package and other libraries as follows:"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "sys.path.append('/Users/pradap/Documents/Research/Python-Package/anhaid/py_labeler')"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 2,
diff --git a/py_labeler/labeler/labeler.py b/py_labeler/labeler/labeler.py
index 438a0fd..6ecd7c5 100644
--- a/py_labeler/labeler/labeler.py
+++ b/py_labeler/labeler/labeler.py
@@ -10,9 +10,10 @@
     from PyQt5.QtWebChannel import QWebChannel
     from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage, QWebEngineScript
     from PyQt5.QtWidgets import QApplication
+    from PyQt5.QtCore import QT_VERSION_STR
 except ImportError:
     raise ImportError('PyQt5 is not installed. Please install PyQt5 to use '
-                      'GUI related functions in py_entitymatching.')
+                      'GUI related functions in py_labeler.')
 
 from py_labeler.labeler.controller.FilterController import FilterController
 from py_labeler.labeler.controller.LabelUpdateController import LabelUpdateController
@@ -175,6 +176,8 @@ def label_table(df, label_column_name):
     """
     if sys.version_info < (3, 5):
         raise ImportError("Python 3.3 or greater is required")
+    if QT_VERSION_STR < '5.9.3':
+        raise ImportError("PyQt 5.9.3 or greater is required")
     _validate_inputs(df, label_column_name)
     df = df.copy(deep=True)
 

From 75a7d5940686fdf2ea6be10cb60dd89814d89896 Mon Sep 17 00:00:00 2001
From: kvpradap <pradap@cs.wisc.edu>
Date: Sat, 30 Dec 2017 14:49:46 -0600
Subject: [PATCH 02/12] [CLN] Removed unnecessary files.

---
 conda.recipe/meta.yaml                |    4 +-
 docs/contributing.rst                 |    8 +-
 docs/make_copy.sh                     |    9 -
 notebooks/Sampling and Labeling.ipynb |   64 +-
 py_labeler/catalog/__init__.py        |    0
 py_labeler/catalog/catalog.py         |  127 ---
 py_labeler/catalog/catalog_manager.py | 1437 -------------------------
 py_labeler/io/__init__.py             |    0
 py_labeler/io/parsers.py              |  465 --------
 requirements.yml                      |    2 +-
 10 files changed, 60 insertions(+), 2056 deletions(-)
 delete mode 100644 docs/make_copy.sh
 delete mode 100644 py_labeler/catalog/__init__.py
 delete mode 100644 py_labeler/catalog/catalog.py
 delete mode 100644 py_labeler/catalog/catalog_manager.py
 delete mode 100644 py_labeler/io/__init__.py
 delete mode 100644 py_labeler/io/parsers.py

diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
index c826073..95cadec 100644
--- a/conda.recipe/meta.yaml
+++ b/conda.recipe/meta.yaml
@@ -31,7 +31,7 @@ requirements:
 test:
   # Python imports
   imports:
-    - py_entitymatching
+    - py_labeler
 
   # commands:
     # You can put test commands to be run here.  Use this to test that the
@@ -46,7 +46,7 @@ test:
     # - nose
 
 about:
-  home: https://sites.google.com/site/anhaidgroup/projects/magellan/py_entitymatching
+  home: https://sites.google.com/site/anhaidgroup/projects/magellan/py_labeler
   license: BSD License
   summary: 'Python library for entity matching.'
 
diff --git a/docs/contributing.rst b/docs/contributing.rst
index 572815e..4660aaa 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -26,7 +26,7 @@ in the documentation and thinking 'this can be improved'...you can do something
 about it!
 
 Feel free to ask questions on the `mailing list
-<https://groups.google.com/forum/#!forum/py_entitymatching>`_
+<https://groups.google.com/forum/#!forum/py_labeler>`_
 
 Bug reports and enhancement requests
 ====================================
@@ -51,10 +51,10 @@ Bug reports must:
       ...
       ```
 
-#. Include the full version string of *magellan_labeler*. You can find the version as follows::
+#. Include the full version string of *py_labeler*. You can find the version as follows::
 
-      >>> import magellan_labeler as em
-      >>> em.__version__
+      >>> import py_labeler as pl
+      >>> pl.__version__
 
 #. Explain why the current behavior is wrong/not desired and what you expect instead.
 
diff --git a/docs/make_copy.sh b/docs/make_copy.sh
deleted file mode 100644
index 4ed1002..0000000
--- a/docs/make_copy.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-cd /Users/pradap/Documents/Research/Python-Package/anhaid/py_entitymatching/docs
-make clean html
-cd _build/html
-scp -r * pradap@trinity.cs.wisc.edu:~/public/html-www/magellan/user_manual/multi_page
-cd /Users/pradap/Documents/Research/Python-Package/anhaid/py_entitymatching/docs
-make clean singlehtml
-cd _build/singlehtml
-scp -r * pradap@trinity.cs.wisc.edu:~/public/html-www/magellan/user_manual/single_page
-cd /Users/pradap/Documents/Research/Python-Package/anhaid/py_entitymatching/docs
diff --git a/notebooks/Sampling and Labeling.ipynb b/notebooks/Sampling and Labeling.ipynb
index e0a99d9..18d7631 100644
--- a/notebooks/Sampling and Labeling.ipynb	
+++ b/notebooks/Sampling and Labeling.ipynb	
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2017-12-30T18:12:19.100155Z",
@@ -29,24 +29,24 @@
     "# Import py_labeler package\n",
     "import py_labeler as labeler\n",
     "import os\n",
-    "import pandas as pd\n"
+    "import pandas as pd"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Get the datasets directory\n",
     "datasets_dir = labeler.get_install_path() + os.sep + 'tests' + os.sep + 'test_datasets'\n",
     "\n",
-    "path_C = datasets_dir + os.sep + 'C1.csv'\n"
+    "path_C = datasets_dir + os.sep + 'C1.csv'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -171,7 +171,7 @@
        "4               1988     Joseph Kuan           94122               1982  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -182,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -191,7 +191,7 @@
        "14"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -209,9 +209,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import PyQt5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'5.6.2'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "PyQt5.QtCore.QT_VERSION_STR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ImportError",
+     "evalue": "PyQt 5.9.3 or greater is required",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-6-0f3149945f8c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Label the data set\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m# Specify the name for the label column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mG\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlabeler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabel_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'gold_label'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/miniconda3/lib/python3.5/site-packages/py_labeler-0.1.0-py3.5.egg/py_labeler/labeler/labeler.py\u001b[0m in \u001b[0;36mlabel_table\u001b[0;34m(df, label_column_name)\u001b[0m\n\u001b[1;32m    178\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Python 3.3 or greater is required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    179\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mQT_VERSION_STR\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;34m'5.9.3'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"PyQt 5.9.3 or greater is required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    181\u001b[0m     \u001b[0m_validate_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel_column_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    182\u001b[0m     \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdeep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mImportError\u001b[0m: PyQt 5.9.3 or greater is required"
+     ]
+    }
+   ],
    "source": [
     "# Label the data set\n",
     "# Specify the name for the label column\n",
@@ -503,7 +545,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.4"
+   "version": "3.5.2"
   }
  },
  "nbformat": 4,
diff --git a/py_labeler/catalog/__init__.py b/py_labeler/catalog/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/py_labeler/catalog/catalog.py b/py_labeler/catalog/catalog.py
deleted file mode 100644
index 0a220a9..0000000
--- a/py_labeler/catalog/catalog.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# coding=utf-8
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class Singleton(object):
-    """
-    A non-thread-safe helper class to ease implementing singletons.
-    This should be used as a decorator -- not a metaclass -- to the
-    class that should be a singleton.
-    The decorated class can define one `__init__` function that
-    takes only the `self` argument. Other than that, there are
-    no restrictions that apply to the decorated class.
-    To get the singleton instance, use the `Instance` method. Trying
-    to use `__call__` will result in a `TypeError` being raised.
-    Limitations: The decorated class cannot be inherited from.
-    """
-
-    def __init__(self, decorated):
-        self._decorated = decorated
-
-    # noinspection PyPep8Naming
-    def Instance(self):
-        """
-        Returns the singleton instance. Upon its first call, it creates a
-        new instance of the decorated class and calls its `__init__` method.
-        On all subsequent calls, the already created instance is returned.
-        """
-        try:
-            return self._instance
-        except AttributeError:
-            # noinspection PyAttributeOutsideInit
-            self._instance = self._decorated()
-            return self._instance
-
-    def __call__(self):
-        raise TypeError('Singletons must be accessed through `Instance()`.')
-
-    def __instancecheck__(self, inst):
-        return isinstance(inst, self._decorated)
-
-
-@Singleton
-class Catalog(object):
-    """
-    Class to store and retrieve catalog information
-    """
-
-    def __init__(self):
-        self.properties_catalog = {}
-
-    def init_properties_for_id(self, obj_id):
-        self.properties_catalog[obj_id] = {}
-        return True
-
-    def init_properties(self, df):
-        df_id = id(df)
-        self.init_properties_for_id(df_id)
-
-    def get_property_for_id(self, obj_id, name):
-        d = self.properties_catalog[obj_id]
-        return d[name]
-
-    def get_property(self, df, name):
-        df_id = id(df)
-        return self.get_property_for_id(df_id, name)
-
-    def set_property_for_id(self, obj_id, name, value):
-        d = self.properties_catalog[obj_id]
-        d[name] = value
-        self.properties_catalog[obj_id] = d
-        return True
-
-    def set_property(self, df, name, value):
-        df_id = id(df)
-        return self.set_property_for_id(df_id, name, value)
-
-    def get_all_properties_for_id(self, obj_id):
-        d = self.properties_catalog[obj_id]
-        return d
-
-    def get_all_properties(self, df):
-        df_id = id(df)
-        return self.get_all_properties_for_id(df_id)
-
-    def del_property_for_id(self, obj_id, name):
-        d = self.properties_catalog[obj_id]
-        del d[name]
-        self.properties_catalog[obj_id] = d
-        return True
-
-    def del_property(self, df, name):
-        df_id = id(df)
-        return self.del_property_for_id(df_id, name)
-
-    def del_all_properties_for_id(self, obj_id):
-        del self.properties_catalog[obj_id]
-        return True
-
-    def del_all_properties(self, df):
-        df_id = id(df)
-        return self.del_all_properties_for_id(df_id)
-
-    def get_catalog(self):
-        return self.properties_catalog
-
-    def del_catalog(self):
-        self.properties_catalog = {}
-        return True
-
-    def get_catalog_len(self):
-        return len(self.properties_catalog)
-
-    def is_catalog_empty(self):
-        return len(self.properties_catalog) == 0
-
-    def is_df_info_present_in_catalog(self, df):
-        return id(df) in self.properties_catalog
-
-    def is_property_present_for_id(self, obj_id, name):
-        d = self.properties_catalog[obj_id]
-        return name in d
-
-    def is_property_present_for_df(self, df, name):
-        df_id = id(df)
-        return self.is_property_present_for_id(df_id, name)
diff --git a/py_labeler/catalog/catalog_manager.py b/py_labeler/catalog/catalog_manager.py
deleted file mode 100644
index d344a5d..0000000
--- a/py_labeler/catalog/catalog_manager.py
+++ /dev/null
@@ -1,1437 +0,0 @@
-# coding=utf-8
-"""
-This module contains wrapper functions for the catalog.
-"""
-import logging
-
-import pandas as pd
-import six
-
-import py_labeler.utils.catalog_helper as ch
-from py_labeler.catalog.catalog import Catalog
-from py_labeler.utils.validation_helper import validate_object_type
-
-logger = logging.getLogger(__name__)
-
-
-def get_property(data_frame, property_name):
-    """
-    Gets the value of a property (with the given property name) for a pandas
-    DataFrame from the catalog.
-
-    Args:
-        data_frame (DataFrame): The DataFrame for which the property should be
-            retrieved.
-        property_name (string): The name of the property that should be
-            retrieved.
-
-    Returns:
-        A Python object (typically a string or a pandas DataFrame depending
-        on the property name) is returned.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type pandas
-         DataFrame.
-        AssertionError: If `property_name` is not of type string.
-        KeyError: If `data_frame` information is not present in the catalog.
-        KeyError: If requested property for the `data_frame` is not present
-            in the catalog.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.get_property(A, 'key')
-         # id
-    """
-    # Validate input parameters
-
-    # # The input object should be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # # The property name should be of type string
-    validate_object_type(property_name, six.string_types, error_prefix='Property name')
-
-    # Get the catalog instance, this is imported here because this object
-    # used to validate the presence of a DataFrame in the catalog, and the
-    # presence of requested metadata in the catalog.
-    catalog = Catalog.Instance()
-
-    # Check for the present of input DataFrame in the catalog.
-    if not catalog.is_df_info_present_in_catalog(data_frame):
-        logger.error('DataFrame information is not present in the catalog')
-        raise KeyError('DataFrame information is not present in the catalog')
-
-    # Check if the requested property is present in the catalog.
-    if not catalog.is_property_present_for_df(data_frame, property_name):
-        logger.error(
-            'Requested metadata ( %s ) for the given DataFrame is not '
-            'present in the catalog' % property_name)
-        raise KeyError(
-            'Requested metadata ( %s ) for the given DataFrame is not '
-            'present in the catalog' % property_name)
-
-    # Return the requested property for the input DataFrame
-    return catalog.get_property(data_frame, property_name)
-
-
-def set_property(data_frame, property_name, property_value):
-    """
-    Sets the value of a property (with the given property name) for a pandas
-    DataFrame in the catalog.
-
-    Args:
-        data_frame (DataFrame): The DataFrame for which the property must  be
-            set.
-        property_name (string): The name of the property to be set.
-        property_value (object): The value of the property to be set. This is
-            typically a string (such as key) or pandas DataFrame (such as
-            ltable, rtable).
-
-    Returns:
-        A Boolean value of True is returned if the update was successful.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type pandas
-         DataFrame.
-        AssertionError: If `property_name` is not of type string.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_property(A, 'key', 'id')
-        >>> pl.get_property(A, 'key')
-         # id
-        >>> pl.get_key(A)
-         # id
-
-
-    Note:
-        If the input DataFrame is not present in the catalog, this function
-        will create an entry in the catalog and set the given property.
-
-    """
-    # Validate input parameters
-
-    # # The input object should be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # # The property name should be of type string
-    validate_object_type(property_name, six.string_types, error_prefix='Property name')
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Check if the DataFrame information is present in the catalog. If the
-    # information is not present, then initialize an entry for that DataFrame
-    #  in the catalog.
-    if not catalog.is_df_info_present_in_catalog(data_frame):
-        catalog.init_properties(data_frame)
-
-    # Set the property in the catalog, and relay the return value from the
-    # underlying catalog object's function. The return value is typically
-    # True if the update was successful.
-    return catalog.set_property(data_frame, property_name, property_value)
-
-
-def init_properties(data_frame):
-    """
-    Initializes properties for a pandas DataFrame in the catalog.
-
-    Specifically, this function creates an entry in the catalog and sets its
-    properties to empty.
-
-    Args:
-        data_frame (DataFrame): DataFrame for which the properties must be
-            initialized.
-
-    Returns:
-        A Boolean value of True is returned if the initialization was
-        successful.
-
-    """
-    # Validate input parameters
-
-    # # The input object should be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Initialize the property in the catalog.
-    # Relay the return value from the underlying catalog object's function.
-    # The return value is typically True if the initialization was successful
-    return catalog.init_properties(data_frame)
-
-
-def get_all_properties(data_frame):
-    """
-    Gets all the properties for a pandas DataFrame object from the catalog.
-
-    Args:
-        data_frame (DataFrame): DataFrame for which the properties must be
-            retrieved.
-
-    Returns:
-        A dictionary containing properties for the input pandas DataFrame.
-
-    Raises:
-        AttributeError: If the input object is not of type pandas DataFrame.
-        KeyError: If the information about DataFrame is not present in the
-            catalog.
-
-
-    """
-    # Validate input parameters
-    # # The input object is expected to be of type DataFrame
-    # # The input object should be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Check if the DataFrame information is present in the catalog. If not
-    # raise an error.
-    if not catalog.is_df_info_present_in_catalog(data_frame):
-        logger.error('DataFrame information is not present in the catalog')
-        raise KeyError('DataFrame information is not present in the catalog')
-
-    # Retrieve the properties for the DataFrame from the catalog and return
-    # it back to the user.
-    return catalog.get_all_properties(data_frame)
-
-
-def del_property(data_frame, property_name):
-    """
-    Deletes a property for a pandas DataFrame from the catalog.
-
-    Args:
-        data_frame (DataFrame): The input DataFrame for which a property must be
-            deleted from the catalog.
-
-        property_name (string): The name of the property that should be deleted.
-
-    Returns:
-        A Boolean value of True is returned if the deletion was successful.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type pandas DataFrame.
-        AssertionError: If `property_name` is not of type string.
-        KeyError: If `data_frame` information is not present in the catalog.
-        KeyError: If requested property for the DataFrame is not present
-            in the catalog.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_property(A, 'key', 'id')
-        >>> pl.get_property(A, 'key')
-        # id
-        >>> pl.del_property(A, 'key')
-        >>> pl.is_property_present_for_df(A, 'key')
-        # False
-
-    """
-    # Validate input parameters
-
-    # # The input object should be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # # The property name should be of type string
-    validate_object_type(property_name, six.string_types, error_prefix='Property name')
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Check if the DataFrame information is present in the catalog, if not
-    # raise an error.
-    if not catalog.is_df_info_present_in_catalog(data_frame):
-        logger.error('DataFrame information is not present in the catalog')
-        raise KeyError('DataFrame information is not present in the catalog')
-
-    # Check if the requested property name to be deleted  is present for the
-    # DataFrame in the catalog, if not raise an error.
-    if not catalog.is_property_present_for_df(data_frame, property_name):
-        logger.error('Requested metadata ( %s ) for the given DataFrame is '
-                     'not present in the catalog' % property_name)
-        raise KeyError('Requested metadata ( %s ) for the given DataFrame is '
-                       'not present in the catalog' % property_name)
-
-    # Delete the property using the underlying catalog object and relay the
-    # return value. Typically the return value is True if the deletion was
-    # successful
-    return catalog.del_property(data_frame, property_name)
-
-
-def del_all_properties(data_frame):
-    """
-    Deletes all properties for a DataFrame from the catalog.
-
-    Args:
-        data_frame (DataFrame): Input DataFrame for which all the properties
-            must be deleted from the catalog.
-
-    Returns:
-        A boolean of True is returned if the deletion was successful
-        from the catalog.
-
-    Raises:
-        AssertionError: If the `data_frame` is not of type pandas DataFrame.
-        KeyError: If the DataFrame information is not present in the catalog.
-
-    Note:
-        This method's functionality is not as same as init_properties. Here
-        the DataFrame's entry will be removed from the catalog,
-        but init_properties will add (if the DataFrame is not present in the
-        catalog) and initialize its properties to an empty object (
-        specifically, an empty Python dictionary).
-    """
-    # Validations of input parameters
-    # # The input object is expected to be of type pandas DataFrame
-    if not isinstance(data_frame, pd.DataFrame):
-        logger.error('Input object is not of type pandas data frame')
-        raise AssertionError('Input object is not of type pandas data frame')
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Check if the DataFrame is present in the catalog. If not, raise an error
-    if not catalog.is_df_info_present_in_catalog(data_frame):
-        logger.error('DataFrame information is not present in the catalog')
-        raise KeyError('DataFrame information is not present in the catalog')
-
-    # Call the underlying catalog object's function to delete the properties
-    # and relay its return value
-    return catalog.del_all_properties(data_frame)
-
-
-def get_catalog():
-    """
-    Gets the catalog information for the current session.
-
-    Returns:
-        A Python dictionary containing the catalog information.
-
-        Specifically, the dictionary contains the Python identifier of a
-        DataFrame (obtained by id(DataFrame object)) as the key
-        and their properties as value.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> catalog = pl.get_catalog()
-
-    """
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-    # Call the underlying catalog object's function to get the catalog. Relay
-    # the return value from the delegated function.
-    return catalog.get_catalog()
-
-
-def del_catalog():
-    """
-    Deletes the catalog for the current session.
-
-    Returns:
-        A Boolean value of True is returned if the deletion was successful.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> pl.del_catalog()
-    """
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-    # Call the underlying catalog object's function to delete the catalog (a
-    # dict).  Relay the return value from the delegated function.
-    return catalog.del_catalog()
-
-
-def is_catalog_empty():
-    """
-    Checks if the catalog is empty.
-
-    Returns:
-        A Boolean value of True is returned if the catalog is empty,
-        else returns False.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.is_catalog_empty()
-         # False
-
-    """
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Call the underlying catalog object's function to check if the catalog
-    # is empty.  Relay the return value from the delegated function.
-    return catalog.is_catalog_empty()
-
-
-def is_dfinfo_present(data_frame):
-    """
-    Checks whether the DataFrame information is present in the catalog.
-
-    Args:
-        data_frame (DataFrame): The DataFrame that should be checked for its
-            presence in the catalog.
-
-    Returns:
-        A Boolean value of True is returned if the DataFrame is present in
-        the catalog, else False is returned.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type pandas
-         DataFrame.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.is_dfinfo_present(A)
-         # True
-
-    """
-    # Validate inputs
-    # We expect the input object to be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Call the underlying catalog object's function to check if the
-    # DataFrame information is present in the catalog.
-    # Relay the return value from the delegated function.
-    return catalog.is_df_info_present_in_catalog(data_frame)
-
-
-def is_property_present_for_df(data_frame, property_name):
-    """
-    Checks if the given property is present for the given DataFrame in the
-    catalog.
-
-    Args:
-        data_frame (DataFrame): The DataFrame for which the property must be
-            checked for.
-        property_name (string): The name of the property that should be
-        checked for its presence for the DataFrame, in the catalog.
-
-    Returns:
-        A Boolean value of True is returned if the property is present for
-        the given DataFrame.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type pandas
-         DataFrame.
-        AssertionError: If `property_name` is not of type string.
-        KeyError: If `data_frame` is not present in the catalog.
-
-    Examples:
-
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.is_property_present_for_df(A, 'id')
-         # True
-        >>> pl.is_property_present_for_df(A, 'fk_ltable')
-         # False
-
-    """
-    # Input validations
-
-    # # The input object should be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # # The property name should be of type string
-    validate_object_type(property_name, six.string_types, error_prefix='Property name')
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Check if the given DataFrame information is present in the catalog. If
-    # not, raise an error.
-    if catalog.is_df_info_present_in_catalog(data_frame) is False:
-        logger.error('DataFrame information is not present in the catalog')
-        raise KeyError('DataFrame information is not present in the catalog')
-
-    # Call the underlying catalog object's function to check if the property
-    # is present for the given DataFrame. Relay the return value from that
-    # function.
-    return catalog.is_property_present_for_df(data_frame, property_name)
-
-
-def get_catalog_len():
-    """
-    Get the length (i.e the number of entries) in the catalog.
-
-    Returns:
-        The number of entries in the catalog as an integer.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> len = pl.get_catalog_len()
-
-    """
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-    # Call the underlying catalog object's function to get the catalog length.
-    # Relay the return value from that function.
-    return catalog.get_catalog_len()
-
-
-def set_properties(data_frame, properties, replace=True):
-    """
-    Sets the  properties for a DataFrame in the catalog.
-
-    Args:
-        data_frame (DataFrame): DataFrame for which the properties must be set.
-        properties (dict): A Python dictionary with keys as property names and
-            values as Python objects (typically strings or DataFrames)
-        replace (Optional[bool]): Flag to indicate whether the  input
-            properties can replace the properties in the catalog. The default
-            value for the flag is True.
-            Specifically, if the DataFrame information is already present in
-            the catalog then the function will check if the replace flag is
-            True. If the flag is set to True, then the function will first
-            delete the existing properties, set it with the given properties.
-            If the flag is False, the function will just return without
-            modifying the existing properties.
-
-
-    Returns:
-        A Boolean value of True is returned if the properties were set for
-        the given DataFrame, else returns False.
-
-    Raises:
-        AssertionError: If the input data_frame object is not of type pandas
-            DataFrame.
-        AssertionError: If the input properties object is not of type Python
-            dictionary.
-
-    """
-    # Validate input parameters
-    # # Input object is expected to be a pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # # Input properties is expected to be of type Python dictionary
-    validate_object_type(properties, dict, error_prefix='The properties')
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-    # Check if the the DataFrame information is present in the catalog. If
-    # present, we expect the replace flag to be True. If the flag was set to
-    # False, then warn the user and return False.
-    if catalog.is_df_info_present_in_catalog(data_frame):
-        if not replace:
-            logger.warning(
-                'Properties already exists for df ( %s ). Not replacing it'
-                % str(id(data_frame)))
-            return False
-        else:
-            # DataFrame information is present and replace flag is True. We
-            # now reset the properties dictionary for this DataFrame.
-            catalog.init_properties(data_frame)
-    else:
-        # The DataFrame information is not present in the catalog. so
-        # initialize the properties
-        catalog.init_properties(data_frame)
-
-    # Now iterate through the given properties and set for the DataFrame.
-    # Note: Here we don't check the correctness of the input properties (i.e
-    # we do not check if a property 'key' is indeed a key)
-    for property_name, property_value in six.iteritems(properties):
-        catalog.set_property(data_frame, property_name, property_value)
-
-    # Finally return True, if everything was successful
-    return True
-
-
-def copy_properties(source_data_frame, target_data_frame, replace=True):
-    """
-    Copies properties from a source DataFrame to target DataFrame in the
-    catalog.
-
-    Args:
-        source_data_frame (DataFrame): The DataFrame from which the properties
-            to be copied from, in the catalog.
-        target_data_frame (DataFrame): The DataFrame to which the properties
-            to be copied to, in the catalog.
-        replace (boolean): A flag to indicate whether the source
-            DataFrame's  properties can replace the target
-            DataFrame's properties in the catalog. The default value for the
-            flag is True.
-            Specifically, if the target DataFrame's information is already
-            present in the catalog then the function will check if the
-            replace flag is True. If the flag is set to True, then the
-            function will first delete the existing properties and then set
-            it with the source DataFrame properties.
-            If the flag is False, the function will just return without
-            modifying the existing properties.
-
-    Returns:
-        A Boolean value of True is returned if the copying was successful.
-
-    Raises:
-        AssertionError: If `source_data_frame` is not of
-            type pandas DataFrame.
-        AssertionError: If `target_data_frame` is not of
-            type pandas DataFrame.
-        KeyError: If source DataFrame is not present in the
-            catalog.
-
-    Examples:
-
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.copy_properties(A, B)
-        >>> pl.get_key(B)
-        # 'id'
-    """
-    # Validate input parameters
-
-    # # The source_data_frame is expected to be of type pandas DataFrame
-    validate_object_type(source_data_frame, pd.DataFrame, error_prefix='Input object (source_data_frame)')
-
-    # # The target_data_frame is expected to be of type pandas DataFrame
-    validate_object_type(target_data_frame, pd.DataFrame, error_prefix='Input object (target_data_frame)')
-
-    # Get the catalog instance
-    catalog = Catalog.Instance()
-
-    # Check if the source DataFrame information is present in the catalog. If
-    #  not raise an error.
-    if catalog.is_df_info_present_in_catalog(source_data_frame) is False:
-        logger.error(
-            'DataFrame information (source_data_frame) is not present in the '
-            'catalog')
-        raise KeyError(
-            'DataFrame information (source_data_frame) is not present in the '
-            'catalog')
-
-    # Get all properties for the source DataFrame
-    metadata = catalog.get_all_properties(source_data_frame)
-
-    # Set the properties to the target DataFrame. Specifically, call the set
-    # properties function and relay its return value.
-
-    # Note: There is a redundancy in validating the input parameters. This
-    # might have a slight performance impact, but we don't expect that this
-    # function gets called so often.
-    return set_properties(target_data_frame, metadata,
-                          replace)  # this initializes tar in the catalog.
-
-
-# key related methods
-def get_key(data_frame):
-    """
-    Gets the value of 'key' property for a DataFrame from the catalog.
-
-    Args:
-        data_frame (DataFrame): The DataFrame for which the key must be
-            retrieved from the catalog.
-
-    Returns:
-        A string value containing the key column name is returned (if present).
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.get_key(A)
-        # 'id'
-
-
-    See Also:
-        :meth:`~magellan_labeler.get_property`
-
-    """
-    # This function is just a sugar to get the 'key' property for a DataFrame
-    return get_property(data_frame, 'key')
-
-
-def set_key(data_frame, key_attribute):
-    """
-    Sets the value of 'key' property for a DataFrame in the catalog with the
-    given attribute (i.e column name).
-
-    Specifically, this function set the the key attribute for the DataFrame
-    if the given attribute satisfies the following two properties:
-
-        The key attribute should have unique values.
-
-        The key attribute should not have missing values. A missing value
-        is represented as np.NaN.
-
-    Args:
-        data_frame (DataFrame): The DataFrame for which the key must be set in
-            the catalog.
-        key_attribute (string): The key attribute (column name) in the
-            DataFrame.
-
-    Returns:
-        A Boolean value of True is returned, if the given attribute
-        satisfies the conditions for a key and the update was successful.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type
-            pandas DataFrame.
-        AssertionError: If `key_attribute` is not of type string.
-        KeyError: If given `key_attribute` is not in the DataFrame columns.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.get_key(A)
-        # 'id'
-
-
-    See Also:
-        :meth:`~magellan_labeler.set_property`
-
-
-    """
-    # Validate input parameters
-
-    # # We expect the input object (data_frame) to be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # # We expect input key attribute to be of type string
-    validate_object_type(key_attribute, six.string_types, error_prefix='Input key attribute')
-
-    # Check if the key attribute is present as one of the columns in the
-    # DataFrame
-    if not ch.check_attrs_present(data_frame, key_attribute):
-        logger.error('Input key ( %s ) not in the DataFrame' % key_attribute)
-        raise KeyError('Input key ( %s ) not in the DataFrame' % key_attribute)
-
-    # Check if the key attribute satisfies the conditions to be a key. If
-    # not, just return False.
-    # Note: Currently it is not clear, whether we should return False from
-    # here or raise an exception. As of now resorting to just returning
-    # False, because this function is used by other computation
-    # intensive commands in magellan_labeler and raising an exception might make all
-    # the work done in those commands go in vain (or those commands should
-    # catch the exception correctly, which may be complicated and require
-    # changes to the current code). We need to revisit this
-    # later.
-    if ch.is_key_attribute(data_frame, key_attribute) is False:
-        logger.warning('Attribute (%s ) does not qualify  to be a key; Not '
-                       'setting/replacing the key' % key_attribute)
-        return False
-    else:
-        # Set the key property for the input DataFrame
-        return set_property(data_frame, 'key', key_attribute)
-
-
-def get_fk_ltable(data_frame):
-    """
-    Gets the foreign key to left table for a DataFrame from the
-    catalog.
-
-    Specifically this function is a sugar function that will get the foreign
-    key to left table using underlying :meth:`~magellan_labeler.get_property` function.
-    This function is typically called on a DataFrame which contains metadata
-    such as fk_ltable, fk_rtable, ltable, rtable.
-
-
-    Args:
-        data_frame (DataFrame): The input DataFrame for which the foreign key
-            ltable property must be retrieved.
-
-    Returns:
-        A Python object, typically a string is returned.
-
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_fk_ltable(C, 'ltable_id')
-        >>> pl.get_fk_ltable(C)
-        # 'ltable_id'
-
-    See Also:
-        :meth:`~magellan_labeler.get_property`
-
-    """
-    # Call the get_property function and relay the result.
-    return get_property(data_frame, 'fk_ltable')
-
-
-def get_fk_rtable(data_frame):
-    """
-    Gets the foreign key to right table for a DataFrame from the catalog.
-
-    Specifically this function is a sugar function that will get the foreign
-    key to right table using :meth:`magellan_labeler.get_property` function. This
-    function is typically called on a DataFrame which contains metadata such as
-    fk_ltable, fk_rtable, ltable, rtable.
-
-    Args:
-        data_frame (DataFrame): The input DataFrame for which the foreign key
-            rtable property must be retrieved.
-
-    Returns:
-        A Python object, (typically a string) is returned.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_fk_rtable(C, 'rtable_id')
-        >>> pl.get_fk_rtable(C)
-        # 'rtable_id'
-
-
-    See Also:
-        :meth:`~magellan_labeler.get_property`
-    """
-    # Call the get_property function and relay the result.
-    return get_property(data_frame, 'fk_rtable')
-
-
-def set_fk_ltable(data_frame, fk_ltable):
-    """
-    Sets the foreign key to ltable for a DataFrame in the catalog.
-
-    Specifically this function is a sugar function that will set the foreign
-    key to the left table using :meth:`magellan_labeler.set_property` function. This
-    function is typically called on a DataFrame which contains metadata such as
-    fk_ltable, fk_rtable, ltable, rtable.
-
-    Args:
-        data_frame (DataFrame): The input DataFrame for which the foreign key
-            ltable property must be set.
-        fk_ltable (string): The attribute that must ne set as the foreign key
-            to the ltable in the catalog.
-
-    Returns:
-        A Boolean value of True is returned if the foreign key to ltable was
-        set successfully.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type
-            pandas DataFrame.
-        AssertionError: If `fk_ltable` is not of type
-            string.
-        AssertionError: If `fk_ltable` is not in the input
-            DataFrame.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_fk_ltable(C, 'ltable_id')
-        >>> pl.get_fk_ltable(C)
-        # 'ltable_id'
-
-
-
-    See Also:
-        :meth:`~magellan_labeler.set_property`
-
-    """
-    # Validate the input parameters
-    # # We expect the input object to be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    # # We expect the input fk_ltable to be of type string
-    validate_object_type(fk_ltable, six.string_types, error_prefix='The input (fk_ltable)')
-
-    # # The fk_ltable attribute should be one of the columns in the input
-    # DataFrame
-    if not ch.check_attrs_present(data_frame, fk_ltable):
-        logger.error('Input attr. ( %s ) not in the DataFrame' % fk_ltable)
-        raise KeyError('Input attr. ( %s ) not in the DataFrame' % fk_ltable)
-
-    # Call the set_property function and relay the result.
-    return set_property(data_frame, 'fk_ltable', fk_ltable)
-
-
-def validate_and_set_fk_ltable(foreign_data_frame, foreign_key_ltable, ltable,
-                               ltable_key):
-    """
-    Validates and set the foreign key ltable for a DataFrame in the the catalog.
-
-    Specifically, given a DataFrame and a foreign key attribute it checks
-    for the following conditions to be satisfied for the attribute. First it
-    checks that foreign key ltable attribute does not have any missing
-    values. Second it checks that the subset of foreign key values,
-    have unique values in the primary (base) table.
-
-    Args:
-        foreign_data_frame (DataFrame): DataFrame containing the foreign key
-            (typically a candidate set, for example output from blocking two
-            tables).
-        foreign_key_ltable (string): An attribute in the foreign DataFrame
-        ltable (DataFrame): Base DataFrame, in which the foreign key
-            attribute would form the primary key.
-        ltable_key (string): An attribute in the base table
-            (typically a primary key attribute).
-
-    Returns:
-        A Boolean value of True will be returned if the validation was
-        successful and the update was successful in the catalog.
-    Raises:
-        AssertionError: If the input foreign DataFrame (foreign_data_frame)
-            is not of type pandas DataFrame.
-        AssertionError: If the foreign key ltable (foreign_key_ltable) is not
-            of type string.
-        AssertionError: If the input ltable (ltable) is not of type pandas
-            DataFrame.
-        AssertionError: If the ltable key (ltable_key) is not of type string.
-
-
-    """
-
-    # check the foreign key constraint
-    # # Note all the validations are done inside the function
-    # check_fk_constraint
-    status = ch.check_fk_constraint(foreign_data_frame, foreign_key_ltable,
-                                    ltable, ltable_key)
-
-    # If the validation is successful then set the property
-    if status:
-        return set_property(foreign_data_frame, 'fk_ltable', foreign_key_ltable)
-    else:
-        # else report the error and just return False.
-        logger.warning(
-            'FK constraint for fk_ltable is not satisfied; '
-            'Not setting the fk_ltable')
-        return False
-
-
-def validate_and_set_fk_rtable(foreign_data_frame, foreign_key_rtable,
-                               rtable, rtable_key):
-    """
-    Validates and set the foreign key ltable for a DataFrame in the the catalog.
-
-    Specifically, given a DataFrame and a foreign key attribute it checks
-    for the following conditions to be satisfied for the attribute. First it
-    checks that foreign key rtable attribute does not have any missing
-    values. Second it checks that the subset of foreign key values,
-    have unique values in the primary (base) table.
-
-    Args:
-        foreign_data_frame (DataFrame): DataFrame containing the foreign key
-            (typically a candidate set, for example output from blocking two
-            tables).
-        foreign_key_rtable (string): An attribute in the foreign DataFrame
-        rtable (DataFrame): Base DataFrame, in which the foreign key
-            attribute would form the primary key.
-        rtable_key (string): An attribute in the base table
-            (typically a primary key attribute).
-
-    Returns:
-        A Boolean value of True will be returned if the validation was
-        successful and the update was successful in the catalog.
-    Raises:
-        AssertionError: If the input foreign DataFrame (foreign_data_frame)
-            is not of type pandas DataFrame.
-        AssertionError: If the foreign key ltable (foreign_key_ltable) is not
-            of type string.
-        AssertionError: If the input ltable (ltable) is not of type pandas
-            DataFrame.
-        AssertionError: If the ltable key (ltable_key) is not of type string.
-
-
-    """
-
-    # Validate the foreign key constraint
-    # Note: All the basic input validations are done inside the
-    # check_fk_constraint function.
-    status = ch.check_fk_constraint(foreign_data_frame, foreign_key_rtable,
-                                    rtable, rtable_key)
-
-    # If the validation was successful, then set the property
-    if status:
-        return set_property(foreign_data_frame, 'fk_rtable', foreign_key_rtable)
-    # else just warn and return False
-    else:
-        logger.warning(
-            'FK constraint for fk_rtable is not satisfied; Not '
-            'setting the fk_rtable and rtable')
-        return False
-
-
-def set_fk_rtable(data_frame, foreign_key_rtable):
-    """
-    Sets the foreign key to rtable for a DataFrame in the catalog.
-
-    Specifically this function is a sugar function that will set the foreign
-    key to right table using set_property function. This function
-    is typically called on a DataFrame which contains metadata such as
-    fk_ltable, fk_rtable, ltable, rtable.
-
-
-
-    Args:
-        data_frame (DataFrame): The input DataFrame for which the foreign key
-            rtable property must be set.
-        foreign_key_rtable (string): The attribute that must be set as
-            foreign key to rtable in the catalog.
-
-    Returns:
-        A Boolean value of True is returned if the foreign key to rtable was
-            set successfully.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type
-          pandas DataFrame.
-        AssertionError: If `foreign_key_rtable` is not of
-            type string.
-        AssertionError: If `fk_rtable` is not in the input
-            DataFrame.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_fk_rtable(C, 'rtable_id')
-        >>> pl.get_fk_rtable(C)
-        # 'rtable_id'
-
-
-    See Also:
-        :meth:`~magellan_labeler.set_property`
-
-    """
-    # Validate the input parameters
-    # # The input object is expected to be of type pandas DataFrame
-    validate_object_type(data_frame, pd.DataFrame)
-
-    validate_object_type(foreign_key_rtable, six.string_types, error_prefix='Input (foreign key ltable)')
-
-    # Check if the given attribute is present in the DataFrame
-    if not ch.check_attrs_present(data_frame, foreign_key_rtable):
-        logger.error('Input attr. ( %s ) not in the DataFrame'
-                     % foreign_key_rtable)
-        raise KeyError('Input attr. ( %s ) not in the DataFrame'
-                       % foreign_key_rtable)
-
-    # Finally set the property and relay the result
-    return set_property(data_frame, 'fk_rtable', foreign_key_rtable)
-
-
-def show_properties(data_frame):
-    """
-    Prints the properties for a DataFrame that is present in the catalog.
-
-    Args:
-        data_frame (DataFrame): The input pandas DataFrame for which the
-            properties must be displayed.
-
-    Examples:
-        >>> A = pd.DataFrame({'key_attr' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'key_attr')
-        >>> pl.show_properties(A)
-        # id: 4572922488  # This will change dynamically
-        # key: key_attr
-
-
-
-    """
-    # Check if the DataFrame information is present in the catalog. If not
-    # return
-    if not is_dfinfo_present(data_frame):
-        logger.error('DataFrame information is not present in the catalog')
-        return
-
-    # Delegate it to show properties for the id if an object in the catalog
-    show_properties_for_id(id(data_frame))
-    # # Get the properties for the DataFrame from the catalog
-    # metadata = get_all_properties(data_frame)
-    #
-    # # First print the id for the DataFrame
-    # print('id: ' + str(id(data_frame)))
-    # # For each property name anf value, print the contents to the user
-    # for property_name, property_value in six.iteritems(metadata):
-    #     # If the property value is string print it out
-    #     if isinstance(property_value, six.string_types):
-    #         print(property_name + ": " + property_value)
-    #     # else, print just the id.
-    #     else:
-    #         print(property_name + "(obj.id): " + str(id(property_value)))
-
-
-def show_properties_for_id(object_id):
-    """
-    Shows the properties for an object id present in the catalog.
-
-    Specifically, given an object id got from typically executing id(
-    <object>), where the object could be a DataFrame, this function will
-    display the properties present for that object id in the catalog.
-
-    Args:
-        object_id (int): The Python identifier of an object (typically a
-         pandas DataFrame).
-
-    Examples:
-        >>> A = pd.DataFrame({'key_attr' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'key_attr')
-        >>> pl.show_properties_for_id(id(A))
-        # id: 4572922488  # This will change dynamically
-        # key: key_attr
-
-
-    """
-    catalog = Catalog.Instance()
-    metadata = catalog.get_all_properties_for_id(object_id)
-    # First print the id for the DataFrame
-    print('id: ' + str(object_id))
-    # For each property name anf value, print the contents to the user
-    for property_name, property_value in six.iteritems(metadata):
-        # If the property value is string print it out
-        if isinstance(property_value, six.string_types):
-            print(property_name + ": " + property_value)
-        # else, print just the id.
-        else:
-            print(property_name + "(obj.id): " + str(id(property_value)))
-
-
-def set_candset_properties(candset, key, foreign_key_ltable,
-                           foreign_key_rtable, ltable, rtable):
-    """
-    Sets candidate set properties.
-
-    Specifically, this is a sugar function that sets all the properties for a
-    candidate set such as key, foreign key ltable, foreign key rtable,
-    ltable and rtable. Further, this function does not check the integrity of
-    input properties.
-
-
-
-    Args:
-        candset (DataFrame): Input DataFrame for which the properties must be
-            set.
-        key (string): Key attribute that must be set for the DataFrame in the
-            catalog.
-        foreign_key_ltable (string): Foreign key ltable attribute that must be
-            set for the DataFrame in the catalog.
-        foreign_key_rtable (string): Foreign key rtable attribute that must be
-            set for the DataFrame in the catalog.
-        ltable (DataFrame): DataFrame that must be set as ltable.
-        rtable (DataFrame): DataFrame that must be set as rtable.
-
-    Returns:
-        A Boolean value of True is returned if the updates were successful.
-
-    """
-    # set the key
-    set_property(candset, 'key', key)
-    # set the foreign key attributes
-    set_fk_ltable(candset, foreign_key_ltable)
-    set_fk_rtable(candset, foreign_key_rtable)
-    # set the ltable and rtables
-    set_property(candset, 'ltable', ltable)
-    set_property(candset, 'rtable', rtable)
-    return True
-
-
-def _validate_metadata_for_table(table, key, output_string, lgr, verbose):
-    """
-    Validates metadata for table (DataFrame)
-
-    """
-    # Validate input parameters
-    # # We expect the input table to be of type pandas DataFrame
-    validate_object_type(table, pd.DataFrame)
-
-    # Check the key column is present in the table
-    if not ch.check_attrs_present(table, key):
-        raise KeyError('Input key ( %s ) not in the DataFrame' % key)
-
-    # Validate the key
-    ch.log_info(lgr, 'Validating ' + output_string + ' key: ' + str(key),
-                verbose)
-    # We expect the key to be of type string
-    validate_object_type(key, six.string_types, error_prefix='Key attribute')
-
-    if not ch.is_key_attribute(table, key, verbose):
-        raise AssertionError('Attribute %s in the %s table does not '
-                             'qualify to be the key' % (
-                                 str(key), output_string))
-    ch.log_info(lgr, '..... Done', verbose)
-    return True
-
-
-def _validate_metadata_for_candset(candset, key, foreign_key_ltable,
-                                   foreign_key_rtable,
-                                   ltable, rtable,
-                                   ltable_key, rtable_key,
-                                   lgr, verbose):
-    """
-    Validates metadata for a candidate set.
-
-    """
-    # Validate input parameters
-    # # We expect candset to be of type pandas DataFrame
-    validate_object_type(candset, pd.DataFrame, error_prefix='Input candset')
-
-    # Check if the key column is present in the candset
-    if not ch.check_attrs_present(candset, key):
-        raise KeyError('Input key ( %s ) not in the DataFrame' % key)
-
-    # Check if the foreign key ltable column is present in the candset
-    if not ch.check_attrs_present(candset, foreign_key_ltable):
-        raise KeyError(
-            'Input foreign_key_ltable ( %s ) not in the DataFrame'
-            % foreign_key_ltable)
-
-    # Check if the foreign key rtable column is present in the candset
-    if not ch.check_attrs_present(candset, foreign_key_rtable):
-        raise KeyError(
-            'Input fk_rtable ( %s ) not in the DataFrame' % foreign_key_rtable)
-
-    # We expect the ltable to be of type pandas DataFrame
-    validate_object_type(ltable, pd.DataFrame, error_prefix='Input ltable')
-
-    # We expect the rtable to be of type pandas DataFrame
-    validate_object_type(rtable, pd.DataFrame, error_prefix='Input rtable')
-
-    # We expect the ltable key to be present in the ltable
-    if not ch.check_attrs_present(ltable, ltable_key):
-        raise KeyError('ltable key ( %s ) not in ltable' % ltable_key)
-
-    # We expect the rtable key to be present in the rtable
-    if not ch.check_attrs_present(rtable, rtable_key):
-        raise KeyError('rtable key ( %s ) not in rtable' % rtable_key)
-
-    # First validate metadata for the candidate set (as a table)
-    _validate_metadata_for_table(candset, key, 'candset', lgr, verbose)
-
-    ch.log_info(lgr, 'Validating foreign key constraint for left table',
-                verbose)
-    # Second check foreign key constraints
-    if not ch.check_fk_constraint(candset, foreign_key_ltable,
-                                  ltable, ltable_key):
-        raise AssertionError(
-            'Candset does not satisfy foreign key constraint with '
-            'the left table')
-
-    if not ch.check_fk_constraint(candset, foreign_key_rtable,
-                                  rtable, rtable_key):
-        raise AssertionError(
-            'Candset does not satisfy foreign key constraint with '
-            'the right table')
-
-    ch.log_info(lgr, '..... Done', verbose)
-    ch.log_info(lgr, 'Validating foreign key constraint for right table',
-                verbose)
-    ch.log_info(lgr, '..... Done', verbose)
-
-    return True
-
-
-# noinspection PyIncorrectDocstring
-def get_keys_for_ltable_rtable(ltable, rtable, lgr, verbose):
-    """
-    Gets keys for the ltable and rtable.
-    """
-    # We expect the ltable to be of type pandas DataFrame
-    if not isinstance(ltable, pd.DataFrame):
-        logger.error('Input ltable is not of type pandas data frame')
-        raise AssertionError('Input ltable is not of type pandas data frame')
-
-    # We expect the rtable to be of type pandas DataFrame
-    if not isinstance(rtable, pd.DataFrame):
-        logger.error('Input rtable is not of type pandas data frame')
-        raise AssertionError('Input rtable is not of type pandas data frame')
-
-    ch.log_info(lgr, 'Required metadata: ltable key, rtable key', verbose)
-    ch.log_info(lgr, 'Getting metadata from the catalog', verbose)
-    # Get the ltable key and rtable key from the catalog
-    ltable_key = get_key(ltable)
-    rtable_key = get_key(rtable)
-    ch.log_info(lgr, '..... Done', verbose)
-    # return the ltable and rtable keys
-    return ltable_key, rtable_key
-
-
-# noinspection PyIncorrectDocstring
-def get_metadata_for_candset(candset, lgr, verbose):
-    """
-    Gets metadata for the candset
-
-    """
-    # Validate input parameters
-    validate_object_type(candset, pd.DataFrame, error_prefix='Input candset')
-
-    ch.log_info(lgr, 'Getting metadata from the catalog', verbose)
-    # Get the key, foreign keys, ltable, rtable and their keys
-    # # Get key
-    key = get_key(candset)
-    # # Get the foreign keys
-    fk_ltable = get_fk_ltable(candset)
-    fk_rtable = get_fk_rtable(candset)
-    # # Get the base tables
-    ltable = get_ltable(candset)
-    rtable = get_rtable(candset)
-    # Get the base table keys
-    l_key = get_key(ltable)
-    r_key = get_key(rtable)
-    ch.log_info(lgr, '..... Done', verbose)
-    # Return the metadata
-    return key, fk_ltable, fk_rtable, ltable, rtable, l_key, r_key
-
-
-# noinspection PyIncorrectDocstring
-def get_ltable(candset):
-    """
-    Gets the ltable for a DataFrame from the catalog.
-
-    Args:
-        candset (DataFrame): The input table for which the ltable must be
-            returned.
-
-    Returns:
-        A pandas DataFrame that is pointed by 'ltable' property of the input
-        table.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_ltable(C, A)
-        >>> id(pl.get_ltable(A) == id(A)
-        # True
-
-
-    See Also:
-        :meth:`~magellan_labeler.get_property`
-    """
-    # Return the ltable for a candidate set. This function is just a sugar
-    return get_property(candset, 'ltable')
-
-
-# noinspection PyIncorrectDocstring
-def get_rtable(candset):
-    """
-    Gets the rtable for a DataFrame from the catalog.
-
-    Args:
-        candset (DataFrame): Input table for which the rtable must be returned.
-
-    Returns:
-        A pandas DataFrame that is pointed by 'rtable' property of the input
-        table.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_rtable(C, B)
-        >>> id(pl.get_rtable(B) == id(B)
-        # True
-
-
-    See Also:
-        :meth:`~magellan_labeler.get_property`
-    """
-    # Return the rtable for a candidate set. This function is just a sugar
-
-    return get_property(candset, 'rtable')
-
-
-def set_ltable(candset, table):
-    """
-    Sets the ltable for a DataFrame in the catalog.
-
-    Args:
-        candset (DataFrame): The input table for which the ltable must be set.
-        table (DataFrame): The table (typically a pandas DataFrame) that must
-            be set as ltable for the input DataFrame.
-
-    Returns:
-        A Boolean value of True is returned, if the update was successful.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_ltable(C, A)
-        >>> id(pl.get_ltable(A) == id(A)
-        # True
-
-
-    See Also:
-        :meth:`~magellan_labeler.set_property`
-    """
-    # Return the ltable for a candidate set. This function is just a sugar
-    return set_property(candset, 'ltable', table)
-
-
-# noinspection PyIncorrectDocstring
-def set_rtable(candset, table):
-    """
-    Sets the rtable for a DataFrame in the catalog.
-
-    Args:
-        candset (DataFrame): The input table for which the rtable must be set.
-        table (DataFrame): The table that must be set as rtable for the input
-            DataFrame.
-
-    Returns:
-        A Boolean value of True is returned, if the update was successful.
-
-    Examples:
-        >>> import magellan_labeler as pl
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> B = pd.DataFrame({'id' : [1, 2], 'colA':['c', 'd'], 'colB' : [30, 40]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.set_key(B, 'id')
-        >>> C = pd.DataFrame({'id':[1, 2], 'ltable_id':[1, 2], 'rtable_id':[2, 1]})
-        >>> pl.set_key(C, 'id')
-        >>> pl.set_rtable(C, B)
-        >>> id(pl.get_rtable(B) == id(B)
-        # True
-
-
-    See Also:
-        :meth:`~magellan_labeler.set_property`
-    """
-    # Return the rtable for a candidate set. This function is just a sugar
-
-    return set_property(candset, 'rtable', table)
diff --git a/py_labeler/io/__init__.py b/py_labeler/io/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/py_labeler/io/parsers.py b/py_labeler/io/parsers.py
deleted file mode 100644
index 5d849b1..0000000
--- a/py_labeler/io/parsers.py
+++ /dev/null
@@ -1,465 +0,0 @@
-# coding=utf-8
-"""This module defines functions to read and write CSV files"""
-import collections
-import logging
-import os
-import pandas as pd
-import six
-
-import py_labeler.catalog.catalog_manager as cm
-from py_labeler.utils.validation_helper import validate_object_type
-
-logger = logging.getLogger(__name__)
-
-
-def read_csv_metadata(file_path, **kwargs):
-    """
-    Reads a CSV (comma-separated values) file into a pandas DataFrame
-    and update the catalog with the metadata. The CSV files typically contain
-    data for the input tables or a candidate set.
-
-    Specifically, this function first reads the CSV file from the given file
-    path into a pandas DataFrame, by using pandas' in-built 'read_csv'
-    method. Then, it updates the catalog with the metadata. There are three
-    ways to update the metadata: (1) using a metadata file, (2) using the
-    key-value parameters supplied in the function, and (3) using both
-    metadata file and key-value parameters.
-
-    To update the metadata in the catalog using the metadata file,
-    the function will look for a file in the same directory with  same file name
-    but with a  specific extension. This extension can be optionally given by
-    the user (defaults to '.metadata'). If the metadata  file is  present,
-    the function will read and update the catalog appropriately. If  the
-    metadata file is not present, the function will issue a warning that the
-    metadata file is not present.
-
-    The metadata information can also be given as parameters to the function
-    (see description of arguments for more details). If given, the function
-    will update the catalog with the given information.
-
-    Further, the metadata can partly reside in the metdata file and partly as
-    supplied parameters. The function will take a union of the two and
-    update the catalog appropriately.
-    If the same metadata is given in both the metadata file
-    and the function, then the metadata in the function takes precedence over
-    the metadata given in the file.
-
-    Args:
-        file_path(string): The CSV file path
-
-        kwargs(dictionary): A Python dictionary containing key-value arguments.
-            There are a few key-value pairs that are specific to
-            read_csv_metadata and  all the other key-value pairs are passed
-            to pandas read_csv method
-
-    Returns:
-        A pandas DataFrame read from the input CSV file.
-    Raises:
-        AssertionError: If `file_path` is not of type string.
-        AssertionError: If a file does not exist in the
-            given `file_path`.
-
-    Examples:
-        *Example 1:* Read from CSV file and set metadata
-
-        >>> A = pl.read_csv_metadata('path_to_csv_file', key='id')
-        >>> pl.get_key(A)
-         # 'id'
-
-        *Example 2:*  Read from CSV file (with metadata file in the same directory
-
-         Let the metadata file contain the following contents:
-
-          #key = id
-
-        >>> A = pl.read_csv_metadata('path_to_csv_file')
-        >>> pl.get_key(A)
-         # 'id'
-
-    See Also:
-        :meth:`~py_entitymatching.to_csv_metadata`
-    """
-    # Validate the input parameters.
-
-    validate_object_type(file_path, six.string_types, error_prefix='Input file path')
-
-    # # Check if the given path is valid.
-    if not os.path.exists(file_path):
-        logger.error('File does not exist at path %s' % file_path)
-        raise AssertionError('File does not exist at path %s' % file_path)
-
-    # Check if the user has specified the metadata file's extension.
-    extension = kwargs.pop('metadata_extn', None)
-
-    # If the extension is not specified then set the extension to .metadata'.
-    if extension is None:
-        extension = '.metadata'
-
-    # Format the extension to include a '.' in front if the user has not
-    # given one.
-    if not extension.startswith('.'):
-        extension = '.' + extension
-
-    # If the file is present, then update metadata from file.
-    if _is_metadata_file_present(file_path, extension=extension):
-        file_name, _ = os.path.splitext(file_path)
-        file_name = ''.join([file_name, extension])
-        metadata, _ = _get_metadata_from_file(file_name)
-
-    # Else issue a warning that the metadata file is not present
-    else:
-        logger.warning('Metadata file is not present in the given path; '
-                       'proceeding to read the csv file.')
-        metadata = {}
-
-    # Update the metadata with the key-value pairs given in the command. The
-    # function _update_metadata_for_read_cmd takes care of updating the
-    # metadata with only the key-value pairs specific to read_csv_metadata
-    # method
-    metadata, kwargs = _update_metadata_for_read_cmd(metadata, **kwargs)
-
-    # Validate the metadata.
-    _check_metadata_for_read_cmd(metadata)
-
-    # Read the csv file using pandas read_csv method.
-    data_frame = pd.read_csv(file_path, **kwargs)
-
-    # Get the value for 'key' property and update the catalog.
-    key = metadata.pop('key', None)
-    if key is not None:
-        cm.set_key(data_frame, key)
-
-    fk_ltable = metadata.pop('fk_ltable', None)
-    if fk_ltable is not None:
-        cm.set_fk_ltable(data_frame, fk_ltable)
-
-    fk_rtable = metadata.pop('fk_rtable', None)
-    if fk_ltable is not None:
-        cm.set_fk_rtable(data_frame, fk_rtable)
-
-    # Update the catalog with other properties.
-    for property_name, property_value in six.iteritems(metadata):
-        cm.set_property(data_frame, property_name, property_value)
-    if not cm.is_dfinfo_present(data_frame):
-        cm.init_properties(data_frame)
-
-    # Return the DataFrame
-    return data_frame
-
-
-def to_csv_metadata(data_frame, file_path, **kwargs):
-    """
-    Writes the DataFrame contents to a CSV file and the DataFrame's metadata
-    (to a separate text file).
-
-    This function writes the DataFrame contents to a CSV file in
-    the given file path. It uses 'to_csv' method from pandas to write
-    the CSV file. The metadata contents are written to the same directory
-    derived from the file path but with the different extension. This
-    extension can be optionally given by the user (with the default value
-    set to .metadata).
-
-    Args:
-        data_frame (DataFrame): The DataFrame that should be written to disk.
-        file_path (string):  The file path to which the DataFrame contents
-            should be written. Metadata is written with the same file name
-            with the extension given by the user (defaults to '.metadata').
-        kwargs (dictionary):  A Python dictionary containing key-value pairs.
-            There is one key-value pair that is specific to
-            to_csv_metadata: metadata_extn. All the other key-value pairs
-            are passed to pandas to_csv function.
-            Here the metadata_extn is the metadata
-            extension (defaults to '.metadata'), with which
-            the metadata file must be written.
-    Returns:
-        A Boolean value of True is returned if the files were written
-        successfully.
-
-    Raises:
-        AssertionError: If `data_frame` is not of type  pandas
-            DataFrame.
-        AssertionError: If `file_path` is not of type string.
-        AssertionError: If DataFrame cannot be written to the given
-         `file_path`.
-
-    Examples:
-
-        >>> import pandas as pd
-        >>> A = pd.DataFrame({'id' : [1, 2], 'colA':['a', 'b'], 'colB' : [10, 20]})
-        >>> pl.set_key(A, 'id')
-        >>> pl.to_csv_metadata(A, 'path_to_csv_file')
-
-
-    See Also:
-        :meth:`~py_entitymatching.read_csv_metadata`
-
-    """
-    # Validate input parameters
-
-    validate_object_type(data_frame, pd.DataFrame)
-
-    validate_object_type(file_path, six.string_types, error_prefix='Input file path')
-
-    # Check if the user has specified the metadata file's extension.
-    extension = kwargs.pop('metadata_extn', None)
-    if extension is None:
-        extension = '.metadata'
-    if not extension.startswith('.'):
-        extension = '.' + extension
-
-    # If the user has not specified whether the index should be written,
-    # we explicitly set it to be false. The reason is writing the index
-    # along makes the CSV file cumbersome to view and later read back into a
-    # DataFrame.
-    index = kwargs.pop('index', None)
-    if index is None:
-        kwargs['index'] = False
-
-    # retrieve the file name and the extension from the given file path.
-    file_name, _ = os.path.splitext(file_path)
-    metadata_filename = file_name + extension
-
-    # check if we access privileges to write a file in the given file path,
-    # and also check if a file already exists in the file path.
-    can_write, file_exists = _check_file_path(file_path)
-
-    if can_write:
-        # check if the file already exists. If so issue a warning and
-        # overwrite the file.
-        if file_exists:
-            logger.warning('File already exists at %s; Overwriting it',
-                           file_path)
-            data_frame.to_csv(file_path, **kwargs)
-        else:
-            data_frame.to_csv(file_path, **kwargs)
-    else:
-        # If we cannot write in the given file path, raise an exception.
-        logger.error('Cannot write in the file path %s; Exiting' % file_path)
-        raise AssertionError('Cannot write in the file path %s' % file_path)
-
-    # repeat the process (as writing the DataFrame) to write the metadata.
-
-    # check for access privileges and file existence.
-    can_write, file_exists = _check_file_path(metadata_filename)
-    if can_write:
-        if file_exists:
-            logger.warning('Metadata file already exists at %s. Overwriting '
-                           'it', metadata_filename)
-            _write_metadata(data_frame, metadata_filename)
-        else:
-            _write_metadata(data_frame, metadata_filename)
-    else:
-        # If we cannot write in the given file path, raise an exception.
-        logger.error('Cannot write in the file path %s; Exiting' % file_path)
-        raise AssertionError('Cannot write in the file path %s' % file_path)
-
-    return True
-
-
-def _write_metadata(data_frame, file_path):
-    """
-    Write metadata contents to disk.
-    """
-    # Initialize a metadata dictionary to store the metadata.
-    metadata_dict = collections.OrderedDict()
-
-    # Get all the properties for the input data frame
-    if cm.is_dfinfo_present(data_frame) is True:
-        properties = cm.get_all_properties(data_frame)
-    else:
-        # If the data_frame is not in the catalog, then return immedidately.
-        return False
-
-    # If the properties are present in the catalog, then write properties to
-    # disk
-    if len(properties) > 0:
-        for property_name, property_value in six.iteritems(properties):
-            # If the property value is not of type string, then just write it
-            #  as 'POINTER'. This will be useful while writing the candidate
-            # sets to disk. The candidate set will have properties such as
-            # ltable and rtable which are DataFrames. We do not have a simple
-            # way to write them to disk and link them back the candidate set
-            # while reading back from disk. So to get around this problem we
-            # will use 'POINTER' as the special value to indicate objects
-            # other than strings.
-            if isinstance(property_value, six.string_types) is False:
-                metadata_dict[property_name] = 'POINTER'
-            else:
-                metadata_dict[property_name] = property_value
-
-        # Write the properties to a file in disk. The file will one property
-        # per line. We follow a special syntax to write the properties. The
-        # syntax is:
-        # #property_name=property_value
-        with open(file_path, 'w') as file_handler:
-            for property_name, property_value in six.iteritems(metadata_dict):
-                file_handler.write('#%s=%s\n' % (property_name, property_value))
-
-    return True
-
-
-def _is_metadata_file_present(file_path, extension='.metadata'):
-    """
-    Check if the metadata file is present.
-    """
-    # Get the file name and the extension from the file path.
-    file_name, _ = os.path.splitext(file_path)
-    # Create a file name with the given extension.
-    file_name = ''.join([file_name, extension])
-    # Check if the file already exists.
-    return os.path.exists(file_name)
-
-
-def _get_metadata_from_file(file_path):
-    """
-    Get the metadata information from the file.
-    """
-    # Initialize a dictionary to store the metadata read from the file.
-    metadata = dict()
-
-    # Get the number of lines from the file
-    num_lines = sum(1 for _ in open(file_path))
-
-    # If there are some contents in the file (i.e num_lines > 0),
-    # read its contents.
-    if num_lines > 0:
-        with open(file_path) as file_handler:
-            for _ in range(num_lines):
-                line = next(file_handler)
-                # Consider only the lines that are starting with '#'
-                if line.startswith('#'):
-                    # Remove the leading '#'
-                    line = line.lstrip('#')
-                    # Split the line with '=' as the delimiter
-                    tokens = line.split('=')
-                    # Based on the special syntax we use, there should be
-                    # exactly two tokens after we split using '='
-                    assert len(tokens) is 2, 'Error in file, he num tokens ' \
-                                             'is not 2'
-                    # Retrieve the property_names and values.
-                    property_name = tokens[0].strip()
-                    property_value = tokens[1].strip()
-                    # If the property value is not 'POINTER' then store it in
-                    #  the metadata dictionary.
-                    if property_value is not 'POINTER':
-                        metadata[property_name] = property_value
-
-    # Return the metadata dictionary and the number of lines in the file.
-    return metadata, num_lines
-
-
-def _update_metadata_for_read_cmd(metadata, **kwargs):
-    """
-    Update metadata for read_csv_metadata method.
-    """
-    # Create a copy of incoming metadata. We will update the incoming
-    # metadata dict with kwargs.
-    copy_metadata = metadata.copy()
-
-    # The updation is going to happen in two steps: (1) overriding the
-    # properties in metadata dict using kwargs, and (2) adding the properties
-    #  to metadata dict from kwargs.
-
-    # Step 1
-    # We will override the properties in the metadata dict with the
-    # properties from kwargs.
-
-    # Get the property from metadata dict.
-    for property_name in copy_metadata.keys():
-        # If the same property is present in kwargs, then override it in the
-        # metadata dict.
-        if property_name in kwargs:
-            property_value = kwargs.pop(property_name)
-            if property_value is not None:
-                metadata[property_name] = property_value
-            else:
-                # Warn the users if the metadata dict had a valid value,
-                # but the kwargs sets it to None.
-                logger.warning(
-                    '%s key had a value (%s)in file but input arg is set to '
-                    'None' % (property_name, metadata[property_name]))
-                # Remove the property from the dictionary.
-                metadata.pop(property_name)  # remove the key-value pair
-
-    # Step 2
-    # Add the properties from kwargs.
-    # We should be careful here. The kwargs contains the key-value pairs that
-    # are used by read_csv method (of pandas). We will just pick the
-    # properties that we expect from the read_csv_metadata method.
-    properties = ['key', 'ltable', 'rtable', 'fk_ltable', 'fk_rtable']
-
-    # For the properties that we expect, read from kwargs and update the
-    # metadata dict.
-    for property_name in properties:
-        if property_name in kwargs:
-            property_value = kwargs.pop(property_name)
-            if property_value is not None:
-                metadata[property_name] = property_value
-            else:
-                # Warn the users if the properties in the kwargs is set to None.
-                logger.warning('Metadata %s is set to None', property_name)
-                # Remove the property from the metadata dict.
-                metadata.pop(property_name, None)
-
-    return metadata, kwargs
-
-
-def _check_metadata_for_read_cmd(metadata):
-    """
-    Check the metadata for read_csv_metadata command
-    """
-
-    # Do basic validation checks for the metadata.
-
-    # We require consistency of properties given for the canidate set. We
-    # expect the user to provide all the required properties for the
-    # candidate set.
-    required_properties = ['ltable', 'rtable', 'fk_ltable', 'fk_rtable']
-
-    # Check what the user has given
-    given_properties = set(required_properties).intersection(metadata.keys())
-
-    # Check if all the required properties are given
-    if len(given_properties) > 0:
-        # Check the lengths are same. If not, this means that the user is
-        # missing something. So, raise an error.
-        if len(given_properties) is not len(required_properties):
-            logger.error(
-                'Dataframe requires all valid ltable, rtable, fk_ltable, '
-                'fk_rtable parameters set')
-            raise AssertionError(
-                'Dataframe requires all valid ltable, rtable, fk_ltable, '
-                'fk_rtable parameters set')
-
-        # ltable is expected to be of type pandas DataFrame. If not raise an
-        # error.
-        if not isinstance(metadata['ltable'], pd.DataFrame):
-            logger.error('The parameter ltable must be set to valid Dataframe')
-            raise AssertionError(
-                'The parameter ltable must be set to valid Dataframe')
-
-        # rtable is expected to be of type pandas DataFrame. If not raise an
-        # error.
-        if not isinstance(metadata['rtable'], pd.DataFrame):
-            logger.error('The parameter rtable must be set to valid Dataframe')
-            raise AssertionError(
-                'The parameter rtable must be set to valid Dataframe')
-    # If the length of comman properties is 0, it will fall out to return
-    # True, which is ok.
-    return True
-
-
-def _check_file_path(file_path):
-    """
-    Check validity (access privileges and existence of a file already)of the
-    given file path.
-    """
-    # returns a tuple can_write, file_exists
-    if os.path.exists(file_path):
-        # the file is there
-        return True, True
-    elif os.access(os.path.dirname(file_path), os.W_OK):
-        return True, False
-        # the file does not exists but write privileges are given
-    else:
-        return False, False
diff --git a/requirements.yml b/requirements.yml
index 8baa41a..509f010 100644
--- a/requirements.yml
+++ b/requirements.yml
@@ -1,4 +1,4 @@
-name: py_entitymatching_dev
+name: py_labeler_dev
 channels:
   - conda-forge
   - uwmagellan

From 36b1a1d48ec5ac872fbf8204acaa9b7871f39bb8 Mon Sep 17 00:00:00 2001
From: pavankm <kpavan@protonmail.com>
Date: Sat, 30 Dec 2017 15:27:28 -0600
Subject: [PATCH 03/12] remove check for version + remove ununsed cmd from
 ipynb

---
 notebooks/Sampling and Labeling.ipynb | 85 +++++++++++++--------------
 py_labeler/labeler/labeler.py         |  6 +-
 2 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/notebooks/Sampling and Labeling.ipynb b/notebooks/Sampling and Labeling.ipynb
index 18d7631..c14f57b 100644
--- a/notebooks/Sampling and Labeling.ipynb	
+++ b/notebooks/Sampling and Labeling.ipynb	
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 11,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2017-12-30T18:12:19.100155Z",
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -46,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -171,7 +171,7 @@
        "4               1988     Joseph Kuan           94122               1982  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -182,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -191,7 +191,7 @@
        "14"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -209,7 +209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -218,16 +218,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'5.6.2'"
+       "'5.9.3'"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -238,22 +238,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "ImportError",
-     "evalue": "PyQt 5.9.3 or greater is required",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-6-0f3149945f8c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Label the data set\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m# Specify the name for the label column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mG\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlabeler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlabel_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mC\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'gold_label'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m~/miniconda3/lib/python3.5/site-packages/py_labeler-0.1.0-py3.5.egg/py_labeler/labeler/labeler.py\u001b[0m in \u001b[0;36mlabel_table\u001b[0;34m(df, label_column_name)\u001b[0m\n\u001b[1;32m    178\u001b[0m         \u001b[0;32mraise\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Python 3.3 or greater is required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    179\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mQT_VERSION_STR\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;34m'5.9.3'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mImportError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"PyQt 5.9.3 or greater is required\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    181\u001b[0m     \u001b[0m_validate_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabel_column_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    182\u001b[0m     \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdeep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mImportError\u001b[0m: PyQt 5.9.3 or greater is required"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Label the data set\n",
     "# Specify the name for the label column\n",
@@ -262,7 +249,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -378,7 +365,7 @@
        "4               1988     Joseph Kuan           94122               1982  "
       ]
      },
-     "execution_count": 8,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -389,7 +376,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -422,7 +409,9 @@
        "      <th>rtable_name</th>\n",
        "      <th>rtable_zipcode</th>\n",
        "      <th>rtable_birth_year</th>\n",
-       "      <th>label</th>\n",
+       "      <th>gold_label</th>\n",
+       "      <th>comments</th>\n",
+       "      <th>tags</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -437,7 +426,9 @@
        "      <td>Mark Levene</td>\n",
        "      <td>94107</td>\n",
        "      <td>1987</td>\n",
-       "      <td>Yes</td>\n",
+       "      <td>Not-Matched</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -450,7 +441,9 @@
        "      <td>Bill Bridge</td>\n",
        "      <td>94107</td>\n",
        "      <td>1986</td>\n",
-       "      <td>Not-Matched</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -463,7 +456,9 @@
        "      <td>Michael Brodie</td>\n",
        "      <td>94107</td>\n",
        "      <td>1987</td>\n",
-       "      <td>Yes</td>\n",
+       "      <td>Not-Labeled</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -476,7 +471,9 @@
        "      <td>Mike Franklin</td>\n",
        "      <td>94122</td>\n",
        "      <td>1988</td>\n",
-       "      <td>Not-Matched</td>\n",
+       "      <td>Not-Sure</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -489,7 +486,9 @@
        "      <td>Joseph Kuan</td>\n",
        "      <td>94122</td>\n",
        "      <td>1982</td>\n",
-       "      <td>Yes</td>\n",
+       "      <td>Not-Labeled</td>\n",
+       "      <td></td>\n",
+       "      <td></td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -510,15 +509,15 @@
        "3               1988   Mike Franklin           94122               1988   \n",
        "4               1988     Joseph Kuan           94122               1982   \n",
        "\n",
-       "         label  \n",
-       "0          Yes  \n",
-       "1  Not-Matched  \n",
-       "2          Yes  \n",
-       "3  Not-Matched  \n",
-       "4          Yes  "
+       "    gold_label comments tags  \n",
+       "0  Not-Matched                \n",
+       "1          Yes                \n",
+       "2  Not-Labeled                \n",
+       "3     Not-Sure                \n",
+       "4  Not-Labeled                "
       ]
      },
-     "execution_count": 13,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -545,7 +544,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.6.3"
   }
  },
  "nbformat": 4,
diff --git a/py_labeler/labeler/labeler.py b/py_labeler/labeler/labeler.py
index 6ecd7c5..a7e9dfa 100644
--- a/py_labeler/labeler/labeler.py
+++ b/py_labeler/labeler/labeler.py
@@ -10,7 +10,7 @@
     from PyQt5.QtWebChannel import QWebChannel
     from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEnginePage, QWebEngineScript
     from PyQt5.QtWidgets import QApplication
-    from PyQt5.QtCore import QT_VERSION_STR
+#   from PyQt5.QtCore import QT_VERSION_STR
 except ImportError:
     raise ImportError('PyQt5 is not installed. Please install PyQt5 to use '
                       'GUI related functions in py_labeler.')
@@ -176,8 +176,8 @@ def label_table(df, label_column_name):
     """
     if sys.version_info < (3, 5):
         raise ImportError("Python 3.3 or greater is required")
-    if QT_VERSION_STR < '5.9.3':
-        raise ImportError("PyQt 5.9.3 or greater is required")
+    # if QT_VERSION_STR < '5.9.2':
+    #    raise ImportError("PyQt 5.9.2 or greater is required")
     _validate_inputs(df, label_column_name)
     df = df.copy(deep=True)
 

From 65f1432e713a8dca6871d42a01b7e2de840750c0 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Tue, 2 Jan 2018 21:04:28 -0600
Subject: [PATCH 04/12] [FIX] Canceling save to file gives error

---
 py_labeler/labeler/view/templates/common_js.html | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/py_labeler/labeler/view/templates/common_js.html b/py_labeler/labeler/view/templates/common_js.html
index cae5af2..0495a61 100644
--- a/py_labeler/labeler/view/templates/common_js.html
+++ b/py_labeler/labeler/view/templates/common_js.html
@@ -5,6 +5,7 @@
 <script type="text/javascript">
 
     var save_file_name = "{{ save_file_name}}";
+    var default_file_name = " {{ save_file_name }}"
     var current_page = {{ current_page|int }};
     var valid_file_name = /^[\w\-\/\\. ]+$/;
 
@@ -102,11 +103,9 @@
     }
 
     function save_data() {
-        save_file_name = prompt("Please enter your name", save_file_name);
-        if (save_file_name == null || !save_file_name.trim())
-            alert("Save file name can't be empty");
-        if (save_file_name == null || !save_file_name.trim() || !valid_file_name.test(save_file_name.trim())) {
-            alert("Valid file names must have only alphabets, numbers, _ , - and spaces")
+        save_file_name = prompt("Please enter your name", default_file_name);
+        if (!save_file_name.trim() || !valid_file_name.test(save_file_name.trim())) {
+            alert("Save file name can't be empty. valid file names must have only alphabets, numbers, _ , - and spaces")
         }
         else {
             new QWebChannel(qt.webChannelTransport, function (channel) {

From 2abdc959ce40425ed283007951350063f560c453 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Tue, 2 Jan 2018 22:37:27 -0600
Subject: [PATCH 05/12] cleanup

---
 py_labeler/labeler/view/templates/common_js.html | 16 +++++++++-------
 py_labeler/utils/generic_helper.py               |  7 -------
 requirements.txt                                 |  2 +-
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/py_labeler/labeler/view/templates/common_js.html b/py_labeler/labeler/view/templates/common_js.html
index 0495a61..abf4c22 100644
--- a/py_labeler/labeler/view/templates/common_js.html
+++ b/py_labeler/labeler/view/templates/common_js.html
@@ -104,13 +104,15 @@
 
     function save_data() {
         save_file_name = prompt("Please enter your name", default_file_name);
-        if (!save_file_name.trim() || !valid_file_name.test(save_file_name.trim())) {
-            alert("Save file name can't be empty. valid file names must have only alphabets, numbers, _ , - and spaces")
-        }
-        else {
-            new QWebChannel(qt.webChannelTransport, function (channel) {
-                channel.objects.tuple_pair_display_controller.save_data(save_file_name);
-            });
+        if (save_file_name != null) {
+            if (!save_file_name.trim() || !valid_file_name.test(save_file_name.trim())) {
+                alert("Save file name can't be empty. valid file names must have only alphabets, numbers, _ , - and spaces")
+            }
+            else {
+                new QWebChannel(qt.webChannelTransport, function (channel) {
+                    channel.objects.tuple_pair_display_controller.save_data(save_file_name);
+                });
+            }
         }
 
     }
diff --git a/py_labeler/utils/generic_helper.py b/py_labeler/utils/generic_helper.py
index 87f1ca1..a7d8022 100644
--- a/py_labeler/utils/generic_helper.py
+++ b/py_labeler/utils/generic_helper.py
@@ -2,18 +2,11 @@
 import logging
 import os
 
-import pandas as pd
-import six
-
-# import py_labeler.catalog.catalog_manager as cm
 from py_labeler.utils import install_path
 
-# from py_labeler.utils.catalog_helper import check_fk_constraint
-
 logger = logging.getLogger(__name__)
 
 
 def get_install_path():
     path_list = install_path.split(os.sep)
     return os.sep.join(path_list[0:len(path_list) - 1])
-
diff --git a/requirements.txt b/requirements.txt
index a5fc0d1..a2f6b43 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 pandas
-nose
+six
 Jinja2
 pyqt5
\ No newline at end of file

From 4551d890fed1920f8d07aeb292d1f06ab15ec1ae Mon Sep 17 00:00:00 2001
From: pavankm <kpavan@protonmail.com>
Date: Thu, 4 Jan 2018 00:58:24 -0600
Subject: [PATCH 06/12] update installation instructions to talk about conda
 v/s virtualenv

---
 docs/user_manual/installation.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/user_manual/installation.rst b/docs/user_manual/installation.rst
index 2150251..1743fe9 100644
--- a/docs/user_manual/installation.rst
+++ b/docs/user_manual/installation.rst
@@ -28,6 +28,18 @@ command::
 
 The above command will install py_labeler and all of its dependencies.
 
+Note about working with conda environments
+------------------------------------------
+We recommened working with a virtual environment created using the 
+
+    virtualenv
+
+command.
+
+If you are working with an environment created using conda note that conda does not provide the latest version of PyQt5 which py_labeler needs.
+An already installed package such as jupyter which uses conda's version of PyQt5 may cause conflict with the version py_labeler needs.
+
+
 Installing from Source Distribution
 -----------------------------------
 Clone the py_labeler package from GitHub

From 5e8a617e09840795d24a8de44308d190fffa84fe Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Fri, 5 Jan 2018 12:12:35 -0600
Subject: [PATCH 07/12] update conda recipie

---
 conda.recipe/meta.yaml | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
index 95cadec..1a723ac 100644
--- a/conda.recipe/meta.yaml
+++ b/conda.recipe/meta.yaml
@@ -1,6 +1,6 @@
 package:
   name: py_labeler
-  version: "0.2.0"
+  version: "0.1.0"
 
 source:
     git_url: ../
@@ -10,23 +10,16 @@ requirements:
   build:
     - python
     - setuptools
-    - py_stringsimjoin
-    - cloudpickle
-    - pyparsing
-    - scikit-learn
-    - pyqt
-    - pandas-profiling
-#    - xgboost
+    - pandas
+    - six
+    - jinja2
 
   run:
     - python
-    - py_stringsimjoin
-    - cloudpickle
-    - pyparsing
-    - scikit-learn
-    - pyqt
-    - pandas-profiling
-#    - xgboost
+    - setuptools
+    - pandas
+    - six
+    - jinja2
 
 test:
   # Python imports
@@ -34,7 +27,7 @@ test:
     - py_labeler
 
   # commands:
-    # You can put test commands to be run here.  Use this to test that the
+    # You can putls test commands to be run here.  Use this to test that the
     # entry points work.
 
 

From 7a9c22dc3977fac069600b8e0b341840d4f87245 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Fri, 5 Jan 2018 12:31:33 -0600
Subject: [PATCH 08/12] changed environment creation doc to depend on
 virtualenv rather than conda

---
 docs/contributing.rst | 122 ++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 64 deletions(-)

diff --git a/docs/contributing.rst b/docs/contributing.rst
index 4660aaa..1c38710 100644
--- a/docs/contributing.rst
+++ b/docs/contributing.rst
@@ -1,15 +1,15 @@
 .. _contributing:
 
 *********************************
-Contributing to magellan_labeler
+Contributing to py_labeler
 *********************************
 
 .. contents:: Table of contents:
-   :local:
+    :local:
 
 This document is adapted from `pandas how to contribute guidelines
 <http://pandas.pydata.org/pandas-docs/stable/contributing.html>`_ for
-*magellan_labeler* package.
+*py_labeler* package.
 
 Where to start?
 ===============
@@ -17,11 +17,11 @@ Where to start?
 All contributions, bug reports, bug fixes, documentation improvements,
 enhancements and ideas are welcome.
 
-If you are simply looking to start working with the *magellan_labeler* codebase, navigate to the
-`GitHub "issues" tab <https://github.com/anhaidgroup/magellan_labeler/issues>`_ and start looking through
+If you are simply looking to start working with the *py_labeler* codebase, navigate to the
+`GitHub "issues" tab <https://github.com/anhaidgroup/py_labeler/issues>`_ and start looking through
 interesting issues.
 
-Or maybe through using *magellan_labeler* you have an idea of your own or are looking for something
+Or maybe through using *py_labeler* you have an idea of your own or are looking for something
 in the documentation and thinking 'this can be improved'...you can do something
 about it!
 
@@ -31,10 +31,10 @@ Feel free to ask questions on the `mailing list
 Bug reports and enhancement requests
 ====================================
 
-Bug reports are an important part of making *magellan_labeler* more stable.Having a
+Bug reports are an important part of making *py_labeler* more stable.Having a
 complete bug report will allow others to reproduce the bug and provide insight into
 fixing. We use GitHub issue tracker to track bugs. It is important that you provide the
-exact version of *magellan_labeler* where the bug is found. Trying the bug-producing
+exact version of *py_labeler* where the bug is found. Trying the bug-producing
 code out on the *master* branch is often a worthwhile exercise to confirm the bug still
 exists. It is also worth searching existing bug reports and pull requests to see if the
 issue has already been reported and/or fixed.
@@ -59,7 +59,7 @@ Bug reports must:
 #. Explain why the current behavior is wrong/not desired and what you expect instead.
 
 
-The issue will then show up to the *magellan_labeler* community and be open to
+The issue will then show up to the *py_labeler* community and be open to
 comments/ideas from others.
 
 
@@ -67,17 +67,17 @@ Working with the code
 =====================
 
 Now that you have an issue you want to fix, enhancement to add, or documentation to
-improve, you need to learn how to work with GitHub and the *magellan_labeler* code base.
+improve, you need to learn how to work with GitHub and the *py_labeler* code base.
 
 Version control, Git, and GitHub
 --------------------------------
 
 To the new user, working with Git is one of the more daunting aspects of contributing
-to *magellan_labeler*. It can very quickly become overwhelming, but sticking to the
+to *py_labeler*. It can very quickly become overwhelming, but sticking to the
 guidelines below will help keep the process straightforward and mostly trouble free.
 As always, if you are having difficulties please feel free to ask for help.
 
-The code is hosted on `GitHub <https://www.github.com/anhaidgroup/magellan_labeler>`_. To
+The code is hosted on `GitHub <https://www.github.com/anhaidgroup/py_labeler>`_. To
 contribute you will need to sign up for a `free GitHub account
 <https://github.com/signup/free>`_. We use `Git <http://git-scm.com/>`_ for
 version control to allow many people to work together on the project.
@@ -99,16 +99,16 @@ you can work seamlessly between your local repository and GitHub.
 Forking
 -------
 
-You will need your own fork to work on the code. Go to the `magellan_labeler project
-page <https://github.com/anhaidgroup/magellan_labeler>`_ and hit the ``Fork`` button. You will
+You will need your own fork to work on the code. Go to the `py_labeler project
+page <https://github.com/anhaidgroup/py_labeler>`_ and hit the ``Fork`` button. You will
 want to clone your fork to your machine::
 
-    git clone git@github.com:<your-user-name>/magellan_labeler.git <local-repo-name>
+    git clone git@github.com:<your-user-name>/py_labeler.git <local-repo-name>
     cd <local-repo-name>
-    git remote add upstream git://github.com/anhaidgroup/magellan_labeler.git
+    git remote add upstream git://github.com/anhaidgroup/py_labeler.git
 
 This creates the directory `local-repo-name` and connects your repository to
-the upstream (main project) *magellan_labeler* repository.
+the upstream (main project) *py_labeler* repository.
 
 The testing suite will run automatically on Travis-CI once your pull request is
 submitted.  However, if you wish to run the test suite on a branch prior to
@@ -131,7 +131,7 @@ The above can be simplified to::
 
 This changes your working directory to the *new_feature* branch.  Keep any
 changes in this branch specific to one bug or feature so it is clear
-what the branch brings to *magellan_labeler*. You can have many new features
+what the branch brings to *py_labeler*. You can have many new features
 and switch in between them using the git checkout command.
 
 To update this branch, you need to retrieve the changes from the master branch::
@@ -139,7 +139,7 @@ To update this branch, you need to retrieve the changes from the master branch::
     git fetch upstream
     git rebase upstream/master
 
-This will replay your commits on top of the lastest magellan_labeler git master.  If this
+This will replay your commits on top of the lastest py_labeler git master.  If this
 leads to merge conflicts, you must resolve them before submitting your pull
 request.  If you have uncommitted changes, you will need to ``stash`` them prior
 to updating.  This will effectively store your changes and they can be reapplied
@@ -150,54 +150,48 @@ after updating.
 Creating a development environment
 ----------------------------------
 
-An easy way to create a *magellan_labeler* development environment is as follows.
+An easy way to create a *py_labeler* development environment is as follows.
 
-- Install either :ref:`Anaconda <install.anaconda>` or :ref:`miniconda <install.miniconda>`
+- Install ``virtualenv``
 - Make sure that you have :ref:`cloned the repository <contributing.forking>`
-- ``cd`` to the *magellan_labeler* source directory
+- ``cd`` to the *py_labeler* source directory
 
-Tell conda to create a new environment, named ``magellan_labeler_dev``, or any other
+Tell virtualenv to create a new environment, named ``py_labeler_dev``, or any other
 name you would like for this environment, by running::
 
-    conda create -n magellan_labeler_dev --file requirements.yml
+    virtualenv py_labeler_dev --python=python3.5 VIRTUALENV_PATH
+
+Where *VIRTUALENV_PATH* is the directory in which the environment is to be created
 
+For a python 3.6 environment::
 
-For a python 3 environment::
+    virtualenv py_labeler_dev --python=python3.6 VIRTUALENV_PATH
 
-      conda create -n magellan_labeler_dev python=3 --file requirements.yml
+Once this is done install the dependencies of *py_labeler* using
 
+    pip install -r requirements.txt
 
 This will create the new environment, and not touch any of your existing environments,
 nor any existing python installation. It will install all of the basic dependencies of
-*magellan_labeler*. You need to install the *nose* package which is used for
+*py_labeler*. You need to install the *nose* package which is used for
 testing, as follows::
 
-      conda install -n magellan_labeler_dev nose
-
-To work in this environment, Windows users should ``activate`` it as follows::
-
-      activate magellan_labeler_dev
+      pip install nose
 
-Mac OSX / Linux users should use::
+To work in this environment::
 
-      source activate magellan_labeler_dev
+      source VIRTUALENV_PATH/bin/activate
 
 You will then see a confirmation message to indicate you are in the new development environment.
 
-To view your environments::
-
-      conda info -e
-
-To return to your home root environment in Windows::
+To return to your home root environment::
 
       deactivate
 
-To return to your home root environment in OSX / Linux::
-
-      source deactivate
 
-See the full conda docs `here <http://conda.pydata.org/docs>`__.
+See the full virtualenv docs `here <https://virtualenv.pypa.io/en/stable/>`__.
 
+Alternatively you could use virtualenvwrapper which is a set of extensions to virtualenv. Refer to the documentation `here <https://virtualenvwrapper.readthedocs.io>`__.
 
 .. _contributing.documentation:
 
@@ -206,7 +200,7 @@ Contributing to the documentation
 
 If you're not the developer type, contributing to the documentation is still
 of huge value. You don't even have to be an expert on
-*magellan_labeler* to do so! Something as simple as rewriting small passages for clarity
+*py_labeler* to do so! Something as simple as rewriting small passages for clarity
 as you reference the docs is a simple but effective way to contribute. The
 next person to read that passage will be in your debt!
 
@@ -218,7 +212,7 @@ help the next person.
 .. contents:: Documentation:
     :local:
 
-About the *magellan_labeler* documentation
+About the *py_labeler* documentation
 -------------------------------------------
 
 The documentation is written in **reStructuredText**, which is almost like writing
@@ -229,8 +223,8 @@ complex changes to the documentation as well.
 
 Some other important things to know about the docs:
 
-- The *magellan_labeler* documentation consists of two parts: the docstrings in the code
-  itself and the docs in this folder ``magellan_labeler/docs/``.
+- The *py_labeler* documentation consists of two parts: the docstrings in the code
+  itself and the docs in this folder ``py_labeler/docs/``.
 
   The docstrings provide a clear explanation of the usage of the individual
   functions, while the documentation in this folder consists of tutorial-like
@@ -244,28 +238,28 @@ Some other important things to know about the docs:
   extend it in a similar manner.
 
 
-How to build the *magellan_labeler* documentation
+How to build the *py_labeler* documentation
 --------------------------------------------------
 
 Requirements
 ~~~~~~~~~~~~
 
-To build the *magellan_labeler* docs there are some extra requirements: you will need to
+To build the *py_labeler* docs there are some extra requirements: you will need to
 have ``sphinx`` and ``ipython`` installed.
 
 It is easiest to :ref:`create a development environment <contributing.dev_env>`, then install::
 
-      conda install -n magellan_labeler_dev sphinx ipython
+      pip install py_labeler_dev sphinx ipython
 
 Building the documentation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 So how do you build the docs? Navigate to your local
-``magellan_labeler/docs/`` directory in the console and run::
+``py_labeler/docs/`` directory in the console and run::
 
     make html
 
-Then you can find the HTML output in the folder ``magellan_labeler/docs/_build/html/``.
+Then you can find the HTML output in the folder ``py_labeler/docs/_build/html/``.
 
 If you want to do a full clean build, do::
 
@@ -283,21 +277,21 @@ Contributing to the code base
 
 Code standards
 --------------
-*magellan_labeler* follows `Google Python Style Guide <https://google.github.io/styleguide/pyguide.html>`_.
+*py_labeler* follows `Google Python Style Guide <https://google.github.io/styleguide/pyguide.html>`_.
 
-Please try to maintain backward compatibility. *magellan_labeler* has lots of users with lots of
+Please try to maintain backward compatibility. *py_labeler* has lots of users with lots of
 existing code, so don't break it if at all possible.  If you think breakage is required,
 clearly state why as part of the pull request.  Also, be careful when changing method
 signatures and add deprecation warnings where needed.
 
 Writing tests
 -------------
-Adding tests is one of the most common requests after code is pushed to *magellan_labeler*.  Therefore,
+Adding tests is one of the most common requests after code is pushed to *py_labeler*.  Therefore,
 it is worth getting in the habit of writing tests ahead of time so this is never an issue.
 
 Unit testing
 ~~~~~~~~~~~~
-Like many packages, *magellan_labeler* uses the `Nose testing system
+Like many packages, *py_labeler* uses the `Nose testing system
 <http://nose.readthedocs.org/en/latest/index.html>`_.
 
 All tests should go into the ``tests`` subdirectory of the specific package.
@@ -305,13 +299,13 @@ This folder contains many current examples of tests, and we suggest looking to t
 inspiration.
 
 The tests can then be run directly inside your Git clone (without having to
-install *magellan_labeler*) by typing::
+install *py_labeler*) by typing::
 
     nosetests
 
 
 
-Contributing your changes to *magellan_labeler*
+Contributing your changes to *py_labeler*
 ================================================
 
 Committing your code
@@ -373,12 +367,12 @@ You can see the remote repositories::
 If you added the upstream repository as described above you will see something
 like::
 
-    origin  git@github.com:<yourname>/magellan_labeler.git (fetch)
-    origin  git@github.com:<yourname>/magellan_labeler.git (push)
-    upstream        git://github.com/anhaidgroup/magellan_labeler.git (fetch)
-    upstream        git://github.com/anhaidgroup/magellan_labeler.git (push)
+    origin  git@github.com:<yourname>/py_labeler.git (fetch)
+    origin  git@github.com:<yourname>/py_labeler.git (push)
+    upstream        git://github.com/anhaidgroup/py_labeler.git (fetch)
+    upstream        git://github.com/anhaidgroup/py_labeler.git (push)
 
-Now your code is on GitHub, but it is not yet a part of the *magellan_labeler* project.  For that to
+Now your code is on GitHub, but it is not yet a part of the *py_labeler* project.  For that to
 happen, a pull request needs to be submitted on GitHub.
 
 Review your code
@@ -389,7 +383,7 @@ again make sure that you have followed all the guidelines outlined in this docum
 regarding code style, tests, performance tests, and documentation. You should also
 double check your branch changes against the branch it was based on:
 
-#. Navigate to your repository on GitHub -- https://github.com/<your-user-name>/magellan_labeler
+#. Navigate to your repository on GitHub -- https://github.com/<your-user-name>/py_labeler
 #. Click on ``Branches``
 #. Click on the ``Compare`` button for your feature branch
 #. Select the ``base`` and ``compare`` branches, if necessary. This will be ``master`` and

From e65357a765a05b243a573c0c1ad5e72dcafaead8 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Fri, 5 Jan 2018 12:32:04 -0600
Subject: [PATCH 09/12] removed 'Magellan Labeler' page title

---
 py_labeler/labeler/view/templates/horizontal_layout.html | 2 +-
 py_labeler/labeler/view/templates/single_layout.html     | 2 +-
 py_labeler/labeler/view/templates/vertical_layout.html   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/py_labeler/labeler/view/templates/horizontal_layout.html b/py_labeler/labeler/view/templates/horizontal_layout.html
index 09fc603..a61277f 100644
--- a/py_labeler/labeler/view/templates/horizontal_layout.html
+++ b/py_labeler/labeler/view/templates/horizontal_layout.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
-    <title>Magellan Labeller</title>
+    <title>Py Labeller</title>
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
 
diff --git a/py_labeler/labeler/view/templates/single_layout.html b/py_labeler/labeler/view/templates/single_layout.html
index fed3e65..60f9bbb 100644
--- a/py_labeler/labeler/view/templates/single_layout.html
+++ b/py_labeler/labeler/view/templates/single_layout.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
-    <title>Magellan Labeller</title>
+    <title>Py Labeller</title>
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
 
diff --git a/py_labeler/labeler/view/templates/vertical_layout.html b/py_labeler/labeler/view/templates/vertical_layout.html
index ca93a65..25b0613 100644
--- a/py_labeler/labeler/view/templates/vertical_layout.html
+++ b/py_labeler/labeler/view/templates/vertical_layout.html
@@ -1,7 +1,7 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
-    <title>Magellan Labeller</title>
+    <title>Py Labeller</title>
     <meta charset="utf-8">
     <meta name="viewport" content="width=device-width, initial-scale=1">
 
@@ -19,7 +19,7 @@
             {% for tuple_pair in tuple_pairs %}
                 <div class="tuple-pair-div">
                     <div>
-                        <table class="{{tuple_pair[label_column_name]+' table table-bordered' }}" id={{tuple_pair['_id'] }}>
+                        <table class="{{ tuple_pair[label_column_name]+' table table-bordered' }}" id={{ tuple_pair['_id'] }}>
                             {% for attribute in attributes %}
                                 <tr>
                                     <td>

From cb40b06efcce5504d3c4824fc1c64f1626c246d6 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Thu, 11 Jan 2018 00:07:52 -0600
Subject: [PATCH 10/12] update documentation

---
 docs/user_manual/installation.rst |  6 ++++--
 docs/user_manual/labeling.rst     | 19 ++++++++-----------
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/docs/user_manual/installation.rst b/docs/user_manual/installation.rst
index 1743fe9..c09e91e 100644
--- a/docs/user_manual/installation.rst
+++ b/docs/user_manual/installation.rst
@@ -28,9 +28,11 @@ command::
 
 The above command will install py_labeler and all of its dependencies.
 
-Note about working with conda environments
+Note about working with conda/virtual environments
 ------------------------------------------
-We recommened working with a virtual environment created using the 
+Due to dependencies, py_labeler works only with virtual environments created with virtualenv or a pure python 3.5+ environment.
+
+We recommend working with a virtual environment created using the
 
     virtualenv
 
diff --git a/docs/user_manual/labeling.rst b/docs/user_manual/labeling.rst
index 7c1d044..a9b7c99 100644
--- a/docs/user_manual/labeling.rst
+++ b/docs/user_manual/labeling.rst
@@ -1,19 +1,16 @@
 Labeling Tool
 -------------
 
-WARNING: The new labeler is only available in python version 3.5 and above only.
+The command `label_table` is used to label the samples.
+An example use is shown below:
 
-A new command `label_table` has been added to label the samples. This new
-labeler is currently in pre-alpha stage and is still incomplete. Use at your
-own risk. An example use is shown below:
+    >>> import py_labeler as pl
+    >>> G = pl.label_table(S, label_column_name='gold_labels')
 
-    >>> G = em.label_table(S, label_column_name='gold_labels')
-
-The new labeler completes the same task as `label_table` in that it will take
-an input table `S` with pairs of tuples and create a copy table `G` with
-additional label, comment, and tags columns. The command will open a GUI that
-allows the user to label each pair of tuples with with either 'Yes', 'No', or
-'Not-Sure'.
+The new labeler will take an input table `S` with pairs of tuples and create
+a copy table `G` with additional label, comment, and tags columns. The command
+will open a GUI that allows the user to label each pair of tuples with with
+either 'Yes', 'No', or 'Not-Sure'.
 
 Please refer to the API reference of :py:meth:`~py_labeler.label_table`
 for more details

From 08fb7fd7520671bdaa49c31a64c5f1c15184bd51 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Thu, 11 Jan 2018 00:09:38 -0600
Subject: [PATCH 11/12] rename ipython notebook

---
 notebooks/{Sampling and Labeling.ipynb => Labeling.ipynb} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename notebooks/{Sampling and Labeling.ipynb => Labeling.ipynb} (100%)

diff --git a/notebooks/Sampling and Labeling.ipynb b/notebooks/Labeling.ipynb
similarity index 100%
rename from notebooks/Sampling and Labeling.ipynb
rename to notebooks/Labeling.ipynb

From 145d0b3aa186b52c3760b921b34ec47ae4d40bc0 Mon Sep 17 00:00:00 2001
From: pavankm <pavank@protonmail.com>
Date: Fri, 12 Jan 2018 13:40:39 -0600
Subject: [PATCH 12/12] updated guide to point to release version of ipython
 notebook

---
 docs/user_manual/guides.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user_manual/guides.rst b/docs/user_manual/guides.rst
index 39e3bda..145c9f2 100644
--- a/docs/user_manual/guides.rst
+++ b/docs/user_manual/guides.rst
@@ -13,7 +13,7 @@ gives a quick tour on installing and using Jupyter notebook.
 
 Sampling and labeling
 ---------------------
-* Sampling and labeling: `Jupyter notebook <https://nbviewer.jupyter.org/github/anhaidgroup/py_labeler/blob/master/notebooks/Sampling%20and%20Labeling.ipynb>`_
+* Sampling and labeling: `Jupyter notebook <https://nbviewer.jupyter.org/github/anhaidgroup/py_labeler/blob/rel_0.1.x/notebooks/Labeling.ipynb>`_
 
 End-to-End EM Workflows
 -----------------------