more documenation

Evelin Amorim · Evelin Amorim · commit 2571a5954c4a · 2024-06-04T12:20:46.000+01:00
diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -0,0 +1,28 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'text2story'
+copyright = '2024, LIAAD'
+author = 'LIAAD'
+release = '1.5.0'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = []
+
+templates_path = ['_templates']
+exclude_patterns = []
+
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = 'alabaster'
+html_static_path = ['_static']
diff --git a/docs/source/custom_annotator.py b/docs/source/custom_annotator.py
@@ -0,0 +1,102 @@
+import spacy
+
+from text2story.core.exceptions import UninstalledModel, InvalidLanguage
+from text2story.core.utils import normalize_tag, chunknize_actors
+
+# this stores the pipeline of models used to extract narrative components
+# for a given language (whose code is the key of this dictionary)
+pipeline = {}
+
+def load(lang:str):
+    """
+    Definition of load method is mandatory, otherwise the package will raise errors.
+    If you do not want to define it, just define an empty method with the command pass
+
+    @param lang: The language code to load models. For instance (pt, en, fr, etc)
+    @return:
+    """
+    if not (spacy.util.is_package('fr_core_news_lg')):
+        spacy.cli.download('fr_core_news_lg')
+    pipeline['fr'] = spacy.load('fr_core_news_lg')
+
+    try:
+        pipeline['fr_time'] = spacy.load(lang + "_tei2go")
+    except OSError:
+        model_name = lang + "_tei2go"
+        command = f"pip install https://huggingface.co/hugosousa/{lang}_tei2go/resolve/main/{lang}_tei2go-any-py3-none-any.whl"
+        raise UninstalledModel(model_name, command)
+
+
+def extract_participants(lang, text):
+    """
+    Parameters
+    ----------
+    lang : str
+        the language of text to be annotated
+    text : str
+        the text to be annotated
+
+    Returns
+    -------
+    list[tuple[tuple[int, int], str, str]]
+        the list of actors identified where each actor is represented by a tuple
+
+    Raises
+    ------
+        InvalidLanguage if the language given is invalid/unsupported
+    """
+
+    if lang not in ['fr']:
+        raise InvalidLanguage(lang)
+
+    doc = pipeline[lang](text)
+
+    iob_token_list = []
+    for token in doc:
+        start_character_offset = token.idx
+        end_character_offset = token.idx + len(token)
+        character_span = (start_character_offset, end_character_offset)
+        pos = normalize_tag(token.pos_)
+        ne = token.ent_iob_ + "-" + normalize_tag(token.ent_type_) if token.ent_iob_ != 'O' else 'O'
+
+        iob_token_list.append((character_span, pos, ne))
+
+    actor_list = chunknize_actors(iob_token_list)
+
+    return actor_list
+
+def extract_times(lang, text, publication_time=None):
+    """
+    Parameters
+    ----------
+    lang : str
+        the language of text to be annotated
+
+    text : str
+        the text to be annotated
+
+    Returns
+    -------
+    list[tuple[tuple[int, int], str, str]]
+        a list consisting of the times identified, where each time is represented by a tuple
+        with the start and end character offset, it's value and type, respectively
+
+    Raises
+    ------
+    InvalidLanguage if the language given is invalid/unsupported
+    """
+    if lang not in ["fr"]:
+        raise InvalidLanguage(lang)
+
+    timex_lst = pipeline["fr"](text).ents
+
+    ans = []
+    for timex in timex_lst:
+
+        start = timex.start_char
+        end = timex.end_char
+        label = timex.label_
+        text = timex.text
+
+        ans.append(((start, end), label, text))
+    return ans
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -0,0 +1,35 @@
+.. text2story documentation master file, created by
+   sphinx-quickstart on Fri May 31 15:28:44 2024.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to text2story's documentation!
+======================================
+
+.. .. toctree::
+..   :maxdepth: 2
+..   :caption: Contents:
+
+**text2story** is Python library that intends to extract narrative components
+(events, participants, time expressions and their relations) in a easy and flexible way. In addition to
+that, it allows to visualize annotations of narratives (manual or automatic). Finally, a benchmark module
+is available for experiments.
+
+Check out the :doc:`usage` section for further information, including
+how to :ref:`installation` the project.
+
+.. note::
+
+   This project is under active development.
+
+Contents
+==================
+
+.. toctree::
+
+   usage
+
+
+.. * :ref:`genindex`
+.. * :ref:`modindex`
+.. * :ref:`search`
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
@@ -0,0 +1,49 @@
+Installation
+=====
+
+Installation of text2story requires some libraries that are not python ones. These libraries are important to
+the visualization module. Next, we detail
+
+Linux / Ubuntu
+-------
+
+The installation requires graphviz software, the latex suite and the software poppler to convert pdf to png.
+In Linux, to install these software open a terminal and type the following commands:
+
+.. code-block:: bash
+    sudo apt-get install graphviz libgraphviz-dev texlive-latex-base  texlive-latex-extra poppler-utils
+
+
+After that, create a virtual environment using venv or other tool of your preference. For instance,
+using the following command in the prompt line:
+
+.. code-block:: bash
+    $ python3 -m venv venv
+
+Then, activate the virtual enviroment in the prompt line. Like, the following command:
+
+.. code-block:: bash
+    $ source venv/bin/activate
+
+After that, you are ready to install
+
+
+Windows
+-------
+
+First, make sure you have Microsoft C++ Build Tools. Then install graphviz software by download one suitable version
+in this [link](https://graphviz.org/download/#windows). Next, install the latex-suite like these
+[tutorial](https://www.tug.org/texlive/windows.html#install) explains. Then, install Popple packed for windows,
+which you download [here](https://github.com/oschwartz10612/poppler-windows).
+
+Finnally, you can install text2story using pip. If it did not recognize the graphviz installation, then you can
+use the following command for pip (tested in pip == 21.1.1).
+
+.. code-block:: powershell
+    pip install text2story  --global-option=build_ext --global-option="-IC:\Program Files\Graphviz\include" --global-option="-LC:\Program Files\Graphviz\lib\"
+
+
+For newer version of pip (tested in pip == 23.1.2), you can type the following command:
+
+.. code-block:: powershell
+    pip install --use-pep517  --config-setting="--global-option=build_ext"  --config-setting="--global-option=-IC:\Program Files\Graphviz\include" --config-setting="--global-option=-LC:\Program Files\Graphviz\lib"
diff --git a/docs/source/usage.rst b/docs/source/usage.rst