diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6769e21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/examples/kafka-linear-regression/.env.example b/examples/kafka-linear-regression/.env.example new file mode 100644 index 0000000..cfd9fbb --- /dev/null +++ b/examples/kafka-linear-regression/.env.example @@ -0,0 +1,3 @@ +UPSTASH_KAFKA_USER= +UPSTASH_KAFKA_PASS= +UPSTASH_KAFKA_ENDPOINT= \ No newline at end of file diff --git a/examples/kafka-linear-regression/README.md b/examples/kafka-linear-regression/README.md new file mode 100644 index 0000000..36d592e --- /dev/null +++ b/examples/kafka-linear-regression/README.md @@ -0,0 +1,46 @@ +# Linear Regression with Kafka and Pathway + +This example demonstrates simple linear regression on streaming data from Kafka. + +You can read the +[linear regression on data from Kafka tutorial](https://pathway.com/developers/tutorials/linear_regression_with_kafka/) +for more details. + +## Before you begin + +You'll need Python installed and access to a Kafka instance. If you don't have +access to a Kafka instance you can signup for an [Upstash](https://upstash.com) +account and use a free Kafka instance. + +## Setup the runtime environment + +Create a virtual environment, activate it, and install the example dependencies: + +```sh +python -m venv env +source env/bin/activate +pip install -r requirements.txt +``` + +Create a `.env` file with the following contents. Update the environment +variable values with credentials for your Kafka instance. + +``` +UPSTASH_KAFKA_USER= +UPSTASH_KAFKA_PASS= +UPSTASH_KAFKA_ENDPOINT= +``` + +## Run the examples + +Run the Pathway script: + +```sh +python realtime_regression.py +``` + +Run the Kafka script: + +```sh +python generate_kafka_stream.py +``` diff --git a/examples/kafka-linear-regression/generating_kafka_stream.py b/examples/kafka-linear-regression/generating_kafka_stream.py index dd369cc..fde60d4 100644 --- a/examples/kafka-linear-regression/generating_kafka_stream.py +++ b/examples/kafka-linear-regression/generating_kafka_stream.py @@ -5,6 +5,10 @@ import random import time +from dotenv import load_dotenv + +load_dotenv() + from kafka import KafkaProducer topic = "linear-regression" @@ -19,7 +23,7 @@ def get_value(i): # set kafka credentials -kafka_endpoint = "talented-cow-10356-eu1-kafka.upstash.io:9092" +kafka_endpoint = os.environ["UPSTASH_KAFKA_ENDPOINT"] kafka_user = os.environ["UPSTASH_KAFKA_USER"] kafka_pass = os.environ["UPSTASH_KAFKA_PASS"] diff --git a/examples/kafka-linear-regression/realtime_regression.py b/examples/kafka-linear-regression/realtime_regression.py index 9135fb8..46979c4 100644 --- a/examples/kafka-linear-regression/realtime_regression.py +++ b/examples/kafka-linear-regression/realtime_regression.py @@ -4,8 +4,12 @@ import pathway as pw +from dotenv import load_dotenv + +load_dotenv() + # set kafka credentials (from upstash) -kafka_endpoint = "talented-cow-10356-eu1-kafka.upstash.io:9092" +kafka_endpoint = os.environ["UPSTASH_KAFKA_ENDPOINT"] kafka_user = os.environ["UPSTASH_KAFKA_USER"] kafka_pass = os.environ["UPSTASH_KAFKA_PASS"] diff --git a/examples/kafka-linear-regression/requirements.txt b/examples/kafka-linear-regression/requirements.txt new file mode 100644 index 0000000..6501bc2 --- /dev/null +++ b/examples/kafka-linear-regression/requirements.txt @@ -0,0 +1,3 @@ +python-dotenv +kafka-python +pathway \ No newline at end of file