Skip to content

Commit

Permalink
Add support for SFTP (#10)
Browse files Browse the repository at this point in the history
* add sftp storage

* update readme

* add unit test sample config

* clean unit test

* add sftp documentation, improve gcs documentation

* hide progess bar for write_file_command + fix raise ValueError
  • Loading branch information
leo-schick authored Feb 13, 2023
1 parent 2e9e3ab commit 6622f52
Show file tree
Hide file tree
Showing 11 changed files with 302 additions and 1 deletion.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ print(mara_storage.shell.delete_file_command('data', file_name='hello-world.txt'

The following **command line clients** are used to access the various databases:

| Database | Client binary | Comments |
| Storage | Client binary | Comments |
| --- | --- | --- |
| Local storage | unix shell | Included in standard distributions. |
| SFTP storage | `sftp`, `curl` | |
| Google Cloud Storage | `gsutil` | From [https://cloud.google.com/storage/docs/gsutil_install](https://cloud.google.com/storage/docs/gsutil_install). |

 
Expand Down
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ This section focuses on the supported storages.

storages-overview
storages/local
storages/sftp
storages/gcs


Expand Down
7 changes: 7 additions & 0 deletions docs/storages/gcs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ Accessing a Google Cloud Storage (GCS) with the shell tool `gsutil`.
Installation
------------

Use extras `google-cloud-storage` to install all required packages.


.. code-block:: shell
$ pip install mara-storage[google-cloud-storage]
You need to install `gsutil`. Take a look at `Install gsutil <https://cloud.google.com/storage/docs/gsutil_install>`_.


Expand Down
72 changes: 72 additions & 0 deletions docs/storages/sftp.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
SFTP
====================

Accessing a SFTP drive.

Installation
------------

Use extras `sftp` to install all required packages.


.. code-block:: shell
$ pip install mara-storage[sftp]
Configuration examples
----------------------

.. tabs::

.. group-tab:: User and password login

.. code-block:: python
import mara_storage.storages
mara_storage.config.storages = lambda: {
'data': mara_storage.storages.SftpStorage(
host="<your_sftp_host>",
user="<your_login_user>",
password="<your_secure_user_password>",
# optional:
insecure = True # allow insegure SSL connections and transfers
}
.. group-tab:: Private key file
.. code-block:: python
import mara_storage.storages
mara_storage.config.storages = lambda: {
'data': mara_storage.storages.SftpStorage(
host="<your_sftp_host>",
user="<your_login_user>",
identity_file="~/.ssh/id_rsa", # path to your private key file
public_identity_file="~/.ssh/id_rsa.pub", # path to your public key file
# optional:
insecure = True # allow insegure SSL connections and transfers
}
|
|
API reference
-------------
This section contains database specific API in the module.
Configuration
~~~~~~~~~~~~~
.. module:: mara_storage.storages
:noindex:
.. autoclass:: SftpStorage
:special-members: __init__
:inherited-members:
:members:
7 changes: 7 additions & 0 deletions mara_storage/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ def __(storage: storages.LocalStorage, file_name: str):
return (storage.base_path.absolute() / file_name).is_file()


@file_exists.register(storages.SftpStorage)
def __(storage: storages.SftpStorage, file_name: str):
from . import sftp
with sftp.connection(storage) as sftp:
return sftp.exists(file_name)


@file_exists.register(storages.GoogleCloudStorage)
def __(storage: storages.GoogleCloudStorage, file_name: str):
import subprocess
Expand Down
10 changes: 10 additions & 0 deletions mara_storage/sftp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pysftp

from mara_storage import storages


def connection(storage: storages.SftpStorage):
return pysftp.Connection(host=storage.host,
port=storage.port if storage.port else 22,
username=storage.user,
password=storage.password)
49 changes: 49 additions & 0 deletions mara_storage/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,22 @@ def __(storage: storages.LocalStorage, file_name: str, compression: Compression
return f'{uncompressor(compression)} '+shlex.quote(str( (storage.base_path / file_name).absolute() ))


@read_file_command.register(storages.SftpStorage)
def __(storage: storages.SftpStorage, file_name: str, compression: Compression = Compression.NONE):
if compression not in [Compression.NONE]:
raise ValueError(f'Only compression NONE is supported from storage type "{storage.__class__.__name__}"')
return ('curl -s'
+ (' -k' if storage.insecure else '')
+ (f' -u {storage.user}:' if storage.user else '')
+ (f'{storage.password}' if storage.user and storage.password else '')
+ (f' --key {storage.identity_file}' if storage.identity_file else '')
+ (f' --pubkey {storage.public_identity_file}' if storage.public_identity_file else '')
+ f' sftp://{storage.host}'
+ (f':{storage.port}' if storage.port else '')
+ f'/{shlex.quote(file_name)}'
+ (f'\\\n | {uncompressor(compression)} - ' if compression != Compression.NONE else ''))


@read_file_command.register(storages.GoogleCloudStorage)
def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compression = Compression.NONE) -> str:
return ('gsutil '
Expand Down Expand Up @@ -90,6 +106,23 @@ def __(storage: storages.LocalStorage, file_name: str, compression: Compression
return 'cat - > ' + shlex.quote(str( full_path ))


@write_file_command.register(storages.SftpStorage)
def __(storage: storages.LocalStorage, file_name: str, compression: Compression = Compression.NONE):
if compression not in [Compression.NONE]:
raise ValueError(f'Only compression NONE is supported from storage type "{storage.__class__.__name__}"')
return ('curl -s'
+ (' -k' if storage.insecure else '')
+ (f' -u {storage.user}:' if storage.user else '')
+ (f'{storage.password}' if storage.password else '')
+ (f' --key {storage.identity_file}' if storage.identity_file else '')
+ (f' --pubkey {storage.public_identity_file}' if storage.public_identity_file else '')
+ ' -T'
+ ' -' # source
+ f' sftp://{storage.host}' # destination
+ (f':{storage.port}' if storage.port else '')
+ f'/{shlex.quote(file_name)}')


@write_file_command.register(storages.GoogleCloudStorage)
def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compression = Compression.NONE) -> str:
if compression not in [Compression.NONE, Compression.GZIP]:
Expand Down Expand Up @@ -137,6 +170,22 @@ def __(storage: storages.LocalStorage, file_name: str, force: bool = True) -> st
+ shlex.quote(str( (storage.base_path / file_name).absolute() )))


@delete_file_command.register(storages.SftpStorage)
def __(storage: storages.SftpStorage, file_name: str, force: bool = True):
if not force:
ValueError(f'Only force=True is supported from storage type "{storage.__class__.__name__}"')

return ((f'sshpass -p {storage.password} ' if storage.password else '')
+ 'sftp'
+ (' -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' if storage.insecure else '')
+ (f' {storage.user}@' if storage.user else '')
+ storage.host
+ (f':{storage.port}' if storage.port else '')
+ (f' -i {storage.identity_file}' if storage.identity_file else '')
+ (f' << EOF\nrm {shlex.quote(file_name)}\nquit\nEOF')
)


@delete_file_command.register(storages.GoogleCloudStorage)
def __(storage: storages.GoogleCloudStorage, file_name: str, force: bool = True) -> str:
return ('gsutil '
Expand Down
24 changes: 24 additions & 0 deletions mara_storage/storages.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,30 @@ def __init__(self, base_path: pathlib.Path):
self.base_path = base_path


class SftpStorage(Storage):
def __init__(self, host: str, port: int = None, user: str = None, password: str = None,
insecure: bool = False, identity_file: str = None, public_identity_file: str = None):
"""
Connection information for a SFTP server
Args:
host: host name
port: tcp port
user: username
password: password
insecure: if True, the known_hosts file will not be checked
identity_file: path to a private key file to be used for private/public key authentication
public_identity_file: path to a public key file to be used for private/public key authentication
"""
self.host = host
self.port = port
self.user = user
self.password = password
self.insecure = insecure
self.identity_file = identity_file
self.public_identity_file = public_identity_file


class GoogleCloudStorage(Storage):
def __init__(self, bucket_name: str, project_id: str = None, location: str = None,
service_account_file: str = None, service_account_info: dict = None):
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ install_requires =

[options.extras_require]
test = pytest
sftp = pysftp
google-cloud-storage = google-cloud-storage; google-oauth
10 changes: 10 additions & 0 deletions tests/local_config.py.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# the SFTP storage config used to perform the test
SFTP_HOST = None
SFTP_PORT = None
SFTP_USERNAME = None
SFTP_PASSWORD = None
SFTP_INSECURE = False
SFTP_IDENTITY_FILE = None
SFTP_PUBLIC_IDENTITY_FILE = None


# the Google Cloud project id used to perform the test
GCS_PROJECT_ID = ''

Expand Down
119 changes: 119 additions & 0 deletions tests/test_sftp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import datetime
import pathlib
import pytest
import subprocess

from mara_storage.compression import Compression, compressor, file_extension as compression_file_extension
from mara_storage.client import StorageClient
from mara_storage import storages, info, shell, manage


from .local_config import SFTP_HOST, SFTP_PORT, SFTP_USERNAME, SFTP_PASSWORD, SFTP_INSECURE, SFTP_IDENTITY_FILE, SFTP_PUBLIC_IDENTITY_FILE

TEST_TOUCH_FILE_NAME = 'empty-file.txt'
TEST_FILE_NOT_EXISTS_FILE_NAME = 'file-does-not-exist.txt'
TEST_READ_FILE_NAME = 'read_test.txt'
TEST_WRITE_FILE_NAME = 'write_test.txt'
TEST_CONTENT = 'THIS IS A TEST CONTENT'

if not SFTP_HOST:
pytest.skip("skipping SFTP tests: variable SFTP_HOST not set in tests/local_config.py", allow_module_level=True)


@pytest.fixture
def storage():
return storages.SftpStorage(host=SFTP_HOST, port=SFTP_PORT, user=SFTP_USERNAME, password=SFTP_PASSWORD,
insecure=SFTP_INSECURE, identity_file=SFTP_IDENTITY_FILE,
public_identity_file=SFTP_PUBLIC_IDENTITY_FILE)


@pytest.fixture(autouse=True)
def test_before_and_after(storage: object):
#manage.ensure_storage(storage)
yield
#manage.drop_storage(storage, force=True)


def test_file_exists(storage: object):
assert isinstance(storage, storages.SftpStorage)

# prepare
write_commnand = shell.write_file_command(storage, TEST_TOUCH_FILE_NAME)
(exitcode, _) = subprocess.getstatusoutput(f"echo '' | {write_commnand}")
assert exitcode == 0
assert info.file_exists(storage, TEST_TOUCH_FILE_NAME)

# test
assert info.file_exists(storage, file_name=TEST_TOUCH_FILE_NAME)
assert not info.file_exists(storage, file_name=TEST_FILE_NOT_EXISTS_FILE_NAME)

# clean up test
delete_command = shell.delete_file_command(storage, TEST_TOUCH_FILE_NAME)
(exitcode, _) = subprocess.getstatusoutput(delete_command)
assert exitcode == 0


def test_read_file_command(storage: object):
assert isinstance(storage, storages.SftpStorage)

# prepare
compressions = [
Compression.NONE]
write_commnand = shell.write_file_command(storage, TEST_READ_FILE_NAME)
(exitcode, _) = subprocess.getstatusoutput(f'echo "{TEST_CONTENT}" | {write_commnand}')
assert exitcode == 0
assert info.file_exists(storage, TEST_READ_FILE_NAME)

for compression in compressions:
if compression == Compression.NONE:
continue
raise NotImplementedError()

# test
for compression in compressions:
print(f'Test compression: {compression}')
file_extension = compression_file_extension(compression)
file_extension = f'.{file_extension}' if file_extension else ''
command = shell.read_file_command(storage, file_name=f'{TEST_READ_FILE_NAME}{file_extension}', compression=compression)
assert command

(exitcode, stdout) = subprocess.getstatusoutput(command)
assert exitcode == 0
assert stdout == TEST_CONTENT


def test_write_file_command(storage: object):
assert isinstance(storage, storages.SftpStorage)

command = shell.write_file_command(storage, file_name=TEST_WRITE_FILE_NAME)
assert command

(exitcode, _) = subprocess.getstatusoutput(f'echo "{TEST_CONTENT}" | {command}')
assert exitcode == 0

assert info.file_exists(storage, file_name=TEST_WRITE_FILE_NAME)

def test_delete_file_command(storage: object):
assert isinstance(storage, storages.SftpStorage)

# prepare
write_commnand = shell.write_file_command(storage, TEST_TOUCH_FILE_NAME)
(exitcode, _) = subprocess.getstatusoutput(f"echo '' | {write_commnand}")
assert exitcode == 0
assert info.file_exists(storage, TEST_TOUCH_FILE_NAME)

# test
command = shell.delete_file_command(storage, file_name=TEST_TOUCH_FILE_NAME)
assert command

(exitcode, _) = subprocess.getstatusoutput(command)
assert exitcode == 0
assert not info.file_exists(storage, TEST_TOUCH_FILE_NAME)

# test if force option works as expected
command = shell.delete_file_command(storage, file_name=TEST_TOUCH_FILE_NAME, force=True)
assert command

(exitcode, _) = subprocess.getstatusoutput(command)
assert exitcode == 0
assert not info.file_exists(storage, TEST_TOUCH_FILE_NAME)

0 comments on commit 6622f52

Please sign in to comment.