Skip to content

Commit 6622f52

Browse files
authored
Add support for SFTP (#10)
* add sftp storage * update readme * add unit test sample config * clean unit test * add sftp documentation, improve gcs documentation * hide progess bar for write_file_command + fix raise ValueError
1 parent 2e9e3ab commit 6622f52

File tree

11 files changed

+302
-1
lines changed

11 files changed

+302
-1
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,10 @@ print(mara_storage.shell.delete_file_command('data', file_name='hello-world.txt'
7676

7777
The following **command line clients** are used to access the various databases:
7878

79-
| Database | Client binary | Comments |
79+
| Storage | Client binary | Comments |
8080
| --- | --- | --- |
8181
| Local storage | unix shell | Included in standard distributions. |
82+
| SFTP storage | `sftp`, `curl` | |
8283
| Google Cloud Storage | `gsutil` | From [https://cloud.google.com/storage/docs/gsutil_install](https://cloud.google.com/storage/docs/gsutil_install). |
8384

8485
 

docs/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ This section focuses on the supported storages.
3030

3131
storages-overview
3232
storages/local
33+
storages/sftp
3334
storages/gcs
3435

3536

docs/storages/gcs.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ Accessing a Google Cloud Storage (GCS) with the shell tool `gsutil`.
66
Installation
77
------------
88

9+
Use extras `google-cloud-storage` to install all required packages.
10+
11+
12+
.. code-block:: shell
13+
14+
$ pip install mara-storage[google-cloud-storage]
15+
916
You need to install `gsutil`. Take a look at `Install gsutil <https://cloud.google.com/storage/docs/gsutil_install>`_.
1017

1118

docs/storages/sftp.rst

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
SFTP
2+
====================
3+
4+
Accessing a SFTP drive.
5+
6+
Installation
7+
------------
8+
9+
Use extras `sftp` to install all required packages.
10+
11+
12+
.. code-block:: shell
13+
14+
$ pip install mara-storage[sftp]
15+
16+
17+
Configuration examples
18+
----------------------
19+
20+
.. tabs::
21+
22+
.. group-tab:: User and password login
23+
24+
.. code-block:: python
25+
26+
import mara_storage.storages
27+
mara_storage.config.storages = lambda: {
28+
'data': mara_storage.storages.SftpStorage(
29+
host="<your_sftp_host>",
30+
user="<your_login_user>",
31+
password="<your_secure_user_password>",
32+
33+
# optional:
34+
insecure = True # allow insegure SSL connections and transfers
35+
}
36+
37+
.. group-tab:: Private key file
38+
39+
.. code-block:: python
40+
41+
import mara_storage.storages
42+
mara_storage.config.storages = lambda: {
43+
'data': mara_storage.storages.SftpStorage(
44+
host="<your_sftp_host>",
45+
user="<your_login_user>",
46+
identity_file="~/.ssh/id_rsa", # path to your private key file
47+
public_identity_file="~/.ssh/id_rsa.pub", # path to your public key file
48+
49+
# optional:
50+
insecure = True # allow insegure SSL connections and transfers
51+
}
52+
53+
|
54+
55+
|
56+
57+
API reference
58+
-------------
59+
60+
This section contains database specific API in the module.
61+
62+
63+
Configuration
64+
~~~~~~~~~~~~~
65+
66+
.. module:: mara_storage.storages
67+
:noindex:
68+
69+
.. autoclass:: SftpStorage
70+
:special-members: __init__
71+
:inherited-members:
72+
:members:

mara_storage/info.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ def __(storage: storages.LocalStorage, file_name: str):
2323
return (storage.base_path.absolute() / file_name).is_file()
2424

2525

26+
@file_exists.register(storages.SftpStorage)
27+
def __(storage: storages.SftpStorage, file_name: str):
28+
from . import sftp
29+
with sftp.connection(storage) as sftp:
30+
return sftp.exists(file_name)
31+
32+
2633
@file_exists.register(storages.GoogleCloudStorage)
2734
def __(storage: storages.GoogleCloudStorage, file_name: str):
2835
import subprocess

mara_storage/sftp.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import pysftp
2+
3+
from mara_storage import storages
4+
5+
6+
def connection(storage: storages.SftpStorage):
7+
return pysftp.Connection(host=storage.host,
8+
port=storage.port if storage.port else 22,
9+
username=storage.user,
10+
password=storage.password)

mara_storage/shell.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,22 @@ def __(storage: storages.LocalStorage, file_name: str, compression: Compression
3636
return f'{uncompressor(compression)} '+shlex.quote(str( (storage.base_path / file_name).absolute() ))
3737

3838

39+
@read_file_command.register(storages.SftpStorage)
40+
def __(storage: storages.SftpStorage, file_name: str, compression: Compression = Compression.NONE):
41+
if compression not in [Compression.NONE]:
42+
raise ValueError(f'Only compression NONE is supported from storage type "{storage.__class__.__name__}"')
43+
return ('curl -s'
44+
+ (' -k' if storage.insecure else '')
45+
+ (f' -u {storage.user}:' if storage.user else '')
46+
+ (f'{storage.password}' if storage.user and storage.password else '')
47+
+ (f' --key {storage.identity_file}' if storage.identity_file else '')
48+
+ (f' --pubkey {storage.public_identity_file}' if storage.public_identity_file else '')
49+
+ f' sftp://{storage.host}'
50+
+ (f':{storage.port}' if storage.port else '')
51+
+ f'/{shlex.quote(file_name)}'
52+
+ (f'\\\n | {uncompressor(compression)} - ' if compression != Compression.NONE else ''))
53+
54+
3955
@read_file_command.register(storages.GoogleCloudStorage)
4056
def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compression = Compression.NONE) -> str:
4157
return ('gsutil '
@@ -90,6 +106,23 @@ def __(storage: storages.LocalStorage, file_name: str, compression: Compression
90106
return 'cat - > ' + shlex.quote(str( full_path ))
91107

92108

109+
@write_file_command.register(storages.SftpStorage)
110+
def __(storage: storages.LocalStorage, file_name: str, compression: Compression = Compression.NONE):
111+
if compression not in [Compression.NONE]:
112+
raise ValueError(f'Only compression NONE is supported from storage type "{storage.__class__.__name__}"')
113+
return ('curl -s'
114+
+ (' -k' if storage.insecure else '')
115+
+ (f' -u {storage.user}:' if storage.user else '')
116+
+ (f'{storage.password}' if storage.password else '')
117+
+ (f' --key {storage.identity_file}' if storage.identity_file else '')
118+
+ (f' --pubkey {storage.public_identity_file}' if storage.public_identity_file else '')
119+
+ ' -T'
120+
+ ' -' # source
121+
+ f' sftp://{storage.host}' # destination
122+
+ (f':{storage.port}' if storage.port else '')
123+
+ f'/{shlex.quote(file_name)}')
124+
125+
93126
@write_file_command.register(storages.GoogleCloudStorage)
94127
def __(storage: storages.GoogleCloudStorage, file_name: str, compression: Compression = Compression.NONE) -> str:
95128
if compression not in [Compression.NONE, Compression.GZIP]:
@@ -137,6 +170,22 @@ def __(storage: storages.LocalStorage, file_name: str, force: bool = True) -> st
137170
+ shlex.quote(str( (storage.base_path / file_name).absolute() )))
138171

139172

173+
@delete_file_command.register(storages.SftpStorage)
174+
def __(storage: storages.SftpStorage, file_name: str, force: bool = True):
175+
if not force:
176+
ValueError(f'Only force=True is supported from storage type "{storage.__class__.__name__}"')
177+
178+
return ((f'sshpass -p {storage.password} ' if storage.password else '')
179+
+ 'sftp'
180+
+ (' -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' if storage.insecure else '')
181+
+ (f' {storage.user}@' if storage.user else '')
182+
+ storage.host
183+
+ (f':{storage.port}' if storage.port else '')
184+
+ (f' -i {storage.identity_file}' if storage.identity_file else '')
185+
+ (f' << EOF\nrm {shlex.quote(file_name)}\nquit\nEOF')
186+
)
187+
188+
140189
@delete_file_command.register(storages.GoogleCloudStorage)
141190
def __(storage: storages.GoogleCloudStorage, file_name: str, force: bool = True) -> str:
142191
return ('gsutil '

mara_storage/storages.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,30 @@ def __init__(self, base_path: pathlib.Path):
3232
self.base_path = base_path
3333

3434

35+
class SftpStorage(Storage):
36+
def __init__(self, host: str, port: int = None, user: str = None, password: str = None,
37+
insecure: bool = False, identity_file: str = None, public_identity_file: str = None):
38+
"""
39+
Connection information for a SFTP server
40+
41+
Args:
42+
host: host name
43+
port: tcp port
44+
user: username
45+
password: password
46+
insecure: if True, the known_hosts file will not be checked
47+
identity_file: path to a private key file to be used for private/public key authentication
48+
public_identity_file: path to a public key file to be used for private/public key authentication
49+
"""
50+
self.host = host
51+
self.port = port
52+
self.user = user
53+
self.password = password
54+
self.insecure = insecure
55+
self.identity_file = identity_file
56+
self.public_identity_file = public_identity_file
57+
58+
3559
class GoogleCloudStorage(Storage):
3660
def __init__(self, bucket_name: str, project_id: str = None, location: str = None,
3761
service_account_file: str = None, service_account_info: dict = None):

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,5 @@ install_requires =
2727

2828
[options.extras_require]
2929
test = pytest
30+
sftp = pysftp
3031
google-cloud-storage = google-cloud-storage; google-oauth

tests/local_config.py.example

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
# the SFTP storage config used to perform the test
2+
SFTP_HOST = None
3+
SFTP_PORT = None
4+
SFTP_USERNAME = None
5+
SFTP_PASSWORD = None
6+
SFTP_INSECURE = False
7+
SFTP_IDENTITY_FILE = None
8+
SFTP_PUBLIC_IDENTITY_FILE = None
9+
10+
111
# the Google Cloud project id used to perform the test
212
GCS_PROJECT_ID = ''
313

tests/test_sftp.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import datetime
2+
import pathlib
3+
import pytest
4+
import subprocess
5+
6+
from mara_storage.compression import Compression, compressor, file_extension as compression_file_extension
7+
from mara_storage.client import StorageClient
8+
from mara_storage import storages, info, shell, manage
9+
10+
11+
from .local_config import SFTP_HOST, SFTP_PORT, SFTP_USERNAME, SFTP_PASSWORD, SFTP_INSECURE, SFTP_IDENTITY_FILE, SFTP_PUBLIC_IDENTITY_FILE
12+
13+
TEST_TOUCH_FILE_NAME = 'empty-file.txt'
14+
TEST_FILE_NOT_EXISTS_FILE_NAME = 'file-does-not-exist.txt'
15+
TEST_READ_FILE_NAME = 'read_test.txt'
16+
TEST_WRITE_FILE_NAME = 'write_test.txt'
17+
TEST_CONTENT = 'THIS IS A TEST CONTENT'
18+
19+
if not SFTP_HOST:
20+
pytest.skip("skipping SFTP tests: variable SFTP_HOST not set in tests/local_config.py", allow_module_level=True)
21+
22+
23+
@pytest.fixture
24+
def storage():
25+
return storages.SftpStorage(host=SFTP_HOST, port=SFTP_PORT, user=SFTP_USERNAME, password=SFTP_PASSWORD,
26+
insecure=SFTP_INSECURE, identity_file=SFTP_IDENTITY_FILE,
27+
public_identity_file=SFTP_PUBLIC_IDENTITY_FILE)
28+
29+
30+
@pytest.fixture(autouse=True)
31+
def test_before_and_after(storage: object):
32+
#manage.ensure_storage(storage)
33+
yield
34+
#manage.drop_storage(storage, force=True)
35+
36+
37+
def test_file_exists(storage: object):
38+
assert isinstance(storage, storages.SftpStorage)
39+
40+
# prepare
41+
write_commnand = shell.write_file_command(storage, TEST_TOUCH_FILE_NAME)
42+
(exitcode, _) = subprocess.getstatusoutput(f"echo '' | {write_commnand}")
43+
assert exitcode == 0
44+
assert info.file_exists(storage, TEST_TOUCH_FILE_NAME)
45+
46+
# test
47+
assert info.file_exists(storage, file_name=TEST_TOUCH_FILE_NAME)
48+
assert not info.file_exists(storage, file_name=TEST_FILE_NOT_EXISTS_FILE_NAME)
49+
50+
# clean up test
51+
delete_command = shell.delete_file_command(storage, TEST_TOUCH_FILE_NAME)
52+
(exitcode, _) = subprocess.getstatusoutput(delete_command)
53+
assert exitcode == 0
54+
55+
56+
def test_read_file_command(storage: object):
57+
assert isinstance(storage, storages.SftpStorage)
58+
59+
# prepare
60+
compressions = [
61+
Compression.NONE]
62+
write_commnand = shell.write_file_command(storage, TEST_READ_FILE_NAME)
63+
(exitcode, _) = subprocess.getstatusoutput(f'echo "{TEST_CONTENT}" | {write_commnand}')
64+
assert exitcode == 0
65+
assert info.file_exists(storage, TEST_READ_FILE_NAME)
66+
67+
for compression in compressions:
68+
if compression == Compression.NONE:
69+
continue
70+
raise NotImplementedError()
71+
72+
# test
73+
for compression in compressions:
74+
print(f'Test compression: {compression}')
75+
file_extension = compression_file_extension(compression)
76+
file_extension = f'.{file_extension}' if file_extension else ''
77+
command = shell.read_file_command(storage, file_name=f'{TEST_READ_FILE_NAME}{file_extension}', compression=compression)
78+
assert command
79+
80+
(exitcode, stdout) = subprocess.getstatusoutput(command)
81+
assert exitcode == 0
82+
assert stdout == TEST_CONTENT
83+
84+
85+
def test_write_file_command(storage: object):
86+
assert isinstance(storage, storages.SftpStorage)
87+
88+
command = shell.write_file_command(storage, file_name=TEST_WRITE_FILE_NAME)
89+
assert command
90+
91+
(exitcode, _) = subprocess.getstatusoutput(f'echo "{TEST_CONTENT}" | {command}')
92+
assert exitcode == 0
93+
94+
assert info.file_exists(storage, file_name=TEST_WRITE_FILE_NAME)
95+
96+
def test_delete_file_command(storage: object):
97+
assert isinstance(storage, storages.SftpStorage)
98+
99+
# prepare
100+
write_commnand = shell.write_file_command(storage, TEST_TOUCH_FILE_NAME)
101+
(exitcode, _) = subprocess.getstatusoutput(f"echo '' | {write_commnand}")
102+
assert exitcode == 0
103+
assert info.file_exists(storage, TEST_TOUCH_FILE_NAME)
104+
105+
# test
106+
command = shell.delete_file_command(storage, file_name=TEST_TOUCH_FILE_NAME)
107+
assert command
108+
109+
(exitcode, _) = subprocess.getstatusoutput(command)
110+
assert exitcode == 0
111+
assert not info.file_exists(storage, TEST_TOUCH_FILE_NAME)
112+
113+
# test if force option works as expected
114+
command = shell.delete_file_command(storage, file_name=TEST_TOUCH_FILE_NAME, force=True)
115+
assert command
116+
117+
(exitcode, _) = subprocess.getstatusoutput(command)
118+
assert exitcode == 0
119+
assert not info.file_exists(storage, TEST_TOUCH_FILE_NAME)

0 commit comments

Comments
 (0)