-
Notifications
You must be signed in to change notification settings - Fork 7
/
rmirro.py
executable file
·544 lines (450 loc) · 22.5 KB
/
rmirro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
#!/usr/bin/python3
import subprocess
import os
import json
import urllib.request
import uuid
import time
import argparse
import shutil
# directory of this file
# (e.g. /some/absolute/path/rmirro)
DIR = os.path.dirname(os.path.abspath(__file__))
parser = argparse.ArgumentParser(
prog = "rmirro",
description = "Synchronize reMarkable with local directory \"[name]/\"",
)
parser.add_argument("name", type=str, nargs="?", default="remarkable", help="SSH hostname of reMarkable reachable with \"ssh [name]\" without password (default: remarkable)")
parser.add_argument("-r", "--renderers", default=["render_usb.py"], nargs="+", metavar="EX", help="list of one or more executables EX in this project's directory such that \"EX infile outfile\" renders a reMarkable document with stem infile to the PDF outfile (default: render_usb.py - using the official USB web interface renderer)")
parser.add_argument("-v", "--verbose", action="store_true", help="print executed shell commands")
# TODO: --favorites-only (or by tags)
# TODO: --pull-only, --push-only, --backup, etc?
# TODO: let user exclude certain files? how would this pan out if they are suddenly included again?
# TODO: build symlink directory structure by tags?
# TODO: set --output directory
# TODO: support renderers that output e.g. SVG instead of PDF?
# Print an error message and exit
def panic(error):
print("ERROR: " + error)
exit(1) # nonzero status code marks failure
# Run a shell command on the local computer,
# Optionally panic with exiterror if it fails
# Optionally capture and return its output
def pc_run(cmd, exiterror=None, capture=True):
if args.verbose:
print(">", subprocess.list2cmdline(cmd)) # print the command
proc = subprocess.run(cmd, capture_output=capture, encoding="utf-8")
if proc.returncode != 0 and exiterror is not None:
print(proc.stderr, end="")
panic(exiterror)
return proc
# Interface to communicate with reMarkable and operate on its raw file system
class Remarkable:
def __init__(self, ssh_name):
self.ssh_name = ssh_name # e.g. "remarkable"
self.raw_dir_remote = "/home/root/.local/share/remarkable/xochitl" # path to raw notes on RM
self.processed_dir_local = os.path.abspath(f"{self.ssh_name}") # path to rendered PDFs on PC (e.g. remarkable/)
self.raw_dir_local = os.path.abspath(f"{self.ssh_name}_metadata") # path to *.metadata files on PC (downloaded from RM) (e.g. remarkable_metadata/)
self.backup_dir = os.path.abspath(f"{self.ssh_name}_backup") # path to save a backup of all raw RM files on PC (e.g. remarkable_backup/)
self.last_sync_path = self.processed_dir_local + "/.last_sync" # path to a file on PC with the timestamp at which the last sync was performed
# "ping" to check if we do indeed have a remarkable connected
print(f"Connecting to {self.ssh_name}")
if self.run("uname -n", exiterror=f"Could not connect to {self.ssh_name} with SSH").stdout not in ("reMarkable\n", "imx8mm-ferrari\n"): # covers (RM1, RM2) and (RMPP)
panic(f"Could not verify that SSH host {self.ssh_name} is a reMarkable")
print(f"Connected to {self.ssh_name}")
self.backup()
self.download_metadata()
# RM .metadata files store only the *parent* of each file
# keep track of every file's *children* too,
# to effectively traverse its file tree later
self.children_cache = {"": [], "trash": []} # root and trash are "implicit/special", as they don't appear in filenames
for id in self.ids():
self.children_cache[id] = [] # initialize list for each file
for id in self.ids():
metadata = self.read_metadata(id)
if args.verbose:
print(f"Read {id = } with {metadata = }")
parent_id = metadata["parent"]
self.children_cache[parent_id].append(id)
# Read the timestamp at which the last sync was performed
def last_sync(self):
if os.path.exists(self.last_sync_path):
with open(self.last_sync_path, "r") as file:
return int(file.read()) # s
return float("inf") # never synced before (i.e. infinitely far in the future)
# Write the timestamp at which the last sync was performed (by default, now)
def write_last_sync(self, t=int(time.time())):
with open(self.last_sync_path, "w") as file:
file.write(str(t) + "\n") # s
# Generate IDs of all RM files
def ids(self):
for filename in os.listdir(self.raw_dir_local):
id, ext = os.path.splitext(filename)
if ext == ".metadata":
yield id
# Download all raw *.metadata files from RM with rsync
def download_metadata(self):
print(f"Downloading metadata to {self.raw_dir_local}")
os.makedirs(self.raw_dir_local, exist_ok=True) # create directories if they do not exist
pc_run(["rsync", "--info=progress2", "-az", "--delete-excluded", "--include=*.metadata", "--exclude=*", f"{self.ssh_name}:{self.raw_dir_remote}/", f"{self.raw_dir_local}/"], exiterror="Failed downloading metadata", capture=False) # --delete-excluded deletes files on PC that are no longer on RM
# Download all raw files from RM with rsync
def backup(self):
print(f"Backing up raw files to {self.backup_dir}")
os.makedirs(self.backup_dir, exist_ok=True) # create directories if they do not exist
pc_run(["rsync", "--info=progress2", "-az", "--delete", f"{self.ssh_name}:{self.raw_dir_remote}/", f"{self.backup_dir}/"], exiterror="Failed backing up raw files", capture=False) # --delete deletes files on PC that are no longer on RM
# Read a RM file that has been downloaded to PC
def read_file(self, filename):
with open(self.raw_dir_local + "/" + filename, "r") as file:
return file.read()
# Read a RM JSON file that has been downloaded to PC
def read_json(self, filename):
return json.loads(self.read_file(filename))
# Read a RM .metadata file that has been downloaded to PC
def read_metadata(self, id):
return self.read_json(f"{id}.metadata")
# Upload a file from the PC storage to RM
def upload_file(self, src_path, dest_name): # TODO: use same prefix as read methods
pc_run(["scp", src_path, f"{self.ssh_name}:{self.raw_dir_remote}/{dest_name}"])
# Create a file in the PC storage and upload it to RM
def write_file(self, filename, content):
# write locally
path_local = f"{self.raw_dir_local}/{filename}"
with open(path_local, "w") as file:
file.write(content)
# copy same file to remarkable
self.upload_file(path_local, filename)
# Create a JSON file in the PC storage and upload it to RM
def write_json(self, filename, dict):
self.write_file(filename, json.dumps(dict) + "\n")
# Create a .metadata file in the PC storage and upload it to RM
def write_metadata(self, id, metadata):
self.write_json(f"{id}.metadata", metadata)
# update cache (parent -> child)
if id not in self.children_cache[metadata["parent"]]:
self.children_cache[metadata["parent"]].append(id)
# update cache (child -> nothing)
if id not in self.children_cache:
self.children_cache[id] = []
# Create a .content file in the PC storage and upload it to RM
def write_content(self, id, content):
self.write_json(f"{id}.content", content)
# Run a shell command on RM
def run(self, cmd, exiterror=None):
return pc_run(["ssh", "-o", "ConnectTimeout=1", self.ssh_name, cmd], exiterror=exiterror)
# Restart reMarkable's interface
# (needed to show newly uploaded files)
def restart(self):
print("Restarting remarkable interface")
self.run("systemctl restart xochitl")
# Some methods that are common to RM files and PC files
class AbstractFile:
# List children of this file (like listing a directory)
def list(self):
for child in self.children():
print(child.path())
child.list()
# Generate all descendants (children, children's children, ...) of this file
def traverse(self):
for child in self.children():
if child.name()[0] == ".":
continue # skip hidden files
else:
yield child # child
yield from child.traverse() # child's children
# Represents a file stored on the reMarkable
class RemarkableFile(AbstractFile):
# Cached lookup of RM file IDs by their full paths (common to all instances)
fullpath_to_id_cache = {} # build as we go
# Construct a RM file from its ID
def __init__(self, id=""):
self.is_root = id == ""
self.is_trash = id == "trash"
self.id = id
if not self.trashed() and self.path() not in self.fullpath_to_id_cache:
self.fullpath_to_id_cache[self.path()] = self.id # cache
# Verify this is a file XOR a directory, to make sure our logic is consistent
assert self.is_file() != self.is_directory(), f"reMarkable file \"{self.id}\" is not a file XOR a directory"
# Read and return metadata attributes as a dictionary
def metadata(self):
return rm.read_metadata(self.id)
# Return whether this file is trashed
def trashed(self):
if self.is_trash:
return True
if self.is_root:
return False
# On RM, a file can be marked as trashed even though its parent is not
# What on earth should be done, then, to a non-trashed that is in a trashed directory?
# Here, it is more sensible to say that a file is trashed if its parent is trashed
return self.parent().trashed()
# Generate this file's children
def children(self):
for id in rm.children_cache[self.id]: # use cached parent-to-child lookup
yield RemarkableFile(id)
# Return this file's parent (directory), or None if it
def parent(self):
if "parent" in self.metadata():
parent_id = self.metadata()["parent"]
return RemarkableFile(parent_id)
else:
assert self.is_root or self.is_trash, "file is an orphan"
return None
# Return this file's name (e.g. "document")
def name(self):
if self.is_root:
return ""
return self.metadata()["visibleName"]
# Return this file's full path as it appears in the visual RM file system (e.g. notes/document.pdf)
def path(self):
if self.is_root:
path = ""
elif self.parent().is_root:
path = self.name() # handle separately to get "toplevelfile" instead of "/toplevelfile"
else:
path = self.parent().path() + "/" + self.name()
# Any file (note, annotated PDF or EPUB) will be a PDF upon export
if self.is_file() and not (path.endswith(".pdf") or path.endswith(".epub")):
path += ".pdf" # add PDF extension to to-be-exported notes
return path
# Find a descendant of this file by its relative path to it
def find(self, path):
if path == "":
return self
if not self.is_root:
path = self.path() + "/" + path # relative to full path
if path in self.fullpath_to_id_cache:
return RemarkableFile(self.fullpath_to_id_cache[path]) # use cache
for file in self.traverse():
if file.path() == path:
return file
return None
# Returns whether this "file" is a directory
def is_directory(self):
return self.is_root or self.metadata()["type"] == "CollectionType"
# Returns whether this "file" is a file (i.e. document)
def is_file(self):
return not self.is_root and self.metadata()["type"] == "DocumentType"
# Returns timestamp at which file was last modified
def last_modified(self):
return 0 if self.is_root else int(self.metadata()["lastModified"]) // 1000 # s
# Returns timestamp at which file was last accessed (opened)
def last_accessed(self):
return 0 if self.is_root else int(self.metadata()["lastOpened"]) // 1000 # s
# Download this file to its corresponding location in the PC directory
def download(self):
infile = rm.backup_dir + "/" + self.id # already have raw file(s) from the backup
outfile = rm.processed_dir_local + "/" + self.path() # output folder/PDF location
if self.is_directory():
os.makedirs(outfile, exist_ok=True) # make directories ourselves
else: # is file
success = False
for renderer in renderers:
proc = pc_run([f"{DIR}/{renderer}", infile, outfile]) # try to render
success = proc.returncode == 0
if len(renderers) > 1 or args.verbose:
print(f"- {renderer}", "succeeded" if success else "failed")
print(proc.stderr, end="")
if success:
break # jump out upon first successful render
# Double check that file was indeed downloaded
success = success and os.path.exists(outfile)
if not success:
panic(f"All renderers failed to render {self.path()}")
# Copy last access/modification time from RM to PC file system
# (these are used to determine sync actions)
atime = self.last_accessed() # s
mtime = self.last_modified() # s
os.utime(outfile, (atime, mtime))
# Returns the corresponding file on PC, or None if it does not exist
def on_computer(self):
pc_file = ComputerFile(rm.processed_dir_local).find(self.path())
return pc_file if pc_file.exists() else None
# Represents a file stored on the computer
class ComputerFile(AbstractFile):
# Construct a PC file by its path
def __init__(self, path):
self._path = path
# Returns whether the PC file exists
def exists(self):
return os.path.exists(self.path())
# Returns the file's path
def path(self):
return self._path
# Returns the file's filename without its extension
def name(self):
filename = os.path.basename(self.path()) # e.g. "document.pdf"
name, ext = os.path.splitext(filename) # e.g. ("document", ".pdf")
return name # without extension
# Returns the file's extension
def extension(self):
_, ext = os.path.splitext(self.path())
return ext
# Returns the file's parent
def parent(self):
return ComputerFile(os.path.dirname(self.path()))
# Returns whether the file is a directory
def is_directory(self):
return os.path.isdir(self.path())
# Returns whether the "file" is a file (i.e. a document, i.e. not a directory)
def is_file(self):
return os.path.isfile(self.path())
# Returns the file's children, if any
def children(self):
if self.is_directory():
return [ComputerFile(self.path() + "/" + name) for name in os.listdir(self.path())]
else:
return []
# Returns a descendant of this file by its path relative to it
def find(self, name):
return ComputerFile(self.path() + "/" + name)
# Returns the timestamp at which the file was created
def created(self):
return int(os.path.getctime(self.path())) # s
# Returns the timestamp at which the file was last accessed
def last_accessed(self):
return int(os.path.getatime(self.path())) # s
# Returns the timestamp at which the file was last modified
def last_modified(self):
return int(os.path.getmtime(self.path())) # s
# Returns the path that the PC file would have on RM
def path_on_remarkable(self):
rm_path = os.path.relpath(self.path(), start=rm.processed_dir_local) # path relative to base directory
if rm_path == ".":
rm_path = "" # RM root
return rm_path
# Returns the corresponding file on RM, or None if it does not exist
def on_remarkable(self):
return rm_root.find(self.path_on_remarkable())
# Upload this PC file to RM
# TODO: could use RM web interface for uploading, if don't need to make new directories?
# TODO: then it would not be necessary to restart the RM interface
def upload(self):
if self.is_file() and self.extension() not in (".pdf", ".epub"):
panic(f"Extension of {self.path()} is not PDF or EPUB")
rm_file = self.on_remarkable()
if rm_file:
# RM file already exists, so we will only update it
id = rm_file.id
metadata = rm_file.metadata()
else:
# RM file does not exist, so we have to create it from scratch
assert self.parent().on_remarkable(), "cannot upload file whose parent does not exist!"
id = str(uuid.uuid4()) # create new ID
assert id not in rm.ids(), f"{id} already exists on {rm.ssh_name}"
metadata = {
"visibleName": self.name(),
"parent": self.parent().on_remarkable().id,
"modified": False, # TODO: do I really need to set all these?
"metadatamodified": False,
"deleted": False,
"pinned": False,
"version": 0,
}
if self.is_directory():
metadata["type"] = "CollectionType"
else: # is file
metadata["type"] = "DocumentType"
metadata["lastOpened"] = str(self.last_accessed() * 1000) # s to ms, only files have this property
metadata["lastModified"] = str(self.last_modified() * 1000) # s to ms
rm.write_metadata(id, metadata)
rm.write_content(id, {}) # this file is required for RM to list file properly
if metadata["type"] == "DocumentType":
rm.upload_file(self.path(), f"{id}{self.extension()}") # upload e.g. document.pdf in the "raw" form {id}.pdf
# Remove (delete) this file on PC
def remove(self):
if self.is_directory():
os.rmdir(self.path())
else:
os.remove(self.path())
# Determine what to do, and why, when syncing file with given RM/PC representations
def sync_action_and_reason(rm_file, pc_file):
if rm_file and not pc_file:
return "PULL", "only on RM"
elif rm_file and pc_file and rm_file.is_file(): # if the file is a directory, there is nothing worth updating (its name doesn't change)
if rm_file.last_modified() > pc_file.last_modified():
return "PULL", "newer on RM"
elif rm_file.last_modified() < pc_file.last_modified():
return "PUSH", "newer on PC"
elif not rm_file and pc_file:
# Was the file removed from RM or created on PC after last sync?
# Compare last sync time to PC time to find out
if pc_file.is_directory():
# Directory modification times are changed every time its contents changes,
# but the creation time stays constant, so go by this instead
pc_time = pc_file.created()
elif pc_file.created() > pc_file.last_modified():
# When a file is copied from on PC, many programs preserve its
# (old) modification time, so rather go by the (new) creation time
# (only holds if the file does not exist on RM)
pc_time = pc_file.created()
else:
# The default is that we want the time the file was modified last
pc_time = pc_file.last_modified()
if rm.last_sync() < pc_time:
return "PUSH", "added on PC"
else:
return "DROP", "deleted on RM"
return "SKIP", "up-to-date"
if __name__ == "__main__":
args = parser.parse_args()
ssh_name = getattr(args, "name")
renderers = getattr(args, "renderers")
rm = Remarkable(ssh_name)
rm_root = RemarkableFile()
pc_root = ComputerFile(rm.processed_dir_local)
# Iterate over all unique (RM file, PC file) pairs exactly once
def iterate_files():
for rm_file in rm_root.traverse():
pc_file = rm_file.on_computer()
yield (rm_file, pc_file)
for pc_file in pc_root.traverse():
rm_file = pc_file.on_remarkable()
if not rm_file: # already processed files on RM in last loop
yield (rm_file, pc_file)
print(f"Synchronizing PDFs with {rm.processed_dir_local}")
print("Will use renderer(s)", " -> ".join(renderers))
print("Comparing files and collecting commands")
commands = {"PULL": [], "PUSH": [], "DROP": []}
for rm_file, pc_file in iterate_files():
action, reason = sync_action_and_reason(rm_file, pc_file)
path = rm_file.path() if rm_file else pc_file.path_on_remarkable()
if action != "SKIP":
commands[action].append((action, reason, path, rm_file, pc_file))
# Sort commands
def key(command):
action, reason, path, rm_file, pc_file = command
return path
commands["PULL"].sort(key=key, reverse=False) # pull shallow files first (creating directories before pulling their contents)
commands["PUSH"].sort(key=key, reverse=False) # push shallow files first (creating directories before pushing their contents)
commands["DROP"].sort(key=key, reverse=True) # drop deep files first (deleting directories' contents before themselves)
commands = commands["PULL"] + commands["PUSH"] + commands["DROP"] # join all commands in one list (pull first, then push, then drop)
# List commands and prompt before proceeding
actions = [command[0] for command in commands]
npull = actions.count("PULL")
npush = actions.count("PUSH")
ndrop = actions.count("DROP")
for i, (action, reason, path, rm_file, pc_file) in enumerate(commands):
print(f"? ({i+1}/{len(commands)}) {action}: {path}" + (f" ({reason})" if args.verbose else ""))
if len(commands) == 0:
print("Did nothing (everything was up-to-date)")
exit()
else:
answer = input(f"Pull {npull}, push {npush} and drop {ndrop} files (y/n)? ")
if answer != "y": # accept nothing but a resounding yes
print("Aborted (no changes have been made)")
exit()
print(f"Pulling {npull}, pushing {npush} and dropping {ndrop} files")
# Execute commands
for i, (action, reason, path, rm_file, pc_file) in enumerate(commands):
print(f"! ({i+1}/{len(commands)}) {action}: {path}")
if action == "PULL":
rm_file.download()
elif action == "PUSH":
pc_file.upload()
elif action == "DROP":
pc_file.remove()
rm.write_last_sync()
# RM interface must be restarted to show newly added files
if npush > 0:
rm.restart()
print(f"Pulled {npull}, pushed {npush} and dropped {ndrop} files")