refactor/rewrite the cli

This commit is contained in:
Christian Zangl 2023-12-21 19:29:27 +01:00
parent 806ceb1706
commit a03d5b4267
No known key found for this signature in database
GPG Key ID: 6D468AC36E2A4B3D
18 changed files with 695 additions and 350 deletions

View File

@ -7,6 +7,3 @@ name = "pypi"
blake3 = ">=0.3.4"
[dev-packages]
[requires]
python_version = "3.11"

6
Pipfile.lock generated
View File

@ -1,12 +1,10 @@
{
"_meta": {
"hash": {
"sha256": "7d9212dcb9d58ac73c0d12a14a5102e0d3df649c4e95913e5bc8846bdb8d710a"
"sha256": "e8cd87a62cdc293b2cab0606525f4eb3bdfeb4f0373a64a5be10685b235d1078"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.11"
},
"requires": {},
"sources": [
{
"name": "pypi",

View File

@ -22,16 +22,20 @@ Some cloud providers re-encode your videos or compress your images to save space
## Installation
```
pip install --user chkbit
```
Or in its own environment:
The easiest way to install python CLI tools is with [pipx](https://pipx.pypa.io/latest/installation/).
```
pipx install chkbit
```
You can also use pip:
```
pip install --user chkbit
```
**NOTE** version 3 now uses the blake3 hash algorithm by default as it is not only better but also faster than md5.
## Usage
Run `chkbit -u PATH` to create/update the chkbit index.
@ -39,13 +43,13 @@ Run `chkbit -u PATH` to create/update the chkbit index.
chkbit will
- create a `.chkbit` index in every subdirectory of the path it was given.
- update the index with md5/sha512/blake3 hashes for every file.
- update the index with blake3 (see --algo) hashes for every file.
- report damage for files that failed the integrity check since the last run (check the exit status).
Run `chkbit PATH` to verify only.
```
usage: chkbit [-h] [-u] [--algo ALGO] [-f] [-i] [-s] [-w N] [-q] [-v] [PATH ...]
usage: chkbit [-h] [-u] [--algo ALGO] [-f] [-s] [--index-name NAME] [--ignore-name NAME] [-w N] [--plain] [-q] [-v] [PATH ...]
Checks the data integrity of your files. See https://github.com/laktak/chkbit-py
@ -54,12 +58,14 @@ positional arguments:
options:
-h, --help show this help message and exit
-u, --update update indices (without this chkbit will only verify files)
--algo ALGO hash algorithm: md5, sha512, blake3
-u, --update update indices (without this chkbit will verify files in readonly mode)
--algo ALGO hash algorithm: md5, sha512, blake3 (default: blake3)
-f, --force force update of damaged items
-i, --verify-index verify files in the index only (will not report new files)
-s, --skip-symlinks do not follow symlinks
-w N, --workers N number of workers to use, default=5
--index-name NAME filename where chkbit stores its hashes (default: .chkbit)
--ignore-name NAME filename that chkbit reads its ignore list from (default: .chkbitignore)
-w N, --workers N number of workers to use (default: 5)
--plain show plain status instead of being fancy
-q, --quiet quiet, don't show progress/information
-v, --verbose verbose output
@ -74,7 +80,7 @@ Status codes:
EXC: internal exception
```
chkbit is set to use only 5 workers by default so it will not slow your system to a crawl. You can specify a higher number to make it a lot faster (requires about 128kB of memory per worker).
chkbit is set to use only 5 workers by default so it will not slow your system to a crawl. You can specify a higher number to make it a lot faster if the IO throughput can also keep up.
## Repair
@ -123,7 +129,7 @@ When you run it again it first checks the modification time,
### I wish to use a stronger hash algorithm
chkbit now supports sha512 and blake3. You can specify it with `--algo sha512` or `--algo blake3`.
chkbit now uses blake3 by default. You can also specify it with `--algo sha512` or `--algo md5`.
Note that existing index files will use the hash that they were created with. If you wish to update all hashes you need to delete your existing indexes first.
@ -145,19 +151,30 @@ Create test and set the modified time:
```
$ echo foo1 > test; touch -t 201501010000 test
$ chkbit -u .
add ./test
Processed 1 file(s).
Indices were updated.
new ./test
Processed 1 file.
- 192.31 files/second
- 0.00 MB/second
- 1 directory was updated
- 1 file hash was added
- 0 file hashes were updated
```
`add` indicates the file was added.
`new` indicates a new file was added.
Now update test with a new modified:
```
$ echo foo2 > test; touch -t 201501010001 test # update test & modified
$ chkbit -u .
upd ./test
Processed 1 file(s).
Indices were updated.
Processed 1 file.
- 191.61 files/second
- 0.00 MB/second
- 1 directory was updated
- 0 file hashes were added
- 1 file hash was updated
```
`upd` indicates the file was updated.
@ -167,10 +184,13 @@ Now update test with the same modified to simulate damage:
$ echo foo3 > test; touch -t 201501010001 test
$ chkbit -u .
DMG ./test
Processed 0 file(s).
Processed 1 file.
- 173.93 files/second
- 0.00 MB/second
chkbit detected damage in these files:
./test
error: detected 1 file(s) with damage!
error: detected 1 file with damage!
```
`DMG` indicates damage.

View File

@ -1,9 +0,0 @@
#!/usr/bin/env python
import sys
if sys.version_info < (3, 6):
sys.exit("Python < 3.6 is not supported")
from chkbit.main import main
main()

View File

@ -1,4 +1,5 @@
from chkbit.status import Status
from chkbit.context import Context
from chkbit.hashfile import hashfile, hashtext
from chkbit.index import Index, Stat
from chkbit.indexthread import IndexThread
from chkbit.index import Index
from chkbit.index_thread import IndexThread

View File

@ -1,10 +1,35 @@
import queue
from chkbit import Status
class Context:
def __init__(self, verify_index, update, force, hash_algo, skip_symlinks):
self.verify_index = verify_index
self.update = update
def __init__(
self,
*,
num_workers=5,
force=False,
update=False,
hash_algo="blake3",
skip_symlinks=False,
index_filename=".chkbit",
ignore_filename=".chkbitignore",
):
self.num_workers = num_workers
self.force = force
self.update = update
self.hash_algo = hash_algo
self.skip_symlinks = skip_symlinks
self.index_filename = index_filename
self.ignore_filename = ignore_filename
self.result_queue = queue.Queue()
self.hit_queue = queue.Queue()
if hash_algo not in ["md5", "sha512", "blake3"]:
raise Exception(f"{hash_algo} is unknown.")
def log(self, stat: Status, path: str):
self.result_queue.put((0, stat, path))
def hit(self, *, cfiles: int = 0, cbytes: int = 0):
self.result_queue.put((1, cfiles, cbytes))

View File

@ -1,11 +1,12 @@
import hashlib
from typing import Callable
BLOCKSIZE = 2**10 * 128 # kb
def hashfile(path, hash_algo=None):
if not hash_algo or hash_algo == "md5":
def hashfile(path: str, hash_algo: str, *, hit: Callable[[str], None]):
if hash_algo == "md5":
h = hashlib.md5()
elif hash_algo == "sha512":
h = hashlib.sha512()
@ -14,14 +15,17 @@ def hashfile(path, hash_algo=None):
h = blake3()
else:
raise Exception(f"{hash_algo} is unknown.")
raise Exception(f"algo '{hash_algo}' is unknown.")
with open(path, "rb") as f:
while True:
buf = f.read(BLOCKSIZE)
if len(buf) <= 0:
l = len(buf)
if l <= 0:
break
h.update(buf)
if hit:
hit(l)
return h.hexdigest()

View File

@ -3,29 +3,14 @@ import os
import subprocess
import sys
import json
from enum import Enum
from chkbit import hashfile, hashtext
from chkbit import hashfile, hashtext, Status
VERSION = 2 # index version
INDEX = ".chkbit"
IGNORE = ".chkbitignore"
class Stat(Enum):
ERR_DMG = "DMG"
ERR_BITROT = "DMG" # legacy
ERR_IDX = "EIX"
WARN_OLD = "old"
NEW = "new"
UPDATE = "upd"
OK = "ok "
SKIP = "skp"
INTERNALEXCEPTION = "EXC"
FLAG_MOD = "fmod"
class Index:
def __init__(self, path, files, *, log=None):
def __init__(self, context, path, files):
self.context = context
self.path = path
self.files = files
self.old = {}
@ -34,15 +19,14 @@ class Index:
self.load_ignore()
self.updates = []
self.modified = True
self.log = log
@property
def ignore_file(self):
return os.path.join(self.path, IGNORE)
def ignore_filepath(self):
return os.path.join(self.path, self.context.ignore_filename)
@property
def idx_file(self):
return os.path.join(self.path, INDEX)
def index_filepath(self):
return os.path.join(self.path, self.context.index_filename)
def should_ignore(self, name):
for ignore in self.ignore:
@ -53,23 +37,23 @@ class Index:
def _setmod(self):
self.modified = True
def _log(self, stat, name):
if self.log:
self.log(stat, os.path.join(self.path, name))
def _log(self, stat: Status, name: str):
self.context.log(stat, os.path.join(self.path, name))
# calc new hashes for this index
def update(self, context):
def update(self):
for name in self.files:
if self.should_ignore(name):
self._log(Stat.SKIP, name)
self._log(Status.SKIP, name)
continue
a = context.hash_algo
a = self.context.hash_algo
# check previously used hash
if name in self.old:
old = self.old[name]
if "md5" in old:
a = "md5" # legacy structure
# legacy structure
a = "md5"
self.old[name] = {"mod": old["mod"], "a": a, "h": old["md5"]}
elif "a" in old:
a = old["a"]
@ -79,7 +63,7 @@ class Index:
def check_fix(self, force):
for name in self.new.keys():
if not name in self.old:
self._log(Stat.NEW, name)
self._log(Status.NEW, name)
self._setmod()
continue
@ -89,14 +73,14 @@ class Index:
bmod = b["mod"]
if a["h"] == b["h"]:
# ok, if the content stays the same the mod time does not matter
self._log(Stat.OK, name)
self._log(Status.OK, name)
if amod != bmod:
self._setmod()
continue
if amod == bmod:
# damage detected
self._log(Stat.ERR_DMG, name)
self._log(Status.ERR_DMG, name)
# replace with old so we don't loose the information on the next run
# unless force is set
if not force:
@ -105,17 +89,23 @@ class Index:
self._setmod()
elif amod < bmod:
# ok, the file was updated
self._log(Stat.UPDATE, name)
self._log(Status.UPDATE, name)
self._setmod()
elif amod > bmod:
self._log(Stat.WARN_OLD, name)
self._log(Status.WARN_OLD, name)
self._setmod()
def _calc_file(self, name, a):
path = os.path.join(self.path, name)
info = os.stat(path)
mtime = int(info.st_mtime * 1000)
return {"mod": mtime, "a": a, "h": hashfile(path, a)}
res = {
"mod": mtime,
"a": a,
"h": hashfile(path, a, hit=lambda l: self.context.hit(cbytes=l)),
}
self.context.hit(cfiles=1)
return res
def save(self):
if self.modified:
@ -123,7 +113,7 @@ class Index:
text = json.dumps(self.new, separators=(",", ":"))
data["idx_hash"] = hashtext(text)
with open(self.idx_file, "w", encoding="utf-8") as f:
with open(self.index_filepath, "w", encoding="utf-8") as f:
json.dump(data, f, separators=(",", ":"))
self.modified = False
return True
@ -131,10 +121,10 @@ class Index:
return False
def load(self):
if not os.path.exists(self.idx_file):
if not os.path.exists(self.index_filepath):
return False
self.modified = False
with open(self.idx_file, "r", encoding="utf-8") as f:
with open(self.index_filepath, "r", encoding="utf-8") as f:
data = json.load(f)
if "data" in data:
# extract old format from js version
@ -149,13 +139,13 @@ class Index:
text = json.dumps(self.old, separators=(",", ":"))
if data.get("idx_hash") != hashtext(text):
self.modified = True
self._log(Stat.ERR_IDX, self.idx_file)
self._log(Status.ERR_IDX, self.index_filepath)
return True
def load_ignore(self):
if not os.path.exists(self.ignore_file):
if not os.path.exists(self.ignore_filepath):
return
with open(self.ignore_file, "r", encoding="utf-8") as f:
with open(self.ignore_filepath, "r", encoding="utf-8") as f:
text = f.read()
self.ignore = list(

69
chkbit/index_thread.py Normal file
View File

@ -0,0 +1,69 @@
import os
import sys
import time
import threading
from chkbit import Index, Status
class IndexThread:
def __init__(self, thread_no, context, input_queue):
self.thread_no = thread_no
self.update = context.update
self.context = context
self.input_queue = input_queue
self.t = threading.Thread(target=self._run)
self.t.daemon = True
self.t.start()
def _process_root(self, parent):
files = []
dirs = []
# load files and subdirs
for name in os.listdir(path=parent):
path = os.path.join(parent, name)
if name[0] == ".":
continue
if os.path.isdir(path):
if self.context.skip_symlinks and os.path.islink(path):
pass
else:
dirs.append(name)
elif os.path.isfile(path):
files.append(name)
# load index
index = Index(self.context, parent, files)
index.load()
# calc the new hashes
index.update()
# compare
index.check_fix(self.context.force)
# save if update is set
if self.update:
if index.save():
self.context.log(Status.UPDATE_INDEX, "")
# process subdirs
for name in dirs:
if not index.should_ignore(name):
self.input_queue.put(os.path.join(parent, name))
else:
self.context.log(Status.SKIP, name + "/")
def _run(self):
while True:
parent = self.input_queue.get()
if parent is None:
break
try:
self._process_root(parent)
except Exception as e:
self.context.log(Status.INTERNALEXCEPTION, f"{parent}: {e}")
self.input_queue.task_done()
def join(self):
self.t.join()

View File

@ -1,71 +0,0 @@
import os
import sys
import time
import threading
from chkbit import Index, Stat
class IndexThread:
def __init__(self, idx, context, res_queue, todo_queue):
self.idx = idx
self.verify_index_only = context.verify_index
self.update = context.update and not self.verify_index_only
self.context = context
self.todo_queue = todo_queue
self.res_queue = res_queue
self.t = threading.Thread(target=self.run)
self.t.daemon = True
self.t.start()
def _log(self, stat, path):
if not self.verify_index_only or stat != Stat.NEW:
self.res_queue.put((self.idx, stat, path))
def _process_root(self, parent):
files = []
dirs = []
# load files and subdirs
for name in os.listdir(path=parent):
path = os.path.join(parent, name)
if name[0] == ".":
continue
if os.path.isdir(path):
if self.context.skip_symlinks and os.path.islink(path):
pass
else:
dirs.append(name)
elif os.path.isfile(path):
files.append(name)
# load index
e = Index(parent, files, log=self._log)
if e.load() or not self.verify_index_only:
# calc the new hashes
e.update(self.context)
# compare
e.check_fix(self.context.force)
# save if update is set
if self.update:
if e.save():
self._log(Stat.FLAG_MOD, "")
# process subdirs
for name in dirs:
if not e.should_ignore(name):
self.todo_queue.put(os.path.join(parent, name))
else:
self._log(Stat.SKIP, name + "/")
def run(self):
while True:
parent = self.todo_queue.get()
if parent is None:
break
try:
self._process_root(parent)
except Exception as e:
self._log(Stat.INTERNALEXCEPTION, f"{parent}: {e}")
self.todo_queue.task_done()

View File

@ -1,189 +0,0 @@
import os
import sys
import time
import argparse
import queue
import threading
from chkbit import Context, IndexThread, Stat
STATUS_CODES = """
Status codes:
DMG: error, data damage detected
EIX: error, index damaged
old: warning, file replaced by an older version
new: new file
upd: file updated
ok : check ok
skp: skipped (see .chkbitignore)
EXC: internal exception
"""
class Main:
def __init__(self):
self.stdscr = None
self.dmg_list = []
self.err_list = []
self.modified = False
self.verbose = False
self.total = 0
self._parse_args()
def _log(self, idx, stat, path):
if stat == Stat.FLAG_MOD:
self.modified = True
else:
if stat == Stat.ERR_DMG:
self.dmg_list.append(path)
elif stat == Stat.INTERNALEXCEPTION:
self.err_list.append(path)
elif stat in [Stat.OK, Stat.UPDATE, Stat.NEW]:
self.total += 1
if self.verbose or not stat in [Stat.OK, Stat.SKIP]:
print(stat.value, path)
if not self.quiet and sys.stdout.isatty():
print(self.total, end="\r")
def _parse_args(self):
parser = argparse.ArgumentParser(
prog="chkbit",
description="Checks the data integrity of your files. See https://github.com/laktak/chkbit-py",
epilog=STATUS_CODES,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"paths", metavar="PATH", type=str, nargs="*", help="directories to check"
)
parser.add_argument(
"-u",
"--update",
action="store_true",
help="update indices (without this chkbit will only verify files)",
)
parser.add_argument(
"--algo",
type=str,
default="md5",
help="hash algorithm: md5, sha512, blake3",
)
parser.add_argument(
"-f", "--force", action="store_true", help="force update of damaged items"
)
parser.add_argument(
"-i",
"--verify-index",
action="store_true",
help="verify files in the index only (will not report new files)",
)
parser.add_argument(
"-s", "--skip-symlinks", action="store_true", help="do not follow symlinks"
)
parser.add_argument(
"-w",
"--workers",
metavar="N",
action="store",
type=int,
default=5,
help="number of workers to use, default=5",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="quiet, don't show progress/information",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="verbose output"
)
self.args = parser.parse_args()
self.verbose = self.args.verbose
self.quiet = self.args.quiet
if not self.args.paths:
parser.print_help()
def _res_worker(self):
while True:
item = self.res_queue.get()
if not item:
break
self._log(*item)
self.res_queue.task_done()
def process(self):
self.res_queue = queue.Queue()
# the todo queue is used to distribute the work
# to the index threads
todo_queue = queue.Queue()
# put the initial paths into the queue
for path in self.args.paths:
todo_queue.put(path)
context = Context(
self.args.verify_index,
self.args.update,
self.args.force,
self.args.algo,
self.args.skip_symlinks,
)
# start indexing
workers = [
IndexThread(idx, context, self.res_queue, todo_queue)
for idx in range(self.args.workers)
]
# log the results from the workers
res_worker = threading.Thread(target=self._res_worker)
res_worker.daemon = True
res_worker.start()
todo_queue.join()
self.res_queue.join()
def print_result(self):
if not self.quiet:
print(
f"Processed {self.total} file(s){' in readonly mode' if not self.args.update else ''}."
)
if self.modified:
print("Indices were updated.")
if self.dmg_list:
print("chkbit detected damage in these files:", file=sys.stderr)
for err in self.dmg_list:
print(err, file=sys.stderr)
print(
f"error: detected {len(self.dmg_list)} file(s) with damage!",
file=sys.stderr,
)
if self.err_list:
print("chkbit ran into errors:", file=sys.stderr)
for err in self.err_list:
print(err, file=sys.stderr)
if self.dmg_list or self.err_list:
sys.exit(1)
def main():
try:
m = Main()
if m.args.paths:
m.process()
m.print_result()
except KeyboardInterrupt:
print("abort")
sys.exit(1)

13
chkbit/status.py Normal file
View File

@ -0,0 +1,13 @@
from enum import Enum
class Status(Enum):
ERR_DMG = "DMG"
ERR_IDX = "EIX"
WARN_OLD = "old"
NEW = "new"
UPDATE = "upd"
OK = "ok "
SKIP = "skp"
INTERNALEXCEPTION = "EXC"
UPDATE_INDEX = "iup"

61
cli/cli.py Normal file
View File

@ -0,0 +1,61 @@
import os
import sys
class CLI:
NO_COLOR = os.environ.get("NO_COLOR", "")
class style:
reset = "\033[0m"
bold = "\033[01m"
disable = "\033[02m"
underline = "\033[04m"
reverse = "\033[07m"
strikethrough = "\033[09m"
invisible = "\033[08m"
class esc:
up = "\033[A"
down = "\033[B"
right = "\033[C"
left = "\033[D"
@staticmethod
def clear_line(opt=0):
# 0=to end, 1=from start, 2=all
return "\033[" + str(opt) + "K"
@staticmethod
def write(*text):
for t in text:
sys.stdout.write(str(t))
sys.stdout.flush()
@staticmethod
def printline(*text):
CLI.write(*text, CLI.esc.clear_line(), "\n")
# 4bit system colors
@staticmethod
def fg4(col):
# black=0,red=1,green=2,orange=3,blue=4,purple=5,cyan=6,lightgrey=7
# darkgrey=8,lightred=9,lightgreen=10,yellow=11,lightblue=12,pink=13,lightcyan=14
if CLI.NO_COLOR:
return ""
else:
return f"\033[{(30+col) if col<8 else (90-8+col)}m"
# 8bit xterm colors
@staticmethod
def fg8(col):
if CLI.NO_COLOR:
return ""
else:
return f"\033[38;5;{col}m"
@staticmethod
def bg8(col):
if CLI.NO_COLOR:
return ""
else:
return f"\033[48;5;{col}m"

326
cli/main.py Normal file
View File

@ -0,0 +1,326 @@
import argparse
import os
import queue
import shutil
import sys
import threading
import time
from datetime import datetime, timedelta
from chkbit import Context, Status, IndexThread
from cli import CLI, Progress, RateCalc, sparkify
STATUS_CODES = """
Status codes:
DMG: error, data damage detected
EIX: error, index damaged
old: warning, file replaced by an older version
new: new file
upd: file updated
ok : check ok
skp: skipped (see .chkbitignore)
EXC: internal exception
"""
UPDATE_INTERVAL = timedelta(milliseconds=700)
MB = 1024 * 1024
CLI_BG = CLI.bg8(240)
CLI_SEP = "|"
CLI_SEP_FG = CLI.fg8(235)
CLI_FG1 = CLI.fg8(255)
CLI_FG2 = CLI.fg8(228)
CLI_FG3 = CLI.fg8(202)
CLI_OK_FG = CLI.fg4(2)
CLI_ALERT_FG = CLI.fg4(1)
class Main:
def __init__(self):
self.stdscr = None
self.dmg_list = []
self.err_list = []
self.num_idx_upd = 0
self.num_new = 0
self.num_upd = 0
self.verbose = False
self.progress = Progress.Fancy
self.total = 0
self.term_width = shutil.get_terminal_size()[0]
max_stat = int((self.term_width - 70) / 2)
self.fps = RateCalc(timedelta(seconds=1), max_stat=max_stat)
self.bps = RateCalc(timedelta(seconds=1), max_stat=max_stat)
def _log(self, stat: Status, path: str):
if stat == Status.UPDATE_INDEX:
self.num_idx_upd += 1
else:
if stat == Status.ERR_DMG:
self.total += 1
self.dmg_list.append(path)
elif stat == Status.INTERNALEXCEPTION:
self.err_list.append(path)
elif stat in [Status.OK, Status.UPDATE, Status.NEW]:
self.total += 1
if stat == Status.UPDATE:
self.num_upd += 1
elif stat == Status.NEW:
self.num_new += 1
if self.verbose or not stat in [Status.OK, Status.SKIP]:
CLI.printline(stat.value, " ", path)
def _res_worker(self, context: Context):
last = datetime.now()
while True:
try:
item = self.result_queue.get(timeout=0.2)
now = datetime.now()
if not item:
if self.progress == Progress.Fancy:
CLI.printline("")
break
t, *p = item
if t == 0:
self._log(*p)
last = datetime.min
else:
self.fps.push(now, p[0])
self.bps.push(now, p[1])
self.result_queue.task_done()
except queue.Empty:
now = datetime.now()
pass
if last + UPDATE_INTERVAL < now:
last = now
if self.progress == Progress.Fancy:
stat_f = f"{self.fps.last} files/s"
stat_b = f"{int(self.bps.last/MB)} MB/s"
stat = f"[{'RW' if context.update else 'RO'}:{context.num_workers}] {self.total:>5} files $ {sparkify(self.fps.stats)} {stat_f:13} $ {sparkify(self.bps.stats)} {stat_b}"
stat = stat[: self.term_width - 1]
stat = stat.replace("$", CLI_SEP_FG + CLI_SEP + CLI_FG2, 1)
stat = stat.replace("$", CLI_SEP_FG + CLI_SEP + CLI_FG3, 1)
CLI.write(
CLI_BG,
CLI_FG1,
stat,
CLI.esc.clear_line(),
CLI.style.reset,
"\r",
)
elif self.progress == Progress.Plain:
print(self.total, end="\r")
def process(self, args):
# the input queue is used to distribute the work
# to the index threads
input_queue = queue.Queue()
# put the initial paths into the queue
for path in args.paths:
input_queue.put(path)
context = Context(
num_workers=args.workers,
force=args.force,
update=args.update,
hash_algo=args.algo,
skip_symlinks=args.skip_symlinks,
index_filename=args.index_name,
ignore_filename=args.ignore_name,
)
self.result_queue = context.result_queue
# start indexing
workers = [
IndexThread(i, context, input_queue) for i in range(context.num_workers)
]
# log the results from the workers
res_worker = threading.Thread(target=self._res_worker, args=(context,))
res_worker.daemon = True
res_worker.start()
# wait for work to finish
input_queue.join()
# signal workers to exit
for worker in workers:
input_queue.put(None)
# signal res_worker to exit
self.result_queue.put(None)
for worker in workers:
worker.join()
res_worker.join()
return context
def print_result(self, context):
def cprint(col, text):
if self.progress == Progress.Fancy:
CLI.printline(col, text, CLI.style.reset)
else:
print(text)
def eprint(col, text):
if self.progress == Progress.Fancy:
CLI.write(col)
print(text, file=sys.stderr)
CLI.write(CLI.style.reset)
else:
print(text, file=sys.stderr)
iunit = lambda x, u: f"{x} {u}{'s' if x!=1 else ''}"
iunit2 = lambda x, u1, u2: f"{x} {u2 if x!=1 else u1}"
if self.progress != Progress.Quiet:
cprint(
CLI_OK_FG,
f"Processed {iunit(self.total, 'file')}{' in readonly mode' if not context.update else ''}.",
)
if self.progress == Progress.Fancy and self.total > 0:
elapsed = (datetime.now() - self.fps.start).total_seconds()
print(f"- {(self.fps.total+self.fps.current)/elapsed:.2f} files/second")
print(f"- {(self.bps.total+self.bps.current)/MB/elapsed:.2f} MB/second")
if context.update:
if self.num_idx_upd:
cprint(
CLI_OK_FG,
f"- {iunit2(self.num_idx_upd, 'directory was', 'directories were')} updated\n"
+ f"- {iunit2(self.num_new, 'file hash was', 'file hashes were')} added\n"
+ f"- {iunit2(self.num_upd, 'file hash was', 'file hashes were')} updated",
)
elif self.num_new + self.num_upd > 0:
cprint(
CLI_ALERT_FG,
f"No changes were made (specify -u to update):\n"
+ f"- {iunit(self.num_new, 'file')} would have been added and\n"
+ f"- {iunit(self.num_upd, 'file')} would have been updated.",
)
if self.dmg_list:
eprint(CLI_ALERT_FG, "chkbit detected damage in these files:")
for err in self.dmg_list:
print(err, file=sys.stderr)
n = len(self.dmg_list)
eprint(
CLI_ALERT_FG,
f"error: detected {iunit(n, 'file')} with damage!",
)
if self.err_list:
eprint(CLI_ALERT_FG, "chkbit ran into errors:")
for err in self.err_list:
print(err, file=sys.stderr)
if self.dmg_list or self.err_list:
sys.exit(1)
def run(self):
parser = argparse.ArgumentParser(
prog="chkbit",
description="Checks the data integrity of your files. See https://github.com/laktak/chkbit-py",
epilog=STATUS_CODES,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"paths", metavar="PATH", type=str, nargs="*", help="directories to check"
)
parser.add_argument(
"-u",
"--update",
action="store_true",
help="update indices (without this chkbit will verify files in readonly mode)",
)
parser.add_argument(
"--algo",
type=str,
default="blake3",
help="hash algorithm: md5, sha512, blake3 (default: blake3)",
)
parser.add_argument(
"-f", "--force", action="store_true", help="force update of damaged items"
)
parser.add_argument(
"-s", "--skip-symlinks", action="store_true", help="do not follow symlinks"
)
parser.add_argument(
"--index-name",
metavar="NAME",
type=str,
default=".chkbit",
help="filename where chkbit stores its hashes (default: .chkbit)",
)
parser.add_argument(
"--ignore-name",
metavar="NAME",
type=str,
default=".chkbitignore",
help="filename that chkbit reads its ignore list from (default: .chkbitignore)",
)
parser.add_argument(
"-w",
"--workers",
metavar="N",
action="store",
type=int,
default=5,
help="number of workers to use (default: 5)",
)
parser.add_argument(
"--plain",
action="store_true",
help="show plain status instead of being fancy",
)
parser.add_argument(
"-q",
"--quiet",
action="store_true",
help="quiet, don't show progress/information",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="verbose output"
)
args = parser.parse_args()
self.verbose = args.verbose
if args.quiet:
self.progress = Progress.Quiet
elif not sys.stdout.isatty():
self.progress = Progress.Summary
elif args.plain:
self.progress = Progress.Plain
if args.paths:
context = self.process(args)
self.print_result(context)
else:
parser.print_help()
def main():
try:
Main().run()
except KeyboardInterrupt:
print("abort")
sys.exit(1)
if __name__ == "__main__":
main()

8
cli/progress.py Normal file
View File

@ -0,0 +1,8 @@
from enum import Enum
class Progress(Enum):
Quiet = (0,)
Summary = (1,)
Plain = (2,)
Fancy = (3,)

28
cli/rate_calc.py Normal file
View File

@ -0,0 +1,28 @@
from datetime import datetime, timedelta
class RateCalc:
def __init__(self, interval: timedelta, max_stat: int):
self.interval = interval
self.max_stat = max(max_stat, 10)
self.reset()
def reset(self):
self.start = datetime.now()
self.updated = self.start
self.total = 0
self.current = 0
self.stats = [0] * self.max_stat
@property
def last(self):
return self.stats[-1]
def push(self, ts: datetime, value: int):
while self.updated + self.interval < ts:
self.stats.append(self.current)
self.stats = self.stats[-self.max_stat :]
self.total += self.current
self.current = 0
self.updated += self.interval
self.current += value

71
cli/sparklines.py Normal file
View File

@ -0,0 +1,71 @@
import math, os, re, string, sys
"""
Copyright (c) 2021, Brandon Whaley <redkrieg@gmail.com>, et al.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
spark_chars = "▁▂▃▄▅▆▇█"
"""Eight unicode characters of (nearly) steadily increasing height."""
def sparkify(series, minimum=None, maximum=None, rows=1):
"""Converts <series> to a sparkline string.
Example:
>>> sparkify([ 0.5, 1.2, 3.5, 7.3, 8.0, 12.5, float("nan"), 15.0, 14.2, 11.8, 6.1,
... 1.9 ])
u'▁▁▂▄▅▇ ██▆▄▂'
>>> sparkify([1, 1, -2, 3, -5, 8, -13])
u'▆▆▅▆▄█▁'
Raises ValueError if input data cannot be converted to float.
Raises TypeError if series is not an iterable.
"""
series = [float(n) for n in series]
if all(not math.isfinite(n) for n in series):
return " " * len(series)
minimum = min(filter(math.isfinite, series)) if minimum is None else minimum
maximum = max(filter(math.isfinite, series)) if maximum is None else maximum
data_range = maximum - minimum
if data_range == 0.0:
# Graph a baseline if every input value is equal.
return "".join([spark_chars[0] if math.isfinite(i) else " " for i in series])
row_res = len(spark_chars)
resolution = row_res * rows
coefficient = (resolution - 1.0) / data_range
def clamp(n):
return min(max(n, minimum), maximum)
def spark_index(n):
"""An integer from 0 to (resolution-1) proportional to the data range"""
return int(round((clamp(n) - minimum) * coefficient))
output = []
for r in range(rows - 1, -1, -1):
row_out = []
row_min = row_res * r
row_max = row_min + row_res - 1
for n in series:
if not math.isfinite(n):
row_out.append(" ")
continue
i = spark_index(n)
if i < row_min:
row_out.append(" ")
elif i > row_max:
row_out.append(spark_chars[-1])
else:
row_out.append(spark_chars[i % row_res])
output.append("".join(row_out))
return os.linesep.join(output)

View File

@ -1,6 +1,6 @@
[project]
name = "chkbit"
version = "2.4.0"
version = "3.0.0"
description = "chkbit checks the data integrity of your files"
authors = [
{name = "Christian Zangl", email = "laktak@cdak.net"},
@ -16,4 +16,7 @@ license = {file = "LICENSE"}
Homepage = "https://github.com/laktak/chkbit-py"
[project.scripts]
chkbit = "chkbit.main:main"
chkbit = "cli.main:main"
[tool.setuptools.packages.find]
include = ["chkbit","cli"]