diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d785337 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,16 @@ +# EditorConfig helps developers define and maintain consistent +# coding styles between different editors and IDEs +# editorconfig.org + +# top-most EditorConfig file +root = true + +[*] +insert_final_newline = true +charset = utf-8 +trim_trailing_whitespace = true +indent_style = space +indent_size = 4 + +[*.md] +indent_size = 2 diff --git a/README.md b/README.md index 90ffb23..0d9ee26 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,24 @@ # chkbit -chkbit is a lightweight tool to check data integrity and to detect bitrot. +chkbit is a lightweight tool to check the data integrity of your files. It allows you to verify *that the data has not changed* since you put it there and that it is still the same when you move it somewhere else. -chkbit is independent of the file system and can help you detect bitrot on you primary system, on backups and in the cloud. +### On your Disk -## TL;DR +chkbit starts with your primary disk. It creates checksums for each folder that will follow your data onto your backups. -Any cloud or local storage media can be affected by data corruption and/or bitrot. While some filesystems have built in protection, this protection is limited to the storage media. +Even though your filesystems should have built in checksums, it is usually not trivial to take them onto another media. -chkbit will create an hash that follows your data from local media to cloud or backup. This enables you to verify the integrity of your data wherever it is moved. +### On your backup -- run chkbit on your system -- move the data to a new system (backup/restore) -- verify that everything is OK with chkbit +No matter what storage media or filesystem you use, chkbit stores its indexes in hidden files that are backed up together with your data. -## What is bitrot? +When you run chkbit-verify on your backup media you can make sure that every byte was correctly transferred. -0 bits flipped | 1 bit flipped | 2 bits flipped | 3 bits flipped --------------- | -------------- | -------------- | -------------- -![](https://upload.wikimedia.org/wikipedia/commons/thumb/2/2f/Bitrot_in_JPEG_files%2C_0_bits_flipped.jpg/180px-Bitrot_in_JPEG_files%2C_0_bits_flipped.jpg) | ![](https://upload.wikimedia.org/wikipedia/commons/thumb/1/1a/Bitrot_in_JPEG_files%2C_1_bit_flipped.jpg/180px-Bitrot_in_JPEG_files%2C_1_bit_flipped.jpg) | ![](https://upload.wikimedia.org/wikipedia/commons/thumb/3/37/Bitrot_in_JPEG_files%2C_2_bits_flipped.jpg/180px-Bitrot_in_JPEG_files%2C_2_bits_flipped.jpg) | ![](https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Bitrot_in_JPEG_files%2C_3_bits_flipped.jpg/180px-Bitrot_in_JPEG_files%2C_3_bits_flipped.jpg) | +If your backup media fails or experiences [bitrot/data degradation](https://en.wikipedia.org/wiki/Data_degradation), chkbit allows you to discover what files were damaged and need to be replaced by other backups. -Data degradation (aka bitrot) is the gradual corruption of computer data due to an accumulation of non-critical failures in a data storage device. It results from the gradual decay of storage media over the course of years or longer. Causes vary by medium. +### Data in the Cloud -**For more information** see [Wikipedia - Data_degradation](https://en.wikipedia.org/wiki/Data_degradation). - -This is the successor to [chkbit/node](https://github.com/laktak/chkbit). It will use and upgrade the index files created by the node version. +Some cloud providers re-encode your videos or compress your images to save space. chkbit will alert you of any changes. ## Installation @@ -46,14 +40,14 @@ chkbit will - create a `.chkbit` index in every subdirectory of the path it was given. - update the index with md5 hashes for every file. -- report bitrot for files that rotted since the last run (check the exit status). +- report damage for files that failed the integrity check since the last run (check the exit status). Run `chkbit PATH` to verify only. ``` usage: chkbit.py [-h] [-u] [-f] [-i] [-q] [-v] [PATH [PATH ...]] -Checks files for bitrot. See https://github.com/laktak/chkbit-py +Checks the data integrity of your files. See https://github.com/laktak/chkbit-py positional arguments: PATH @@ -67,7 +61,7 @@ optional arguments: -v, --verbose verbose output Status codes: - ROT: error, bitrot detected + DMG: error, data damage detected EIX: error, index damaged old: warning, file replaced by an older version new: new file @@ -79,14 +73,14 @@ Status codes: ## Repair -chkbit cannot repair bitrot, its job is simply to detect it. +chkbit cannot repair damage, its job is simply to detect it. You should - backup regularly. - run chkbit *before* each backup. -- check for bitrot on the backup media. -- in case of bitrot *restore* from a checked backup. +- check for damage on the backup media. +- in case of damage *restore* from a checked backup. ## Ignore files @@ -147,16 +141,16 @@ Indices were updated. `upd` indicates the file was updated. -Now update test with the same modified to simulate bitrot: +Now update test with the same modified to simulate damage: ``` $ echo foo3 > test; touch -t 201501010001 test $ chkbit -u . -ROT ./test +DMG ./test Processed 0 file(s). -chkbit detected bitrot in these files: +chkbit detected damage in these files: ./test -error: detected 1 file(s) with bitrot! +error: detected 1 file(s) with damage! ``` -`ROT` indicates bitrot. +`DMG` indicates damage. diff --git a/chkbit/index.py b/chkbit/index.py index 25700f0..491947e 100644 --- a/chkbit/index.py +++ b/chkbit/index.py @@ -12,7 +12,8 @@ IGNORE = ".chkbitignore" class Stat(Enum): - ERR_BITROT = "ROT" + ERR_DMG = "DMG" + ERR_BITROT = "DMG" # legacy ERR_IDX = "EIX" WARN_OLD = "old" NEW = "new" @@ -84,8 +85,8 @@ class Index: continue if amod == bmod: - # rot detected - self._log(Stat.ERR_BITROT, name) + # damage detected + self._log(Stat.ERR_DMG, name) # replace with old so we don't loose the information on the next run # unless force is set if not force: diff --git a/chkbit/main.py b/chkbit/main.py index aefbda4..3b3a025 100644 --- a/chkbit/main.py +++ b/chkbit/main.py @@ -8,7 +8,7 @@ from chkbit import IndexThread, Stat STATUS_CODES = """ Status codes: - ROT: error, bitrot detected + DMG: error, data damage detected EIX: error, index damaged old: warning, file replaced by an older version new: new file @@ -22,7 +22,7 @@ Status codes: class Main: def __init__(self): self.stdscr = None - self.bitrot_list = [] + self.dmg_list = [] self.err_list = [] self.modified = False self.verbose = False @@ -34,8 +34,8 @@ class Main: if stat == Stat.FLAG_MOD: self.modified = True else: - if stat == Stat.ERR_BITROT: - self.bitrot_list.append(path) + if stat == Stat.ERR_DMG: + self.dmg_list.append(path) elif stat == Stat.INTERNALEXCEPTION: self.err_list.append(path) elif stat in [Stat.OK, Stat.UPDATE, Stat.NEW]: @@ -47,7 +47,7 @@ class Main: def _parse_args(self): parser = argparse.ArgumentParser( - description="Checks files for bitrot. See https://github.com/laktak/chkbit-py", + description="Checks the data integrity of your files. See https://github.com/laktak/chkbit-py", epilog=STATUS_CODES, formatter_class=argparse.RawDescriptionHelpFormatter, ) @@ -71,10 +71,6 @@ class Main: help="verify files in the index only (will not report new files)", ) - # parser.add_argument( - # "-d", "--delete", action="store_true", help="remove all .chkbit files from target" - # ) - parser.add_argument( "-q", "--quiet", @@ -126,12 +122,12 @@ class Main: if self.modified: print("Indices were updated.") - if self.bitrot_list: - print("chkbit detected bitrot in these files:", file=sys.stderr) - for err in self.bitrot_list: + if self.dmg_list: + print("chkbit detected damage in these files:", file=sys.stderr) + for err in self.dmg_list: print(err, file=sys.stderr) print( - f"error: detected {len(self.bitrot_list)} file(s) with bitrot!", + f"error: detected {len(self.dmg_list)} file(s) with damage!", file=sys.stderr, ) if self.err_list: @@ -139,7 +135,7 @@ class Main: for err in self.err_list: print(err, file=sys.stderr) - if self.bitrot_list or self.err_list: + if self.dmg_list or self.err_list: sys.exit(1) diff --git a/setup.py b/setup.py index ee39bee..b08bc8b 100644 --- a/setup.py +++ b/setup.py @@ -11,11 +11,11 @@ with open(os.path.join(os.path.dirname(__file__), "README.md"), encoding="utf-8" setup( name="chkbit", - version="2.0.3", + version="2.1.0", url="https://github.com/laktak/chkbit-py", author="Christian Zangl", author_email="laktak@cdak.net", - description="chkbit is a lightweight bitrot detection tool.", + description="chkbit checks the data integrity of your files", long_description=readme, long_description_content_type="text/markdown", entry_points={"console_scripts": ["chkbit = chkbit.main:main"]},