From 647f248d07438df8f5871f0adaf0b5379eba1501 Mon Sep 17 00:00:00 2001 From: Peter McGoron Date: Sat, 9 Dec 2023 10:58:59 -0500 Subject: [PATCH] init --- COPYING | 202 ++++++++++++++++++++ README.rst | 20 ++ sha-256.c | 226 +++++++++++++++++++++++ sha-256.h | 103 +++++++++++ wlb.c | 531 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1082 insertions(+) create mode 100644 COPYING create mode 100644 README.rst create mode 100644 sha-256.c create mode 100644 sha-256.h create mode 100644 wlb.c diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/COPYING @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..769b4b3 --- /dev/null +++ b/README.rst @@ -0,0 +1,20 @@ +--------------------- +Windows Local Backup +--------------------- + +Windows local backup system. Supports anything Sqlite3 supports. +Tested on x86 TCC for Windows XP. + +Backups are stored in a SQLite database. Each backup is timestamped +at the time of execution, with an optional name. + +Each file in the backup has it's SHA256 sum included. If a file has +an identical SHA256 sum, only one copy of the file is stored. The +relative directory of each file is stored. + +----- +Notes +----- + +Do not run backups while programs are writing to things or your +program might crash. The backup program should still work fine. diff --git a/sha-256.c b/sha-256.c new file mode 100644 index 0000000..5a69250 --- /dev/null +++ b/sha-256.c @@ -0,0 +1,226 @@ +#include "sha-256.h" + +#define TOTAL_LEN_LEN 8 + +/* + * Comments from pseudo-code at https://en.wikipedia.org/wiki/SHA-2 are reproduced here. + * When useful for clarification, portions of the pseudo-code are reproduced here too. + */ + +/* + * @brief Rotate a 32-bit value by a number of bits to the right. + * @param value The value to be rotated. + * @param count The number of bits to rotate by. + * @return The rotated value. + */ +static inline uint32_t right_rot(uint32_t value, unsigned int count) +{ + /* + * Defined behaviour in standard C for all count where 0 < count < 32, which is what we need here. + */ + return value >> count | value << (32 - count); +} + +/* + * @brief Update a hash value under calculation with a new chunk of data. + * @param h Pointer to the first hash item, of a total of eight. + * @param p Pointer to the chunk data, which has a standard length. + * + * @note This is the SHA-256 work horse. + */ +static inline void consume_chunk(uint32_t *h, const uint8_t *p) +{ + unsigned i, j; + uint32_t ah[8]; + + /* Initialize working variables to current hash value: */ + for (i = 0; i < 8; i++) + ah[i] = h[i]; + + /* + * The w-array is really w[64], but since we only need 16 of them at a time, we save stack by + * calculating 16 at a time. + * + * This optimization was not there initially and the rest of the comments about w[64] are kept in their + * initial state. + */ + + /* + * create a 64-entry message schedule array w[0..63] of 32-bit words (The initial values in w[0..63] + * don't matter, so many implementations zero them here) copy chunk into first 16 words w[0..15] of the + * message schedule array + */ + uint32_t w[16]; + + /* Compression function main loop: */ + for (i = 0; i < 4; i++) { + for (j = 0; j < 16; j++) { + if (i == 0) { + w[j] = + (uint32_t)p[0] << 24 | (uint32_t)p[1] << 16 | (uint32_t)p[2] << 8 | (uint32_t)p[3]; + p += 4; + } else { + /* Extend the first 16 words into the remaining 48 words w[16..63] of the + * message schedule array: */ + const uint32_t s0 = right_rot(w[(j + 1) & 0xf], 7) ^ right_rot(w[(j + 1) & 0xf], 18) ^ + (w[(j + 1) & 0xf] >> 3); + const uint32_t s1 = right_rot(w[(j + 14) & 0xf], 17) ^ + right_rot(w[(j + 14) & 0xf], 19) ^ (w[(j + 14) & 0xf] >> 10); + w[j] = w[j] + s0 + w[(j + 9) & 0xf] + s1; + } + const uint32_t s1 = right_rot(ah[4], 6) ^ right_rot(ah[4], 11) ^ right_rot(ah[4], 25); + const uint32_t ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]); + + /* + * Initialize array of round constants: + * (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311): + */ + static const uint32_t k[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, + 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, + 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, + 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, + 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116, + 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, + 0xc67178f2}; + + const uint32_t temp1 = ah[7] + s1 + ch + k[i << 4 | j] + w[j]; + const uint32_t s0 = right_rot(ah[0], 2) ^ right_rot(ah[0], 13) ^ right_rot(ah[0], 22); + const uint32_t maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]); + const uint32_t temp2 = s0 + maj; + + ah[7] = ah[6]; + ah[6] = ah[5]; + ah[5] = ah[4]; + ah[4] = ah[3] + temp1; + ah[3] = ah[2]; + ah[2] = ah[1]; + ah[1] = ah[0]; + ah[0] = temp1 + temp2; + } + } + + /* Add the compressed chunk to the current hash value: */ + for (i = 0; i < 8; i++) + h[i] += ah[i]; +} + +/* + * Public functions. See header file for documentation. + */ + +void sha_256_init(struct Sha_256 *sha_256, uint8_t hash[SIZE_OF_SHA_256_HASH]) +{ + sha_256->hash = hash; + sha_256->chunk_pos = sha_256->chunk; + sha_256->space_left = SIZE_OF_SHA_256_CHUNK; + sha_256->total_len = 0; + /* + * Initialize hash values (first 32 bits of the fractional parts of the square roots of the first 8 primes + * 2..19): + */ + sha_256->h[0] = 0x6a09e667; + sha_256->h[1] = 0xbb67ae85; + sha_256->h[2] = 0x3c6ef372; + sha_256->h[3] = 0xa54ff53a; + sha_256->h[4] = 0x510e527f; + sha_256->h[5] = 0x9b05688c; + sha_256->h[6] = 0x1f83d9ab; + sha_256->h[7] = 0x5be0cd19; +} + +void sha_256_write(struct Sha_256 *sha_256, const void *data, size_t len) +{ + sha_256->total_len += len; + + /* + * The following cast is not necessary, and could even be considered as poor practice. However, it makes this + * file valid C++, which could be a good thing for some use cases. + */ + const uint8_t *p = (const uint8_t *)data; + + while (len > 0) { + /* + * If the input chunks have sizes that are multiples of the calculation chunk size, no copies are + * necessary. We operate directly on the input data instead. + */ + if (sha_256->space_left == SIZE_OF_SHA_256_CHUNK && len >= SIZE_OF_SHA_256_CHUNK) { + consume_chunk(sha_256->h, p); + len -= SIZE_OF_SHA_256_CHUNK; + p += SIZE_OF_SHA_256_CHUNK; + continue; + } + /* General case, no particular optimization. */ + const size_t consumed_len = len < sha_256->space_left ? len : sha_256->space_left; + memcpy(sha_256->chunk_pos, p, consumed_len); + sha_256->space_left -= consumed_len; + len -= consumed_len; + p += consumed_len; + if (sha_256->space_left == 0) { + consume_chunk(sha_256->h, sha_256->chunk); + sha_256->chunk_pos = sha_256->chunk; + sha_256->space_left = SIZE_OF_SHA_256_CHUNK; + } else { + sha_256->chunk_pos += consumed_len; + } + } +} + +uint8_t *sha_256_close(struct Sha_256 *sha_256) +{ + uint8_t *pos = sha_256->chunk_pos; + size_t space_left = sha_256->space_left; + uint32_t *const h = sha_256->h; + + /* + * The current chunk cannot be full. Otherwise, it would already have been consumed. I.e. there is space left for + * at least one byte. The next step in the calculation is to add a single one-bit to the data. + */ + *pos++ = 0x80; + --space_left; + + /* + * Now, the last step is to add the total data length at the end of the last chunk, and zero padding before + * that. But we do not necessarily have enough space left. If not, we pad the current chunk with zeroes, and add + * an extra chunk at the end. + */ + if (space_left < TOTAL_LEN_LEN) { + memset(pos, 0x00, space_left); + consume_chunk(h, sha_256->chunk); + pos = sha_256->chunk; + space_left = SIZE_OF_SHA_256_CHUNK; + } + const size_t left = space_left - TOTAL_LEN_LEN; + memset(pos, 0x00, left); + pos += left; + size_t len = sha_256->total_len; + pos[7] = (uint8_t)(len << 3); + len >>= 5; + int i; + for (i = 6; i >= 0; --i) { + pos[i] = (uint8_t)len; + len >>= 8; + } + consume_chunk(h, sha_256->chunk); + /* Produce the final hash value (big-endian): */ + int j; + uint8_t *const hash = sha_256->hash; + for (i = 0, j = 0; i < 8; i++) { + hash[j++] = (uint8_t)(h[i] >> 24); + hash[j++] = (uint8_t)(h[i] >> 16); + hash[j++] = (uint8_t)(h[i] >> 8); + hash[j++] = (uint8_t)h[i]; + } + return sha_256->hash; +} + +void calc_sha_256(uint8_t hash[SIZE_OF_SHA_256_HASH], const void *input, size_t len) +{ + struct Sha_256 sha_256; + sha_256_init(&sha_256, hash); + sha_256_write(&sha_256, input, len); + (void)sha_256_close(&sha_256); +} diff --git a/sha-256.h b/sha-256.h new file mode 100644 index 0000000..6ba59bf --- /dev/null +++ b/sha-256.h @@ -0,0 +1,103 @@ +#ifndef SHA_256_H +#define SHA_256_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * @brief Size of the SHA-256 sum. This times eight is 256 bits. + */ +#define SIZE_OF_SHA_256_HASH 32 + +/* + * @brief Size of the chunks used for the calculations. + * + * @note This should mostly be ignored by the user, although when using the streaming API, it has an impact for + * performance. Add chunks whose size is a multiple of this, and you will avoid a lot of superfluous copying in RAM! + */ +#define SIZE_OF_SHA_256_CHUNK 64 + +/* + * @brief The opaque SHA-256 type, that should be instantiated when using the streaming API. + * + * @note Although the details are exposed here, in order to make instantiation easy, you should refrain from directly + * accessing the fields, as they may change in the future. + */ +struct Sha_256 { + uint8_t *hash; + uint8_t chunk[SIZE_OF_SHA_256_CHUNK]; + uint8_t *chunk_pos; + size_t space_left; + size_t total_len; + uint32_t h[8]; +}; + +/* + * @brief The simple SHA-256 calculation function. + * @param hash Hash array, where the result is delivered. + * @param input Pointer to the data the hash shall be calculated on. + * @param len Length of the input data, in byte. + * + * @note If all of the data you are calculating the hash value on is available in a contiguous buffer in memory, this is + * the function you should use. + * + * @note If either of the passed pointers is NULL, the results are unpredictable. + */ +void calc_sha_256(uint8_t hash[SIZE_OF_SHA_256_HASH], const void *input, size_t len); + +/* + * @brief Initialize a SHA-256 streaming calculation. + * @param sha_256 A pointer to a SHA-256 structure. + * @param hash Hash array, where the result will be delivered. + * + * @note If all of the data you are calculating the hash value on is not available in a contiguous buffer in memory, this is + * where you should start. Instantiate a SHA-256 structure, for instance by simply declaring it locally, make your hash + * buffer available, and invoke this function. Once a SHA-256 hash has been calculated (see further below) a SHA-256 + * structure can be initialized again for the next calculation. + * + * @note If either of the passed pointers is NULL, the results are unpredictable. + */ +void sha_256_init(struct Sha_256 *sha_256, uint8_t hash[SIZE_OF_SHA_256_HASH]); + +/* + * @brief Stream more input data for an on-going SHA-256 calculation. + * @param sha_256 A pointer to a previously initialized SHA-256 structure. + * @param data Pointer to the data to be added to the calculation. + * @param len Length of the data to add, in byte. + * + * @note This function may be invoked an arbitrary number of times between initialization and closing, but the maximum + * data length is limited by the SHA-256 algorithm: the total number of bits (i.e. the total number of bytes times + * eight) must be representable by a 64-bit unsigned integer. While that is not a practical limitation, the results are + * unpredictable if that limit is exceeded. + * + * @note This function may be invoked on empty data (zero length), although that obviously will not add any data. + * + * @note If either of the passed pointers is NULL, the results are unpredictable. + */ +void sha_256_write(struct Sha_256 *sha_256, const void *data, size_t len); + +/* + * @brief Conclude a SHA-256 streaming calculation, making the hash value available. + * @param sha_256 A pointer to a previously initialized SHA-256 structure. + * @return Pointer to the hash array, where the result is delivered. + * + * @note After this function has been invoked, the result is available in the hash buffer that initially was provided. A + * pointer to the hash value is returned for convenience, but you should feel free to ignore it: it is simply a pointer + * to the first byte of your initially provided hash array. + * + * @note If the passed pointer is NULL, the results are unpredictable. + * + * @note Invoking this function for a calculation with no data (the writing function has never been invoked, or it only + * has been invoked with empty data) is legal. It will calculate the SHA-256 value of the empty string. + */ +uint8_t *sha_256_close(struct Sha_256 *sha_256); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/wlb.c b/wlb.c new file mode 100644 index 0000000..2876d48 --- /dev/null +++ b/wlb.c @@ -0,0 +1,531 @@ +#include +#include +#include +#include "sqlite3.h" +#include "sha-2/sha-256.h" +#define SHASIZ SIZE_OF_SHA_256_HASH + +/* TODO: "\\?\" trick? + * Windows limits path lengths to 260 characters. Explorer on Windows XP + * has this issue too. + * + * UTF issues? + */ + +/************************************************************************* + * Compatability defines for non-Unicode systems + ************************************************************************/ + +#ifdef UNICODE +# define ENCODING SQLITE3_UTF16 +# define sqlite3_open_U sqlite3_open16 +# define sqlite3_errmsg_U sqlite3_errmsg16 +# define sqlite3_bind_text_U sqlite3_bind_text16 +#else +# define ENCODING SQLITE3_UTF8 +# define sqlite3_open_U sqlite3_open +# define sqlite3_errmsg_U sqlite3_errmsg +# define sqlite3_bind_text_U sqlite3_bind_text +#endif + +/**************** + * Globals + ***************/ + +sqlite3 *g_db = NULL; +int g_verbose = 0; +sqlite_int64 g_backup_id = -1; + +/******************** + * Utility Functions + *******************/ + +static void _log(TCHAR *msg, ...) +{ + va_list va; + + if (g_verbose) { + va_start(va, emsg); + _tvprintf(emsg, va); + va_end(va); + } +} +/* String literals only */ +#define log(msg, ...) _log(TEXT(msg), __VA_ARGS__) + +static void _die(TCHAR *emsg, ...) +{ + va_list va; + va_start(va, emsg); + _tvprintf(emsg, va); + va_end(va); + + sqlite3_close(g_db); + exit(1); +} +/* String literals only */ +#define die(emsg, ...) _die(TEXT(emsg), __VA_ARGS__) + +/********************************** + * Database constants + *********************************/ + +/* 'wlb' in ASCII */ +#define UPPER_VERSION 0x776c62 +#define CUR_VERSION 0x776c6201 +#define S(s) #s +#define INIT_SCRIPT "\ +BEGIN; \ +PRAGMA user_version = " S(CUR_VERSION) ";\ +CREATE TABLE chunks (\ + rowid INTEGER PRIMARY KEY, \ + sha256 TEXT UNIQUE NOT NULL, \ + data BLOB NOT NULL\ +); \ +CREATE TABLE backup_ids (\ + ts TEXT UNIQUE NOT NULL,\ + rowid INTEGER PRIMARY KEY\ +); \ +CREATE TABLE backups (\ + backup INTEGER NOT NULL REFERENCES backup_ids ON DELETE CASCADE, \ + path TEXT NOT NULL, \ + chunk INTEGER NOT NULL REFERENCES chunks ON DELETE RESTRICT\ +); \ +CREATE INDEX backups_paths ON backups (path); \ +CREATE INDEX backups_backup ON backups (backup); \ +CREATE INDEX backups_chunk ON backups (chunk);\ +COMMIT;" + +/* Check if the database version is correct. If it is not correct + * and the create flag is enabled, then initialize a new database. + */ +static void check_db_version(int did_not_exist) +{ + int user_version; + sqlite3_stmt *stmt; + + /* DB did not exist beforehand: initialize DB */ + if (did_not_exist) { + if (sqlite3_exec(g_db, INIT_SCRIPT, NULL, NULL, NULL) != SQLITE_OK) + die("Error initializing database: %s\n", + sqlite3_errmsg_U(g_db)); + log("Intialized database\n"); + return; + } + + /* DB existed beforehand: do version check */ + if (sqlite3_prepare_v2(g_db, "PRAGMA user_version;", -1, &stmt, NULL) + != SQLITE_OK) + die("Error preparing user version check: %s\n", + sqlite3_errmsg_U(g_db)); + if (sqlite3_step(stmt) != SQLITE_ROW) + die("Error executing user version check: %s\n", + sqlite3_errmsg_U(g_db)); + user_version = sqlite3_column_int(stmt, 0); + sqlite3_finalize(stmt); + + if (user_version != CUR_VERSION) { + die("Bad DB version found (expected %d), got %d\n", + CUR_VERSION & 0xFF, user_version & 0xFF); + } +} + +/* Initialize the backup ID used in this session. + * This is only called if the user requests an archive of a directory. + */ +static void initialize_backup_id(void) +{ + sqlite3_stmt *stmt; + + if (sqlite3_prepare(g_db, + "INSERT INTO backup_ids (ts) VALUES (datetime()) RETURNING rowid;", + -1, &stmt, NULL) != SQLITE_OK) { + die("failed to prepare inserting timestamp: %s\n", + sqlite3_errmsg_U(g_db)); + } + + if (sqlite3_step(stmt) != SQLITE_ROW) + die("failed to insert timestamp: %s\n", + sqlite3_errmsg_U(g_db)); + g_backup_id = sqlite3_column_int64(stmt, 0); + sqlite3_finalize(stmt); +} + +/* Opens the DB and initializes it if it did not exist. */ +static void open_db(TCHAR *fn) +{ + int did_not_exist = GetFileAttributes(fn) + == INVALID_FILE_ATTRIBUTES; + char *errmsg; + + if (sqlite3_open_U(fn, &g_db) != SQLITE_OK) + die("Error opening database %s: %s\n", argv[i], + sqlite3_errmsg_U(g_db)); + check_db_version(did_not_exist); + + /* Store current timestamp */ + if (sqlite3_exec(g_db, "BEGIN;" NULL, NULL, &errmsg) != SQLITE_OK) + die("failed to begin transaction: %s\n", errmsg); +} + +/* Insert a backup record into the database. + * + * This part occurs after the file data has been inserted or identified + * by SHA256 hash. + */ +static int insert_backup_record(TCHAR *name, sqlite_int64 chunk_id) +{ + sqlite3_stmt *stmt; + int r = 0; + + if (sqlite3_prepare(g_db, + "INSERT INTO backups (backup, path, chunk) VALUES (?,?,?);", + -1, &stmt, NULL) != SQLITE_OK) { + _tprintf("Could not prepare backup insert statement for %s: %s\n", + name, sqlite3_errmsg_U(g_db)); + return 0; + } + + if (sqlite3_bind_int64(stmt, 1, g_backup_id) != SQLITE_OK) { + _tprintf("Could not bind backup id for %s: %s\n", name, + sqlite3_errmsg_U(g_db)); + goto end; + } + + if (sqlite3_bind_text_U(stmt, 2, name) != SQLITE_OK) { + _tprintf("Could not bind path for %s: %s\n", name, + sqlite3_errmsg_U(g_db)); + goto end; + } + + if (sqlite3_bind_int64(stmt, 3, chunk_id) != SQLITE_OK) { + _tprintf("Could not bind chunk for %s: %s\n", name, + sqlite3_errmsg_U(g_db)); + goto end; + } + + if (sqlite3_step(stmt) != SQLITE_DONE) { + _tprintf("Error in inserting chunk for %s: %s\n", name, + sqlite3_errmsg_U(g_db)); + goto end; + } + + r = 1; +end: + sqlite3_finalize(stmt); + return r; +} + +/* Write a file to a BLOB in ``chunks`` at row ``rowid``. + * + * Returns 0 on failure and 1 on success. + */ +static int write_chunk(TCHAR *name, HANDLE f, sqlite_int64 rowid) +{ + sqlite3_blob *blob; + char buf[4096]; + DWORD read = 0; + int has_written = 0; + int r = 0; + + if (sqlite3_blob_open(g_db, "main", "chunks", "data", rowid, 1, + &blob) != SQLITE_OK) { + _tprintf("Could not open BLOB to write chunk for %s: %s\n", + name, sqlite3_errmsg_U(g_db)); + return 0; + } + + for (;;) { + if (ReadFile(f, buf, sizeof(buf), &read, NULL) == FALSE) { + _tprintf("failed to read in %s for sha256 calculation\n", + name); + goto end; + } + + if (read == 0) + break; + if (sqlite3_blob_write(g_db, buf, read, has_written) + != SQLITE_OK) { + _tprintf("Could not write to BLOB for %s: %s\n", + name, sqlite3_errmsg_U(g_db)); + goto end; + } + + has_written += read; + } + + r = 1; +end: + sqlite3_blob_close(blob); + return r; +} + +/* Insert a chunk with a specified SHA256 checksum into the database. + * + * Returns 1 on success, 0 on failure. On success, ``rowid`` contains + * the row in ``chunks`` that contains the data of ``f``. + */ +static int insert_chunk(TCHAR *name, HANDLE f, sqlite_int64 *rowid, + uint8_t sha256[SHASIZ]) +{ + LARGE_INTEGER fsize = 0; + sqlite3_stmt *stmt; + int r = 0; + + if (SetFilePointer(f, 0, NULL, FILE_BEGIN) + == INVALID_SET_FILE_POINTER) { + _tprintf("rewind for %s failed\n", name); + return 0; + } + + if (GetFileSizeEx(f, &fsize) == 0) { + _tprintf("Could not get the file size of %s\n", name); + return 0; + } + + /* Chunks are fixed size in SQLite. They need to be pre-allocated + * with the size of a file before a file can be written to it. + */ + if (sqlite3_prepare(g_db, + "INSERT INTO chunks (sha256, data) VALUES (hex(?), zeroblob(?)) RETURNING rowid;", + -1, stmt, NULL) != SQLITE_OK) { + _tprintf("Could not prepare chunk insertion for %s: %s\n", + name, sqlite3_errmsg_U(g_db)); + return 0; + } + + if (sqlite3_bind_blob(stmt, 1, sha256, SHASIZ, SQLITE_TRANSIENT) + != SQLITE_OK) { + _tprintf("Could not bind sha256 to statement for %s: %s\n", + name, sqlite3_errmsg_U(g_db)); + goto finalize; + } + + if (sqlite3_bind_int64(stmt, 2, fsize) != SQLITE_OK) { + _tprintf("Could not bind file size to statement for %s: %s\n", + name, sqlite3_errmsg_U(g_db)); + goto finalize; + } + + if (sqlite3_step(&stmt) != SQLITE_ROW) + _tprintf("Could not step chunk insertion statement for %s: %s\n", + name, sqlite3_errmsg_U(g_db)); + goto finalize; + } + + *rowid = sqlite3_column_int64(stmt, 0); + r = write_chunk(name, f, *rowid); +finalize: + sqlite3_finalize(stmt); + return r; +} + +/* Check if the SHA256 checksum already exists in the database. + * + * If the checksum exists, ``rowid`` is filled with the row in ``chunks`` + * that contains that sha256 sum. If it does not exist, then ``rowid`` + * contains ``-1``. + * + * Returns 0 if an error occured, and 1 if no error occured. + */ +static int check_sha256(TCHAR *name, uint8_t sha256[SHASIZ], + sqlite_int64 *rowid) +{ + sqlite3_stmt *stmt; + int r = 0; + + /* Since ``sha256`` is binary, use ``hex`` to convert the binary + * string to a text representation. + */ + if (sqlite3_prepare_v2(g_db, + "SELECT rowid FROM chunks WHERE sha256 = hex(?);", + -1, &stmt, NULL) != SQLITE_OK) { + _tprintf("failed to prepare sha256 statement for %s: %s\n", + name, sqlite_errmsg_U(g_db)); + goto end; + } + + if (sqlite3_bind_blob(&stmt, 1, sha256, SHASIZ, SQLITE_TRANSIENT) + != SQLITE_OK) { + _tprintf("failed to bind sha256 value for %s: %s\n", + name, sqlite_errmsg_U(g_db)); + goto end; + } + + switch (sqlite3_step(&stmt)) { + case SQLITE_ROW: + /* there is a sha256 value */ + *rowid = sqlite3_column_int(stmt, 0); + break; + case SQLITE_DONE: + /* The chunk was never entered */ + *rowid = -1; + break; + } + + r = 1; +end: + sqlite3_finalize(stmt); + return r; +} + +/* Calculate the SHA256 checksum of the file in ``f``, and place a binary + * representation of the checksum into ``shastr``. + * + * Returns 0 if there was an error, 1 for success. + */ +static int calculate_sha256(TCHAR *name, HANDLE f, uint8_t sha256[SHASIZ]) +{ + DWORD read = 0; + uint8_t buf[SIZE_OF_SHA_256_CHUNK * 64]; + struct Sha_256 sha_state; + int i; + + sha_256_init(&sha_state, sha256); + for (;;) { + if (ReadFile(f, buf, sizeof(buf), &read, NULL) == FALSE) { + _tprintf("failed to read in %s for sha256 calculation\n", + name); + return 0; + } + + /* ReadFile() reads 0 bytes on EOF. */ + if (read == 0) + break; + sha_256_write(&sha_state, buf, read); + } + + sha_256_close(&sha_state); + return 1; +} + +/* Archive a file. */ +static void archive_file(TCHAR *name) +{ + HANDLE f; + sqlite_int64 chunks_rowid = -1; + uint8_t sha256[SHASIZ]; + char *errstr; + int success = 0; + + if (sqlite3_exec(g_db, "SAVEPOINT file;", NULL, NULL, &errstr) + != SQLITE_OK) + die("failed to initialize file savepoint: %s\n", errstr); + + f = CreateFile(name, GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (f == INVALID_HANDLE_VALUE) { + _tprintf("could not open %s\n", name); + goto end; + } + + if (!calculate_sha256(name, f, sha256)) + goto end; + if (!check_sha256(sha256, &chunks_rowid)) + goto end; + + if (chunks_rowid < 0) { + if (!insert_chunk(name, f, &chunks_rowid, sha256)) + goto end; + } + + if (!insert_backup_record(name, chunks_rowid)) + goto end; + success = 1; + + log("Archived %s\n", name); +end: + if (sqlite3_exec(g_db, success ? "RELEASE file;" : "ROLLBACK TO file;", NULL, NULL, + &errstr) != SQLITE_OK) + die("failed to rollback file savepoint: %s\n", errstr); + CloseHandle(f); +} + +/* Recursively archive a directory. */ +static void archive_directory(TCHAR *dirname) +{ + WIN32_FIND_DATA fdata; + HANDLE dhandle; + TCHAR pathname[PATH_MAX]; + dhandle = FindFirstFile(dirname, &fdata); + if (dhandle == INVALID_HANDLE_VALUE) { + _tprintf("Failed to open directory %s\n", dirname); + return; + } + + do { + /* Windows versions prior to 11 (!) include _snprintf() as + * a non-standard version of snprintf(). _snprintf() will + * return -1 if the string does not fit in the buffer. + */ + if (_sntprintf(pathname, PATH_MAX, "%s\\%s", dirname, + fdata.cFileName) < 0) { + _tprintf("Pathname for %s\\%s too long\n", dirname, + fdata.cFileName); + continue; + } + + if (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + archive_directory(pathname); + else + archive_file(pathname); + } while (FindNextFile(dhandle, &fdata) != 0); + + FindClose(dhandle); +} + +/* Check if the path is a file or a directory, and either archive + * the file or archive the entire directory recursively. */ +static void archive_file_or_directory(TCHAR *name) +{ + DWORD attr = GetFileAttributes(fn); + if (attr == INVALID_FILE_ATTRIBUTES) { + _tprintf(TEXT("failed to open %s\n"), name); + return; + } + + if (g_backup_id < 0) + initialize_backup_id(); + + if (attr & FILE_ATTRIBUTE_DIRECTORY) + archive_directory(name); + else + archive_file(name); +} + +/* Print instructions to console and exit. */ +static void usage(void) +{ + _tprintf(TEXT("\nwlb [\\V] [\\H] [\\D DBNAME] [\\A DIRECTORIES...]\n")); + _tprintf(TEXT("\\V: Verbose\n")); + _tprintf(TEXT("\\H: Display the help\n")); + _tprintf(TEXT("\\D: Database (create if does not exist)\n")); + _tprintf(TEXT("\\A: Archive directories\n")); + exit(0); +} + +int _tmain(int argc, TCHAR *argv[]) +{ + int i; + + for (i = 1; i < argc; i++) { + if (_tcscmp(argv[i], TEXT("\\H")) == 0) { + usage(); + } else if (_tcscmp(argv[i], TEXT("\\A")) == 0) { + i++; + archive_file_or_directory(argv[i]); + } else if (_tcscmp(argv[i], TEXT("\\D")) == 0) { + i++; + open_db(argv[i]); + } else if (_tcscmp(argv[i], TEXT("\\V")) == 0) { + g_verbose = 1; + } else { + usage(); + } + } + + if (g_db) + sqlite3_exec(g_db, "COMMIT;", NULL, NULL, NULL); + sqlite3_close(g_db); + return 0; +}