aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2023-12-09 10:58:59 -0500
committerGravatar Peter McGoron 2023-12-09 10:58:59 -0500
commit647f248d07438df8f5871f0adaf0b5379eba1501 (patch)
tree12eaa10f106eb10ae53098dd920c189fa7f1d86b
init
-rw-r--r--COPYING202
-rw-r--r--README.rst20
-rw-r--r--sha-256.c226
-rw-r--r--sha-256.h103
-rw-r--r--wlb.c531
5 files changed, 1082 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..769b4b3
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,20 @@
+---------------------
+Windows Local Backup
+---------------------
+
+Windows local backup system. Supports anything Sqlite3 supports.
+Tested on x86 TCC for Windows XP.
+
+Backups are stored in a SQLite database. Each backup is timestamped
+at the time of execution, with an optional name.
+
+Each file in the backup has it's SHA256 sum included. If a file has
+an identical SHA256 sum, only one copy of the file is stored. The
+relative directory of each file is stored.
+
+-----
+Notes
+-----
+
+Do not run backups while programs are writing to things or your
+program might crash. The backup program should still work fine.
diff --git a/sha-256.c b/sha-256.c
new file mode 100644
index 0000000..5a69250
--- /dev/null
+++ b/sha-256.c
@@ -0,0 +1,226 @@
+#include "sha-256.h"
+
+#define TOTAL_LEN_LEN 8
+
+/*
+ * Comments from pseudo-code at https://en.wikipedia.org/wiki/SHA-2 are reproduced here.
+ * When useful for clarification, portions of the pseudo-code are reproduced here too.
+ */
+
+/*
+ * @brief Rotate a 32-bit value by a number of bits to the right.
+ * @param value The value to be rotated.
+ * @param count The number of bits to rotate by.
+ * @return The rotated value.
+ */
+static inline uint32_t right_rot(uint32_t value, unsigned int count)
+{
+ /*
+ * Defined behaviour in standard C for all count where 0 < count < 32, which is what we need here.
+ */
+ return value >> count | value << (32 - count);
+}
+
+/*
+ * @brief Update a hash value under calculation with a new chunk of data.
+ * @param h Pointer to the first hash item, of a total of eight.
+ * @param p Pointer to the chunk data, which has a standard length.
+ *
+ * @note This is the SHA-256 work horse.
+ */
+static inline void consume_chunk(uint32_t *h, const uint8_t *p)
+{
+ unsigned i, j;
+ uint32_t ah[8];
+
+ /* Initialize working variables to current hash value: */
+ for (i = 0; i < 8; i++)
+ ah[i] = h[i];
+
+ /*
+ * The w-array is really w[64], but since we only need 16 of them at a time, we save stack by
+ * calculating 16 at a time.
+ *
+ * This optimization was not there initially and the rest of the comments about w[64] are kept in their
+ * initial state.
+ */
+
+ /*
+ * create a 64-entry message schedule array w[0..63] of 32-bit words (The initial values in w[0..63]
+ * don't matter, so many implementations zero them here) copy chunk into first 16 words w[0..15] of the
+ * message schedule array
+ */
+ uint32_t w[16];
+
+ /* Compression function main loop: */
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 16; j++) {
+ if (i == 0) {
+ w[j] =
+ (uint32_t)p[0] << 24 | (uint32_t)p[1] << 16 | (uint32_t)p[2] << 8 | (uint32_t)p[3];
+ p += 4;
+ } else {
+ /* Extend the first 16 words into the remaining 48 words w[16..63] of the
+ * message schedule array: */
+ const uint32_t s0 = right_rot(w[(j + 1) & 0xf], 7) ^ right_rot(w[(j + 1) & 0xf], 18) ^
+ (w[(j + 1) & 0xf] >> 3);
+ const uint32_t s1 = right_rot(w[(j + 14) & 0xf], 17) ^
+ right_rot(w[(j + 14) & 0xf], 19) ^ (w[(j + 14) & 0xf] >> 10);
+ w[j] = w[j] + s0 + w[(j + 9) & 0xf] + s1;
+ }
+ const uint32_t s1 = right_rot(ah[4], 6) ^ right_rot(ah[4], 11) ^ right_rot(ah[4], 25);
+ const uint32_t ch = (ah[4] & ah[5]) ^ (~ah[4] & ah[6]);
+
+ /*
+ * Initialize array of round constants:
+ * (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311):
+ */
+ static const uint32_t k[] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4,
+ 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
+ 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f,
+ 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc,
+ 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+ 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, 0x19a4c116,
+ 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7,
+ 0xc67178f2};
+
+ const uint32_t temp1 = ah[7] + s1 + ch + k[i << 4 | j] + w[j];
+ const uint32_t s0 = right_rot(ah[0], 2) ^ right_rot(ah[0], 13) ^ right_rot(ah[0], 22);
+ const uint32_t maj = (ah[0] & ah[1]) ^ (ah[0] & ah[2]) ^ (ah[1] & ah[2]);
+ const uint32_t temp2 = s0 + maj;
+
+ ah[7] = ah[6];
+ ah[6] = ah[5];
+ ah[5] = ah[4];
+ ah[4] = ah[3] + temp1;
+ ah[3] = ah[2];
+ ah[2] = ah[1];
+ ah[1] = ah[0];
+ ah[0] = temp1 + temp2;
+ }
+ }
+
+ /* Add the compressed chunk to the current hash value: */
+ for (i = 0; i < 8; i++)
+ h[i] += ah[i];
+}
+
+/*
+ * Public functions. See header file for documentation.
+ */
+
+void sha_256_init(struct Sha_256 *sha_256, uint8_t hash[SIZE_OF_SHA_256_HASH])
+{
+ sha_256->hash = hash;
+ sha_256->chunk_pos = sha_256->chunk;
+ sha_256->space_left = SIZE_OF_SHA_256_CHUNK;
+ sha_256->total_len = 0;
+ /*
+ * Initialize hash values (first 32 bits of the fractional parts of the square roots of the first 8 primes
+ * 2..19):
+ */
+ sha_256->h[0] = 0x6a09e667;
+ sha_256->h[1] = 0xbb67ae85;
+ sha_256->h[2] = 0x3c6ef372;
+ sha_256->h[3] = 0xa54ff53a;
+ sha_256->h[4] = 0x510e527f;
+ sha_256->h[5] = 0x9b05688c;
+ sha_256->h[6] = 0x1f83d9ab;
+ sha_256->h[7] = 0x5be0cd19;
+}
+
+void sha_256_write(struct Sha_256 *sha_256, const void *data, size_t len)
+{
+ sha_256->total_len += len;
+
+ /*
+ * The following cast is not necessary, and could even be considered as poor practice. However, it makes this
+ * file valid C++, which could be a good thing for some use cases.
+ */
+ const uint8_t *p = (const uint8_t *)data;
+
+ while (len > 0) {
+ /*
+ * If the input chunks have sizes that are multiples of the calculation chunk size, no copies are
+ * necessary. We operate directly on the input data instead.
+ */
+ if (sha_256->space_left == SIZE_OF_SHA_256_CHUNK && len >= SIZE_OF_SHA_256_CHUNK) {
+ consume_chunk(sha_256->h, p);
+ len -= SIZE_OF_SHA_256_CHUNK;
+ p += SIZE_OF_SHA_256_CHUNK;
+ continue;
+ }
+ /* General case, no particular optimization. */
+ const size_t consumed_len = len < sha_256->space_left ? len : sha_256->space_left;
+ memcpy(sha_256->chunk_pos, p, consumed_len);
+ sha_256->space_left -= consumed_len;
+ len -= consumed_len;
+ p += consumed_len;
+ if (sha_256->space_left == 0) {
+ consume_chunk(sha_256->h, sha_256->chunk);
+ sha_256->chunk_pos = sha_256->chunk;
+ sha_256->space_left = SIZE_OF_SHA_256_CHUNK;
+ } else {
+ sha_256->chunk_pos += consumed_len;
+ }
+ }
+}
+
+uint8_t *sha_256_close(struct Sha_256 *sha_256)
+{
+ uint8_t *pos = sha_256->chunk_pos;
+ size_t space_left = sha_256->space_left;
+ uint32_t *const h = sha_256->h;
+
+ /*
+ * The current chunk cannot be full. Otherwise, it would already have been consumed. I.e. there is space left for
+ * at least one byte. The next step in the calculation is to add a single one-bit to the data.
+ */
+ *pos++ = 0x80;
+ --space_left;
+
+ /*
+ * Now, the last step is to add the total data length at the end of the last chunk, and zero padding before
+ * that. But we do not necessarily have enough space left. If not, we pad the current chunk with zeroes, and add
+ * an extra chunk at the end.
+ */
+ if (space_left < TOTAL_LEN_LEN) {
+ memset(pos, 0x00, space_left);
+ consume_chunk(h, sha_256->chunk);
+ pos = sha_256->chunk;
+ space_left = SIZE_OF_SHA_256_CHUNK;
+ }
+ const size_t left = space_left - TOTAL_LEN_LEN;
+ memset(pos, 0x00, left);
+ pos += left;
+ size_t len = sha_256->total_len;
+ pos[7] = (uint8_t)(len << 3);
+ len >>= 5;
+ int i;
+ for (i = 6; i >= 0; --i) {
+ pos[i] = (uint8_t)len;
+ len >>= 8;
+ }
+ consume_chunk(h, sha_256->chunk);
+ /* Produce the final hash value (big-endian): */
+ int j;
+ uint8_t *const hash = sha_256->hash;
+ for (i = 0, j = 0; i < 8; i++) {
+ hash[j++] = (uint8_t)(h[i] >> 24);
+ hash[j++] = (uint8_t)(h[i] >> 16);
+ hash[j++] = (uint8_t)(h[i] >> 8);
+ hash[j++] = (uint8_t)h[i];
+ }
+ return sha_256->hash;
+}
+
+void calc_sha_256(uint8_t hash[SIZE_OF_SHA_256_HASH], const void *input, size_t len)
+{
+ struct Sha_256 sha_256;
+ sha_256_init(&sha_256, hash);
+ sha_256_write(&sha_256, input, len);
+ (void)sha_256_close(&sha_256);
+}
diff --git a/sha-256.h b/sha-256.h
new file mode 100644
index 0000000..6ba59bf
--- /dev/null
+++ b/sha-256.h
@@ -0,0 +1,103 @@
+#ifndef SHA_256_H
+#define SHA_256_H
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * @brief Size of the SHA-256 sum. This times eight is 256 bits.
+ */
+#define SIZE_OF_SHA_256_HASH 32
+
+/*
+ * @brief Size of the chunks used for the calculations.
+ *
+ * @note This should mostly be ignored by the user, although when using the streaming API, it has an impact for
+ * performance. Add chunks whose size is a multiple of this, and you will avoid a lot of superfluous copying in RAM!
+ */
+#define SIZE_OF_SHA_256_CHUNK 64
+
+/*
+ * @brief The opaque SHA-256 type, that should be instantiated when using the streaming API.
+ *
+ * @note Although the details are exposed here, in order to make instantiation easy, you should refrain from directly
+ * accessing the fields, as they may change in the future.
+ */
+struct Sha_256 {
+ uint8_t *hash;
+ uint8_t chunk[SIZE_OF_SHA_256_CHUNK];
+ uint8_t *chunk_pos;
+ size_t space_left;
+ size_t total_len;
+ uint32_t h[8];
+};
+
+/*
+ * @brief The simple SHA-256 calculation function.
+ * @param hash Hash array, where the result is delivered.
+ * @param input Pointer to the data the hash shall be calculated on.
+ * @param len Length of the input data, in byte.
+ *
+ * @note If all of the data you are calculating the hash value on is available in a contiguous buffer in memory, this is
+ * the function you should use.
+ *
+ * @note If either of the passed pointers is NULL, the results are unpredictable.
+ */
+void calc_sha_256(uint8_t hash[SIZE_OF_SHA_256_HASH], const void *input, size_t len);
+
+/*
+ * @brief Initialize a SHA-256 streaming calculation.
+ * @param sha_256 A pointer to a SHA-256 structure.
+ * @param hash Hash array, where the result will be delivered.
+ *
+ * @note If all of the data you are calculating the hash value on is not available in a contiguous buffer in memory, this is
+ * where you should start. Instantiate a SHA-256 structure, for instance by simply declaring it locally, make your hash
+ * buffer available, and invoke this function. Once a SHA-256 hash has been calculated (see further below) a SHA-256
+ * structure can be initialized again for the next calculation.
+ *
+ * @note If either of the passed pointers is NULL, the results are unpredictable.
+ */
+void sha_256_init(struct Sha_256 *sha_256, uint8_t hash[SIZE_OF_SHA_256_HASH]);
+
+/*
+ * @brief Stream more input data for an on-going SHA-256 calculation.
+ * @param sha_256 A pointer to a previously initialized SHA-256 structure.
+ * @param data Pointer to the data to be added to the calculation.
+ * @param len Length of the data to add, in byte.
+ *
+ * @note This function may be invoked an arbitrary number of times between initialization and closing, but the maximum
+ * data length is limited by the SHA-256 algorithm: the total number of bits (i.e. the total number of bytes times
+ * eight) must be representable by a 64-bit unsigned integer. While that is not a practical limitation, the results are
+ * unpredictable if that limit is exceeded.
+ *
+ * @note This function may be invoked on empty data (zero length), although that obviously will not add any data.
+ *
+ * @note If either of the passed pointers is NULL, the results are unpredictable.
+ */
+void sha_256_write(struct Sha_256 *sha_256, const void *data, size_t len);
+
+/*
+ * @brief Conclude a SHA-256 streaming calculation, making the hash value available.
+ * @param sha_256 A pointer to a previously initialized SHA-256 structure.
+ * @return Pointer to the hash array, where the result is delivered.
+ *
+ * @note After this function has been invoked, the result is available in the hash buffer that initially was provided. A
+ * pointer to the hash value is returned for convenience, but you should feel free to ignore it: it is simply a pointer
+ * to the first byte of your initially provided hash array.
+ *
+ * @note If the passed pointer is NULL, the results are unpredictable.
+ *
+ * @note Invoking this function for a calculation with no data (the writing function has never been invoked, or it only
+ * has been invoked with empty data) is legal. It will calculate the SHA-256 value of the empty string.
+ */
+uint8_t *sha_256_close(struct Sha_256 *sha_256);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/wlb.c b/wlb.c
new file mode 100644
index 0000000..2876d48
--- /dev/null
+++ b/wlb.c
@@ -0,0 +1,531 @@
+#include <windows.h>
+#include <stdlib.h>
+#include <tchar.h>
+#include "sqlite3.h"
+#include "sha-2/sha-256.h"
+#define SHASIZ SIZE_OF_SHA_256_HASH
+
+/* TODO: "\\?\" trick?
+ * Windows limits path lengths to 260 characters. Explorer on Windows XP
+ * has this issue too.
+ *
+ * UTF issues?
+ */
+
+/*************************************************************************
+ * Compatability defines for non-Unicode systems
+ ************************************************************************/
+
+#ifdef UNICODE
+# define ENCODING SQLITE3_UTF16
+# define sqlite3_open_U sqlite3_open16
+# define sqlite3_errmsg_U sqlite3_errmsg16
+# define sqlite3_bind_text_U sqlite3_bind_text16
+#else
+# define ENCODING SQLITE3_UTF8
+# define sqlite3_open_U sqlite3_open
+# define sqlite3_errmsg_U sqlite3_errmsg
+# define sqlite3_bind_text_U sqlite3_bind_text
+#endif
+
+/****************
+ * Globals
+ ***************/
+
+sqlite3 *g_db = NULL;
+int g_verbose = 0;
+sqlite_int64 g_backup_id = -1;
+
+/********************
+ * Utility Functions
+ *******************/
+
+static void _log(TCHAR *msg, ...)
+{
+ va_list va;
+
+ if (g_verbose) {
+ va_start(va, emsg);
+ _tvprintf(emsg, va);
+ va_end(va);
+ }
+}
+/* String literals only */
+#define log(msg, ...) _log(TEXT(msg), __VA_ARGS__)
+
+static void _die(TCHAR *emsg, ...)
+{
+ va_list va;
+ va_start(va, emsg);
+ _tvprintf(emsg, va);
+ va_end(va);
+
+ sqlite3_close(g_db);
+ exit(1);
+}
+/* String literals only */
+#define die(emsg, ...) _die(TEXT(emsg), __VA_ARGS__)
+
+/**********************************
+ * Database constants
+ *********************************/
+
+/* 'wlb' in ASCII */
+#define UPPER_VERSION 0x776c62
+#define CUR_VERSION 0x776c6201
+#define S(s) #s
+#define INIT_SCRIPT "\
+BEGIN; \
+PRAGMA user_version = " S(CUR_VERSION) ";\
+CREATE TABLE chunks (\
+ rowid INTEGER PRIMARY KEY, \
+ sha256 TEXT UNIQUE NOT NULL, \
+ data BLOB NOT NULL\
+); \
+CREATE TABLE backup_ids (\
+ ts TEXT UNIQUE NOT NULL,\
+ rowid INTEGER PRIMARY KEY\
+); \
+CREATE TABLE backups (\
+ backup INTEGER NOT NULL REFERENCES backup_ids ON DELETE CASCADE, \
+ path TEXT NOT NULL, \
+ chunk INTEGER NOT NULL REFERENCES chunks ON DELETE RESTRICT\
+); \
+CREATE INDEX backups_paths ON backups (path); \
+CREATE INDEX backups_backup ON backups (backup); \
+CREATE INDEX backups_chunk ON backups (chunk);\
+COMMIT;"
+
+/* Check if the database version is correct. If it is not correct
+ * and the create flag is enabled, then initialize a new database.
+ */
+static void check_db_version(int did_not_exist)
+{
+ int user_version;
+ sqlite3_stmt *stmt;
+
+ /* DB did not exist beforehand: initialize DB */
+ if (did_not_exist) {
+ if (sqlite3_exec(g_db, INIT_SCRIPT, NULL, NULL, NULL) != SQLITE_OK)
+ die("Error initializing database: %s\n",
+ sqlite3_errmsg_U(g_db));
+ log("Intialized database\n");
+ return;
+ }
+
+ /* DB existed beforehand: do version check */
+ if (sqlite3_prepare_v2(g_db, "PRAGMA user_version;", -1, &stmt, NULL)
+ != SQLITE_OK)
+ die("Error preparing user version check: %s\n",
+ sqlite3_errmsg_U(g_db));
+ if (sqlite3_step(stmt) != SQLITE_ROW)
+ die("Error executing user version check: %s\n",
+ sqlite3_errmsg_U(g_db));
+ user_version = sqlite3_column_int(stmt, 0);
+ sqlite3_finalize(stmt);
+
+ if (user_version != CUR_VERSION) {
+ die("Bad DB version found (expected %d), got %d\n",
+ CUR_VERSION & 0xFF, user_version & 0xFF);
+ }
+}
+
+/* Initialize the backup ID used in this session.
+ * This is only called if the user requests an archive of a directory.
+ */
+static void initialize_backup_id(void)
+{
+ sqlite3_stmt *stmt;
+
+ if (sqlite3_prepare(g_db,
+ "INSERT INTO backup_ids (ts) VALUES (datetime()) RETURNING rowid;",
+ -1, &stmt, NULL) != SQLITE_OK) {
+ die("failed to prepare inserting timestamp: %s\n",
+ sqlite3_errmsg_U(g_db));
+ }
+
+ if (sqlite3_step(stmt) != SQLITE_ROW)
+ die("failed to insert timestamp: %s\n",
+ sqlite3_errmsg_U(g_db));
+ g_backup_id = sqlite3_column_int64(stmt, 0);
+ sqlite3_finalize(stmt);
+}
+
+/* Opens the DB and initializes it if it did not exist. */
+static void open_db(TCHAR *fn)
+{
+ int did_not_exist = GetFileAttributes(fn)
+ == INVALID_FILE_ATTRIBUTES;
+ char *errmsg;
+
+ if (sqlite3_open_U(fn, &g_db) != SQLITE_OK)
+ die("Error opening database %s: %s\n", argv[i],
+ sqlite3_errmsg_U(g_db));
+ check_db_version(did_not_exist);
+
+ /* Store current timestamp */
+ if (sqlite3_exec(g_db, "BEGIN;" NULL, NULL, &errmsg) != SQLITE_OK)
+ die("failed to begin transaction: %s\n", errmsg);
+}
+
+/* Insert a backup record into the database.
+ *
+ * This part occurs after the file data has been inserted or identified
+ * by SHA256 hash.
+ */
+static int insert_backup_record(TCHAR *name, sqlite_int64 chunk_id)
+{
+ sqlite3_stmt *stmt;
+ int r = 0;
+
+ if (sqlite3_prepare(g_db,
+ "INSERT INTO backups (backup, path, chunk) VALUES (?,?,?);",
+ -1, &stmt, NULL) != SQLITE_OK) {
+ _tprintf("Could not prepare backup insert statement for %s: %s\n",
+ name, sqlite3_errmsg_U(g_db));
+ return 0;
+ }
+
+ if (sqlite3_bind_int64(stmt, 1, g_backup_id) != SQLITE_OK) {
+ _tprintf("Could not bind backup id for %s: %s\n", name,
+ sqlite3_errmsg_U(g_db));
+ goto end;
+ }
+
+ if (sqlite3_bind_text_U(stmt, 2, name) != SQLITE_OK) {
+ _tprintf("Could not bind path for %s: %s\n", name,
+ sqlite3_errmsg_U(g_db));
+ goto end;
+ }
+
+ if (sqlite3_bind_int64(stmt, 3, chunk_id) != SQLITE_OK) {
+ _tprintf("Could not bind chunk for %s: %s\n", name,
+ sqlite3_errmsg_U(g_db));
+ goto end;
+ }
+
+ if (sqlite3_step(stmt) != SQLITE_DONE) {
+ _tprintf("Error in inserting chunk for %s: %s\n", name,
+ sqlite3_errmsg_U(g_db));
+ goto end;
+ }
+
+ r = 1;
+end:
+ sqlite3_finalize(stmt);
+ return r;
+}
+
+/* Write a file to a BLOB in ``chunks`` at row ``rowid``.
+ *
+ * Returns 0 on failure and 1 on success.
+ */
+static int write_chunk(TCHAR *name, HANDLE f, sqlite_int64 rowid)
+{
+ sqlite3_blob *blob;
+ char buf[4096];
+ DWORD read = 0;
+ int has_written = 0;
+ int r = 0;
+
+ if (sqlite3_blob_open(g_db, "main", "chunks", "data", rowid, 1,
+ &blob) != SQLITE_OK) {
+ _tprintf("Could not open BLOB to write chunk for %s: %s\n",
+ name, sqlite3_errmsg_U(g_db));
+ return 0;
+ }
+
+ for (;;) {
+ if (ReadFile(f, buf, sizeof(buf), &read, NULL) == FALSE) {
+ _tprintf("failed to read in %s for sha256 calculation\n",
+ name);
+ goto end;
+ }
+
+ if (read == 0)
+ break;
+ if (sqlite3_blob_write(g_db, buf, read, has_written)
+ != SQLITE_OK) {
+ _tprintf("Could not write to BLOB for %s: %s\n",
+ name, sqlite3_errmsg_U(g_db));
+ goto end;
+ }
+
+ has_written += read;
+ }
+
+ r = 1;
+end:
+ sqlite3_blob_close(blob);
+ return r;
+}
+
+/* Insert a chunk with a specified SHA256 checksum into the database.
+ *
+ * Returns 1 on success, 0 on failure. On success, ``rowid`` contains
+ * the row in ``chunks`` that contains the data of ``f``.
+ */
+static int insert_chunk(TCHAR *name, HANDLE f, sqlite_int64 *rowid,
+ uint8_t sha256[SHASIZ])
+{
+ LARGE_INTEGER fsize = 0;
+ sqlite3_stmt *stmt;
+ int r = 0;
+
+ if (SetFilePointer(f, 0, NULL, FILE_BEGIN)
+ == INVALID_SET_FILE_POINTER) {
+ _tprintf("rewind for %s failed\n", name);
+ return 0;
+ }
+
+ if (GetFileSizeEx(f, &fsize) == 0) {
+ _tprintf("Could not get the file size of %s\n", name);
+ return 0;
+ }
+
+ /* Chunks are fixed size in SQLite. They need to be pre-allocated
+ * with the size of a file before a file can be written to it.
+ */
+ if (sqlite3_prepare(g_db,
+ "INSERT INTO chunks (sha256, data) VALUES (hex(?), zeroblob(?)) RETURNING rowid;",
+ -1, stmt, NULL) != SQLITE_OK) {
+ _tprintf("Could not prepare chunk insertion for %s: %s\n",
+ name, sqlite3_errmsg_U(g_db));
+ return 0;
+ }
+
+ if (sqlite3_bind_blob(stmt, 1, sha256, SHASIZ, SQLITE_TRANSIENT)
+ != SQLITE_OK) {
+ _tprintf("Could not bind sha256 to statement for %s: %s\n",
+ name, sqlite3_errmsg_U(g_db));
+ goto finalize;
+ }
+
+ if (sqlite3_bind_int64(stmt, 2, fsize) != SQLITE_OK) {
+ _tprintf("Could not bind file size to statement for %s: %s\n",
+ name, sqlite3_errmsg_U(g_db));
+ goto finalize;
+ }
+
+ if (sqlite3_step(&stmt) != SQLITE_ROW)
+ _tprintf("Could not step chunk insertion statement for %s: %s\n",
+ name, sqlite3_errmsg_U(g_db));
+ goto finalize;
+ }
+
+ *rowid = sqlite3_column_int64(stmt, 0);
+ r = write_chunk(name, f, *rowid);
+finalize:
+ sqlite3_finalize(stmt);
+ return r;
+}
+
+/* Check if the SHA256 checksum already exists in the database.
+ *
+ * If the checksum exists, ``rowid`` is filled with the row in ``chunks``
+ * that contains that sha256 sum. If it does not exist, then ``rowid``
+ * contains ``-1``.
+ *
+ * Returns 0 if an error occured, and 1 if no error occured.
+ */
+static int check_sha256(TCHAR *name, uint8_t sha256[SHASIZ],
+ sqlite_int64 *rowid)
+{
+ sqlite3_stmt *stmt;
+ int r = 0;
+
+ /* Since ``sha256`` is binary, use ``hex`` to convert the binary
+ * string to a text representation.
+ */
+ if (sqlite3_prepare_v2(g_db,
+ "SELECT rowid FROM chunks WHERE sha256 = hex(?);",
+ -1, &stmt, NULL) != SQLITE_OK) {
+ _tprintf("failed to prepare sha256 statement for %s: %s\n",
+ name, sqlite_errmsg_U(g_db));
+ goto end;
+ }
+
+ if (sqlite3_bind_blob(&stmt, 1, sha256, SHASIZ, SQLITE_TRANSIENT)
+ != SQLITE_OK) {
+ _tprintf("failed to bind sha256 value for %s: %s\n",
+ name, sqlite_errmsg_U(g_db));
+ goto end;
+ }
+
+ switch (sqlite3_step(&stmt)) {
+ case SQLITE_ROW:
+ /* there is a sha256 value */
+ *rowid = sqlite3_column_int(stmt, 0);
+ break;
+ case SQLITE_DONE:
+ /* The chunk was never entered */
+ *rowid = -1;
+ break;
+ }
+
+ r = 1;
+end:
+ sqlite3_finalize(stmt);
+ return r;
+}
+
+/* Calculate the SHA256 checksum of the file in ``f``, and place a binary
+ * representation of the checksum into ``shastr``.
+ *
+ * Returns 0 if there was an error, 1 for success.
+ */
+static int calculate_sha256(TCHAR *name, HANDLE f, uint8_t sha256[SHASIZ])
+{
+ DWORD read = 0;
+ uint8_t buf[SIZE_OF_SHA_256_CHUNK * 64];
+ struct Sha_256 sha_state;
+ int i;
+
+ sha_256_init(&sha_state, sha256);
+ for (;;) {
+ if (ReadFile(f, buf, sizeof(buf), &read, NULL) == FALSE) {
+ _tprintf("failed to read in %s for sha256 calculation\n",
+ name);
+ return 0;
+ }
+
+ /* ReadFile() reads 0 bytes on EOF. */
+ if (read == 0)
+ break;
+ sha_256_write(&sha_state, buf, read);
+ }
+
+ sha_256_close(&sha_state);
+ return 1;
+}
+
+/* Archive a file. */
+static void archive_file(TCHAR *name)
+{
+ HANDLE f;
+ sqlite_int64 chunks_rowid = -1;
+ uint8_t sha256[SHASIZ];
+ char *errstr;
+ int success = 0;
+
+ if (sqlite3_exec(g_db, "SAVEPOINT file;", NULL, NULL, &errstr)
+ != SQLITE_OK)
+ die("failed to initialize file savepoint: %s\n", errstr);
+
+ f = CreateFile(name, GENERIC_READ, FILE_SHARE_READ, NULL,
+ OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ if (f == INVALID_HANDLE_VALUE) {
+ _tprintf("could not open %s\n", name);
+ goto end;
+ }
+
+ if (!calculate_sha256(name, f, sha256))
+ goto end;
+ if (!check_sha256(sha256, &chunks_rowid))
+ goto end;
+
+ if (chunks_rowid < 0) {
+ if (!insert_chunk(name, f, &chunks_rowid, sha256))
+ goto end;
+ }
+
+ if (!insert_backup_record(name, chunks_rowid))
+ goto end;
+ success = 1;
+
+ log("Archived %s\n", name);
+end:
+ if (sqlite3_exec(g_db, success ? "RELEASE file;" : "ROLLBACK TO file;", NULL, NULL,
+ &errstr) != SQLITE_OK)
+ die("failed to rollback file savepoint: %s\n", errstr);
+ CloseHandle(f);
+}
+
+/* Recursively archive a directory. */
+static void archive_directory(TCHAR *dirname)
+{
+ WIN32_FIND_DATA fdata;
+ HANDLE dhandle;
+ TCHAR pathname[PATH_MAX];
+ dhandle = FindFirstFile(dirname, &fdata);
+ if (dhandle == INVALID_HANDLE_VALUE) {
+ _tprintf("Failed to open directory %s\n", dirname);
+ return;
+ }
+
+ do {
+ /* Windows versions prior to 11 (!) include _snprintf() as
+ * a non-standard version of snprintf(). _snprintf() will
+ * return -1 if the string does not fit in the buffer.
+ */
+ if (_sntprintf(pathname, PATH_MAX, "%s\\%s", dirname,
+ fdata.cFileName) < 0) {
+ _tprintf("Pathname for %s\\%s too long\n", dirname,
+ fdata.cFileName);
+ continue;
+ }
+
+ if (fdata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+ archive_directory(pathname);
+ else
+ archive_file(pathname);
+ } while (FindNextFile(dhandle, &fdata) != 0);
+
+ FindClose(dhandle);
+}
+
+/* Check if the path is a file or a directory, and either archive
+ * the file or archive the entire directory recursively. */
+static void archive_file_or_directory(TCHAR *name)
+{
+ DWORD attr = GetFileAttributes(fn);
+ if (attr == INVALID_FILE_ATTRIBUTES) {
+ _tprintf(TEXT("failed to open %s\n"), name);
+ return;
+ }
+
+ if (g_backup_id < 0)
+ initialize_backup_id();
+
+ if (attr & FILE_ATTRIBUTE_DIRECTORY)
+ archive_directory(name);
+ else
+ archive_file(name);
+}
+
+/* Print instructions to console and exit. */
+static void usage(void)
+{
+ _tprintf(TEXT("\nwlb [\\V] [\\H] [\\D DBNAME] [\\A DIRECTORIES...]\n"));
+ _tprintf(TEXT("\\V: Verbose\n"));
+ _tprintf(TEXT("\\H: Display the help\n"));
+ _tprintf(TEXT("\\D: Database (create if does not exist)\n"));
+ _tprintf(TEXT("\\A: Archive directories\n"));
+ exit(0);
+}
+
+int _tmain(int argc, TCHAR *argv[])
+{
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ if (_tcscmp(argv[i], TEXT("\\H")) == 0) {
+ usage();
+ } else if (_tcscmp(argv[i], TEXT("\\A")) == 0) {
+ i++;
+ archive_file_or_directory(argv[i]);
+ } else if (_tcscmp(argv[i], TEXT("\\D")) == 0) {
+ i++;
+ open_db(argv[i]);
+ } else if (_tcscmp(argv[i], TEXT("\\V")) == 0) {
+ g_verbose = 1;
+ } else {
+ usage();
+ }
+ }
+
+ if (g_db)
+ sqlite3_exec(g_db, "COMMIT;", NULL, NULL, NULL);
+ sqlite3_close(g_db);
+ return 0;
+}