Skip to content

Commit

Permalink
solve bug removing records con encrypted KBs. Solve bug with bsort fo…
Browse files Browse the repository at this point in the history
…r the new format
  • Loading branch information
core software devel committed Sep 9, 2024
1 parent 48990c2 commit 64571d6
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 33 deletions.
8 changes: 4 additions & 4 deletions src/bsort.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,13 @@ void close_sort(struct sort *sort)
}
}

int bsort(char *file_path)
int bsort(char *file_path, int rec_ln)
{
int char_start = 0;
int char_stop = 255;
int record_size=21;
int key_size=21;
int stack_size=5;
int record_size = rec_ln;
int key_size = rec_ln;
int stack_size = 5;
int cut_off = 4;

struct sort sort;
Expand Down
2 changes: 1 addition & 1 deletion src/bsort.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#ifndef __BSORT_H
#define __BSORT_H

int bsort(char *file_path);
int bsort(char *file_path, int rec_ln);

#endif
22 changes: 14 additions & 8 deletions src/collate.c
Original file line number Diff line number Diff line change
Expand Up @@ -527,15 +527,21 @@ bool key_in_delete_list(struct ldb_collate_data *collate, uint8_t *key, uint8_t
{
if (collate->in_table.definitions & LDB_TABLE_DEFINITION_ENCRYPTED)
{
unsigned char tuple_bin[MAX_CSV_LINE_LEN];
if(!decode && !ldb_decoder_lib_load())
return false;

int r_size = decode(DECODE_BASE64, NULL, NULL, collate->del_tuples->tuples[i]->data + char_to_skip, strlen(collate->del_tuples->tuples[i]->data) - char_to_skip, tuple_bin);
if (r_size > 0)
result = !memcmp(tuple_bin, data + (collate->del_tuples->keys_number - 1) * collate->del_tuples->key_ln, r_size);
//if we are ignoring the data the record must be removed.
if (strchr(collate->del_tuples->tuples[i]->data + char_to_skip, '*'))
result = true;
else
result = false;
{
unsigned char tuple_bin[MAX_CSV_LINE_LEN];
if(!decode && !ldb_decoder_lib_load())
return false;

int r_size = decode(DECODE_BASE64, NULL, NULL, collate->del_tuples->tuples[i]->data + char_to_skip, strlen(collate->del_tuples->tuples[i]->data) - char_to_skip, tuple_bin);
if (r_size > 0)
result = !memcmp(tuple_bin, data + (collate->del_tuples->keys_number - 1) * collate->del_tuples->key_ln, r_size);
else
result = false;
}
}
else
{
Expand Down
31 changes: 16 additions & 15 deletions src/import.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,13 @@ bool csv_sort(ldb_importation_config_t * config)
* @param sort
* @return true
*/
bool bin_sort(char *file_path, bool sort)
bool bin_sort(char *file_path, int rec_ln)
{
if (!ldb_file_size(file_path))
return false;
if (!sort)
return true;

log_info("Sorting %s\n", file_path);
return bsort(file_path);
return bsort(file_path, rec_ln);
}


Expand Down Expand Up @@ -200,7 +199,7 @@ int ldb_import_snippets(ldb_importation_config_t * config)
strcpy(oss_wfp.db, config->dbname);
strcpy(oss_wfp.table, config->table);
oss_wfp.key_ln = 4;
oss_wfp.rec_ln = 18;
oss_wfp.rec_ln = config->opt.params.key_size + 2; //key_ln + 2 bytes for line number
oss_wfp.ts_ln = 2;
oss_wfp.tmp = config->opt.params.overwrite;

Expand All @@ -210,8 +209,8 @@ int ldb_import_snippets(ldb_importation_config_t * config)
size_t bytecounter = 0;
int tick = 10000; // activate progress every "tick" records

/* raw record length = wfp crc32(3) + file md5(16) + line(2) = 21 bytes */
int raw_ln = 21;
/* raw record length = wfp crc32(3) + file md5(16) / 8 + line(2) = 21 / 13 bytes */
int raw_ln = 3 + config->opt.params.key_size + 2;

/* First three bytes are bytes 2nd-4th of the wfp) */
int rec_ln = raw_ln - 3;
Expand All @@ -220,12 +219,15 @@ int ldb_import_snippets(ldb_importation_config_t * config)
uint8_t key1 = first_byte(config->csv_path);

/* File should contain 21 * N bytes */
if (ldb_file_size(config->csv_path) % 21)
if (ldb_file_size(config->csv_path) % (raw_ln))
{
printf("File %s does not contain 21-byte records\n", config->csv_path);
printf("File %s does not contain %d-byte records\n", config->csv_path, raw_ln);
exit(EXIT_FAILURE);
}

if (config->opt.params.sort)
bin_sort(config->csv_path, raw_ln);

/* Load ignored wfps into boolean array */
bool *bl = calloc(256 * 256 * 256, 1);
for (int i = 0; i < IGNORED_WFP_LN; i += 4)
Expand Down Expand Up @@ -264,7 +266,7 @@ int ldb_import_snippets(ldb_importation_config_t * config)

/* Create table if it doesn't exist */
if (!ldb_table_exists(config->dbname, config->table))
ldb_create_table_new(config->dbname, config->table, 4, rec_ln, 1, LDB_TABLE_DEFINITION_STANDARD);
ldb_create_table_new(config->dbname, config->table, LDB_KEY_LN, rec_ln, 1, LDB_TABLE_DEFINITION_STANDARD);

/* Open ldb */
out = ldb_open(oss_wfp, last_wfp, "r+");
Expand Down Expand Up @@ -317,7 +319,7 @@ int ldb_import_snippets(ldb_importation_config_t * config)
/* Skip duplicated records. Since md5 records to be imported are sorted, it will be faster
to compare them from last to first byte. Also, we only compare the 16 byte md5 */
if (record_ln > 0)
if (!reverse_memcmp(record + record_ln - rec_ln, rec, 16))
if (!reverse_memcmp(record + record_ln - rec_ln, rec, config->opt.params.key_size))
{
memcpy(record + record_ln, rec, rec_ln);
record_ln += rec_ln;
Expand Down Expand Up @@ -1431,11 +1433,11 @@ int import_collate_sector(ldb_importation_config_t *config)
tmptable.tmp = true;
tmptable.key_ln = LDB_KEY_LN;

int max_rec_len = config->opt.params.is_wfp_table == 1 ? 18 : config->opt.params.collate_max_rec;
int max_rec_len = config->opt.params.is_wfp_table == 1 ? config->opt.params.key_size + 2 : config->opt.params.collate_max_rec;

if (ldbtable.rec_ln && ldbtable.rec_ln != max_rec_len)
{
log_info("E076 Max record length should equal fixed record length (%d)\n", ldbtable.rec_ln);
log_info("E076 Max record length should equal fixed record length (%d vs %d)\n", ldbtable.rec_ln, max_rec_len);
return LDB_ERROR_RECORD_LENGHT_INVAID;
}
else if (max_rec_len < ldbtable.key_ln)
Expand Down Expand Up @@ -1533,8 +1535,7 @@ int ldb_import(ldb_importation_config_t * job)
}
else if (config.opt.params.is_wfp_table)
{
if (bin_sort(config.csv_path, config.opt.params.sort))
result = ldb_import_snippets(&config);
result = ldb_import_snippets(&config);
}
else
{
Expand Down
20 changes: 15 additions & 5 deletions src/recordset.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,11 @@ bool ldb_csvprint(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8
/* Print key in hex (first CSV field) */
for (int i = 0; i < LDB_KEY_LN; i++)
printf("%02x", key[i]);
for (int i = 0; i < subkey_ln; i++)
printf("%02x", subkey[i]);
if (subkey)
{
for (int i = 0; i < subkey_ln; i++)
printf("%02x", subkey[i]);
}
}
//print secondaries keys
else
Expand All @@ -303,14 +306,15 @@ bool ldb_csvprint(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8
int *hex_bytes = ptr;
int remaining_hex = 0;
//print everything as hex

if (*hex_bytes < 0)
remaining_hex = size;
remaining_hex = size - table->key_ln * (table->keys-1);
else
remaining_hex = *hex_bytes - table->key_ln * table->keys;
remaining_hex = *hex_bytes - table->key_ln * (table->keys-1);

if (remaining_hex < 0) remaining_hex = 0;

if (table->key_ln * table->keys + remaining_hex >= size)
if (table->key_ln * (table->keys - 1) + remaining_hex >= size)
{
fwrite("\n", 1, 1, stdout);
return false;
Expand All @@ -323,6 +327,12 @@ bool ldb_csvprint(struct ldb_table * table, uint8_t *key, uint8_t *subkey, uint8
printf("%02x", data[table->key_ln * (table->keys-1) + i]);
}
/* Print remaining CSV data */
if (remaining_hex >= size)
{
fwrite("\n", 1, 1, stdout);
return false;
}

printf(",");
for (int i = table->key_ln * (table->keys - 1) + remaining_hex; i < size; i++)
fwrite(data + i, 1, 1, stdout);
Expand Down

0 comments on commit 64571d6

Please sign in to comment.