Skip to content

Commit

Permalink
sidecar files #114, version bump
Browse files Browse the repository at this point in the history
  • Loading branch information
simon987 committed Oct 25, 2020
1 parent 7a505c2 commit 641a8ec
Show file tree
Hide file tree
Showing 20 changed files with 504 additions and 361 deletions.
18 changes: 8 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ add_subdirectory(third-party/libscan)
set(ARGPARSE_SHARED off)
add_subdirectory(third-party/argparse)

add_executable(
sist2
add_executable(sist2

# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c

src/main.c
src/sist.h
src/io/walk.h src/io/walk.c
Expand All @@ -25,12 +28,9 @@ add_executable(
src/util.c src/util.h
src/ctx.h src/types.h
src/log.c src/log.h

# argparse
third-party/argparse/argparse.h third-party/argparse/argparse.c

src/cli.c src/cli.h
src/stats.c src/stats.h src/ctx.c)
src/stats.c src/stats.h src/ctx.c
src/parsing/sidecar.c src/parsing/sidecar.h)

target_link_directories(sist2 PRIVATE BEFORE ${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}/lib/)
set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib)
Expand Down Expand Up @@ -73,15 +73,14 @@ if (SIST_DEBUG)
sist2
PRIVATE
-fsanitize=address
# -static
)
set_target_properties(
sist2
PROPERTIES
OUTPUT_NAME sist2_debug
)
else ()
# set(VCPKG_BUILD_TYPE release)
# set(VCPKG_BUILD_TYPE release)
target_compile_options(
sist2
PRIVATE
Expand All @@ -106,7 +105,6 @@ target_link_libraries(
argparse
unofficial::glib::glib
unofficial::mongoose::mongoose
# OpenSSL::SSL OpenSSL::Crypto
CURL::libcurl

${UUID_LIB}
Expand Down
55 changes: 50 additions & 5 deletions docs/USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
* [link to specific indices](#link-to-specific-indices)
* [exec-script](#exec-script)
* [tagging](#tagging)
* [sidecar files](#sidecar-files)

```
Usage: sist2 scan [OPTION]... PATH
Expand Down Expand Up @@ -153,10 +154,13 @@ documents.idx/
├── agg_mime.csv
├── agg_date.csv
├── add_size.csv
├── thumbs
├── thumbs/
| ├── data.mdb
| └── lock.mdb
└── tags
├── tags/
| ├── data.mdb
| └── lock.mdb
└── meta/
├── data.mdb
└── lock.mdb
```
Expand All @@ -183,9 +187,11 @@ by a third party application. The 'external' index must have the following forma
my_index/
├── descriptor.json
├── _index_0
└── thumbs
├── data.mdb
└── lock.mdb
└── thumbs/
| ├── data.mdb
| └── lock.mdb
└── meta/
└── <empty>
```

*descriptor.json*:
Expand Down Expand Up @@ -349,3 +355,42 @@ See [Automatic tagging](#automatic-tagging) for information about tag
### Automatic tagging

See [scripting](scripting.md) documentation.

# Sidecar files

When scanning, sist2 will read metadata from `.s2meta` JSON files and overwrite the
original document's metadata. Sidecar metadata files will also work inside archives.
Sidecar files themselves are not saved in the index.

This feature is useful to leverage third-party applications such as speech-to-text or
OCR to add additional metadata to a file.

**Example**

```
~/Documents/
├── Video.mp4
└── Video.mp4.s2meta
```

The sidecar file must have exactly the same file path and the `.s2meta` suffix.

`Video.mp4.s2meta`:
```json
{
"content": "This sidecar file will overwrite some metadata fields of Video.mp4",
"author": "Some author",
"duration": 12345,
"bitrate": 67890,
"some_arbitrary_field": [1,2,3]
}
```

```
sist2 scan ~/Documents -o ./docs.idx
sist2 index ./docs.idx
```

*NOTE*: It is technically possible to overwrite the `tag` value using sidecar files, however,
it is not currently possible to restore both manual tags and sidecar tags without user scripts
while reindexing.
1 change: 1 addition & 0 deletions scripts/mime.csv
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,4 @@ image/x-sony-arw, arw
image/x-sony-sr2, sr2
image/x-sony-srf, srf
image/x-epson-erf, erf
sist2/sidecar, s2meta
5 changes: 5 additions & 0 deletions scripts/mime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
ext_in_hash = set()

major_mime = {
"sist2": 0,
"model": 1,
"example": 2,
"message": 3,
Expand Down Expand Up @@ -122,7 +123,11 @@ def mime_id(mime):
elif mime in raw:
mime_id += " | 0x00800000"
elif mime == "application/x-empty":
cnt -= 1
return "1"
elif mime == "sist2/sidecar":
cnt -= 1
return "2"
return mime_id


Expand Down
2 changes: 2 additions & 0 deletions src/ctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ typedef struct {
tpool_t *pool;
store_t *tag_store;
GHashTable *tags;
store_t *meta_store;
GHashTable *meta;
} IndexCtx_t;

typedef struct {
Expand Down
37 changes: 30 additions & 7 deletions src/io/serialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,13 +247,8 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
}
dyn_buffer_write_char(&buf, '\0');

if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, buf.buf);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}
char full_filename[PATH_MAX];
strcpy(full_filename, buf.buf);

cJSON_AddStringToObject(document, "extension", buf.buf + line.ext);
if (*(buf.buf + line.ext - 1) == '.') {
Expand Down Expand Up @@ -334,8 +329,36 @@ void read_index_bin(const char *path, const char *index_id, index_func func) {
key = getc(file);
}

cJSON *meta_obj = NULL;
if (IndexCtx.meta != NULL) {
const char *meta_string = g_hash_table_lookup(IndexCtx.meta, full_filename);
if (meta_string != NULL) {
meta_obj = cJSON_Parse(meta_string);

cJSON *child;
for (child = meta_obj->child; child != NULL; child = child->next) {
char meta_key[4096];
strcpy(meta_key, child->string);
cJSON_DeleteItemFromObject(document, meta_key);
cJSON_AddItemReferenceToObject(document, meta_key, child);
}
}
}

if (IndexCtx.tags != NULL) {
const char *tags_string = g_hash_table_lookup(IndexCtx.tags, full_filename);
if (tags_string != NULL) {
cJSON *tags_arr = cJSON_Parse(tags_string);
cJSON_DeleteItemFromObject(document, "tag");
cJSON_AddItemToObject(document, "tag", tags_arr);
}
}

func(document, uuid_str);
cJSON_Delete(document);
if (meta_obj) {
cJSON_Delete(meta_obj);
}
}
dyn_buffer_destroy(&buf);
fclose(file);
Expand Down
14 changes: 10 additions & 4 deletions src/io/store.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,13 @@ void store_destroy(store_t *store) {
void store_write(store_t *store, char *key, size_t key_len, char *buf, size_t buf_len) {

if (LogCtx.very_verbose) {
char uuid_str[UUID_STR_LEN];
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
if (key_len == 16) {
char uuid_str[UUID_STR_LEN] = {0, };
uuid_unparse((unsigned char *) key, uuid_str);
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", uuid_str, buf_len)
} else {
LOG_DEBUGF("store.c", "Store write {%s} %lu bytes", key, buf_len)
}
}

MDB_val mdb_key;
Expand Down Expand Up @@ -136,7 +140,9 @@ GHashTable *store_read_all(store_t *store) {
count += 1;
}

LOG_DEBUGF("store.c", "Read tags for %d documents", count);
const char *path;
mdb_env_get_path(store->env, &path);
LOG_DEBUGF("store.c", "Read %d entries from %s", count, path);

mdb_cursor_close(cur);
mdb_txn_abort(txn);
Expand Down
1 change: 1 addition & 0 deletions src/io/store.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#define STORE_SIZE_TN 1024 * 1024 * 5
#define STORE_SIZE_TAG 1024 * 16
#define STORE_SIZE_META STORE_SIZE_TAG

typedef struct store_t {
MDB_dbi dbi;
Expand Down
10 changes: 9 additions & 1 deletion src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#define EPILOG "Made by simon987 <me@simon987.net>. Released under GPL-3.0"


static const char *const Version = "2.8.4";
static const char *const Version = "2.8.5";
static const char *const usage[] = {
"sist2 scan [OPTION]... PATH",
"sist2 index [OPTION]... INDEX",
Expand Down Expand Up @@ -182,6 +182,10 @@ void sist2_scan(scan_args_t *args) {
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.store = store_create(store_path, STORE_SIZE_TN);

snprintf(store_path, PATH_MAX, "%smeta", ScanCtx.index.path);
mkdir(store_path, S_IWUSR | S_IRUSR | S_IXUSR);
ScanCtx.index.meta_store = store_create(store_path, STORE_SIZE_META);

scan_print_header();

if (args->incremental != NULL) {
Expand Down Expand Up @@ -289,6 +293,10 @@ void sist2_index(index_args_t *args) {
IndexCtx.tag_store = store_create(path_tmp, STORE_SIZE_TAG);
IndexCtx.tags = store_read_all(IndexCtx.tag_store);

snprintf(path_tmp, sizeof(path_tmp), "%s/meta", args->index_path);
IndexCtx.meta_store = store_create(path_tmp, STORE_SIZE_META);
IndexCtx.meta = store_read_all(IndexCtx.meta_store);

index_func f;
if (args->print) {
f = print_json;
Expand Down
1 change: 1 addition & 0 deletions src/parsing/mime.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#define MAJOR_MIME(mime_id) (mime_id & 0x000F0000) >> 16

#define MIME_EMPTY 1
#define MIME_SIST2_SIDECAR 2

#define DONT_PARSE 0x80000000
#define SHOULD_PARSE(mime_id) (ScanCtx.fast == 0 && (mime_id & DONT_PARSE) != DONT_PARSE && mime_id != 0)
Expand Down
Loading

0 comments on commit 641a8ec

Please sign in to comment.