aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--example-config.ini6
-rw-r--r--src/Config.c7
-rw-r--r--src/Config.h1
-rw-r--r--src/Main.c7
-rw-r--r--src/Routes/Search.c59
-rw-r--r--src/Scraping/Scraping.h5
-rw-r--r--src/Scraping/ScrapingParsers.c102
7 files changed, 152 insertions, 35 deletions
diff --git a/example-config.ini b/example-config.ini
index e0f1f02..fc6ea8d 100644
--- a/example-config.ini
+++ b/example-config.ini
@@ -25,3 +25,9 @@ domain = https://search.example.com
# Cache TTL for infobox data in seconds (default: 86400 = 24 hours)
#ttl_infobox = 86400
+
+[engines]
+# Use * for all engines, or specify comma-separated list (e.g., ddg,yahoo)
+# Use *,-engine to exclude specific engines (e.g., *,-startpage)
+# Available engines: ddg, startpage, yahoo, mojeek
+engines="*"
diff --git a/src/Config.c b/src/Config.c
index b4a0f21..0c243bd 100644
--- a/src/Config.c
+++ b/src/Config.c
@@ -54,6 +54,8 @@ int load_config(const char *filename, Config *config) {
value_end--;
}
+ while (*value == ' ' || *value == '\t')
+ value++;
while (*value == '"' || *value == '\'')
value++;
@@ -91,6 +93,11 @@ int load_config(const char *filename, Config *config) {
} else if (strcmp(key, "ttl_infobox") == 0) {
config->cache_ttl_infobox = atoi(value);
}
+ } else if (strcmp(section, "engines") == 0) {
+ if (strcmp(key, "engines") == 0) {
+ strncpy(config->engines, value, sizeof(config->engines) - 1);
+ config->engines[sizeof(config->engines) - 1] = '\0';
+ }
}
}
}
diff --git a/src/Config.h b/src/Config.h
index 33ff527..4143bbd 100644
--- a/src/Config.h
+++ b/src/Config.h
@@ -42,6 +42,7 @@ typedef struct {
char cache_dir[512];
int cache_ttl_search;
int cache_ttl_infobox;
+ char engines[512];
} Config;
int load_config(const char *filename, Config *config);
diff --git a/src/Main.c b/src/Main.c
index be5080b..8aa161d 100644
--- a/src/Main.c
+++ b/src/Main.c
@@ -16,7 +16,7 @@
#include "Scraping/Scraping.h"
Config global_config;
-
+
int handle_opensearch(UrlParams *params) {
(void)params;
extern Config global_config;
@@ -51,7 +51,8 @@ int main() {
.randomize_password = 0,
.cache_dir = DEFAULT_CACHE_DIR,
.cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH,
- .cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX};
+ .cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX,
+ .engines = ""};
if (load_config("config.ini", &cfg) != 0) {
fprintf(stderr, "[WARN] Could not load config file, using defaults\n");
@@ -59,6 +60,8 @@ int main() {
global_config = cfg;
+ apply_engines_config(cfg.engines);
+
if (cache_init(cfg.cache_dir) != 0) {
fprintf(stderr,
"[WARN] Failed to initialize cache, continuing without caching\n");
diff --git a/src/Routes/Search.c b/src/Routes/Search.c
index ad167fb..bc35fb6 100644
--- a/src/Routes/Search.c
+++ b/src/Routes/Search.c
@@ -273,26 +273,39 @@ int results_handler(UrlParams *params) {
}
}
+ int enabled_engine_count = 0;
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ if (ENGINE_REGISTRY[i].enabled) {
+ enabled_engine_count++;
+ }
+ }
+
ScrapeJob jobs[ENGINE_COUNT];
SearchResult *all_results[ENGINE_COUNT];
+ int engine_idx = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
- all_results[i] = NULL;
- jobs[i].engine = &ENGINE_REGISTRY[i];
- jobs[i].query = raw_query;
- jobs[i].out_results = &all_results[i];
- jobs[i].max_results = MAX_RESULTS_PER_ENGINE;
- jobs[i].results_count = 0;
- jobs[i].page = page;
- jobs[i].handle = NULL;
- jobs[i].response.memory = NULL;
- jobs[i].response.size = 0;
- jobs[i].response.capacity = 0;
- jobs[i].http_status = 0;
- jobs[i].status = SCRAPE_STATUS_PENDING;
+ if (ENGINE_REGISTRY[i].enabled) {
+ all_results[engine_idx] = NULL;
+ jobs[engine_idx].engine = &ENGINE_REGISTRY[i];
+ jobs[engine_idx].query = raw_query;
+ jobs[engine_idx].out_results = &all_results[engine_idx];
+ jobs[engine_idx].max_results = MAX_RESULTS_PER_ENGINE;
+ jobs[engine_idx].results_count = 0;
+ jobs[engine_idx].page = page;
+ jobs[engine_idx].handle = NULL;
+ jobs[engine_idx].response.memory = NULL;
+ jobs[engine_idx].response.size = 0;
+ jobs[engine_idx].response.capacity = 0;
+ jobs[engine_idx].http_status = 0;
+ jobs[engine_idx].status = SCRAPE_STATUS_PENDING;
+ engine_idx++;
+ }
}
- scrape_engines_parallel(jobs, ENGINE_COUNT);
+ if (enabled_engine_count > 0) {
+ scrape_engines_parallel(jobs, enabled_engine_count);
+ }
if (page == 1) {
for (int i = 0; i < HANDLER_COUNT; i++) {
@@ -301,10 +314,10 @@ int results_handler(UrlParams *params) {
}
if (btnI) {
- for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int i = 0; i < enabled_engine_count; i++) {
if (jobs[i].results_count > 0 && all_results[i][0].url) {
char *redirect_url = strdup(all_results[i][0].url);
- for (int j = 0; j < ENGINE_COUNT; j++) {
+ for (int j = 0; j < enabled_engine_count; j++) {
for (int k = 0; k < jobs[j].results_count; k++) {
free(all_results[j][k].url);
free(all_results[j][k].title);
@@ -327,7 +340,7 @@ int results_handler(UrlParams *params) {
return 0;
}
}
- for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int i = 0; i < enabled_engine_count; i++) {
free(all_results[i]);
}
if (page == 1) {
@@ -369,7 +382,7 @@ int results_handler(UrlParams *params) {
}
int warning_count = 0;
- for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int i = 0; i < enabled_engine_count; i++) {
if (warning_message_for_job(&jobs[i]))
warning_count++;
}
@@ -379,7 +392,7 @@ int results_handler(UrlParams *params) {
int *warning_inner_counts = NULL;
int warning_index = 0;
- for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int i = 0; i < enabled_engine_count; i++) {
const char *warning_message = warning_message_for_job(&jobs[i]);
if (!warning_message)
continue;
@@ -407,7 +420,7 @@ int results_handler(UrlParams *params) {
}
int total_results = 0;
- for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int i = 0; i < enabled_engine_count; i++) {
total_results += jobs[i].results_count;
}
@@ -427,7 +440,7 @@ int results_handler(UrlParams *params) {
send_response(html);
free(html);
}
- for (int i = 0; i < ENGINE_COUNT; i++)
+ for (int i = 0; i < enabled_engine_count; i++)
free(all_results[i]);
if (page == 1) {
for (int i = 0; i < HANDLER_COUNT; i++) {
@@ -441,7 +454,7 @@ int results_handler(UrlParams *params) {
}
int unique_count = 0;
- for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int i = 0; i < enabled_engine_count; i++) {
for (int j = 0; j < jobs[i].results_count; j++) {
char *display_url = all_results[i][j].url;
@@ -524,7 +537,7 @@ int results_handler(UrlParams *params) {
free(html);
}
- for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int i = 0; i < enabled_engine_count; i++) {
free(all_results[i]);
}
}
diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h
index 014285f..be65e5a 100644
--- a/src/Scraping/Scraping.h
+++ b/src/Scraping/Scraping.h
@@ -15,6 +15,7 @@ typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results);
typedef struct {
+ const char *id;
const char *name;
const char *base_url;
const char *host_header;
@@ -24,6 +25,7 @@ typedef struct {
int page_multiplier;
int page_base;
ParserFunc parser;
+ int enabled;
} SearchEngine;
typedef struct {
@@ -54,8 +56,9 @@ typedef struct {
ScrapeStatus status;
} ScrapeJob;
-extern const SearchEngine ENGINE_REGISTRY[];
+extern SearchEngine ENGINE_REGISTRY[];
extern const int ENGINE_COUNT;
+void apply_engines_config(const char *engines_str);
size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
void *userp);
diff --git a/src/Scraping/ScrapingParsers.c b/src/Scraping/ScrapingParsers.c
index 874cf54..96aaded 100644
--- a/src/Scraping/ScrapingParsers.c
+++ b/src/Scraping/ScrapingParsers.c
@@ -310,38 +310,122 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
static int parse_mojeek(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results);
-const SearchEngine ENGINE_REGISTRY[] = {
- {.name = "DuckDuckGo Lite",
+SearchEngine ENGINE_REGISTRY[] = {
+ {.id = "ddg",
+ .name = "DuckDuckGo Lite",
.base_url = "https://lite.duckduckgo.com/lite/?q=",
.host_header = "lite.duckduckgo.com",
.referer = "https://lite.duckduckgo.com/",
.page_param = "s",
.page_multiplier = 30,
.page_base = 0,
- .parser = parse_ddg_lite},
- {.name = "Startpage",
+ .parser = parse_ddg_lite,
+ .enabled = 1},
+ {.id = "startpage",
+ .name = "Startpage",
.base_url = "https://www.startpage.com/sp/search?q=",
.host_header = "www.startpage.com",
.referer = "https://www.startpage.com/",
.page_param = "page",
.page_multiplier = 1,
.page_base = 1,
- .parser = parse_startpage},
- {.name = "Yahoo",
+ .parser = parse_startpage,
+ .enabled = 1},
+ {.id = "yahoo",
+ .name = "Yahoo",
.base_url = "https://search.yahoo.com/search?p=",
.host_header = "search.yahoo.com",
.referer = "https://search.yahoo.com/",
.page_param = "b",
.page_multiplier = 10,
.page_base = 1,
- .parser = parse_yahoo},
- {.name = "Mojeek",
+ .parser = parse_yahoo,
+ .enabled = 1},
+ {.id = "mojeek",
+ .name = "Mojeek",
.base_url = "https://www.mojeek.com/search?q=",
.host_header = "www.mojeek.com",
.referer = "https://www.mojeek.com/",
.page_param = "s",
.page_multiplier = 10,
.page_base = 1,
- .parser = parse_mojeek}};
+ .parser = parse_mojeek,
+ .enabled = 1}};
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
+
+static int engine_id_compare(const char *engine_id, const char *config_id) {
+ while (*engine_id && *config_id) {
+ char e = *engine_id;
+ char c = *config_id;
+ if (e >= 'A' && e <= 'Z')
+ e = e - 'A' + 'a';
+ if (c >= 'A' && c <= 'Z')
+ c = c - 'A' + 'a';
+ if (e != c)
+ return 0;
+ engine_id++;
+ config_id++;
+ }
+ return *engine_id == *config_id;
+}
+
+void apply_engines_config(const char *engines_str) {
+ if (!engines_str || engines_str[0] == '\0') {
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ ENGINE_REGISTRY[i].enabled = 1;
+ }
+ return;
+ }
+
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ ENGINE_REGISTRY[i].enabled = 0;
+ }
+
+ char *copy = strdup(engines_str);
+ if (!copy)
+ return;
+
+ char *saveptr;
+ char *token = strtok_r(copy, ",", &saveptr);
+
+ while (token) {
+ while (*token == ' ' || *token == '\t')
+ token++;
+
+ if (strcmp(token, "*") == 0) {
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ ENGINE_REGISTRY[i].enabled = 1;
+ }
+ } else if (token[0] == '-' && token[1] != '\0') {
+ char *engine_id = token + 1;
+ int found = 0;
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ if (engine_id_compare(ENGINE_REGISTRY[i].id, engine_id)) {
+ ENGINE_REGISTRY[i].enabled = 0;
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ fprintf(stderr, "[WARN] Unknown engine: %s\n", engine_id);
+ }
+ } else {
+ int found = 0;
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ if (engine_id_compare(ENGINE_REGISTRY[i].id, token)) {
+ ENGINE_REGISTRY[i].enabled = 1;
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ fprintf(stderr, "[WARN] Unknown engine: %s\n", token);
+ }
+ }
+
+ token = strtok_r(NULL, ",", &saveptr);
+ }
+
+ free(copy);
+}