aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Config.c67
-rw-r--r--src/Config.h11
-rw-r--r--src/Infobox/Calculator.c115
-rw-r--r--src/Infobox/Calculator.h9
-rw-r--r--src/Infobox/Dictionary.c246
-rw-r--r--src/Infobox/Dictionary.h10
-rw-r--r--src/Infobox/Infobox.c13
-rw-r--r--src/Infobox/Infobox.h13
-rw-r--r--src/Infobox/Wikipedia.c165
-rw-r--r--src/Infobox/Wikipedia.h9
-rw-r--r--src/Main.c49
-rw-r--r--src/Routes/Home.c14
-rw-r--r--src/Routes/Home.h8
-rw-r--r--src/Routes/Images.c278
-rw-r--r--src/Routes/Images.h8
-rw-r--r--src/Routes/Search.c275
-rw-r--r--src/Routes/Search.h8
-rw-r--r--src/Scraping/Scraping.c459
-rw-r--r--src/Scraping/Scraping.h58
-rw-r--r--src/Utility/Display.c46
-rw-r--r--src/Utility/Display.h6
-rw-r--r--src/Utility/Unescape.c80
-rw-r--r--src/Utility/Unescape.h10
-rw-r--r--src/Utility/Utility.c8
-rw-r--r--src/Utility/Utility.h6
25 files changed, 1971 insertions, 0 deletions
diff --git a/src/Config.c b/src/Config.c
new file mode 100644
index 0000000..4a93980
--- /dev/null
+++ b/src/Config.c
@@ -0,0 +1,67 @@
+#include "Config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int load_config(const char *filename, Config *config) {
+ FILE *file = fopen(filename, "r");
+ if (!file) {
+ return -1;
+ }
+
+ char line[512];
+ char section[64] = "";
+
+ while (fgets(line, sizeof(line), file)) {
+
+ line[strcspn(line, "\r\n")] = 0;
+
+ if (line[0] == '\0' || line[0] == '#' || line[0] == ';') {
+ continue;
+ }
+
+ if (line[0] == '[') {
+ char *end = strchr(line, ']');
+ if (end) {
+ *end = '\0';
+ snprintf(section, sizeof(section), "%.*s", (int)(sizeof(section) - 1), line + 1);
+ section[sizeof(section) - 1] = '\0';
+ }
+ continue;
+ }
+
+ char *delimiter = strchr(line, '=');
+ if (delimiter) {
+ *delimiter = '\0';
+ char *key = line;
+ char *value = delimiter + 1;
+
+ while (*key == ' ' || *key == '\t') key++;
+ while (*value == ' ' || *value == '\t') value++;
+
+ char *key_end = key + strlen(key) - 1;
+ while (key_end > key && (*key_end == ' ' || *key_end == '\t')) {
+ *key_end = '\0';
+ key_end--;
+ }
+
+ char *value_end = value + strlen(value) - 1;
+ while (value_end > value && (*value_end == ' ' || *value_end == '\t')) {
+ *value_end = '\0';
+ value_end--;
+ }
+
+ if (strcmp(section, "server") == 0) {
+ if (strcmp(key, "host") == 0) {
+ strncpy(config->host, value, sizeof(config->host) - 1);
+ config->host[sizeof(config->host) - 1] = '\0';
+ } else if (strcmp(key, "port") == 0) {
+ config->port = atoi(value);
+ }
+ }
+ }
+ }
+
+ fclose(file);
+ return 0;
+} \ No newline at end of file
diff --git a/src/Config.h b/src/Config.h
new file mode 100644
index 0000000..384ed94
--- /dev/null
+++ b/src/Config.h
@@ -0,0 +1,11 @@
+#ifndef CONFIG_H
+#define CONFIG_H
+
+typedef struct {
+ char host[256];
+ int port;
+} Config;
+
+int load_config(const char *filename, Config *config);
+
+#endif \ No newline at end of file
diff --git a/src/Infobox/Calculator.c b/src/Infobox/Calculator.c
new file mode 100644
index 0000000..b80ce21
--- /dev/null
+++ b/src/Infobox/Calculator.c
@@ -0,0 +1,115 @@
+#include "Calculator.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <ctype.h>
+
+static char logic_log[4096];
+
+typedef struct {
+ const char *buffer;
+ int pos;
+} Parser;
+
+static double parse_expression(Parser *p);
+
+static void skip_ws(Parser *p) {
+ while (p->buffer[p->pos] == ' ') p->pos++;
+}
+
+static double parse_factor(Parser *p) {
+ skip_ws(p);
+ if (p->buffer[p->pos] == '-') {
+ p->pos++;
+ return -parse_factor(p);
+ }
+ if (p->buffer[p->pos] == '(') {
+ p->pos++;
+ double res = parse_expression(p);
+ if (p->buffer[p->pos] == ')') p->pos++;
+ return res;
+ }
+ char *endptr;
+ double val = strtod(&p->buffer[p->pos], &endptr);
+ p->pos = (int)(endptr - p->buffer);
+ return val;
+}
+
+static double parse_term(Parser *p) {
+ double left = parse_factor(p);
+ while (1) {
+ skip_ws(p);
+ char op = p->buffer[p->pos];
+ if (op == '*' || op == '/') {
+ p->pos++;
+ double right = parse_factor(p);
+ double old = left;
+ left = (op == '*') ? left * right : left / right;
+
+ char step[256];
+
+ snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
+ right, left);
+ strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
+ } else
+ break;
+ }
+ return left;
+}
+
+static double parse_expression(Parser *p) {
+ double left = parse_term(p);
+ while (1) {
+ skip_ws(p);
+ char op = p->buffer[p->pos];
+ if (op == '+' || op == '-') {
+ p->pos++;
+ double right = parse_term(p);
+ double old = left;
+ left = (op == '+') ? left + right : left - right;
+
+ char step[256];
+
+ snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
+ right, left);
+ strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
+ } else
+ break;
+ }
+ return left;
+}
+
+double evaluate(const char *expr) {
+ logic_log[0] = '\0';
+ if (!expr || strlen(expr) == 0) return 0.0;
+ Parser p = {expr, 0};
+ return parse_expression(&p);
+}
+
+InfoBox fetch_calc_data(char *math_input) {
+ InfoBox info = {NULL, NULL, NULL, NULL};
+ if (!math_input) return info;
+
+ double result = evaluate(math_input);
+
+ char html_output[5120];
+ snprintf(html_output, sizeof(html_output),
+ "<div class='calc-container' style='line-height: 1.6;'>"
+ "%s"
+ "<div style='margin-top: 8px; border-top: 1px solid #eee; "
+ "padding-top: 8px; font-size: 1.2em;'>"
+ "<b>%g</b>"
+ "</div>"
+ "</div>",
+ strlen(logic_log) > 0 ? logic_log : "<div>Constant value</div>",
+ result);
+
+ info.title = strdup("Calculation");
+ info.extract = strdup(html_output);
+ info.thumbnail_url =
+ strdup("/static/calculation.svg");
+ info.url = strdup("#");
+
+ return info;
+}
diff --git a/src/Infobox/Calculator.h b/src/Infobox/Calculator.h
new file mode 100644
index 0000000..275aed6
--- /dev/null
+++ b/src/Infobox/Calculator.h
@@ -0,0 +1,9 @@
+#ifndef CALCULATOR_H
+#define CALCULATOR_H
+
+#include "Infobox.h"
+
+double evaluate(const char *expr);
+InfoBox fetch_calc_data(char *math_input);
+
+#endif
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c
new file mode 100644
index 0000000..a835899
--- /dev/null
+++ b/src/Infobox/Dictionary.c
@@ -0,0 +1,246 @@
+#include "Dictionary.h"
+#include <curl/curl.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/xpath.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+
+static const char *PREFIXES[] = {
+ "what is the definition of ", "what's the definition of ",
+ "what is the meaning of ", "what's the meaning of ",
+ "what does the word ", "definition of ", "meaning of ", "def of ",
+ "define ", "definition ", "define:", "def ", "def:",
+ "what does ", "what is ", "what's ", "whats ",
+ "meaning ", "dictionary ", "dict ", NULL
+};
+
+static const char *SUFFIXES[] = {
+ " definition", " def", " meaning", " mean", " means",
+ " dictionary", " dict", " define", " defined",
+ " definition?", " def?", " meaning?", " mean?", " means?",
+ " in english", " in english?", NULL
+};
+
+static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
+
+static const char *strcasestr_impl(const char *haystack, const char *needle) {
+ if (!haystack || !needle || !*needle) return haystack;
+ size_t len = strlen(needle);
+ for (const char *h = haystack; *h; h++) {
+ if (strncasecmp(h, needle, len) == 0) return h;
+ }
+ return NULL;
+}
+
+struct MemStruct { char *memory; size_t size; };
+
+static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) {
+ size_t realsize = size * nmemb;
+ struct MemStruct *mem = (struct MemStruct *)userp;
+ char *ptr = realloc(mem->memory, mem->size + realsize + 1);
+ if (!ptr) return 0;
+ mem->memory = ptr;
+ memcpy(&(mem->memory[mem->size]), contents, realsize);
+ mem->size += realsize;
+ mem->memory[mem->size] = 0;
+ return realsize;
+}
+
+static char *xpath_text(xmlDocPtr doc, const char *xpath) {
+ xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
+ if (!ctx) return NULL;
+ xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
+ xmlXPathFreeContext(ctx);
+ if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
+ if (obj) xmlXPathFreeObject(obj);
+ return NULL;
+ }
+ xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
+ char *result = content ? strdup((char *)content) : NULL;
+ if (content) xmlFree(content);
+ xmlXPathFreeObject(obj);
+ return result;
+}
+
+static char *build_html(const char *word, const char *pron, const char *pos,
+ const char *def, const char *ex) {
+ char html[4096];
+ int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>");
+ if (word) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word);
+ if (pron) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron);
+ if (pos) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos);
+ if (def) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='margin-bottom: 8px;'>%s</div>", def);
+ if (ex) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex);
+ snprintf(html + n, sizeof(html) - n, "</div>");
+ return strdup(html);
+}
+
+static char *extract_word(const char *query) {
+ if (!query) return NULL;
+
+ const char *start = query;
+
+ for (int i = 0; PREFIXES[i]; i++) {
+ size_t len = strlen(PREFIXES[i]);
+ if (strncasecmp(start, PREFIXES[i], len) == 0) {
+ start += len;
+ break;
+ }
+ }
+
+ while (*start == ' ') start++;
+ char *word = strdup(start);
+ if (!word) return NULL;
+
+ int changed = 1;
+ while (changed) {
+ changed = 0;
+ for (int i = 0; SKIP_WORDS[i]; i++) {
+ size_t len = strlen(SKIP_WORDS[i]);
+ if (strncasecmp(word, SKIP_WORDS[i], len) == 0) {
+ memmove(word, word + len, strlen(word + len) + 1);
+ changed = 1;
+ break;
+ }
+ }
+ }
+
+ changed = 1;
+ while (changed) {
+ changed = 0;
+ for (int i = 0; SUFFIXES[i]; i++) {
+ const char *found = strcasestr_impl(word, SUFFIXES[i]);
+ if (found) {
+ char *pos = word + (found - word);
+ *pos = '\0';
+ changed = 1;
+ break;
+ }
+ }
+ }
+
+ size_t len = strlen(word);
+ while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' ||
+ word[len-1] == '!' || word[len-1] == '.')) {
+ word[--len] = '\0';
+ }
+
+ if (len == 0) { free(word); return NULL; }
+
+ for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]);
+ char *space = strchr(word, ' ');
+ if (space) *space = '\0';
+
+ return word;
+}
+
+int is_dictionary_query(const char *query) {
+ if (!query) return 0;
+
+ for (int i = 0; PREFIXES[i]; i++) {
+ size_t len = strlen(PREFIXES[i]);
+ if (strncasecmp(query, PREFIXES[i], len) == 0) {
+ const char *after = query + len;
+ while (*after == ' ') after++;
+ if (*after != '\0') return 1;
+ }
+ }
+
+ for (int i = 0; SUFFIXES[i]; i++) {
+ const char *pos = strcasestr_impl(query, SUFFIXES[i]);
+ if (pos) {
+ const char *after = pos + strlen(SUFFIXES[i]);
+ while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++;
+ if (*after == '\0' && pos > query && (pos - query) < 100) return 1;
+ }
+ }
+
+ if (strncasecmp(query, "what is ", 8) == 0 ||
+ strncasecmp(query, "what's ", 7) == 0 ||
+ strncasecmp(query, "whats ", 6) == 0) {
+ const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 :
+ strncasecmp(query, "what's ", 7) == 0 ? 7 : 6);
+ const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ",
+ "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL};
+ for (int i = 0; articles[i]; i++) {
+ if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0;
+ }
+ const char *space = strchr(word, ' ');
+ if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
+ }
+
+ return 0;
+}
+
+char *construct_dictionary_url(const char *query) {
+ char *word = extract_word(query);
+ if (!word) return NULL;
+
+ CURL *curl = curl_easy_init();
+ if (!curl) { free(word); return NULL; }
+
+ char *escaped = curl_easy_escape(curl, word, 0);
+ const char *base = "https://dictionary.cambridge.org/dictionary/english/";
+ char *url = malloc(strlen(base) + strlen(escaped) + 1);
+ if (url) {
+ strcpy(url, base);
+ strcat(url, escaped);
+ }
+
+ curl_free(escaped);
+ curl_easy_cleanup(curl);
+ free(word);
+ return url;
+}
+
+InfoBox fetch_dictionary_data(const char *query) {
+ InfoBox info = {NULL, NULL, NULL, NULL};
+
+ char *url = construct_dictionary_url(query);
+ if (!url) return info;
+
+ CURL *curl = curl_easy_init();
+ if (!curl) { free(url); return info; }
+
+ struct MemStruct chunk = {malloc(1), 0};
+ curl_easy_setopt(curl, CURLOPT_URL, url);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
+ curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0");
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+
+ if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
+ htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
+ if (doc) {
+ char *word = xpath_text(doc, "//span[@class='hw dhw']");
+ char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
+ char *pos = xpath_text(doc, "//span[@class='pos dpos']");
+ char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
+ char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
+
+ if (word && def) {
+ info.title = strdup("Dictionary");
+ info.extract = build_html(word, pron, pos, def, ex);
+ info.thumbnail_url = strdup("/static/dictionary.jpg");
+ info.url = strdup(url);
+ }
+
+ free(word); free(pron); free(pos); free(def); free(ex);
+ xmlFreeDoc(doc);
+ }
+ }
+
+ curl_easy_cleanup(curl);
+ free(chunk.memory);
+ free(url);
+ return info;
+} \ No newline at end of file
diff --git a/src/Infobox/Dictionary.h b/src/Infobox/Dictionary.h
new file mode 100644
index 0000000..2f212c3
--- /dev/null
+++ b/src/Infobox/Dictionary.h
@@ -0,0 +1,10 @@
+#ifndef DICTIONARY_H
+#define DICTIONARY_H
+
+#include "Infobox.h"
+
+InfoBox fetch_dictionary_data(const char *word);
+char *construct_dictionary_url(const char *word);
+int is_dictionary_query(const char *query);
+
+#endif \ No newline at end of file
diff --git a/src/Infobox/Infobox.c b/src/Infobox/Infobox.c
new file mode 100644
index 0000000..5043c05
--- /dev/null
+++ b/src/Infobox/Infobox.c
@@ -0,0 +1,13 @@
+#include "Infobox.h"
+#include <stdlib.h>
+
+void free_infobox(InfoBox *info) {
+ if (info->title)
+ free(info->title);
+ if (info->thumbnail_url)
+ free(info->thumbnail_url);
+ if (info->extract)
+ free(info->extract);
+ if (info->url)
+ free(info->url);
+}
diff --git a/src/Infobox/Infobox.h b/src/Infobox/Infobox.h
new file mode 100644
index 0000000..a052b80
--- /dev/null
+++ b/src/Infobox/Infobox.h
@@ -0,0 +1,13 @@
+#ifndef INFOBOX_H
+#define INFOBOX_H
+
+typedef struct {
+ char *title;
+ char *thumbnail_url;
+ char *extract;
+ char *url;
+} InfoBox;
+
+void free_infobox(InfoBox *info);
+
+#endif
diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c
new file mode 100644
index 0000000..ed4645f
--- /dev/null
+++ b/src/Infobox/Wikipedia.c
@@ -0,0 +1,165 @@
+#include "Wikipedia.h"
+#include <curl/curl.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct WikiMemoryStruct {
+ char *memory;
+ size_t size;
+};
+
+static void shorten_summary(char **extract_ptr, int max_chars) {
+ if (!extract_ptr || !*extract_ptr) return;
+
+ char *text = *extract_ptr;
+ int len = strlen(text);
+
+ if (len <= max_chars) return;
+
+ int end_pos = max_chars;
+ for (int i = max_chars; i > (max_chars / 2); i--) {
+ if (text[i] == '.' || text[i] == '!' || text[i] == '?') {
+ end_pos = i + 1;
+ break;
+ }
+ }
+
+ char *new_text = (char *)malloc(end_pos + 4);
+
+ if (new_text) {
+ strncpy(new_text, text, end_pos);
+ new_text[end_pos] = '\0';
+ strcat(new_text, "...");
+ free(*extract_ptr);
+ *extract_ptr = new_text;
+ }
+}
+
+static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
+ void *userp) {
+ size_t realsize = size * nmemb;
+ struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
+
+ char *ptr = realloc(mem->memory, mem->size + realsize + 1);
+ if (ptr == NULL) {
+ fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
+ return 0;
+ }
+
+ mem->memory = ptr;
+ memcpy(&(mem->memory[mem->size]), contents, realsize);
+ mem->size += realsize;
+ mem->memory[mem->size] = 0;
+
+ return realsize;
+}
+
+static void extract_wiki_info(xmlNode *node, InfoBox *info) {
+ xmlNode *cur_node = NULL;
+
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ if (strcmp((const char *)cur_node->name, "page") == 0) {
+ xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title");
+ if (title) {
+ info->title = strdup((const char *)title);
+
+ const char *base_article_url = "https://en.wikipedia.org/wiki/";
+ char *formatted_title = strdup((const char *)title);
+ for (int i = 0; formatted_title[i]; i++) {
+ if (formatted_title[i] == ' ') formatted_title[i] = '_';
+ }
+
+ info->url =
+ malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
+ if (info->url) {
+ strcpy(info->url, base_article_url);
+ strcat(info->url, formatted_title);
+ }
+ free(formatted_title);
+ xmlFree(title);
+ }
+ }
+
+ if (strcmp((const char *)cur_node->name, "thumbnail") == 0) {
+ xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source");
+ if (source) {
+ info->thumbnail_url = strdup((const char *)source);
+ xmlFree(source);
+ }
+ }
+
+ if (strcmp((const char *)cur_node->name, "extract") == 0) {
+ xmlChar *content = xmlNodeGetContent(cur_node);
+ if (content) {
+ info->extract = strdup((const char *)content);
+
+ shorten_summary(&(info->extract), 300);
+ xmlFree(content);
+ }
+ }
+ }
+ extract_wiki_info(cur_node->children, info);
+ }
+}
+
+InfoBox fetch_wiki_data(char *api_url) {
+ CURL *curl_handle;
+ CURLcode res;
+ struct WikiMemoryStruct chunk;
+ InfoBox info = {NULL, NULL, NULL, NULL};
+
+ chunk.memory = malloc(1);
+ chunk.size = 0;
+
+ curl_handle = curl_easy_init();
+
+ if (curl_handle) {
+ curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
+ WikiWriteMemoryCallback);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
+ curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
+
+ res = curl_easy_perform(curl_handle);
+
+ if (res == CURLE_OK) {
+ xmlDocPtr doc =
+ xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
+ if (doc != NULL) {
+ xmlNode *root_element = xmlDocGetRootElement(doc);
+ extract_wiki_info(root_element, &info);
+ xmlFreeDoc(doc);
+ }
+ }
+
+ curl_easy_cleanup(curl_handle);
+ free(chunk.memory);
+ }
+
+ return info;
+}
+
+char *construct_wiki_url(const char *search_term) {
+ CURL *curl = curl_easy_init();
+ if (!curl) return NULL;
+
+ char *escaped_term = curl_easy_escape(curl, search_term, 0);
+ const char *base =
+ "https://en.wikipedia.org/w/"
+ "api.php?action=query&prop=extracts|pageimages&exintro&"
+ "explaintext&pithumbsize=400&format=xml&origin=*&titles=";
+
+ char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1);
+ if (full_url) {
+ strcpy(full_url, base);
+ strcat(full_url, escaped_term);
+ }
+
+ curl_free(escaped_term);
+ curl_easy_cleanup(curl);
+ return full_url;
+}
diff --git a/src/Infobox/Wikipedia.h b/src/Infobox/Wikipedia.h
new file mode 100644
index 0000000..8a8103e
--- /dev/null
+++ b/src/Infobox/Wikipedia.h
@@ -0,0 +1,9 @@
+#ifndef WIKIPEDIA_H
+#define WIKIPEDIA_H
+
+#include "Infobox.h"
+
+InfoBox fetch_wiki_data(char *api_url);
+char *construct_wiki_url(const char *search_term);
+
+#endif
diff --git a/src/Main.c b/src/Main.c
new file mode 100644
index 0000000..d1b2eb9
--- /dev/null
+++ b/src/Main.c
@@ -0,0 +1,49 @@
+#include <beaker.h>
+#include <curl/curl.h>
+#include <libxml/parser.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "Config.h"
+#include "Routes/Home.h"
+#include "Routes/Images.h"
+#include "Routes/Search.h"
+
+int handle_opensearch(UrlParams *params) {
+ (void)params;
+ serve_static_file_with_mime("opensearch.xml", "application/opensearchdescription+xml");
+ return 0;
+}
+
+int main() {
+ LIBXML_TEST_VERSION
+ xmlInitParser();
+
+ curl_global_init(CURL_GLOBAL_DEFAULT);
+
+ Config config = {.host = "0.0.0.0", .port = 5000};
+
+ if (load_config("config.ini", &config) != 0) {
+ fprintf(stderr, "Warning: Could not load config file, using defaults\n");
+ }
+
+ set_handler("/", home_handler);
+ set_handler("/opensearch.xml", handle_opensearch);
+ set_handler("/search", results_handler);
+ set_handler("/images", images_handler);
+
+ fprintf(stderr, "Starting Omnisearch on %s:%d\n", config.host, config.port);
+
+ int result = beaker_run(config.host, config.port);
+
+ if (result != 0) {
+ fprintf(stderr, "Error: Beaker server failed to start.\n");
+ curl_global_cleanup();
+ xmlCleanupParser();
+ return EXIT_FAILURE;
+ }
+
+ curl_global_cleanup();
+ xmlCleanupParser();
+ return EXIT_SUCCESS;
+} \ No newline at end of file
diff --git a/src/Routes/Home.c b/src/Routes/Home.c
new file mode 100644
index 0000000..81370ba
--- /dev/null
+++ b/src/Routes/Home.c
@@ -0,0 +1,14 @@
+#include "Home.h"
+#include <stdlib.h>
+
+int home_handler(UrlParams *params) {
+ (void)params;
+ TemplateContext ctx = new_context();
+ char *rendered_html = render_template("home.html", &ctx);
+ send_response(rendered_html);
+
+ free(rendered_html);
+ free_context(&ctx);
+
+ return 0;
+}
diff --git a/src/Routes/Home.h b/src/Routes/Home.h
new file mode 100644
index 0000000..5d01ab3
--- /dev/null
+++ b/src/Routes/Home.h
@@ -0,0 +1,8 @@
+#ifndef HOME_H
+#define HOME_H
+
+#include <beaker.h>
+
+int home_handler(UrlParams *params);
+
+#endif
diff --git a/src/Routes/Images.c b/src/Routes/Images.c
new file mode 100644
index 0000000..67ae94c
--- /dev/null
+++ b/src/Routes/Images.c
@@ -0,0 +1,278 @@
+#include "Images.h"
+#include "../Utility/Unescape.h"
+
+#include <curl/curl.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/xpath.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+struct MemoryBlock {
+ char *response;
+ size_t size;
+};
+
+static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb,
+ void *userp) {
+ size_t realsize = size * nmemb;
+ struct MemoryBlock *mem = (struct MemoryBlock *)userp;
+ char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1);
+ if (ptr == NULL) {
+ return 0;
+ }
+ mem->response = ptr;
+ memcpy(&(mem->response[mem->size]), data, realsize);
+ mem->size += realsize;
+ mem->response[mem->size] = 0;
+ return realsize;
+}
+
+static char *fetch_images_html(const char *url) {
+ CURL *curl_handle;
+ struct MemoryBlock chunk = {.response = malloc(1), .size = 0};
+ if (!chunk.response) {
+ return NULL;
+ }
+
+ curl_handle = curl_easy_init();
+ if (!curl_handle) {
+ free(chunk.response);
+ return NULL;
+ }
+
+ curl_easy_setopt(curl_handle, CURLOPT_URL, url);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
+ curl_easy_setopt(
+ curl_handle, CURLOPT_USERAGENT,
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
+ curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
+ curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L);
+
+ CURLcode res = curl_easy_perform(curl_handle);
+ if (res != CURLE_OK) {
+ free(chunk.response);
+ curl_easy_cleanup(curl_handle);
+ return NULL;
+ }
+
+ curl_easy_cleanup(curl_handle);
+ return chunk.response;
+}
+
+int images_handler(UrlParams *params) {
+ TemplateContext ctx = new_context();
+ char *raw_query = "";
+
+ if (params) {
+ for (int i = 0; i < params->count; i++) {
+ if (strcmp(params->params[i].key, "q") == 0) {
+ raw_query = params->params[i].value;
+ break;
+ }
+ }
+ }
+
+ char *display_query = url_decode_query(raw_query);
+ context_set(&ctx, "query", display_query);
+
+ if (!raw_query || strlen(raw_query) == 0) {
+ send_response("<h1>No query provided</h1>");
+ if (display_query) free(display_query);
+ free_context(&ctx);
+ return -1;
+ }
+
+ CURL *tmp = curl_easy_init();
+ if (!tmp) {
+ send_response("<h1>Error initializing curl</h1>");
+ if (display_query) free(display_query);
+ free_context(&ctx);
+ return -1;
+ }
+ char *encoded_query = curl_easy_escape(tmp, raw_query, 0);
+ curl_easy_cleanup(tmp);
+
+ if (!encoded_query) {
+ send_response("<h1>Error encoding query</h1>");
+ if (display_query) free(display_query);
+ free_context(&ctx);
+ return -1;
+ }
+
+ char url[1024];
+ snprintf(url, sizeof(url),
+ "https://www.bing.com/images/search?q=%s", encoded_query);
+ fprintf(stderr, "[DEBUG] Fetching URL: %s\n", url);
+
+ char *html = fetch_images_html(url);
+ if (!html) {
+ fprintf(stderr, "[DEBUG] Failed to fetch HTML\n");
+ send_response("<h1>Error fetching images</h1>");
+ free(encoded_query);
+ free(display_query);
+ free_context(&ctx);
+ return -1;
+ }
+
+ htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL,
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
+ if (!doc) {
+ free(html);
+ free(encoded_query);
+ free(display_query);
+ free_context(&ctx);
+ return -1;
+ }
+
+ xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+
+ if (!xpathCtx) {
+ xmlFreeDoc(doc);
+ free(html);
+ free(encoded_query);
+ free(display_query);
+ free_context(&ctx);
+ return -1;
+ }
+
+ xmlXPathObjectPtr xpathObj =
+ xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
+
+ int image_count = 0;
+ char ***image_matrix = NULL;
+ int *inner_counts = NULL;
+
+ if (xpathObj && xpathObj->nodesetval) {
+ int nodes = xpathObj->nodesetval->nodeNr;
+ fprintf(stderr, "[DEBUG] Found %d image items\n", nodes);
+
+ int max_images = (nodes < 32) ? nodes : 32;
+ image_matrix = malloc(sizeof(char **) * max_images);
+ inner_counts = malloc(sizeof(int) * max_images);
+
+ for (int i = 0; i < nodes; i++) {
+ if (image_count >= 32) break;
+
+ xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
+ xmlNodePtr img_node = NULL;
+ xmlNodePtr tit_node = NULL;
+ xmlNodePtr des_node = NULL;
+ xmlNodePtr thumb_link = NULL;
+
+ for (xmlNodePtr child = node->children; child; child = child->next) {
+ if (child->type != XML_ELEMENT_NODE) continue;
+
+ if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
+ xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
+ if (class) {
+ if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
+ thumb_link = child;
+ for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) {
+ if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
+ xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class");
+ if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
+ for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) {
+ if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) {
+ img_node = cico_child;
+ break;
+ }
+ }
+ }
+ if (div_class) xmlFree(div_class);
+ }
+ }
+ } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
+ tit_node = child;
+ }
+ xmlFree(class);
+ }
+ } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
+ xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
+ if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
+ for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) {
+ if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
+ xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class");
+ if (div_class) {
+ if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
+ des_node = meta_child;
+ }
+ xmlFree(div_class);
+ }
+ } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
+ xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
+ if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
+ tit_node = meta_child;
+ }
+ if (a_class) xmlFree(a_class);
+ }
+ }
+ }
+ if (class) xmlFree(class);
+ }
+ }
+
+ xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
+ xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
+ xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
+ xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
+
+ fprintf(stderr, "[DEBUG] Image %d: thumb=%s, full=%s, title=%s, site=%s\n",
+ image_count, iurl ? (char *)iurl : "nil",
+ full_url ? (char *)full_url : "nil",
+ title ? (char *)title : "nil",
+ rurl ? (char *)rurl : "nil");
+
+ if (iurl && strlen((char *)iurl) > 0) {
+ image_matrix[image_count] = malloc(sizeof(char *) * 4);
+ image_matrix[image_count][0] = strdup((char *)iurl);
+ image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
+ image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
+ image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#");
+ inner_counts[image_count] = 4;
+ image_count++;
+ }
+
+ if (iurl) xmlFree(iurl);
+ if (title) xmlFree(title);
+ if (rurl) xmlFree(rurl);
+ if (full_url) xmlFree(full_url);
+ }
+ }
+
+ context_set_array_of_arrays(&ctx, "images", image_matrix, image_count,
+ inner_counts);
+
+ char *rendered = render_template("images.html", &ctx);
+ if (rendered) {
+ send_response(rendered);
+ free(rendered);
+ } else {
+ send_response("<h1>Error rendering image results</h1>");
+ }
+
+ if (image_matrix) {
+ for (int i = 0; i < image_count; i++) {
+ for (int j = 0; j < 4; j++) {
+ free(image_matrix[i][j]);
+ }
+ free(image_matrix[i]);
+ }
+ free(image_matrix);
+ }
+ if (inner_counts) {
+ free(inner_counts);
+ }
+
+ if (xpathObj) xmlXPathFreeObject(xpathObj);
+ if (xpathCtx) xmlXPathFreeContext(xpathCtx);
+ if (doc) xmlFreeDoc(doc);
+ free(html);
+ curl_free(encoded_query);
+ free(display_query);
+ free_context(&ctx);
+
+ return 0;
+} \ No newline at end of file
diff --git a/src/Routes/Images.h b/src/Routes/Images.h
new file mode 100644
index 0000000..86f4a31
--- /dev/null
+++ b/src/Routes/Images.h
@@ -0,0 +1,8 @@
+#ifndef IMAGES_HANDLER_H
+#define IMAGES_HANDLER_H
+
+#include <beaker.h>
+
+int images_handler(UrlParams *params);
+
+#endif
diff --git a/src/Routes/Search.c b/src/Routes/Search.c
new file mode 100644
index 0000000..4e8c7ad
--- /dev/null
+++ b/src/Routes/Search.c
@@ -0,0 +1,275 @@
+#include "Search.h"
+#include "../Infobox/Wikipedia.h"
+#include "../Infobox/Calculator.h"
+#include "../Infobox/Dictionary.h"
+#include "../Scraping/Scraping.h"
+#include "../Utility/Display.h"
+#include "../Utility/Unescape.h"
+#include <ctype.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+typedef struct {
+ const char *query;
+ InfoBox result;
+ int success;
+} InfoBoxThreadData;
+
+static void *wiki_thread_func(void *arg) {
+ InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
+ char *dynamic_url = construct_wiki_url(data->query);
+ if (dynamic_url) {
+ data->result = fetch_wiki_data(dynamic_url);
+ data->success =
+ (data->result.title != NULL && data->result.extract != NULL &&
+ strlen(data->result.extract) > 10);
+ free(dynamic_url);
+ } else {
+ data->success = 0;
+ }
+ return NULL;
+}
+
+static int is_calculator_query(const char *query) {
+ if (!query) return 0;
+
+ int has_digit = 0;
+ int has_operator = 0;
+
+ for (const char *p = query; *p; p++) {
+ if (isdigit(*p) || *p == '.') {
+ has_digit = 1;
+ }
+ if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '=' ||
+ *p == '^') {
+ has_operator = 1;
+ }
+ }
+
+ return has_digit && (has_operator || strchr(query, '.'));
+}
+
+static void *calc_thread_func(void *arg) {
+ InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
+
+ if (is_calculator_query(data->query)) {
+ data->result = fetch_calc_data((char *)data->query);
+ data->success =
+ (data->result.title != NULL && data->result.extract != NULL);
+ } else {
+ data->success = 0;
+ }
+
+ return NULL;
+}
+
+static void *dict_thread_func(void *arg) {
+ InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
+
+ if (is_dictionary_query(data->query)) {
+ data->result = fetch_dictionary_data(data->query);
+ data->success =
+ (data->result.title != NULL && data->result.extract != NULL);
+ } else {
+ data->success = 0;
+ }
+
+ return NULL;
+}
+
+static int add_infobox_to_collection(InfoBox *infobox, char ****collection,
+ int **inner_counts, int current_count) {
+ *collection =
+ (char ***)realloc(*collection, sizeof(char **) * (current_count + 1));
+ *inner_counts =
+ (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
+
+ (*collection)[current_count] = (char **)malloc(sizeof(char *) * 4);
+ (*collection)[current_count][0] = infobox->title;
+ (*collection)[current_count][1] = infobox->thumbnail_url;
+ (*collection)[current_count][2] = infobox->extract;
+ (*collection)[current_count][3] = infobox->url;
+ (*inner_counts)[current_count] = 4;
+
+ return current_count + 1;
+}
+
+int results_handler(UrlParams *params) {
+ TemplateContext ctx = new_context();
+ char *raw_query = "";
+ int page = 1;
+
+ if (params) {
+ for (int i = 0; i < params->count; i++) {
+ if (strcmp(params->params[i].key, "q") == 0) {
+ raw_query = params->params[i].value;
+ } else if (strcmp(params->params[i].key, "p") == 0) {
+ int parsed = atoi(params->params[i].value);
+ if (parsed > 1) page = parsed;
+ }
+ }
+ }
+
+ context_set(&ctx, "query", raw_query);
+
+ char page_str[16], prev_str[16], next_str[16];
+ snprintf(page_str, sizeof(page_str), "%d", page);
+ snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
+ snprintf(next_str, sizeof(next_str), "%d", page + 1);
+ context_set(&ctx, "page", page_str);
+ context_set(&ctx, "prev_page", prev_str);
+ context_set(&ctx, "next_page", next_str);
+
+ if (!raw_query || strlen(raw_query) == 0) {
+ send_response("<h1>No query provided</h1>");
+ free_context(&ctx);
+ return -1;
+ }
+
+ pthread_t wiki_tid, calc_tid, dict_tid;
+ InfoBoxThreadData wiki_data = {.query = raw_query, .success = 0};
+ InfoBoxThreadData calc_data = {.query = raw_query, .success = 0};
+ InfoBoxThreadData dict_data = {.query = raw_query, .success = 0};
+
+ if (page == 1) {
+ pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data);
+ pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data);
+ pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data);
+ }
+
+ ScrapeJob jobs[ENGINE_COUNT];
+ SearchResult *all_results[ENGINE_COUNT];
+
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ all_results[i] = NULL;
+ jobs[i].engine = &ENGINE_REGISTRY[i];
+ jobs[i].query = raw_query;
+ jobs[i].out_results = &all_results[i];
+ jobs[i].max_results = 10;
+ jobs[i].results_count = 0;
+ jobs[i].page = page;
+ }
+
+ scrape_engines_parallel(jobs, ENGINE_COUNT);
+
+ if (page == 1) {
+ pthread_join(wiki_tid, NULL);
+ pthread_join(calc_tid, NULL);
+ pthread_join(dict_tid, NULL);
+ }
+
+ char ***infobox_matrix = NULL;
+ int *infobox_inner_counts = NULL;
+ int infobox_count = 0;
+
+ if (page == 1) {
+ if (dict_data.success) {
+ infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix,
+ &infobox_inner_counts, infobox_count);
+ }
+
+ if (calc_data.success) {
+ infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix,
+ &infobox_inner_counts, infobox_count);
+ }
+
+ if (wiki_data.success) {
+ infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
+ &infobox_inner_counts, infobox_count);
+ }
+ }
+
+ if (infobox_count > 0) {
+ context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
+ infobox_count, infobox_inner_counts);
+ free(infobox_matrix);
+ free(infobox_inner_counts);
+ }
+
+ int total_results = 0;
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ total_results += jobs[i].results_count;
+ }
+
+ if (total_results > 0) {
+ char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results);
+ int *results_inner_counts = (int *)malloc(sizeof(int) * total_results);
+ char **seen_urls = (char **)malloc(sizeof(char *) * total_results);
+ int unique_count = 0;
+
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int j = 0; j < jobs[i].results_count; j++) {
+ char *display_url = all_results[i][j].url;
+
+ int is_duplicate = 0;
+ for (int k = 0; k < unique_count; k++) {
+ if (strcmp(seen_urls[k], display_url) == 0) {
+ is_duplicate = 1;
+ break;
+ }
+ }
+
+ if (is_duplicate) {
+ free(all_results[i][j].url);
+ free(all_results[i][j].title);
+ free(all_results[i][j].snippet);
+ continue;
+ }
+
+ seen_urls[unique_count] = strdup(display_url);
+ results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
+ char *pretty_url = pretty_display_url(display_url);
+
+ results_matrix[unique_count][0] = strdup(display_url);
+ results_matrix[unique_count][1] = strdup(pretty_url);
+ results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled");
+ results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup("");
+
+ results_inner_counts[unique_count] = 4;
+
+ free(pretty_url);
+ free(all_results[i][j].url);
+ free(all_results[i][j].title);
+ free(all_results[i][j].snippet);
+
+ unique_count++;
+ }
+ free(all_results[i]);
+ }
+
+ context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts);
+
+ char *html = render_template("results.html", &ctx);
+ if (html) {
+ send_response(html);
+ free(html);
+ }
+
+ for (int i = 0; i < unique_count; i++) {
+ for (int j = 0; j < 4; j++) free(results_matrix[i][j]);
+ free(results_matrix[i]);
+ free(seen_urls[i]);
+ }
+ free(seen_urls);
+ free(results_matrix);
+ free(results_inner_counts);
+ } else {
+ char *html = render_template("results.html", &ctx);
+ if (html) {
+ send_response(html);
+ free(html);
+ }
+ }
+
+ if (page == 1) {
+ if (wiki_data.success) free_infobox(&wiki_data.result);
+ if (calc_data.success) free_infobox(&calc_data.result);
+ if (dict_data.success) free_infobox(&dict_data.result);
+ }
+ free_context(&ctx);
+
+ return 0;
+} \ No newline at end of file
diff --git a/src/Routes/Search.h b/src/Routes/Search.h
new file mode 100644
index 0000000..c6bc146
--- /dev/null
+++ b/src/Routes/Search.h
@@ -0,0 +1,8 @@
+#ifndef SEARCH_HANDLER_H
+#define SEARCH_HANDLER_H
+
+#include <beaker.h>
+
+int results_handler(UrlParams *params);
+
+#endif
diff --git a/src/Scraping/Scraping.c b/src/Scraping/Scraping.c
new file mode 100644
index 0000000..42e05d6
--- /dev/null
+++ b/src/Scraping/Scraping.c
@@ -0,0 +1,459 @@
+#include "Scraping.h"
+#include "../Utility/Unescape.h"
+#include <curl/curl.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/xpath.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
+ void *userp) {
+ size_t realsize = size * nmemb;
+ MemoryBuffer *mem = (MemoryBuffer *)userp;
+
+ if (mem->size + realsize + 1 > mem->capacity) {
+
+ size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
+ while (new_cap < mem->size + realsize + 1) new_cap *= 2;
+
+ char *ptr = (char *)realloc(mem->memory, new_cap);
+ if (!ptr) {
+ return 0;
+ }
+ mem->memory = ptr;
+ mem->capacity = new_cap;
+ }
+
+ memcpy(&(mem->memory[mem->size]), contents, realsize);
+ mem->size += realsize;
+ mem->memory[mem->size] = 0;
+
+ return realsize;
+}
+
+static const char *get_random_user_agent() {
+ static const char *agents[] = {
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
+ "like Gecko) Chrome/120.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
+ "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
+ "Gecko) "
+ "Chrome/120.0.0.0` Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
+ "Firefox/121.0",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
+ "(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
+ return agents[rand() % 5];
+}
+
+static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
+ SearchResult **out_results, int max_results) {
+ (void)engine_name;
+ int found_count = 0;
+ xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+ if (!xpathCtx) {
+ return 0;
+ }
+
+ const char *link_xpath = "//tr[not(contains(@class, 'result-sponsored'))]//a[@class='result-link']";
+ xmlXPathObjectPtr xpathObj =
+ xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx);
+
+ if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
+ if (xpathObj) xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
+ }
+
+ int num_links = xpathObj->nodesetval->nodeNr;
+
+ int actual_alloc = (num_links < max_results) ? num_links : max_results;
+ *out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
+ if (!*out_results) {
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
+ }
+
+ for (int i = 0; i < num_links && found_count < max_results; i++) {
+ xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i];
+ char *title = (char *)xmlNodeGetContent(linkNode);
+ char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href");
+ char *snippet_text = NULL;
+
+ xmlNodePtr current = linkNode->parent;
+ while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
+ current = current->parent;
+
+ if (current && current->next) {
+ xmlNodePtr snippetRow = current->next;
+ while (snippetRow &&
+ xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0)
+ snippetRow = snippetRow->next;
+ if (snippetRow) {
+
+ xpathCtx->node = snippetRow;
+ xmlXPathObjectPtr sObj = xmlXPathEvalExpression(
+ (xmlChar *)".//td[@class='result-snippet']", xpathCtx);
+ if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
+ snippet_text = (char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]);
+ }
+ if (sObj) xmlXPathFreeObject(sObj);
+ xpathCtx->node = NULL;
+
+ }
+ }
+
+ (*out_results)[found_count].url = unescape_search_url(url);
+ (*out_results)[found_count].title = strdup(title ? title : "No Title");
+ (*out_results)[found_count].snippet = strdup(snippet_text ? snippet_text : "");
+
+ found_count++;
+
+ if (title) xmlFree(title);
+ if (url) xmlFree(url);
+ if (snippet_text) xmlFree(snippet_text);
+ }
+
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return found_count;
+}
+
+static int parse_startpage(const char *engine_name, xmlDocPtr doc,
+ SearchResult **out_results, int max_results) {
+ (void)engine_name;
+ int found_count = 0;
+ xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+ if (!xpathCtx) {
+ return 0;
+ }
+
+ const char *container_xpath = "//div[contains(@class, 'result')]";
+ xmlXPathObjectPtr xpathObj =
+ xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
+
+ if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
+ if (xpathObj) xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
+ }
+
+ int num_results = xpathObj->nodesetval->nodeNr;
+
+ int actual_alloc = (num_results < max_results) ? num_results : max_results;
+ *out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
+ if (!*out_results) {
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
+ }
+
+ for (int i = 0; i < num_results && found_count < max_results; i++) {
+ xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
+ xpathCtx->node = resultNode;
+
+ xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
+ (xmlChar *)".//a[contains(@class, 'result-link')]", xpathCtx);
+ char *url =
+ (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
+ (xmlChar *)"href")
+ : NULL;
+
+ xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
+ (xmlChar *)".//h2[contains(@class, 'wgl-title')]", xpathCtx);
+ char *title =
+ (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
+ (xmlChar *)".//p[contains(@class, 'description')]", xpathCtx);
+ char *snippet_text =
+ (snippetObj && snippetObj->nodesetval &&
+ snippetObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ if (url && title) {
+ (*out_results)[found_count].url = strdup(url);
+ (*out_results)[found_count].title = strdup(title);
+ (*out_results)[found_count].snippet =
+ strdup(snippet_text ? snippet_text : "");
+ found_count++;
+ }
+
+ if (title) xmlFree(title);
+ if (url) xmlFree(url);
+ if (snippet_text) xmlFree(snippet_text);
+ if (linkObj) xmlXPathFreeObject(linkObj);
+ if (titleObj) xmlXPathFreeObject(titleObj);
+ if (snippetObj) xmlXPathFreeObject(snippetObj);
+ }
+
+ xpathCtx->node = NULL;
+
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return found_count;
+}
+
+static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
+ SearchResult **out_results, int max_results) {
+ (void)engine_name;
+ int found_count = 0;
+ xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+ if (!xpathCtx) {
+ return 0;
+ }
+
+ const char *container_xpath = "//div[contains(@class, 'algo-sr')]";
+ xmlXPathObjectPtr xpathObj =
+ xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
+
+ if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
+ if (xpathObj) xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
+ }
+
+ int num_results = xpathObj->nodesetval->nodeNr;
+
+ int actual_alloc = (num_results < max_results) ? num_results : max_results;
+ *out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
+ if (!*out_results) {
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
+ }
+
+ for (int i = 0; i < num_results && found_count < max_results; i++) {
+ xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
+ xpathCtx->node = resultNode;
+
+ xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
+ (xmlChar *)".//div[contains(@class, 'compTitle')]//a[@target='_blank']",
+ xpathCtx);
+ char *url =
+ (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
+ (xmlChar *)"href")
+ : NULL;
+
+ xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
+ (xmlChar *)".//h3[contains(@class, 'title')]", xpathCtx);
+ char *title =
+ (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
+ (xmlChar *)".//div[contains(@class, 'compText')]//p", xpathCtx);
+ char *snippet_text =
+ (snippetObj && snippetObj->nodesetval &&
+ snippetObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ if (url && title) {
+ (*out_results)[found_count].url = unescape_search_url(url);
+ (*out_results)[found_count].title = strdup(title);
+ (*out_results)[found_count].snippet =
+ strdup(snippet_text ? snippet_text : "");
+ found_count++;
+ }
+
+ if (title) xmlFree(title);
+ if (url) xmlFree(url);
+ if (snippet_text) xmlFree(snippet_text);
+ if (linkObj) xmlXPathFreeObject(linkObj);
+ if (titleObj) xmlXPathFreeObject(titleObj);
+ if (snippetObj) xmlXPathFreeObject(snippetObj);
+ }
+
+ xpathCtx->node = NULL;
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return found_count;
+}
+
+const SearchEngine ENGINE_REGISTRY[] = {
+ {.name = "DuckDuckGo Lite",
+ .base_url = "https://lite.duckduckgo.com/lite/?q=",
+ .host_header = "lite.duckduckgo.com",
+ .referer = "https://lite.duckduckgo.com/",
+ .page_param = "s",
+ .page_multiplier = 30,
+ .page_base = 0,
+ .parser = parse_ddg_lite},
+ {.name = "Startpage",
+ .base_url = "https://www.startpage.com/sp/search?q=",
+ .host_header = "www.startpage.com",
+ .referer = "https://www.startpage.com/",
+ .page_param = "page",
+ .page_multiplier = 1,
+ .page_base = 1,
+ .parser = parse_startpage},
+ {.name = "Yahoo",
+ .base_url = "https://search.yahoo.com/search?p=",
+ .host_header = "search.yahoo.com",
+ .referer = "https://search.yahoo.com/",
+ .page_param = "b",
+ .page_multiplier = 10,
+ .page_base = 1,
+ .parser = parse_yahoo}};
+
+const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
+
+static void configure_curl_handle(CURL *curl, const char *full_url,
+ MemoryBuffer *chunk,
+ struct curl_slist *headers) {
+ curl_easy_setopt(curl, CURLOPT_URL, full_url);
+ curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)chunk);
+ curl_easy_setopt(curl, CURLOPT_USERAGENT, get_random_user_agent());
+
+ curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
+
+ curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
+
+ curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, 300L);
+
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+ curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L);
+ curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
+ curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "");
+}
+
+int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) {
+ CURLM *multi_handle = curl_multi_init();
+ if (!multi_handle) {
+ return -1;
+ }
+
+ for (int i = 0; i < num_jobs; i++) {
+ ScrapeJob *job = &jobs[i];
+ job->handle = curl_easy_init();
+ if (!job->handle) {
+ continue;
+ }
+
+ job->response.memory = (char *)malloc(16384);
+ job->response.size = 0;
+ job->response.capacity = 16384;
+
+ char full_url[1024];
+ char *encoded_query = curl_easy_escape(job->handle, job->query, 0);
+ if (!encoded_query) {
+ curl_easy_cleanup(job->handle);
+ job->handle = NULL;
+ continue;
+ }
+
+ int page = (job->page < 1) ? 1 : job->page;
+ int page_value = (page - 1) * job->engine->page_multiplier + job->engine->page_base;
+
+ snprintf(full_url, sizeof(full_url), "%s%s&%s=%d",
+ job->engine->base_url,
+ encoded_query,
+ job->engine->page_param,
+ page_value);
+ curl_free(encoded_query);
+
+ struct curl_slist *headers = NULL;
+ char host_buf[256], ref_buf[256];
+ snprintf(host_buf, sizeof(host_buf), "Host: %s", job->engine->host_header);
+ snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer);
+ headers = curl_slist_append(headers, host_buf);
+ headers = curl_slist_append(headers, ref_buf);
+ headers = curl_slist_append(headers, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
+ headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
+ headers = curl_slist_append(headers, "DNT: 1");
+
+ configure_curl_handle(job->handle, full_url, &job->response, headers);
+
+ curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
+
+ curl_multi_add_handle(multi_handle, job->handle);
+ }
+
+ usleep(100000 + (rand() % 100000));
+
+ int still_running = 0;
+ curl_multi_perform(multi_handle, &still_running);
+
+ do {
+ int numfds = 0;
+ CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds);
+
+ if (mc != CURLM_OK) {
+ break;
+ }
+
+ curl_multi_perform(multi_handle, &still_running);
+ } while (still_running);
+
+ CURLMsg *msg;
+ int msgs_left;
+ while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
+ if (msg->msg == CURLMSG_DONE) {
+ CURL *handle = msg->easy_handle;
+
+ for (int i = 0; i < num_jobs; i++) {
+ if (jobs[i].handle == handle) {
+ ScrapeJob *job = &jobs[i];
+
+ long response_code;
+ curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code);
+
+ if (msg->data.result == CURLE_OK && job->response.size > 0) {
+ xmlDocPtr doc = htmlReadMemory(
+ job->response.memory, job->response.size, NULL, NULL,
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
+
+ if (doc) {
+ job->results_count = job->engine->parser(
+ job->engine->name, doc, job->out_results, job->max_results);
+ xmlFreeDoc(doc);
+ }
+ } else {
+ job->results_count = 0;
+ }
+
+ struct curl_slist *headers;
+ curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
+ if (headers) curl_slist_free_all(headers);
+
+ free(job->response.memory);
+ curl_multi_remove_handle(multi_handle, handle);
+ curl_easy_cleanup(handle);
+ break;
+ }
+ }
+ }
+ }
+
+ curl_multi_cleanup(multi_handle);
+ return 0;
+}
+
+int scrape_engine(const SearchEngine *engine, const char *query,
+ SearchResult **out_results, int max_results) {
+ ScrapeJob job = {
+ .engine = engine,
+ .query = (char *)query,
+ .out_results = out_results,
+ .max_results = max_results,
+ .results_count = 0,
+ .page = 1
+ };
+
+ scrape_engines_parallel(&job, 1);
+ return job.results_count;
+} \ No newline at end of file
diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h
new file mode 100644
index 0000000..d8a3b13
--- /dev/null
+++ b/src/Scraping/Scraping.h
@@ -0,0 +1,58 @@
+#ifndef SCRAPING_H
+#define SCRAPING_H
+
+#include <libxml/HTMLparser.h>
+#include <curl/curl.h>
+
+#define LOG_INFO(msg, ...) fprintf(stderr, "[INFO] " msg "\n", ##__VA_ARGS__)
+#define LOG_WARN(msg, ...) fprintf(stderr, "[WARN] " msg "\n", ##__VA_ARGS__)
+#define LOG_DEBUG(msg, ...) fprintf(stderr, "[DEBUG] " msg "\n", ##__VA_ARGS__)
+#define LOG_ERROR(msg, ...) fprintf(stderr, "[ERROR] " msg "\n", ##__VA_ARGS__)
+
+typedef struct {
+ char *url;
+ char *title;
+ char *snippet;
+} SearchResult;
+
+typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
+ SearchResult **out_results, int max_results);
+
+typedef struct {
+ const char *name;
+ const char *base_url;
+ const char *host_header;
+ const char *referer;
+
+ const char *page_param;
+ int page_multiplier;
+ int page_base;
+ ParserFunc parser;
+} SearchEngine;
+
+typedef struct {
+ char *memory;
+ size_t size;
+ size_t capacity;
+} MemoryBuffer;
+
+typedef struct {
+ const SearchEngine *engine;
+ char *query;
+ SearchResult **out_results;
+ int max_results;
+ int page;
+ CURL *handle;
+ MemoryBuffer response;
+ int results_count;
+} ScrapeJob;
+
+extern const SearchEngine ENGINE_REGISTRY[];
+extern const int ENGINE_COUNT;
+
+int scrape_engine(const SearchEngine *engine, const char *query,
+ SearchResult **out_results, int max_results);
+
+int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs);
+
+#endif \ No newline at end of file
diff --git a/src/Utility/Display.c b/src/Utility/Display.c
new file mode 100644
index 0000000..492e998
--- /dev/null
+++ b/src/Utility/Display.c
@@ -0,0 +1,46 @@
+#include "Display.h"
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+char *pretty_display_url(const char *input) {
+ if (!input) return NULL;
+
+ const char *start = input;
+
+ const char *protocol_pos = strstr(input, "://");
+ if (protocol_pos) {
+ start = protocol_pos + 3;
+ }
+
+ if (strncasecmp(start, "www.", 4) == 0) {
+ start += 4;
+ }
+
+ size_t input_len = strlen(start);
+ char temp[512];
+ strncpy(temp, start, sizeof(temp) - 1);
+ temp[sizeof(temp) - 1] = '\0';
+
+ if (input_len > 0 && temp[input_len - 1] == '/') {
+ temp[input_len - 1] = '\0';
+ }
+
+ char *output = (char *)malloc(strlen(temp) * 3 + 1);
+ if (!output) return NULL;
+
+ size_t j = 0;
+ for (size_t i = 0; temp[i] != '\0'; i++) {
+ if (temp[i] == '/') {
+ output[j++] = ' ';
+ output[j++] = '>';
+ output[j++] = ' ';
+ } else {
+ output[j++] = (char)tolower((unsigned char)temp[i]);
+ }
+ }
+ output[j] = '\0';
+
+ return output;
+}
diff --git a/src/Utility/Display.h b/src/Utility/Display.h
new file mode 100644
index 0000000..bbaf421
--- /dev/null
+++ b/src/Utility/Display.h
@@ -0,0 +1,6 @@
+#ifndef DISPLAY_H
+#define DISPLAY_H
+
+char *pretty_display_url(const char *input);
+
+#endif
diff --git a/src/Utility/Unescape.c b/src/Utility/Unescape.c
new file mode 100644
index 0000000..e2968b2
--- /dev/null
+++ b/src/Utility/Unescape.c
@@ -0,0 +1,80 @@
+#include "Unescape.h"
+#include "Utility.h"
+#include <stdlib.h>
+#include <string.h>
+
+char *unescape_search_url(const char *input) {
+ if (!input) return NULL;
+
+ const char *key = NULL;
+ const char *start = NULL;
+ const char *end = NULL;
+ size_t len = 0;
+
+ if (strstr(input, "uddg=")) {
+ key = "uddg=";
+ start = strstr(input, key);
+ if (!start) return NULL;
+ start += strlen(key);
+ end = strchr(start, '&');
+ len = end ? (size_t)(end - start) : strlen(start);
+ }
+
+ else if (strstr(input, "RU=")) {
+ key = "RU=";
+ start = strstr(input, key);
+ if (!start) return strdup(input);
+ start += strlen(key);
+ end = strchr(start, '/');
+ len = end ? (size_t)(end - start) : strlen(start);
+ }
+
+ else {
+ return strdup(input);
+ }
+
+ char *output = (char *)malloc(len * 3 + 1);
+ if (!output) return NULL;
+
+ size_t i = 0, j = 0;
+ while (i < len) {
+ if (start[i] == '%' && i + 2 < len) {
+ int high = hex_to_int(start[i + 1]);
+ int low = hex_to_int(start[i + 2]);
+ if (high != -1 && low != -1) {
+ output[j++] = (char)((high << 4) | low);
+ i += 3;
+ } else {
+ output[j++] = start[i++];
+ }
+ } else if (start[i] == '+') {
+ output[j++] = ' ';
+ i++;
+ } else {
+ output[j++] = start[i++];
+ }
+ }
+ output[j] = '\0';
+
+ return output;
+}
+
+char *url_decode_query(const char *src) {
+ if (!src) return NULL;
+ char *res = strdup(src);
+ char *p = res;
+ while (*src) {
+ if (*src == '+') {
+ *p++ = ' ';
+ } else if (*src == '%' && src[1] && src[2]) {
+ char hex[3] = {src[1], src[2], '\0'};
+ *p++ = (char)strtol(hex, NULL, 16);
+ src += 2;
+ } else {
+ *p++ = *src;
+ }
+ src++;
+ }
+ *p = '\0';
+ return res;
+}
diff --git a/src/Utility/Unescape.h b/src/Utility/Unescape.h
new file mode 100644
index 0000000..0adb228
--- /dev/null
+++ b/src/Utility/Unescape.h
@@ -0,0 +1,10 @@
+#ifndef UNESCAPE_H
+#define UNESCAPE_H
+
+#include <stddef.h>
+
+char *unescape_search_url(const char *input);
+char *url_decode_query(const char *src);
+
+#endif
+
diff --git a/src/Utility/Utility.c b/src/Utility/Utility.c
new file mode 100644
index 0000000..8e5af92
--- /dev/null
+++ b/src/Utility/Utility.c
@@ -0,0 +1,8 @@
+#include "Utility.h"
+
+int hex_to_int(char c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ return -1;
+}
diff --git a/src/Utility/Utility.h b/src/Utility/Utility.h
new file mode 100644
index 0000000..3b0181c
--- /dev/null
+++ b/src/Utility/Utility.h
@@ -0,0 +1,6 @@
+#ifndef UTILITY_H
+#define UTILITY_H
+
+int hex_to_int(char c);
+
+#endif