aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Infobox/Calculator.c168
-rw-r--r--src/Infobox/Dictionary.c376
-rw-r--r--src/Infobox/Infobox.c16
-rw-r--r--src/Infobox/Infobox.h8
-rw-r--r--src/Infobox/UnitConversion.c394
-rw-r--r--src/Infobox/Wikipedia.c144
-rw-r--r--src/Main.c40
-rw-r--r--src/Proxy/Proxy.c240
-rw-r--r--src/Routes/Home.c14
-rw-r--r--src/Routes/ImageProxy.c88
-rw-r--r--src/Routes/Images.c302
-rw-r--r--src/Routes/Search.c336
-rw-r--r--src/Scraping/Scraping.c570
-rw-r--r--src/Scraping/Scraping.h10
-rw-r--r--src/Utility/Display.c72
-rw-r--r--src/Utility/Utility.c8
16 files changed, 1393 insertions, 1393 deletions
diff --git a/src/Infobox/Calculator.c b/src/Infobox/Calculator.c
index b80ce21..22563f7 100644
--- a/src/Infobox/Calculator.c
+++ b/src/Infobox/Calculator.c
@@ -8,108 +8,108 @@
static char logic_log[4096];
typedef struct {
- const char *buffer;
- int pos;
+ const char *buffer;
+ int pos;
} Parser;
static double parse_expression(Parser *p);
static void skip_ws(Parser *p) {
- while (p->buffer[p->pos] == ' ') p->pos++;
+ while (p->buffer[p->pos] == ' ') p->pos++;
}
static double parse_factor(Parser *p) {
- skip_ws(p);
- if (p->buffer[p->pos] == '-') {
- p->pos++;
- return -parse_factor(p);
- }
- if (p->buffer[p->pos] == '(') {
- p->pos++;
- double res = parse_expression(p);
- if (p->buffer[p->pos] == ')') p->pos++;
- return res;
- }
- char *endptr;
- double val = strtod(&p->buffer[p->pos], &endptr);
- p->pos = (int)(endptr - p->buffer);
- return val;
+ skip_ws(p);
+ if (p->buffer[p->pos] == '-') {
+ p->pos++;
+ return -parse_factor(p);
+ }
+ if (p->buffer[p->pos] == '(') {
+ p->pos++;
+ double res = parse_expression(p);
+ if (p->buffer[p->pos] == ')') p->pos++;
+ return res;
+ }
+ char *endptr;
+ double val = strtod(&p->buffer[p->pos], &endptr);
+ p->pos = (int)(endptr - p->buffer);
+ return val;
}
static double parse_term(Parser *p) {
- double left = parse_factor(p);
- while (1) {
- skip_ws(p);
- char op = p->buffer[p->pos];
- if (op == '*' || op == '/') {
- p->pos++;
- double right = parse_factor(p);
- double old = left;
- left = (op == '*') ? left * right : left / right;
-
- char step[256];
-
- snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
- right, left);
- strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
- } else
- break;
- }
- return left;
+ double left = parse_factor(p);
+ while (1) {
+ skip_ws(p);
+ char op = p->buffer[p->pos];
+ if (op == '*' || op == '/') {
+ p->pos++;
+ double right = parse_factor(p);
+ double old = left;
+ left = (op == '*') ? left * right : left / right;
+
+ char step[256];
+
+ snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
+ right, left);
+ strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
+ } else
+ break;
+ }
+ return left;
}
static double parse_expression(Parser *p) {
- double left = parse_term(p);
- while (1) {
- skip_ws(p);
- char op = p->buffer[p->pos];
- if (op == '+' || op == '-') {
- p->pos++;
- double right = parse_term(p);
- double old = left;
- left = (op == '+') ? left + right : left - right;
-
- char step[256];
-
- snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
- right, left);
- strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
- } else
- break;
- }
- return left;
+ double left = parse_term(p);
+ while (1) {
+ skip_ws(p);
+ char op = p->buffer[p->pos];
+ if (op == '+' || op == '-') {
+ p->pos++;
+ double right = parse_term(p);
+ double old = left;
+ left = (op == '+') ? left + right : left - right;
+
+ char step[256];
+
+ snprintf(step, sizeof(step), "<div>%g %c %g = <b>%g</b></div>", old, op,
+ right, left);
+ strncat(logic_log, step, sizeof(logic_log) - strlen(logic_log) - 1);
+ } else
+ break;
+ }
+ return left;
}
double evaluate(const char *expr) {
- logic_log[0] = '\0';
- if (!expr || strlen(expr) == 0) return 0.0;
- Parser p = {expr, 0};
- return parse_expression(&p);
+ logic_log[0] = '\0';
+ if (!expr || strlen(expr) == 0) return 0.0;
+ Parser p = {expr, 0};
+ return parse_expression(&p);
}
InfoBox fetch_calc_data(char *math_input) {
- InfoBox info = {NULL, NULL, NULL, NULL};
- if (!math_input) return info;
-
- double result = evaluate(math_input);
-
- char html_output[5120];
- snprintf(html_output, sizeof(html_output),
- "<div class='calc-container' style='line-height: 1.6;'>"
- "%s"
- "<div style='margin-top: 8px; border-top: 1px solid #eee; "
- "padding-top: 8px; font-size: 1.2em;'>"
- "<b>%g</b>"
- "</div>"
- "</div>",
- strlen(logic_log) > 0 ? logic_log : "<div>Constant value</div>",
- result);
-
- info.title = strdup("Calculation");
- info.extract = strdup(html_output);
- info.thumbnail_url =
- strdup("/static/calculation.svg");
- info.url = strdup("#");
-
- return info;
+ InfoBox info = {NULL, NULL, NULL, NULL};
+ if (!math_input) return info;
+
+ double result = evaluate(math_input);
+
+ char html_output[5120];
+ snprintf(html_output, sizeof(html_output),
+ "<div class='calc-container' style='line-height: 1.6;'>"
+ "%s"
+ "<div style='margin-top: 8px; border-top: 1px solid #eee; "
+ "padding-top: 8px; font-size: 1.2em;'>"
+ "<b>%g</b>"
+ "</div>"
+ "</div>",
+ strlen(logic_log) > 0 ? logic_log : "<div>Constant value</div>",
+ result);
+
+ info.title = strdup("Calculation");
+ info.extract = strdup(html_output);
+ info.thumbnail_url =
+ strdup("/static/calculation.svg");
+ info.url = strdup("#");
+
+ return info;
}
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c
index ca4e5cd..1900c21 100644
--- a/src/Infobox/Dictionary.c
+++ b/src/Infobox/Dictionary.c
@@ -11,239 +11,239 @@
#include <ctype.h>
static const char *PREFIXES[] = {
- "what is the definition of ", "what's the definition of ",
- "what is the meaning of ", "what's the meaning of ",
- "what does the word ", "definition of ", "meaning of ", "def of ",
- "define ", "definition ", "define:", "def ", "def:",
- "what does ", "what is ", "what's ", "whats ",
- "meaning ", "dictionary ", "dict ", NULL
+ "what is the definition of ", "what's the definition of ",
+ "what is the meaning of ", "what's the meaning of ",
+ "what does the word ", "definition of ", "meaning of ", "def of ",
+ "define ", "definition ", "define:", "def ", "def:",
+ "what does ", "what is ", "what's ", "whats ",
+ "meaning ", "dictionary ", "dict ", NULL
};
static const char *SUFFIXES[] = {
- " definition", " def", " meaning", " mean", " means",
- " dictionary", " dict", " define", " defined",
- " definition?", " def?", " meaning?", " mean?", " means?",
- " in english", " in english?", NULL
+ " definition", " def", " meaning", " mean", " means",
+ " dictionary", " dict", " define", " defined",
+ " definition?", " def?", " meaning?", " mean?", " means?",
+ " in english", " in english?", NULL
};
static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
static const char *strcasestr_impl(const char *haystack, const char *needle) {
- if (!haystack || !needle || !*needle) return haystack;
- size_t len = strlen(needle);
- for (const char *h = haystack; *h; h++) {
- if (strncasecmp(h, needle, len) == 0) return h;
- }
- return NULL;
+ if (!haystack || !needle || !*needle) return haystack;
+ size_t len = strlen(needle);
+ for (const char *h = haystack; *h; h++) {
+ if (strncasecmp(h, needle, len) == 0) return h;
+ }
+ return NULL;
}
struct MemStruct { char *memory; size_t size; };
static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) {
- size_t realsize = size * nmemb;
- struct MemStruct *mem = (struct MemStruct *)userp;
- char *ptr = realloc(mem->memory, mem->size + realsize + 1);
- if (!ptr) return 0;
- mem->memory = ptr;
- memcpy(&(mem->memory[mem->size]), contents, realsize);
- mem->size += realsize;
- mem->memory[mem->size] = 0;
- return realsize;
+ size_t realsize = size * nmemb;
+ struct MemStruct *mem = (struct MemStruct *)userp;
+ char *ptr = realloc(mem->memory, mem->size + realsize + 1);
+ if (!ptr) return 0;
+ mem->memory = ptr;
+ memcpy(&(mem->memory[mem->size]), contents, realsize);
+ mem->size += realsize;
+ mem->memory[mem->size] = 0;
+ return realsize;
}
static char *xpath_text(xmlDocPtr doc, const char *xpath) {
- xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
- if (!ctx) return NULL;
- xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
- xmlXPathFreeContext(ctx);
- if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
- if (obj) xmlXPathFreeObject(obj);
- return NULL;
- }
- xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
- char *result = content ? strdup((char *)content) : NULL;
- if (content) xmlFree(content);
- xmlXPathFreeObject(obj);
- return result;
+ xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
+ if (!ctx) return NULL;
+ xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
+ xmlXPathFreeContext(ctx);
+ if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
+ if (obj) xmlXPathFreeObject(obj);
+ return NULL;
+ }
+ xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
+ char *result = content ? strdup((char *)content) : NULL;
+ if (content) xmlFree(content);
+ xmlXPathFreeObject(obj);
+ return result;
}
static char *build_html(const char *word, const char *pron, const char *pos,
- const char *def, const char *ex) {
- char html[4096];
- int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>");
- if (word) n += snprintf(html + n, sizeof(html) - n,
- "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word);
- if (pron) n += snprintf(html + n, sizeof(html) - n,
- "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron);
- if (pos) n += snprintf(html + n, sizeof(html) - n,
- "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos);
- if (def) n += snprintf(html + n, sizeof(html) - n,
- "<div style='margin-bottom: 8px;'>%s</div>", def);
- if (ex) n += snprintf(html + n, sizeof(html) - n,
- "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex);
- snprintf(html + n, sizeof(html) - n, "</div>");
- return strdup(html);
+ const char *def, const char *ex) {
+ char html[4096];
+ int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>");
+ if (word) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word);
+ if (pron) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron);
+ if (pos) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos);
+ if (def) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='margin-bottom: 8px;'>%s</div>", def);
+ if (ex) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex);
+ snprintf(html + n, sizeof(html) - n, "</div>");
+ return strdup(html);
}
static char *extract_word(const char *query) {
- if (!query) return NULL;
+ if (!query) return NULL;
- const char *start = query;
+ const char *start = query;
- for (int i = 0; PREFIXES[i]; i++) {
- size_t len = strlen(PREFIXES[i]);
- if (strncasecmp(start, PREFIXES[i], len) == 0) {
- start += len;
- break;
- }
+ for (int i = 0; PREFIXES[i]; i++) {
+ size_t len = strlen(PREFIXES[i]);
+ if (strncasecmp(start, PREFIXES[i], len) == 0) {
+ start += len;
+ break;
}
-
- while (*start == ' ') start++;
- char *word = strdup(start);
- if (!word) return NULL;
-
- int changed = 1;
- while (changed) {
- changed = 0;
- for (int i = 0; SKIP_WORDS[i]; i++) {
- size_t len = strlen(SKIP_WORDS[i]);
- if (strncasecmp(word, SKIP_WORDS[i], len) == 0) {
- memmove(word, word + len, strlen(word + len) + 1);
- changed = 1;
- break;
- }
- }
+ }
+
+ while (*start == ' ') start++;
+ char *word = strdup(start);
+ if (!word) return NULL;
+
+ int changed = 1;
+ while (changed) {
+ changed = 0;
+ for (int i = 0; SKIP_WORDS[i]; i++) {
+ size_t len = strlen(SKIP_WORDS[i]);
+ if (strncasecmp(word, SKIP_WORDS[i], len) == 0) {
+ memmove(word, word + len, strlen(word + len) + 1);
+ changed = 1;
+ break;
+ }
}
+ }
- changed = 1;
- while (changed) {
- changed = 0;
- for (int i = 0; SUFFIXES[i]; i++) {
- const char *found = strcasestr_impl(word, SUFFIXES[i]);
- if (found) {
- char *pos = word + (found - word);
- *pos = '\0';
- changed = 1;
- break;
- }
- }
+ changed = 1;
+ while (changed) {
+ changed = 0;
+ for (int i = 0; SUFFIXES[i]; i++) {
+ const char *found = strcasestr_impl(word, SUFFIXES[i]);
+ if (found) {
+ char *pos = word + (found - word);
+ *pos = '\0';
+ changed = 1;
+ break;
+ }
}
+ }
- size_t len = strlen(word);
- while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' ||
- word[len-1] == '!' || word[len-1] == '.')) {
- word[--len] = '\0';
- }
+ size_t len = strlen(word);
+ while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' ||
+ word[len-1] == '!' || word[len-1] == '.')) {
+ word[--len] = '\0';
+ }
- if (len == 0) { free(word); return NULL; }
+ if (len == 0) { free(word); return NULL; }
- for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]);
- char *space = strchr(word, ' ');
- if (space) *space = '\0';
+ for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]);
+ char *space = strchr(word, ' ');
+ if (space) *space = '\0';
- return word;
+ return word;
}
int is_dictionary_query(const char *query) {
- if (!query) return 0;
-
- for (int i = 0; PREFIXES[i]; i++) {
- size_t len = strlen(PREFIXES[i]);
- if (strncasecmp(query, PREFIXES[i], len) == 0) {
- const char *after = query + len;
- while (*after == ' ') after++;
- if (*after != '\0') return 1;
- }
+ if (!query) return 0;
+
+ for (int i = 0; PREFIXES[i]; i++) {
+ size_t len = strlen(PREFIXES[i]);
+ if (strncasecmp(query, PREFIXES[i], len) == 0) {
+ const char *after = query + len;
+ while (*after == ' ') after++;
+ if (*after != '\0') return 1;
}
-
- for (int i = 0; SUFFIXES[i]; i++) {
- const char *pos = strcasestr_impl(query, SUFFIXES[i]);
- if (pos) {
- const char *after = pos + strlen(SUFFIXES[i]);
- while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++;
- if (*after == '\0' && pos > query && (pos - query) < 100) return 1;
- }
+ }
+
+ for (int i = 0; SUFFIXES[i]; i++) {
+ const char *pos = strcasestr_impl(query, SUFFIXES[i]);
+ if (pos) {
+ const char *after = pos + strlen(SUFFIXES[i]);
+ while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++;
+ if (*after == '\0' && pos > query && (pos - query) < 100) return 1;
}
-
- if (strncasecmp(query, "what is ", 8) == 0 ||
- strncasecmp(query, "what's ", 7) == 0 ||
- strncasecmp(query, "whats ", 6) == 0) {
- const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 :
- strncasecmp(query, "what's ", 7) == 0 ? 7 : 6);
- const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ",
- "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL};
- for (int i = 0; articles[i]; i++) {
- if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0;
- }
- const char *space = strchr(word, ' ');
- if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
+ }
+
+ if (strncasecmp(query, "what is ", 8) == 0 ||
+ strncasecmp(query, "what's ", 7) == 0 ||
+ strncasecmp(query, "whats ", 6) == 0) {
+ const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 :
+ strncasecmp(query, "what's ", 7) == 0 ? 7 : 6);
+ const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ",
+ "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL};
+ for (int i = 0; articles[i]; i++) {
+ if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0;
}
+ const char *space = strchr(word, ' ');
+ if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
+ }
- return 0;
+ return 0;
}
char *construct_dictionary_url(const char *query) {
- char *word = extract_word(query);
- if (!word) return NULL;
-
- CURL *curl = curl_easy_init();
- if (!curl) { free(word); return NULL; }
-
- char *escaped = curl_easy_escape(curl, word, 0);
- const char *base = "https://dictionary.cambridge.org/dictionary/english/";
- char *url = malloc(strlen(base) + strlen(escaped) + 1);
- if (url) {
- strcpy(url, base);
- strcat(url, escaped);
- }
-
- curl_free(escaped);
- curl_easy_cleanup(curl);
- free(word);
- return url;
+ char *word = extract_word(query);
+ if (!word) return NULL;
+
+ CURL *curl = curl_easy_init();
+ if (!curl) { free(word); return NULL; }
+
+ char *escaped = curl_easy_escape(curl, word, 0);
+ const char *base = "https://dictionary.cambridge.org/dictionary/english/";
+ char *url = malloc(strlen(base) + strlen(escaped) + 1);
+ if (url) {
+ strcpy(url, base);
+ strcat(url, escaped);
+ }
+
+ curl_free(escaped);
+ curl_easy_cleanup(curl);
+ free(word);
+ return url;
}
InfoBox fetch_dictionary_data(const char *query) {
- InfoBox info = {NULL, NULL, NULL, NULL};
-
- char *url = construct_dictionary_url(query);
- if (!url) return info;
-
- CURL *curl = curl_easy_init();
- if (!curl) { free(url); return info; }
-
- struct MemStruct chunk = {malloc(1), 0};
- curl_easy_setopt(curl, CURLOPT_URL, url);
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
- curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0");
- curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
- apply_proxy_settings(curl);
-
- if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
- htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
- HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
- if (doc) {
- char *word = xpath_text(doc, "//span[@class='hw dhw']");
- char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
- char *pos = xpath_text(doc, "//span[@class='pos dpos']");
- char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
- char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
-
- if (word && def) {
- info.title = strdup("Dictionary");
- info.extract = build_html(word, pron, pos, def, ex);
- info.thumbnail_url = strdup("/static/dictionary.jpg");
- info.url = strdup(url);
- }
-
- free(word); free(pron); free(pos); free(def); free(ex);
- xmlFreeDoc(doc);
- }
+ InfoBox info = {NULL, NULL, NULL, NULL};
+
+ char *url = construct_dictionary_url(query);
+ if (!url) return info;
+
+ CURL *curl = curl_easy_init();
+ if (!curl) { free(url); return info; }
+
+ struct MemStruct chunk = {malloc(1), 0};
+ curl_easy_setopt(curl, CURLOPT_URL, url);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
+ curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0");
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+ apply_proxy_settings(curl);
+
+ if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
+ htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
+ if (doc) {
+ char *word = xpath_text(doc, "//span[@class='hw dhw']");
+ char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
+ char *pos = xpath_text(doc, "//span[@class='pos dpos']");
+ char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
+ char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
+
+ if (word && def) {
+ info.title = strdup("Dictionary");
+ info.extract = build_html(word, pron, pos, def, ex);
+ info.thumbnail_url = strdup("/static/dictionary.jpg");
+ info.url = strdup(url);
+ }
+
+ free(word); free(pron); free(pos); free(def); free(ex);
+ xmlFreeDoc(doc);
}
+ }
- curl_easy_cleanup(curl);
- free(chunk.memory);
- free(url);
- return info;
+ curl_easy_cleanup(curl);
+ free(chunk.memory);
+ free(url);
+ return info;
} \ No newline at end of file
diff --git a/src/Infobox/Infobox.c b/src/Infobox/Infobox.c
index 5043c05..337b057 100644
--- a/src/Infobox/Infobox.c
+++ b/src/Infobox/Infobox.c
@@ -2,12 +2,12 @@
#include <stdlib.h>
void free_infobox(InfoBox *info) {
- if (info->title)
- free(info->title);
- if (info->thumbnail_url)
- free(info->thumbnail_url);
- if (info->extract)
- free(info->extract);
- if (info->url)
- free(info->url);
+ if (info->title)
+ free(info->title);
+ if (info->thumbnail_url)
+ free(info->thumbnail_url);
+ if (info->extract)
+ free(info->extract);
+ if (info->url)
+ free(info->url);
}
diff --git a/src/Infobox/Infobox.h b/src/Infobox/Infobox.h
index a052b80..ecb16b6 100644
--- a/src/Infobox/Infobox.h
+++ b/src/Infobox/Infobox.h
@@ -2,10 +2,10 @@
#define INFOBOX_H
typedef struct {
- char *title;
- char *thumbnail_url;
- char *extract;
- char *url;
+ char *title;
+ char *thumbnail_url;
+ char *extract;
+ char *url;
} InfoBox;
void free_infobox(InfoBox *info);
diff --git a/src/Infobox/UnitConversion.c b/src/Infobox/UnitConversion.c
index 6a08b9c..933dd71 100644
--- a/src/Infobox/UnitConversion.c
+++ b/src/Infobox/UnitConversion.c
@@ -82,24 +82,24 @@ static const UnitDef *find_unit(const char *str) {
size_t j = 0;
for (size_t i = 0; i < len && j < 63; i++) {
- if ((unsigned char)str[i] == 0xC2 && (unsigned char)str[i+1] == 0xB0) {
- i++;
- continue;
- }
- if (str[i] == '^' && i + 1 < len && str[i + 1] == '2') {
- normalized[j++] = '2';
- i++;
- continue;
- }
- normalized[j++] = tolower((unsigned char)str[i]);
+ if ((unsigned char)str[i] == 0xC2 && (unsigned char)str[i+1] == 0xB0) {
+ i++;
+ continue;
+ }
+ if (str[i] == '^' && i + 1 < len && str[i + 1] == '2') {
+ normalized[j++] = '2';
+ i++;
+ continue;
+ }
+ normalized[j++] = tolower((unsigned char)str[i]);
}
normalized[j] = '\0';
for (int i = 0; i < UNIT_COUNT; i++) {
- if (strcmp(normalized, UNITS[i].name) == 0) return &UNITS[i];
- for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) {
- if (strcmp(normalized, UNITS[i].alias[k]) == 0) return &UNITS[i];
- }
+ if (strcmp(normalized, UNITS[i].name) == 0) return &UNITS[i];
+ for (int k = 0; k < 4 && UNITS[i].alias[k]; k++) {
+ if (strcmp(normalized, UNITS[i].alias[k]) == 0) return &UNITS[i];
+ }
}
return NULL;
}
@@ -108,37 +108,37 @@ int is_unit_conv_query(const char *query) {
if (!query) return 0;
const char *patterns[] = {
- " to ", " in ", " into ",
- " = ", " equals ", " equal ",
- " convert ", " conversion ",
- " -> ", " → ",
- NULL
+ " to ", " in ", " into ",
+ " = ", " equals ", " equal ",
+ " convert ", " conversion ",
+ " -> ", " → ",
+ NULL
};
int has_pattern = 0;
for (int i = 0; patterns[i]; i++) {
- if (strstr(query, patterns[i])) {
- has_pattern = 1;
- break;
- }
+ if (strstr(query, patterns[i])) {
+ has_pattern = 1;
+ break;
+ }
}
if (!has_pattern) {
- const char *last_space = strrchr(query, ' ');
- if (last_space) {
- const UnitDef *u = find_unit(last_space + 1);
- if (u) {
- const char *before = query;
- while (*before && is_whitespace(*before)) before++;
- const char *num_end = before;
- while (*num_end &&
- (isdigit(*num_end) || *num_end == '.' || *num_end == '-' ||
- *num_end == '+' || *num_end == '/' || *num_end == '\'' || *num_end == '"')) {
- num_end++;
- }
- if (num_end > before) has_pattern = 1;
- }
+ const char *last_space = strrchr(query, ' ');
+ if (last_space) {
+ const UnitDef *u = find_unit(last_space + 1);
+ if (u) {
+ const char *before = query;
+ while (*before && is_whitespace(*before)) before++;
+ const char *num_end = before;
+ while (*num_end &&
+ (isdigit(*num_end) || *num_end == '.' || *num_end == '-' ||
+ *num_end == '+' || *num_end == '/' || *num_end == '\'' || *num_end == '"')) {
+ num_end++;
}
+ if (num_end > before) has_pattern = 1;
+ }
+ }
}
return has_pattern;
@@ -153,58 +153,58 @@ static double parse_value(const char **ptr) {
if (*p == '-' || *p == '+') p++;
while (*p >= '0' && *p <= '9') {
- value = value * 10 + (*p - '0');
- has_num = 1;
- p++;
+ value = value * 10 + (*p - '0');
+ has_num = 1;
+ p++;
}
if (*p == '.') {
+ p++;
+ double frac = 0.1;
+ while (*p >= '0' && *p <= '9') {
+ value += (*p - '0') * frac;
+ frac *= 0.1;
+ has_num = 1;
p++;
- double frac = 0.1;
- while (*p >= '0' && *p <= '9') {
- value += (*p - '0') * frac;
- frac *= 0.1;
- has_num = 1;
- p++;
- }
+ }
}
if (*p == '/' && has_num) {
+ p++;
+ double denom = 0.0;
+ int has_denom = 0;
+ while (*p >= '0' && *p <= '9') {
+ denom = denom * 10 + (*p - '0');
+ has_denom = 1;
p++;
- double denom = 0.0;
- int has_denom = 0;
- while (*p >= '0' && *p <= '9') {
- denom = denom * 10 + (*p - '0');
- has_denom = 1;
- p++;
- }
- if (has_denom && denom > 0) {
- value = value / denom;
- }
+ }
+ if (has_denom && denom > 0) {
+ value = value / denom;
+ }
}
while (*p == '\'' || *p == '"') {
- double extra = 0.0;
+ double extra = 0.0;
+ p++;
+ while (*p >= '0' && *p <= '9') {
+ extra = extra * 10 + (*p - '0');
+ p++;
+ }
+ if (*p == '.') {
p++;
+ double frac = 0.1;
while (*p >= '0' && *p <= '9') {
- extra = extra * 10 + (*p - '0');
- p++;
- }
- if (*p == '.') {
- p++;
- double frac = 0.1;
- while (*p >= '0' && *p <= '9') {
- extra += (*p - '0') * frac;
- frac *= 0.1;
- p++;
- }
+ extra += (*p - '0') * frac;
+ frac *= 0.1;
+ p++;
}
- if (*p == '\'' || *p == '"') p++;
- value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254);
+ }
+ if (*p == '\'' || *p == '"') p++;
+ value += extra * (p[-1] == '\'' ? 0.3048 : 0.0254);
}
if (!has_num) {
- *ptr = p;
- return 0.0;
+ *ptr = p;
+ return 0.0;
}
*ptr = p;
@@ -235,29 +235,29 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
const char *to_pos = NULL;
size_t keyword_len = 0;
for (int i = 0; to_keywords[i]; i++) {
- const char *found = strstr(p, to_keywords[i]);
- if (found) {
- to_pos = found + strlen(to_keywords[i]);
- keyword_len = strlen(to_keywords[i]);
- break;
- }
+ const char *found = strstr(p, to_keywords[i]);
+ if (found) {
+ to_pos = found + strlen(to_keywords[i]);
+ keyword_len = strlen(to_keywords[i]);
+ break;
+ }
}
if (!to_pos) {
- const char *last_space = strrchr(p, ' ');
- if (last_space && last_space > p) {
- char from_part[64] = {0};
- size_t len = last_space - p;
- if (len < 63) {
- strncpy(from_part, p, len);
- *from_unit = find_unit(from_part);
- if (*from_unit) {
- *to_unit = find_unit(last_space + 1);
- return *to_unit ? 1 : 0;
- }
- }
+ const char *last_space = strrchr(p, ' ');
+ if (last_space && last_space > p) {
+ char from_part[64] = {0};
+ size_t len = last_space - p;
+ if (len < 63) {
+ strncpy(from_part, p, len);
+ *from_unit = find_unit(from_part);
+ if (*from_unit) {
+ *to_unit = find_unit(last_space + 1);
+ return *to_unit ? 1 : 0;
+ }
}
- return 0;
+ }
+ return 0;
}
char from_part[64] = {0};
@@ -271,20 +271,20 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
*from_unit = find_unit(from_part);
if (!*from_unit) {
- char *end = from_part + strlen(from_part);
- while (end > from_part) {
- while (end > from_part && is_whitespace(end[-1])) end--;
- if (end <= from_part) break;
- char *start = end;
- while (start > from_part && !is_whitespace(start[-1])) start--;
- size_t word_len = end - start;
- memmove(from_part + word_len + 1, from_part, start - from_part);
- from_part[word_len] = ' ';
- from_part[word_len + 1] = '\0';
- *from_unit = find_unit(from_part);
- if (*from_unit) break;
- end = start;
- }
+ char *end = from_part + strlen(from_part);
+ while (end > from_part) {
+ while (end > from_part && is_whitespace(end[-1])) end--;
+ if (end <= from_part) break;
+ char *start = end;
+ while (start > from_part && !is_whitespace(start[-1])) start--;
+ size_t word_len = end - start;
+ memmove(from_part + word_len + 1, from_part, start - from_part);
+ from_part[word_len] = ' ';
+ from_part[word_len + 1] = '\0';
+ *from_unit = find_unit(from_part);
+ if (*from_unit) break;
+ end = start;
+ }
}
if (!*from_unit) return 0;
@@ -297,30 +297,30 @@ static int parse_conversion_query(const char *query, double *value, const UnitDe
size_t to_len = 0;
const char *tp = to_pos;
while (*tp && !is_separator(*tp) && to_len < 63) {
- to_part[to_len++] = *tp++;
+ to_part[to_len++] = *tp++;
}
to_part[to_len] = '\0';
*to_unit = find_unit(to_part);
if (!*to_unit) {
- const char *try_ptr = to_pos;
- while (*try_ptr && is_whitespace(*try_ptr)) try_ptr++;
- char try_buf[64] = {0};
- size_t try_len = 0;
- while (*try_ptr && try_len < 63) {
- try_buf[try_len++] = *try_ptr++;
- }
- while (try_len > 0) {
- *to_unit = find_unit(try_buf);
- if (*to_unit) {
- strcpy(to_part, try_buf);
- break;
- }
- char *last_space = strrchr(try_buf, ' ');
- if (!last_space) break;
- *last_space = '\0';
- try_len = strlen(try_buf);
+ const char *try_ptr = to_pos;
+ while (*try_ptr && is_whitespace(*try_ptr)) try_ptr++;
+ char try_buf[64] = {0};
+ size_t try_len = 0;
+ while (*try_ptr && try_len < 63) {
+ try_buf[try_len++] = *try_ptr++;
+ }
+ while (try_len > 0) {
+ *to_unit = find_unit(try_buf);
+ if (*to_unit) {
+ strcpy(to_part, try_buf);
+ break;
}
+ char *last_space = strrchr(try_buf, ' ');
+ if (!last_space) break;
+ *last_space = '\0';
+ try_len = strlen(try_buf);
+ }
}
return *to_unit ? 1 : 0;
@@ -343,7 +343,7 @@ static double convert_value(double value, const UnitDef *from, const UnitDef *to
if (from->type != to->type) return 0;
if (from->type == UNIT_TEMP) {
- return convert_temp(value, from, to);
+ return convert_temp(value, from, to);
}
double base_value = value * from->to_base;
@@ -353,23 +353,23 @@ static double convert_value(double value, const UnitDef *from, const UnitDef *to
static void format_number(double val, char *buf, size_t bufsize) {
if (bufsize == 0) return;
if (val == 0) {
- snprintf(buf, bufsize, "0");
- return;
+ snprintf(buf, bufsize, "0");
+ return;
}
if (fabs(val) < 0.01 && fabs(val) > 0) {
- snprintf(buf, bufsize, "%.2g", val);
+ snprintf(buf, bufsize, "%.2g", val);
} else if (fabs(val) < 1) {
- snprintf(buf, bufsize, "%.2f", val);
- char *p = buf + strlen(buf) - 1;
- while (p > buf && *p == '0') *p-- = '\0';
- if (*p == '.') *p = '\0';
+ snprintf(buf, bufsize, "%.2f", val);
+ char *p = buf + strlen(buf) - 1;
+ while (p > buf && *p == '0') *p-- = '\0';
+ if (*p == '.') *p = '\0';
} else if (fmod(val + 0.0001, 1.0) < 0.0002) {
- snprintf(buf, bufsize, "%.0f", val);
+ snprintf(buf, bufsize, "%.0f", val);
} else {
- snprintf(buf, bufsize, "%.2f", val);
- char *p = buf + strlen(buf) - 1;
- while (p > buf && *p == '0') *p-- = '\0';
- if (*p == '.') *p = '\0';
+ snprintf(buf, bufsize, "%.2f", val);
+ char *p = buf + strlen(buf) - 1;
+ while (p > buf && *p == '0') *p-- = '\0';
+ if (*p == '.') *p = '\0';
}
}
@@ -383,74 +383,74 @@ static const char *pluralize(const char *unit, double value, char *buf, size_t b
buf[bufsize - 1] = '\0';
if (strcmp(unit, "foot") == 0 || strcmp(unit, "square foot") == 0) {
- if (is_one) strcpy(buf, unit);
- else strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet");
- return buf;
+ if (is_one) strcpy(buf, unit);
+ else strcpy(buf, strcmp(unit, "square foot") == 0 ? "square feet" : "feet");
+ return buf;
}
if (strcmp(unit, "inch") == 0 || strcmp(unit, "square inch") == 0) {
- if (is_one) strcpy(buf, unit);
- else strcpy(buf, strcmp(unit, "square inch") == 0 ? "square inches" : "inches");
- return buf;
+ if (is_one) strcpy(buf, unit);
+ else strcpy(buf, strcmp(unit, "square inch") == 0 ? "square inches" : "inches");
+ return buf;
}
if (strcmp(unit, "stone") == 0) {
- if (is_one) strcpy(buf, "stone");
- else strcpy(buf, "stones");
- return buf;
+ if (is_one) strcpy(buf, "stone");
+ else strcpy(buf, "stones");
+ return buf;
}
if (strcmp(unit, "celsius") == 0 ||
- strcmp(unit, "fahrenheit") == 0 ||
- strcmp(unit, "kelvin") == 0) {
- strcpy(buf, unit);
- return buf;
+ strcmp(unit, "fahrenheit") == 0 ||
+ strcmp(unit, "kelvin") == 0) {
+ strcpy(buf, unit);
+ return buf;
}
if (unit[len-1] == 's' ||
- unit[len-1] == 'x' ||
- unit[len-1] == 'z' ||
- (len >= 2 && unit[len-2] == 'c' && unit[len-1] == 'h') ||
- (len >= 2 && unit[len-2] == 's' && unit[len-1] == 'h')) {
- if (!is_one) {
- buf[len] = 'e';
- buf[len+1] = '\0';
- }
+ unit[len-1] == 'x' ||
+ unit[len-1] == 'z' ||
+ (len >= 2 && unit[len-2] == 'c' && unit[len-1] == 'h') ||
+ (len >= 2 && unit[len-2] == 's' && unit[len-1] == 'h')) {
+ if (!is_one) {
+ buf[len] = 'e';
+ buf[len+1] = '\0';
+ }
} else if (unit[len-1] == 'y' && len >= 2 &&
- !(unit[len-2] == 'a' || unit[len-2] == 'e' ||
- unit[len-2] == 'i' || unit[len-2] == 'o' ||
- unit[len-2] == 'u')) {
- if (is_one) {
- buf[len-1] = '\0';
- } else {
- buf[len] = 's';
- buf[len+1] = '\0';
- }
+ !(unit[len-2] == 'a' || unit[len-2] == 'e' ||
+ unit[len-2] == 'i' || unit[len-2] == 'o' ||
+ unit[len-2] == 'u')) {
+ if (is_one) {
+ buf[len-1] = '\0';
+ } else {
+ buf[len] = 's';
+ buf[len+1] = '\0';
+ }
} else if (len >= 2 && unit[len-2] == 'f' && unit[len-1] == 'e') {
- if (is_one) {
- buf[len-2] = '\0';
- } else {
- buf[len-1] = 's';
- buf[len] = '\0';
- }
+ if (is_one) {
+ buf[len-2] = '\0';
+ } else {
+ buf[len-1] = 's';
+ buf[len] = '\0';
+ }
} else if (unit[len-1] == 'f' && len >= 1) {
- if (is_one) {
- buf[len-1] = '\0';
- } else {
- buf[len-1] = 'v';
- buf[len] = 'e';
- buf[len+1] = 's';
- buf[len+2] = '\0';
- }
+ if (is_one) {
+ buf[len-1] = '\0';
+ } else {
+ buf[len-1] = 'v';
+ buf[len] = 'e';
+ buf[len+1] = 's';
+ buf[len+2] = '\0';
+ }
} else if (unit[len-1] == 'e' && len >= 2 && unit[len-2] == 'f') {
- if (is_one) {
- buf[len-2] = '\0';
- } else {
- buf[len-1] = 's';
- buf[len] = '\0';
- }
+ if (is_one) {
+ buf[len-2] = '\0';
} else {
- if (!is_one) {
- buf[len] = 's';
- buf[len+1] = '\0';
- }
+ buf[len-1] = 's';
+ buf[len] = '\0';
+ }
+ } else {
+ if (!is_one) {
+ buf[len] = 's';
+ buf[len+1] = '\0';
+ }
}
return buf;
@@ -466,12 +466,12 @@ static char *build_html(double value, const UnitDef *from, double result, const
pluralize(to->name, result, to_name_buf, sizeof(to_name_buf));
int n = snprintf(html, sizeof(html),
- "<div class='unit-conv-container' style='line-height: 1.6;'>"
- "<div style='font-size: 1.3em; margin-bottom: 8px;'>"
- "<b>%s %s</b> = <b>%s %s</b>"
- "</div>",
- val_buf, from_name_buf,
- res_buf, to_name_buf);
+ "<div class='unit-conv-container' style='line-height: 1.6;'>"
+ "<div style='font-size: 1.3em; margin-bottom: 8px;'>"
+ "<b>%s %s</b> = <b>%s %s</b>"
+ "</div>",
+ val_buf, from_name_buf,
+ res_buf, to_name_buf);
snprintf(html + n, sizeof(html) - n, "</div>");
return html;
}
diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c
index dff26f6..09c13c6 100644
--- a/src/Infobox/Wikipedia.c
+++ b/src/Infobox/Wikipedia.c
@@ -23,32 +23,32 @@ static void shorten_summary(char **extract_ptr, int max_chars) {
int end_pos = max_chars;
for (int i = max_chars; i > (max_chars / 2); i--) {
- if (text[i] == '.' || text[i] == '!' || text[i] == '?') {
- end_pos = i + 1;
- break;
- }
+ if (text[i] == '.' || text[i] == '!' || text[i] == '?') {
+ end_pos = i + 1;
+ break;
+ }
}
char *new_text = (char *)malloc(end_pos + 4);
if (new_text) {
- strncpy(new_text, text, end_pos);
- new_text[end_pos] = '\0';
- strcat(new_text, "...");
- free(*extract_ptr);
- *extract_ptr = new_text;
+ strncpy(new_text, text, end_pos);
+ new_text[end_pos] = '\0';
+ strcat(new_text, "...");
+ free(*extract_ptr);
+ *extract_ptr = new_text;
}
}
static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
- void *userp) {
+ void *userp) {
size_t realsize = size * nmemb;
struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
char *ptr = realloc(mem->memory, mem->size + realsize + 1);
if (ptr == NULL) {
- fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
- return 0;
+ fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
+ return 0;
}
mem->memory = ptr;
@@ -63,48 +63,48 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
xmlNode *cur_node = NULL;
for (cur_node = node; cur_node; cur_node = cur_node->next) {
- if (cur_node->type == XML_ELEMENT_NODE) {
- if (strcmp((const char *)cur_node->name, "page") == 0) {
- xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title");
- if (title) {
- info->title = strdup((const char *)title);
-
- const char *base_article_url = "https://en.wikipedia.org/wiki/";
- char *formatted_title = strdup((const char *)title);
- for (int i = 0; formatted_title[i]; i++) {
- if (formatted_title[i] == ' ') formatted_title[i] = '_';
- }
-
- info->url =
- malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
- if (info->url) {
- strcpy(info->url, base_article_url);
- strcat(info->url, formatted_title);
- }
- free(formatted_title);
- xmlFree(title);
- }
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ if (strcmp((const char *)cur_node->name, "page") == 0) {
+ xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title");
+ if (title) {
+ info->title = strdup((const char *)title);
+
+ const char *base_article_url = "https://en.wikipedia.org/wiki/";
+ char *formatted_title = strdup((const char *)title);
+ for (int i = 0; formatted_title[i]; i++) {
+ if (formatted_title[i] == ' ') formatted_title[i] = '_';
}
- if (strcmp((const char *)cur_node->name, "thumbnail") == 0) {
- xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source");
- if (source) {
- info->thumbnail_url = strdup((const char *)source);
- xmlFree(source);
- }
+ info->url =
+ malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
+ if (info->url) {
+ strcpy(info->url, base_article_url);
+ strcat(info->url, formatted_title);
}
+ free(formatted_title);
+ xmlFree(title);
+ }
+ }
+
+ if (strcmp((const char *)cur_node->name, "thumbnail") == 0) {
+ xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source");
+ if (source) {
+ info->thumbnail_url = strdup((const char *)source);
+ xmlFree(source);
+ }
+ }
- if (strcmp((const char *)cur_node->name, "extract") == 0) {
- xmlChar *content = xmlNodeGetContent(cur_node);
- if (content) {
- info->extract = strdup((const char *)content);
+ if (strcmp((const char *)cur_node->name, "extract") == 0) {
+ xmlChar *content = xmlNodeGetContent(cur_node);
+ if (content) {
+ info->extract = strdup((const char *)content);
- shorten_summary(&(info->extract), 300);
- xmlFree(content);
- }
- }
+ shorten_summary(&(info->extract), 300);
+ xmlFree(content);
+ }
}
- extract_wiki_info(cur_node->children, info);
+ }
+ extract_wiki_info(cur_node->children, info);
}
}
@@ -120,27 +120,27 @@ InfoBox fetch_wiki_data(char *api_url) {
curl_handle = curl_easy_init();
if (curl_handle) {
- curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
- curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
- WikiWriteMemoryCallback);
- curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
- curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
- apply_proxy_settings(curl_handle);
-
- res = curl_easy_perform(curl_handle);
-
- if (res == CURLE_OK) {
- xmlDocPtr doc =
- xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
- if (doc != NULL) {
- xmlNode *root_element = xmlDocGetRootElement(doc);
- extract_wiki_info(root_element, &info);
- xmlFreeDoc(doc);
- }
+ curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
+ WikiWriteMemoryCallback);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
+ curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
+ apply_proxy_settings(curl_handle);
+
+ res = curl_easy_perform(curl_handle);
+
+ if (res == CURLE_OK) {
+ xmlDocPtr doc =
+ xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
+ if (doc != NULL) {
+ xmlNode *root_element = xmlDocGetRootElement(doc);
+ extract_wiki_info(root_element, &info);
+ xmlFreeDoc(doc);
}
+ }
- curl_easy_cleanup(curl_handle);
- free(chunk.memory);
+ curl_easy_cleanup(curl_handle);
+ free(chunk.memory);
}
return info;
@@ -152,14 +152,14 @@ char *construct_wiki_url(const char *search_term) {
char *escaped_term = curl_easy_escape(curl, search_term, 0);
const char *base =
- "https://en.wikipedia.org/w/"
- "api.php?action=query&prop=extracts|pageimages&exintro&"
- "explaintext&pithumbsize=400&format=xml&origin=*&titles=";
+ "https://en.wikipedia.org/w/"
+ "api.php?action=query&prop=extracts|pageimages&exintro&"
+ "explaintext&pithumbsize=400&format=xml&origin=*&titles=";
char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1);
if (full_url) {
- strcpy(full_url, base);
- strcat(full_url, escaped_term);
+ strcpy(full_url, base);
+ strcat(full_url, escaped_term);
}
curl_free(escaped_term);
diff --git a/src/Main.c b/src/Main.c
index 4475c5d..83cfaf0 100644
--- a/src/Main.c
+++ b/src/Main.c
@@ -14,9 +14,9 @@
#include "Routes/Search.h"
int handle_opensearch(UrlParams *params) {
- (void)params;
- serve_static_file_with_mime("opensearch.xml", "application/opensearchdescription+xml");
- return 0;
+ (void)params;
+ serve_static_file_with_mime("opensearch.xml", "application/opensearchdescription+xml");
+ return 0;
}
int main() {
@@ -31,32 +31,32 @@ int main() {
curl_global_init(CURL_GLOBAL_DEFAULT);
Config config = {
- .host = "0.0.0.0",
- .port = 5000,
- .proxy = "",
- .proxy_list_file = "",
- .max_proxy_retries = 3,
- .randomize_username = 0,
- .randomize_password = 0
+ .host = "0.0.0.0",
+ .port = 5000,
+ .proxy = "",
+ .proxy_list_file = "",
+ .max_proxy_retries = 3,
+ .randomize_username = 0,
+ .randomize_password = 0
};
if (load_config("config.ini", &config) != 0) {
- fprintf(stderr, "Warning: Could not load config file, using defaults\n");
+ fprintf(stderr, "Warning: Could not load config file, using defaults\n");
}
if (config.proxy_list_file[0] != '\0') {
- if (load_proxy_list(config.proxy_list_file) < 0) {
- fprintf(stderr, "Warning: Failed to load proxy list, continuing without proxies\n");
- }
+ if (load_proxy_list(config.proxy_list_file) < 0) {
+ fprintf(stderr, "Warning: Failed to load proxy list, continuing without proxies\n");
+ }
}
max_proxy_retries = config.max_proxy_retries;
set_proxy_config(config.proxy, config.randomize_username, config.randomize_password);
if (proxy_url[0] != '\0') {
- fprintf(stderr, "Using proxy: %s\n", proxy_url);
+ fprintf(stderr, "Using proxy: %s\n", proxy_url);
} else if (proxy_count > 0) {
- fprintf(stderr, "Using %d proxies from %s\n", proxy_count, config.proxy_list_file);
+ fprintf(stderr, "Using %d proxies from %s\n", proxy_count, config.proxy_list_file);
}
set_handler("/", home_handler);
@@ -70,10 +70,10 @@ int main() {
int result = beaker_run(config.host, config.port);
if (result != 0) {
- fprintf(stderr, "Error: Beaker server failed to start.\n");
- curl_global_cleanup();
- xmlCleanupParser();
- return EXIT_FAILURE;
+ fprintf(stderr, "Error: Beaker server failed to start.\n");
+ curl_global_cleanup();
+ xmlCleanupParser();
+ return EXIT_FAILURE;
}
curl_global_cleanup();
diff --git a/src/Proxy/Proxy.c b/src/Proxy/Proxy.c
index 939aea0..9908350 100644
--- a/src/Proxy/Proxy.c
+++ b/src/Proxy/Proxy.c
@@ -17,15 +17,15 @@ static const char RAND_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRST
static void generate_random_string(char *buf, size_t len) {
for (size_t i = 0; i < len - 1; i++) {
- buf[i] = RAND_CHARS[rand() % (sizeof(RAND_CHARS) - 1)];
+ buf[i] = RAND_CHARS[rand() % (sizeof(RAND_CHARS) - 1)];
}
buf[len - 1] = '\0';
}
void set_proxy_config(const char *proxy_str, int rand_user, int rand_pass) {
if (proxy_str && proxy_str[0]) {
- strncpy(proxy_url, proxy_str, sizeof(proxy_url) - 1);
- proxy_url[sizeof(proxy_url) - 1] = '\0';
+ strncpy(proxy_url, proxy_str, sizeof(proxy_url) - 1);
+ proxy_url[sizeof(proxy_url) - 1] = '\0';
}
randomize_username = rand_user;
randomize_password = rand_pass;
@@ -40,49 +40,49 @@ static Proxy parse_proxy_line(const char *line) {
if (len == 0) return proxy;
if (strncmp(line, "http://", 7) == 0) {
- proxy.type = PROXY_HTTP;
- host_start = line + 7;
+ proxy.type = PROXY_HTTP;
+ host_start = line + 7;
} else if (strncmp(line, "socks5://", 9) == 0) {
- proxy.type = PROXY_SOCKS5;
- host_start = line + 9;
+ proxy.type = PROXY_SOCKS5;
+ host_start = line + 9;
} else if (strncmp(line, "socks4://", 9) == 0) {
- proxy.type = PROXY_SOCKS4;
- host_start = line + 9;
+ proxy.type = PROXY_SOCKS4;
+ host_start = line + 9;
} else {
- host_start = line;
+ host_start = line;
}
const char *at = strchr(host_start, '@');
if (at) {
- char cred_buf[128];
- size_t cred_len = at - host_start;
- if (cred_len >= sizeof(cred_buf)) cred_len = sizeof(cred_buf) - 1;
- strncpy(cred_buf, host_start, cred_len);
- cred_buf[cred_len] = '\0';
-
- char *colon = strchr(cred_buf, ':');
- if (colon) {
- size_t user_len = colon - cred_buf;
- if (user_len >= sizeof(proxy.username)) user_len = sizeof(proxy.username) - 1;
- strncpy(proxy.username, cred_buf, user_len);
- proxy.username[user_len] = '\0';
- strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1);
- proxy.password[sizeof(proxy.password) - 1] = '\0';
- }
- host_start = at + 1;
+ char cred_buf[128];
+ size_t cred_len = at - host_start;
+ if (cred_len >= sizeof(cred_buf)) cred_len = sizeof(cred_buf) - 1;
+ strncpy(cred_buf, host_start, cred_len);
+ cred_buf[cred_len] = '\0';
+
+ char *colon = strchr(cred_buf, ':');
+ if (colon) {
+ size_t user_len = colon - cred_buf;
+ if (user_len >= sizeof(proxy.username)) user_len = sizeof(proxy.username) - 1;
+ strncpy(proxy.username, cred_buf, user_len);
+ proxy.username[user_len] = '\0';
+ strncpy(proxy.password, colon + 1, sizeof(proxy.password) - 1);
+ proxy.password[sizeof(proxy.password) - 1] = '\0';
+ }
+ host_start = at + 1;
}
port_start = strchr(host_start, ':');
if (port_start) {
- char host_buf[256];
- size_t host_len = port_start - host_start;
- if (host_len >= sizeof(host_buf)) host_len = sizeof(host_buf) - 1;
- strncpy(host_buf, host_start, host_len);
- host_buf[host_len] = '\0';
- snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf);
- proxy.port = atoi(port_start + 1);
+ char host_buf[256];
+ size_t host_len = port_start - host_start;
+ if (host_len >= sizeof(host_buf)) host_len = sizeof(host_buf) - 1;
+ strncpy(host_buf, host_start, host_len);
+ host_buf[host_len] = '\0';
+ snprintf(proxy.host, sizeof(proxy.host), "%.*s", (int)host_len, host_buf);
+ proxy.port = atoi(port_start + 1);
} else {
- snprintf(proxy.host, sizeof(proxy.host), "%s", host_start);
+ snprintf(proxy.host, sizeof(proxy.host), "%s", host_start);
}
return proxy;
@@ -90,71 +90,71 @@ static Proxy parse_proxy_line(const char *line) {
int load_proxy_list(const char *filename) {
if (!filename || filename[0] == '\0') {
- return 0;
+ return 0;
}
pthread_mutex_lock(&proxy_mutex);
if (proxy_list) {
- free(proxy_list);
- proxy_list = NULL;
+ free(proxy_list);
+ proxy_list = NULL;
}
proxy_count = 0;
FILE *file = fopen(filename, "r");
if (!file) {
- pthread_mutex_unlock(&proxy_mutex);
- fprintf(stderr, "[WARN] Could not open proxy list file: %s\n", filename);
- return -1;
+ pthread_mutex_unlock(&proxy_mutex);
+ fprintf(stderr, "[WARN] Could not open proxy list file: %s\n", filename);
+ return -1;
}
int capacity = 16;
proxy_list = (Proxy *)malloc(capacity * sizeof(Proxy));
if (!proxy_list) {
- fclose(file);
- return -1;
+ fclose(file);
+ return -1;
}
proxy_count = 0;
char line[512];
while (fgets(line, sizeof(line), file)) {
- line[strcspn(line, "\r\n")] = 0;
+ line[strcspn(line, "\r\n")] = 0;
- if (line[0] == '\0' || line[0] == '#') {
- continue;
- }
+ if (line[0] == '\0' || line[0] == '#') {
+ continue;
+ }
- char *p = line;
- while (*p == ' ' || *p == '\t') p++;
+ char *p = line;
+ while (*p == ' ' || *p == '\t') p++;
- char *end = p + strlen(p) - 1;
- while (end > p && (*end == ' ' || *end == '\t')) {
- *end = '\0';
- end--;
- }
+ char *end = p + strlen(p) - 1;
+ while (end > p && (*end == ' ' || *end == '\t')) {
+ *end = '\0';
+ end--;
+ }
- if (p[0] == '\0') continue;
+ if (p[0] == '\0') continue;
- Proxy proxy = parse_proxy_line(p);
- if (proxy.port == 0) {
- continue;
- }
+ Proxy proxy = parse_proxy_line(p);
+ if (proxy.port == 0) {
+ continue;
+ }
- if (proxy_count >= capacity) {
- capacity *= 2;
- Proxy *new_list = (Proxy *)realloc(proxy_list, capacity * sizeof(Proxy));
- if (!new_list) {
- free(proxy_list);
- proxy_list = NULL;
- proxy_count = 0;
- fclose(file);
- pthread_mutex_unlock(&proxy_mutex);
- return -1;
- }
- proxy_list = new_list;
+ if (proxy_count >= capacity) {
+ capacity *= 2;
+ Proxy *new_list = (Proxy *)realloc(proxy_list, capacity * sizeof(Proxy));
+ if (!new_list) {
+ free(proxy_list);
+ proxy_list = NULL;
+ proxy_count = 0;
+ fclose(file);
+ pthread_mutex_unlock(&proxy_mutex);
+ return -1;
}
+ proxy_list = new_list;
+ }
- proxy_list[proxy_count++] = proxy;
+ proxy_list[proxy_count++] = proxy;
}
fclose(file);
@@ -166,8 +166,8 @@ int load_proxy_list(const char *filename) {
void free_proxy_list(void) {
pthread_mutex_lock(&proxy_mutex);
if (proxy_list) {
- free(proxy_list);
- proxy_list = NULL;
+ free(proxy_list);
+ proxy_list = NULL;
}
proxy_count = 0;
pthread_mutex_unlock(&proxy_mutex);
@@ -176,8 +176,8 @@ void free_proxy_list(void) {
Proxy *get_random_proxy(void) {
pthread_mutex_lock(&proxy_mutex);
if (proxy_count == 0) {
- pthread_mutex_unlock(&proxy_mutex);
- return NULL;
+ pthread_mutex_unlock(&proxy_mutex);
+ return NULL;
}
int start = rand() % proxy_count;
@@ -185,19 +185,19 @@ Proxy *get_random_proxy(void) {
Proxy *selected = NULL;
while (checked < proxy_count) {
- int idx = (start + checked) % proxy_count;
- if (proxy_list[idx].failures < max_proxy_retries) {
- selected = &proxy_list[idx];
- break;
- }
- checked++;
+ int idx = (start + checked) % proxy_count;
+ if (proxy_list[idx].failures < max_proxy_retries) {
+ selected = &proxy_list[idx];
+ break;
+ }
+ checked++;
}
if (!selected) {
- for (int i = 0; i < proxy_count; i++) {
- proxy_list[i].failures = 0;
- }
- selected = &proxy_list[rand() % proxy_count];
+ for (int i = 0; i < proxy_count; i++) {
+ proxy_list[i].failures = 0;
+ }
+ selected = &proxy_list[rand() % proxy_count];
}
pthread_mutex_unlock(&proxy_mutex);
@@ -213,45 +213,45 @@ void record_proxy_failure(Proxy *proxy) {
void apply_proxy_settings(CURL *curl) {
if (proxy_url[0] != '\0') {
- curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url);
- if (strncmp(proxy_url, "socks5://", 9) == 0) {
- curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
- } else if (strncmp(proxy_url, "socks4://", 9) == 0) {
- curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
- } else {
- curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
- }
+ curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url);
+ if (strncmp(proxy_url, "socks5://", 9) == 0) {
+ curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
+ } else if (strncmp(proxy_url, "socks4://", 9) == 0) {
+ curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
+ } else {
+ curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
+ }
- if (randomize_username || randomize_password) {
- char userpwd[256];
- char username[32] = {0};
- char password[32] = {0};
+ if (randomize_username || randomize_password) {
+ char userpwd[256];
+ char username[32] = {0};
+ char password[32] = {0};
- if (randomize_username) generate_random_string(username, sizeof(username));
- if (randomize_password) generate_random_string(password, sizeof(password));
+ if (randomize_username) generate_random_string(username, sizeof(username));
+ if (randomize_password) generate_random_string(password, sizeof(password));
- snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password);
- curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
- }
+ snprintf(userpwd, sizeof(userpwd), "%s:%s", username, password);
+ curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
+ }
} else if (proxy_count > 0) {
- Proxy *proxy = get_random_proxy();
- if (proxy) {
- char proxy_url_buf[512];
- snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host, proxy->port);
- curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf);
- if (proxy->type == PROXY_HTTP) {
- curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
- } else if (proxy->type == PROXY_SOCKS4) {
- curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
- } else {
- curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
- }
-
- if (proxy->username[0] != '\0' || proxy->password[0] != '\0') {
- char userpwd[128];
- snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username, proxy->password);
- curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
- }
+ Proxy *proxy = get_random_proxy();
+ if (proxy) {
+ char proxy_url_buf[512];
+ snprintf(proxy_url_buf, sizeof(proxy_url_buf), "%s:%d", proxy->host, proxy->port);
+ curl_easy_setopt(curl, CURLOPT_PROXY, proxy_url_buf);
+ if (proxy->type == PROXY_HTTP) {
+ curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
+ } else if (proxy->type == PROXY_SOCKS4) {
+ curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
+ } else {
+ curl_easy_setopt(curl, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
}
+
+ if (proxy->username[0] != '\0' || proxy->password[0] != '\0') {
+ char userpwd[128];
+ snprintf(userpwd, sizeof(userpwd), "%s:%s", proxy->username, proxy->password);
+ curl_easy_setopt(curl, CURLOPT_PROXYUSERPWD, userpwd);
+ }
+ }
}
}
diff --git a/src/Routes/Home.c b/src/Routes/Home.c
index 81370ba..4526a9d 100644
--- a/src/Routes/Home.c
+++ b/src/Routes/Home.c
@@ -2,13 +2,13 @@
#include <stdlib.h>
int home_handler(UrlParams *params) {
- (void)params;
- TemplateContext ctx = new_context();
- char *rendered_html = render_template("home.html", &ctx);
- send_response(rendered_html);
+ (void)params;
+ TemplateContext ctx = new_context();
+ char *rendered_html = render_template("home.html", &ctx);
+ send_response(rendered_html);
- free(rendered_html);
- free_context(&ctx);
+ free(rendered_html);
+ free_context(&ctx);
- return 0;
+ return 0;
}
diff --git a/src/Routes/ImageProxy.c b/src/Routes/ImageProxy.c
index 5141cd5..10fbd19 100644
--- a/src/Routes/ImageProxy.c
+++ b/src/Routes/ImageProxy.c
@@ -16,9 +16,9 @@ typedef struct {
static int is_allowed_domain(const char *url) {
const char *protocol = strstr(url, "://");
if (!protocol) {
- protocol = url;
+ protocol = url;
} else {
- protocol += 3;
+ protocol += 3;
}
const char *path = strchr(protocol, '/');
@@ -26,49 +26,49 @@ static int is_allowed_domain(const char *url) {
char host[256] = {0};
if (host_len >= sizeof(host)) {
- host_len = sizeof(host) - 1;
+ host_len = sizeof(host) - 1;
}
strncpy(host, protocol, host_len);
const char *allowed_domains[] = {
- "mm.bing.net",
- "th.bing.com",
- NULL
+ "mm.bing.net",
+ "th.bing.com",
+ NULL
};
for (int i = 0; allowed_domains[i] != NULL; i++) {
- size_t domain_len = strlen(allowed_domains[i]);
- size_t host_str_len = strlen(host);
-
- if (host_str_len >= domain_len) {
- const char *suffix = host + host_str_len - domain_len;
- if (strcmp(suffix, allowed_domains[i]) == 0) {
- return 1;
- }
+ size_t domain_len = strlen(allowed_domains[i]);
+ size_t host_str_len = strlen(host);
+
+ if (host_str_len >= domain_len) {
+ const char *suffix = host + host_str_len - domain_len;
+ if (strcmp(suffix, allowed_domains[i]) == 0) {
+ return 1;
}
}
+ }
return 0;
}
static size_t write_callback(void *contents, size_t size, size_t nmemb,
- void *userp) {
+ void *userp) {
size_t realsize = size * nmemb;
MemoryBuffer *buf = (MemoryBuffer *)userp;
if (buf->size + realsize > MAX_IMAGE_SIZE) {
- return 0;
+ return 0;
}
if (buf->size + realsize > buf->capacity) {
- size_t new_capacity = buf->capacity * 2;
- if (new_capacity < buf->size + realsize) {
- new_capacity = buf->size + realsize;
- }
- char *new_data = realloc(buf->data, new_capacity);
- if (!new_data) return 0;
- buf->data = new_data;
- buf->capacity = new_capacity;
+ size_t new_capacity = buf->capacity * 2;
+ if (new_capacity < buf->size + realsize) {
+ new_capacity = buf->size + realsize;
+ }
+ char *new_data = realloc(buf->data, new_capacity);
+ if (!new_data) return 0;
+ buf->data = new_data;
+ buf->capacity = new_capacity;
}
memcpy(buf->data + buf->size, contents, realsize);
@@ -79,38 +79,38 @@ static size_t write_callback(void *contents, size_t size, size_t nmemb,
int image_proxy_handler(UrlParams *params) {
const char *url = NULL;
for (int i = 0; i < params->count; i++) {
- if (strcmp(params->params[i].key, "url") == 0) {
- url = params->params[i].value;
- break;
- }
+ if (strcmp(params->params[i].key, "url") == 0) {
+ url = params->params[i].value;
+ break;
+ }
}
if (!url || strlen(url) == 0) {
- send_response("Missing 'url' parameter");
- return 0;
+ send_response("Missing 'url' parameter");
+ return 0;
}
if (!is_allowed_domain(url)) {
- send_response("Domain not allowed");
- return 0;
+ send_response("Domain not allowed");
+ return 0;
}
CURL *curl = curl_easy_init();
if (!curl) {
- send_response("Failed to initialize curl");
- return 0;
+ send_response("Failed to initialize curl");
+ return 0;
}
MemoryBuffer buf = {
- .data = malloc(8192),
- .size = 0,
- .capacity = 8192
+ .data = malloc(8192),
+ .size = 0,
+ .capacity = 8192
};
if (!buf.data) {
- curl_easy_cleanup(curl);
- send_response("Memory allocation failed");
- return 0;
+ curl_easy_cleanup(curl);
+ send_response("Memory allocation failed");
+ return 0;
}
curl_easy_setopt(curl, CURLOPT_URL, url);
@@ -130,15 +130,15 @@ int image_proxy_handler(UrlParams *params) {
char content_type[64] = {0};
if (content_type_ptr) {
- strncpy(content_type, content_type_ptr, sizeof(content_type) - 1);
+ strncpy(content_type, content_type_ptr, sizeof(content_type) - 1);
}
curl_easy_cleanup(curl);
if (res != CURLE_OK || response_code != 200) {
- free(buf.data);
- send_response("Failed to fetch image");
- return 0;
+ free(buf.data);
+ send_response("Failed to fetch image");
+ return 0;
}
const char *mime_type = strlen(content_type) > 0 ? content_type : "image/jpeg";
diff --git a/src/Routes/Images.c b/src/Routes/Images.c
index e96d6fd..b997112 100644
--- a/src/Routes/Images.c
+++ b/src/Routes/Images.c
@@ -17,12 +17,12 @@ struct MemoryBlock {
};
static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb,
- void *userp) {
+ void *userp) {
size_t realsize = size * nmemb;
struct MemoryBlock *mem = (struct MemoryBlock *)userp;
char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1);
if (ptr == NULL) {
- return 0;
+ return 0;
}
mem->response = ptr;
memcpy(&(mem->response[mem->size]), data, realsize);
@@ -35,30 +35,30 @@ static char *fetch_images_html(const char *url) {
CURL *curl_handle;
struct MemoryBlock chunk = {.response = malloc(1), .size = 0};
if (!chunk.response) {
- return NULL;
+ return NULL;
}
curl_handle = curl_easy_init();
if (!curl_handle) {
- free(chunk.response);
- return NULL;
+ free(chunk.response);
+ return NULL;
}
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
curl_easy_setopt(
- curl_handle, CURLOPT_USERAGENT,
- "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
+ curl_handle, CURLOPT_USERAGENT,
+ "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L);
apply_proxy_settings(curl_handle);
CURLcode res = curl_easy_perform(curl_handle);
if (res != CURLE_OK) {
- free(chunk.response);
- curl_easy_cleanup(curl_handle);
- return NULL;
+ free(chunk.response);
+ curl_easy_cleanup(curl_handle);
+ return NULL;
}
curl_easy_cleanup(curl_handle);
@@ -71,15 +71,15 @@ int images_handler(UrlParams *params) {
int page = 1;
if (params) {
- for (int i = 0; i < params->count; i++) {
- if (strcmp(params->params[i].key, "q") == 0) {
- raw_query = params->params[i].value;
- } else if (strcmp(params->params[i].key, "p") == 0) {
- int parsed = atoi(params->params[i].value);
- if (parsed > 1) page = parsed;
- }
+ for (int i = 0; i < params->count; i++) {
+ if (strcmp(params->params[i].key, "q") == 0) {
+ raw_query = params->params[i].value;
+ } else if (strcmp(params->params[i].key, "p") == 0) {
+ int parsed = atoi(params->params[i].value);
+ if (parsed > 1) page = parsed;
}
}
+ }
context_set(&ctx, "query", raw_query);
@@ -87,7 +87,7 @@ int images_handler(UrlParams *params) {
snprintf(page_str, sizeof(page_str), "%d", page);
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
snprintf(next_str, sizeof(next_str), "%d", page + 1);
- context_set(&ctx, "page", page_str);
+ context_set(&ctx, "page", page_str);
context_set(&ctx, "prev_page", prev_str);
context_set(&ctx, "next_page", next_str);
@@ -95,198 +95,198 @@ int images_handler(UrlParams *params) {
context_set(&ctx, "query", display_query);
if (!raw_query || strlen(raw_query) == 0) {
- send_response("<h1>No query provided</h1>");
- if (display_query) free(display_query);
- free_context(&ctx);
- return -1;
+ send_response("<h1>No query provided</h1>");
+ if (display_query) free(display_query);
+ free_context(&ctx);
+ return -1;
}
CURL *tmp = curl_easy_init();
if (!tmp) {
- send_response("<h1>Error initializing curl</h1>");
- if (display_query) free(display_query);
- free_context(&ctx);
- return -1;
+ send_response("<h1>Error initializing curl</h1>");
+ if (display_query) free(display_query);
+ free_context(&ctx);
+ return -1;
}
char *encoded_query = curl_easy_escape(tmp, raw_query, 0);
curl_easy_cleanup(tmp);
if (!encoded_query) {
- send_response("<h1>Error encoding query</h1>");
- if (display_query) free(display_query);
- free_context(&ctx);
- return -1;
+ send_response("<h1>Error encoding query</h1>");
+ if (display_query) free(display_query);
+ free_context(&ctx);
+ return -1;
}
char url[1024];
int first = (page - 1) * 32 + 1;
snprintf(url, sizeof(url),
- "https://www.bing.com/images/search?q=%s&first=%d", encoded_query, first);
+ "https://www.bing.com/images/search?q=%s&first=%d", encoded_query, first);
char *html = fetch_images_html(url);
if (!html) {
- send_response("<h1>Error fetching images</h1>");
- free(encoded_query);
- free(display_query);
- free_context(&ctx);
- return -1;
+ send_response("<h1>Error fetching images</h1>");
+ free(encoded_query);
+ free(display_query);
+ free_context(&ctx);
+ return -1;
}
htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL,
- HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
if (!doc) {
- free(html);
- free(encoded_query);
- free(display_query);
- free_context(&ctx);
- return -1;
+ free(html);
+ free(encoded_query);
+ free(display_query);
+ free_context(&ctx);
+ return -1;
}
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) {
- xmlFreeDoc(doc);
- free(html);
- free(encoded_query);
- free(display_query);
- free_context(&ctx);
- return -1;
+ xmlFreeDoc(doc);
+ free(html);
+ free(encoded_query);
+ free(display_query);
+ free_context(&ctx);
+ return -1;
}
xmlXPathObjectPtr xpathObj =
- xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
+ xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
int image_count = 0;
char ***image_matrix = NULL;
int *inner_counts = NULL;
if (xpathObj && xpathObj->nodesetval) {
- int nodes = xpathObj->nodesetval->nodeNr;
-
- int max_images = (nodes < 32) ? nodes : 32;
- image_matrix = malloc(sizeof(char **) * max_images);
- inner_counts = malloc(sizeof(int) * max_images);
-
- for (int i = 0; i < nodes; i++) {
- if (image_count >= 32) break;
-
- xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
- xmlNodePtr img_node = NULL;
- xmlNodePtr tit_node = NULL;
- xmlNodePtr des_node = NULL;
- xmlNodePtr thumb_link = NULL;
-
- for (xmlNodePtr child = node->children; child; child = child->next) {
- if (child->type != XML_ELEMENT_NODE) continue;
-
- if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
- xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
- if (class) {
- if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
- thumb_link = child;
- for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) {
- if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
- xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class");
- if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
- for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) {
- if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) {
- img_node = cico_child;
- break;
- }
- }
- }
- if (div_class) xmlFree(div_class);
- }
- }
- } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
- tit_node = child;
+ int nodes = xpathObj->nodesetval->nodeNr;
+
+ int max_images = (nodes < 32) ? nodes : 32;
+ image_matrix = malloc(sizeof(char **) * max_images);
+ inner_counts = malloc(sizeof(int) * max_images);
+
+ for (int i = 0; i < nodes; i++) {
+ if (image_count >= 32) break;
+
+ xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
+ xmlNodePtr img_node = NULL;
+ xmlNodePtr tit_node = NULL;
+ xmlNodePtr des_node = NULL;
+ xmlNodePtr thumb_link = NULL;
+
+ for (xmlNodePtr child = node->children; child; child = child->next) {
+ if (child->type != XML_ELEMENT_NODE) continue;
+
+ if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
+ xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
+ if (class) {
+ if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
+ thumb_link = child;
+ for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) {
+ if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
+ xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class");
+ if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
+ for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) {
+ if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) {
+ img_node = cico_child;
+ break;
}
- xmlFree(class);
}
- } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
- xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
- if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
- for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) {
- if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
- xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class");
- if (div_class) {
- if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
- des_node = meta_child;
- }
- xmlFree(div_class);
- }
- } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
- xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
- if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
- tit_node = meta_child;
- }
- if (a_class) xmlFree(a_class);
- }
- }
}
- if (class) xmlFree(class);
+ if (div_class) xmlFree(div_class);
}
+ }
+ } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
+ tit_node = child;
}
-
- xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
- xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
- xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
- xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
-
- if (iurl && strlen((char *)iurl) > 0) {
- char *proxy_url = NULL;
- CURL *esc_curl = curl_easy_init();
- if (esc_curl) {
- char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
- if (encoded) {
- size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
- proxy_url = malloc(proxy_len);
- if (proxy_url) {
- snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
- }
- curl_free(encoded);
+ xmlFree(class);
+ }
+ } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
+ xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
+ if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
+ for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) {
+ if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
+ xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class");
+ if (div_class) {
+ if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
+ des_node = meta_child;
}
- curl_easy_cleanup(esc_curl);
+ xmlFree(div_class);
}
+ } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
+ xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
+ if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
+ tit_node = meta_child;
+ }
+ if (a_class) xmlFree(a_class);
+ }
+ }
+ }
+ if (class) xmlFree(class);
+ }
+ }
- image_matrix[image_count] = malloc(sizeof(char *) * 4);
- image_matrix[image_count][0] = proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
- image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
- image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
- image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#");
- inner_counts[image_count] = 4;
- image_count++;
+ xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
+ xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
+ xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
+ xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
+
+ if (iurl && strlen((char *)iurl) > 0) {
+ char *proxy_url = NULL;
+ CURL *esc_curl = curl_easy_init();
+ if (esc_curl) {
+ char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
+ if (encoded) {
+ size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
+ proxy_url = malloc(proxy_len);
+ if (proxy_url) {
+ snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
}
+ curl_free(encoded);
+ }
+ curl_easy_cleanup(esc_curl);
+ }
- if (iurl) xmlFree(iurl);
- if (title) xmlFree(title);
- if (rurl) xmlFree(rurl);
- if (full_url) xmlFree(full_url);
+ image_matrix[image_count] = malloc(sizeof(char *) * 4);
+ image_matrix[image_count][0] = proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
+ image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
+ image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
+ image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#");
+ inner_counts[image_count] = 4;
+ image_count++;
}
+
+ if (iurl) xmlFree(iurl);
+ if (title) xmlFree(title);
+ if (rurl) xmlFree(rurl);
+ if (full_url) xmlFree(full_url);
+ }
}
context_set_array_of_arrays(&ctx, "images", image_matrix, image_count,
- inner_counts);
+ inner_counts);
char *rendered = render_template("images.html", &ctx);
if (rendered) {
- send_response(rendered);
- free(rendered);
+ send_response(rendered);
+ free(rendered);
} else {
- send_response("<h1>Error rendering image results</h1>");
+ send_response("<h1>Error rendering image results</h1>");
}
if (image_matrix) {
- for (int i = 0; i < image_count; i++) {
- for (int j = 0; j < 4; j++) {
- free(image_matrix[i][j]);
- }
- free(image_matrix[i]);
+ for (int i = 0; i < image_count; i++) {
+ for (int j = 0; j < 4; j++) {
+ free(image_matrix[i][j]);
}
- free(image_matrix);
+ free(image_matrix[i]);
+ }
+ free(image_matrix);
}
if (inner_counts) {
- free(inner_counts);
+ free(inner_counts);
}
if (xpathObj) xmlXPathFreeObject(xpathObj);
diff --git a/src/Routes/Search.c b/src/Routes/Search.c
index dee7a9f..51fe415 100644
--- a/src/Routes/Search.c
+++ b/src/Routes/Search.c
@@ -23,13 +23,13 @@ static void *wiki_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
char *dynamic_url = construct_wiki_url(data->query);
if (dynamic_url) {
- data->result = fetch_wiki_data(dynamic_url);
- data->success =
- (data->result.title != NULL && data->result.extract != NULL &&
- strlen(data->result.extract) > 10);
- free(dynamic_url);
+ data->result = fetch_wiki_data(dynamic_url);
+ data->success =
+ (data->result.title != NULL && data->result.extract != NULL &&
+ strlen(data->result.extract) > 10);
+ free(dynamic_url);
} else {
- data->success = 0;
+ data->success = 0;
}
return NULL;
}
@@ -41,44 +41,44 @@ static int is_calculator_query(const char *query) {
int has_math_operator = 0;
for (const char *p = query; *p; p++) {
- if (isdigit(*p) || *p == '.') {
- has_digit = 1;
- }
- if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '^') {
- has_math_operator = 1;
- }
+ if (isdigit(*p) || *p == '.') {
+ has_digit = 1;
+ }
+ if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '^') {
+ has_math_operator = 1;
+ }
}
if (!has_digit || !has_math_operator) return 0;
int len = strlen(query);
for (int i = 0; i < len; i++) {
- char c = query[i];
- if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^') {
- int has_num_before = 0;
- int has_num_after = 0;
-
- for (int j = i - 1; j >= 0; j--) {
- if (isdigit(query[j]) || query[j] == '.') {
- has_num_before = 1;
- break;
- }
- if (query[j] != ' ') break;
- }
+ char c = query[i];
+ if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^') {
+ int has_num_before = 0;
+ int has_num_after = 0;
+
+ for (int j = i - 1; j >= 0; j--) {
+ if (isdigit(query[j]) || query[j] == '.') {
+ has_num_before = 1;
+ break;
+ }
+ if (query[j] != ' ') break;
+ }
- for (int j = i + 1; j < len; j++) {
- if (isdigit(query[j]) || query[j] == '.') {
- has_num_after = 1;
- break;
- }
- if (query[j] != ' ') break;
- }
+ for (int j = i + 1; j < len; j++) {
+ if (isdigit(query[j]) || query[j] == '.') {
+ has_num_after = 1;
+ break;
+ }
+ if (query[j] != ' ') break;
+ }
- if (has_num_before || has_num_after) {
- return 1;
- }
+ if (has_num_before || has_num_after) {
+ return 1;
}
}
+ }
return 0;
}
@@ -87,11 +87,11 @@ static void *calc_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
if (is_calculator_query(data->query)) {
- data->result = fetch_calc_data((char *)data->query);
- data->success =
- (data->result.title != NULL && data->result.extract != NULL);
+ data->result = fetch_calc_data((char *)data->query);
+ data->success =
+ (data->result.title != NULL && data->result.extract != NULL);
} else {
- data->success = 0;
+ data->success = 0;
}
return NULL;
@@ -101,11 +101,11 @@ static void *dict_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
if (is_dictionary_query(data->query)) {
- data->result = fetch_dictionary_data(data->query);
- data->success =
- (data->result.title != NULL && data->result.extract != NULL);
+ data->result = fetch_dictionary_data(data->query);
+ data->success =
+ (data->result.title != NULL && data->result.extract != NULL);
} else {
- data->success = 0;
+ data->success = 0;
}
return NULL;
@@ -115,22 +115,22 @@ static void *unit_thread_func(void *arg) {
InfoBoxThreadData *data = (InfoBoxThreadData *)arg;
if (is_unit_conv_query(data->query)) {
- data->result = fetch_unit_conv_data(data->query);
- data->success =
- (data->result.title != NULL && data->result.extract != NULL);
+ data->result = fetch_unit_conv_data(data->query);
+ data->success =
+ (data->result.title != NULL && data->result.extract != NULL);
} else {
- data->success = 0;
+ data->success = 0;
}
return NULL;
}
static int add_infobox_to_collection(InfoBox *infobox, char ****collection,
- int **inner_counts, int current_count) {
+ int **inner_counts, int current_count) {
*collection =
- (char ***)realloc(*collection, sizeof(char **) * (current_count + 1));
+ (char ***)realloc(*collection, sizeof(char **) * (current_count + 1));
*inner_counts =
- (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
+ (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1));
(*collection)[current_count] = (char **)malloc(sizeof(char *) * 4);
(*collection)[current_count][0] = infobox->title ? strdup(infobox->title) : NULL;
@@ -148,15 +148,15 @@ int results_handler(UrlParams *params) {
int page = 1;
if (params) {
- for (int i = 0; i < params->count; i++) {
- if (strcmp(params->params[i].key, "q") == 0) {
- raw_query = params->params[i].value;
- } else if (strcmp(params->params[i].key, "p") == 0) {
- int parsed = atoi(params->params[i].value);
- if (parsed > 1) page = parsed;
- }
+ for (int i = 0; i < params->count; i++) {
+ if (strcmp(params->params[i].key, "q") == 0) {
+ raw_query = params->params[i].value;
+ } else if (strcmp(params->params[i].key, "p") == 0) {
+ int parsed = atoi(params->params[i].value);
+ if (parsed > 1) page = parsed;
}
}
+ }
context_set(&ctx, "query", raw_query);
@@ -164,14 +164,14 @@ int results_handler(UrlParams *params) {
snprintf(page_str, sizeof(page_str), "%d", page);
snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
snprintf(next_str, sizeof(next_str), "%d", page + 1);
- context_set(&ctx, "page", page_str);
+ context_set(&ctx, "page", page_str);
context_set(&ctx, "prev_page", prev_str);
context_set(&ctx, "next_page", next_str);
if (!raw_query || strlen(raw_query) == 0) {
- send_response("<h1>No query provided</h1>");
- free_context(&ctx);
- return -1;
+ send_response("<h1>No query provided</h1>");
+ free_context(&ctx);
+ return -1;
}
pthread_t wiki_tid, calc_tid, dict_tid, unit_tid;
@@ -181,36 +181,36 @@ int results_handler(UrlParams *params) {
InfoBoxThreadData unit_data = {.query = raw_query, .success = 0};
if (page == 1) {
- pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data);
- pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data);
- pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data);
- pthread_create(&unit_tid, NULL, unit_thread_func, &unit_data);
+ pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data);
+ pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data);
+ pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data);
+ pthread_create(&unit_tid, NULL, unit_thread_func, &unit_data);
}
ScrapeJob jobs[ENGINE_COUNT];
SearchResult *all_results[ENGINE_COUNT];
for (int i = 0; i < ENGINE_COUNT; i++) {
- all_results[i] = NULL;
- jobs[i].engine = &ENGINE_REGISTRY[i];
- jobs[i].query = raw_query;
- jobs[i].out_results = &all_results[i];
- jobs[i].max_results = 10;
- jobs[i].results_count = 0;
- jobs[i].page = page;
- jobs[i].handle = NULL;
- jobs[i].response.memory = NULL;
- jobs[i].response.size = 0;
- jobs[i].response.capacity = 0;
+ all_results[i] = NULL;
+ jobs[i].engine = &ENGINE_REGISTRY[i];
+ jobs[i].query = raw_query;
+ jobs[i].out_results = &all_results[i];
+ jobs[i].max_results = 10;
+ jobs[i].results_count = 0;
+ jobs[i].page = page;
+ jobs[i].handle = NULL;
+ jobs[i].response.memory = NULL;
+ jobs[i].response.size = 0;
+ jobs[i].response.capacity = 0;
}
scrape_engines_parallel(jobs, ENGINE_COUNT);
if (page == 1) {
- pthread_join(wiki_tid, NULL);
- pthread_join(calc_tid, NULL);
- pthread_join(dict_tid, NULL);
- pthread_join(unit_tid, NULL);
+ pthread_join(wiki_tid, NULL);
+ pthread_join(calc_tid, NULL);
+ pthread_join(dict_tid, NULL);
+ pthread_join(unit_tid, NULL);
}
char ***infobox_matrix = NULL;
@@ -218,118 +218,118 @@ int results_handler(UrlParams *params) {
int infobox_count = 0;
if (page == 1) {
- if (dict_data.success) {
- infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix,
- &infobox_inner_counts, infobox_count);
- }
+ if (dict_data.success) {
+ infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix,
+ &infobox_inner_counts, infobox_count);
+ }
- if (calc_data.success) {
- infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix,
- &infobox_inner_counts, infobox_count);
- }
+ if (calc_data.success) {
+ infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix,
+ &infobox_inner_counts, infobox_count);
+ }
- if (unit_data.success) {
- infobox_count = add_infobox_to_collection(&unit_data.result, &infobox_matrix,
- &infobox_inner_counts, infobox_count);
- }
+ if (unit_data.success) {
+ infobox_count = add_infobox_to_collection(&unit_data.result, &infobox_matrix,
+ &infobox_inner_counts, infobox_count);
+ }
- if (wiki_data.success) {
- infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
- &infobox_inner_counts, infobox_count);
- }
+ if (wiki_data.success) {
+ infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix,
+ &infobox_inner_counts, infobox_count);
+ }
}
if (infobox_count > 0) {
- context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
- infobox_count, infobox_inner_counts);
- for (int i = 0; i < infobox_count; i++) {
- for (int j = 0; j < 4; j++) free(infobox_matrix[i][j]);
- free(infobox_matrix[i]);
- }
- free(infobox_matrix);
- free(infobox_inner_counts);
+ context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix,
+ infobox_count, infobox_inner_counts);
+ for (int i = 0; i < infobox_count; i++) {
+ for (int j = 0; j < 4; j++) free(infobox_matrix[i][j]);
+ free(infobox_matrix[i]);
+ }
+ free(infobox_matrix);
+ free(infobox_inner_counts);
}
int total_results = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
- total_results += jobs[i].results_count;
+ total_results += jobs[i].results_count;
}
if (total_results > 0) {
- char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results);
- int *results_inner_counts = (int *)malloc(sizeof(int) * total_results);
- char **seen_urls = (char **)malloc(sizeof(char *) * total_results);
- int unique_count = 0;
-
- for (int i = 0; i < ENGINE_COUNT; i++) {
- for (int j = 0; j < jobs[i].results_count; j++) {
- char *display_url = all_results[i][j].url;
-
- int is_duplicate = 0;
- for (int k = 0; k < unique_count; k++) {
- if (strcmp(seen_urls[k], display_url) == 0) {
- is_duplicate = 1;
- break;
- }
- }
-
- if (is_duplicate) {
- free(all_results[i][j].url);
- free(all_results[i][j].title);
- free(all_results[i][j].snippet);
- continue;
- }
-
- seen_urls[unique_count] = strdup(display_url);
- results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
- char *pretty_url = pretty_display_url(display_url);
-
- results_matrix[unique_count][0] = strdup(display_url);
- results_matrix[unique_count][1] = strdup(pretty_url);
- results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled");
- results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup("");
-
- results_inner_counts[unique_count] = 4;
-
- free(pretty_url);
- free(all_results[i][j].url);
- free(all_results[i][j].title);
- free(all_results[i][j].snippet);
-
- unique_count++;
+ char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results);
+ int *results_inner_counts = (int *)malloc(sizeof(int) * total_results);
+ char **seen_urls = (char **)malloc(sizeof(char *) * total_results);
+ int unique_count = 0;
+
+ for (int i = 0; i < ENGINE_COUNT; i++) {
+ for (int j = 0; j < jobs[i].results_count; j++) {
+ char *display_url = all_results[i][j].url;
+
+ int is_duplicate = 0;
+ for (int k = 0; k < unique_count; k++) {
+ if (strcmp(seen_urls[k], display_url) == 0) {
+ is_duplicate = 1;
+ break;
}
- free(all_results[i]);
}
- context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts);
-
- char *html = render_template("results.html", &ctx);
- if (html) {
- send_response(html);
- free(html);
+ if (is_duplicate) {
+ free(all_results[i][j].url);
+ free(all_results[i][j].title);
+ free(all_results[i][j].snippet);
+ continue;
}
- for (int i = 0; i < unique_count; i++) {
- for (int j = 0; j < 4; j++) free(results_matrix[i][j]);
- free(results_matrix[i]);
- free(seen_urls[i]);
+ seen_urls[unique_count] = strdup(display_url);
+ results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4);
+ char *pretty_url = pretty_display_url(display_url);
+
+ results_matrix[unique_count][0] = strdup(display_url);
+ results_matrix[unique_count][1] = strdup(pretty_url);
+ results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled");
+ results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup("");
+
+ results_inner_counts[unique_count] = 4;
+
+ free(pretty_url);
+ free(all_results[i][j].url);
+ free(all_results[i][j].title);
+ free(all_results[i][j].snippet);
+
+ unique_count++;
}
- free(seen_urls);
- free(results_matrix);
- free(results_inner_counts);
+ free(all_results[i]);
+ }
+
+ context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts);
+
+ char *html = render_template("results.html", &ctx);
+ if (html) {
+ send_response(html);
+ free(html);
+ }
+
+ for (int i = 0; i < unique_count; i++) {
+ for (int j = 0; j < 4; j++) free(results_matrix[i][j]);
+ free(results_matrix[i]);
+ free(seen_urls[i]);
+ }
+ free(seen_urls);
+ free(results_matrix);
+ free(results_inner_counts);
} else {
- char *html = render_template("results.html", &ctx);
- if (html) {
- send_response(html);
- free(html);
- }
+ char *html = render_template("results.html", &ctx);
+ if (html) {
+ send_response(html);
+ free(html);
+ }
}
if (page == 1) {
- if (wiki_data.success) free_infobox(&wiki_data.result);
- if (calc_data.success) free_infobox(&calc_data.result);
- if (dict_data.success) free_infobox(&dict_data.result);
- if (unit_data.success) free_infobox(&unit_data.result);
+ if (wiki_data.success) free_infobox(&wiki_data.result);
+ if (calc_data.success) free_infobox(&calc_data.result);
+ if (dict_data.success) free_infobox(&dict_data.result);
+ if (unit_data.success) free_infobox(&unit_data.result);
}
free_context(&ctx);
diff --git a/src/Scraping/Scraping.c b/src/Scraping/Scraping.c
index 5b1b5d6..97f40a8 100644
--- a/src/Scraping/Scraping.c
+++ b/src/Scraping/Scraping.c
@@ -11,21 +11,21 @@
#include <unistd.h>
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
- void *userp) {
+ void *userp) {
size_t realsize = size * nmemb;
MemoryBuffer *mem = (MemoryBuffer *)userp;
if (mem->size + realsize + 1 > mem->capacity) {
- size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
- while (new_cap < mem->size + realsize + 1) new_cap *= 2;
+ size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
+ while (new_cap < mem->size + realsize + 1) new_cap *= 2;
- char *ptr = (char *)realloc(mem->memory, new_cap);
- if (!ptr) {
- return 0;
- }
- mem->memory = ptr;
- mem->capacity = new_cap;
+ char *ptr = (char *)realloc(mem->memory, new_cap);
+ if (!ptr) {
+ return 0;
+ }
+ mem->memory = ptr;
+ mem->capacity = new_cap;
}
memcpy(&(mem->memory[mem->size]), contents, realsize);
@@ -37,37 +37,37 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
static const char *get_random_user_agent() {
static const char *agents[] = {
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
- "like Gecko) Chrome/120.0.0.0 Safari/537.36",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
- "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
- "Gecko) "
- "Chrome/120.0.0.0` Safari/537.36",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
- "Firefox/121.0",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
- "(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
+ "like Gecko) Chrome/120.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
+ "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
+ "Gecko) "
+ "Chrome/120.0.0.0` Safari/537.36",
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
+ "Firefox/121.0",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
+ "(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
return agents[rand() % 5];
}
static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
- SearchResult **out_results, int max_results) {
+ SearchResult **out_results, int max_results) {
(void)engine_name;
int found_count = 0;
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) {
- return 0;
+ return 0;
}
const char *link_xpath = "//tr[not(contains(@class, 'result-sponsored'))]//a[@class='result-link']";
xmlXPathObjectPtr xpathObj =
- xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx);
+ xmlXPathEvalExpression((xmlChar *)link_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
- if (xpathObj) xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return 0;
+ if (xpathObj) xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
}
int num_links = xpathObj->nodesetval->nodeNr;
@@ -75,49 +75,49 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
int actual_alloc = (num_links < max_results) ? num_links : max_results;
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
if (!*out_results) {
- xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return 0;
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
}
for (int i = 0; i < num_links && found_count < max_results; i++) {
- xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i];
- char *title = (char *)xmlNodeGetContent(linkNode);
- char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href");
- char *snippet_text = NULL;
-
- xmlNodePtr current = linkNode->parent;
- while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
- current = current->parent;
-
- if (current && current->next) {
- xmlNodePtr snippetRow = current->next;
- while (snippetRow &&
- xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0)
- snippetRow = snippetRow->next;
- if (snippetRow) {
-
- xpathCtx->node = snippetRow;
- xmlXPathObjectPtr sObj = xmlXPathEvalExpression(
- (xmlChar *)".//td[@class='result-snippet']", xpathCtx);
- if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
- snippet_text = (char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]);
- }
- if (sObj) xmlXPathFreeObject(sObj);
- xpathCtx->node = NULL;
+ xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i];
+ char *title = (char *)xmlNodeGetContent(linkNode);
+ char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href");
+ char *snippet_text = NULL;
+
+ xmlNodePtr current = linkNode->parent;
+ while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
+ current = current->parent;
+
+ if (current && current->next) {
+ xmlNodePtr snippetRow = current->next;
+ while (snippetRow &&
+ xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0)
+ snippetRow = snippetRow->next;
+ if (snippetRow) {
+
+ xpathCtx->node = snippetRow;
+ xmlXPathObjectPtr sObj = xmlXPathEvalExpression(
+ (xmlChar *)".//td[@class='result-snippet']", xpathCtx);
+ if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
+ snippet_text = (char *)xmlNodeGetContent(sObj->nodesetval->nodeTab[0]);
+ }
+ if (sObj) xmlXPathFreeObject(sObj);
+ xpathCtx->node = NULL;
- }
}
+ }
- (*out_results)[found_count].url = unescape_search_url(url);
- (*out_results)[found_count].title = strdup(title ? title : "No Title");
- (*out_results)[found_count].snippet = strdup(snippet_text ? snippet_text : "");
+ (*out_results)[found_count].url = unescape_search_url(url);
+ (*out_results)[found_count].title = strdup(title ? title : "No Title");
+ (*out_results)[found_count].snippet = strdup(snippet_text ? snippet_text : "");
- found_count++;
+ found_count++;
- if (title) xmlFree(title);
- if (url) xmlFree(url);
- if (snippet_text) xmlFree(snippet_text);
+ if (title) xmlFree(title);
+ if (url) xmlFree(url);
+ if (snippet_text) xmlFree(snippet_text);
}
xmlXPathFreeObject(xpathObj);
@@ -126,22 +126,22 @@ static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
}
static int parse_startpage(const char *engine_name, xmlDocPtr doc,
- SearchResult **out_results, int max_results) {
+ SearchResult **out_results, int max_results) {
(void)engine_name;
int found_count = 0;
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) {
- return 0;
+ return 0;
}
const char *container_xpath = "//div[contains(@class, 'result')]";
xmlXPathObjectPtr xpathObj =
- xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
+ xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
- if (xpathObj) xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return 0;
+ if (xpathObj) xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
}
int num_results = xpathObj->nodesetval->nodeNr;
@@ -149,52 +149,52 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
int actual_alloc = (num_results < max_results) ? num_results : max_results;
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
if (!*out_results) {
- xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return 0;
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
}
for (int i = 0; i < num_results && found_count < max_results; i++) {
- xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
- xpathCtx->node = resultNode;
-
- xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
- (xmlChar *)".//a[contains(@class, 'result-link')]", xpathCtx);
- char *url =
- (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
- ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
- (xmlChar *)"href")
- : NULL;
-
- xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
- (xmlChar *)".//h2[contains(@class, 'wgl-title')]", xpathCtx);
- char *title =
- (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
- ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
- : NULL;
-
- xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
- (xmlChar *)".//p[contains(@class, 'description')]", xpathCtx);
- char *snippet_text =
- (snippetObj && snippetObj->nodesetval &&
- snippetObj->nodesetval->nodeNr > 0)
- ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
- : NULL;
-
- if (url && title) {
- (*out_results)[found_count].url = strdup(url);
- (*out_results)[found_count].title = strdup(title);
- (*out_results)[found_count].snippet =
- strdup(snippet_text ? snippet_text : "");
- found_count++;
- }
+ xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
+ xpathCtx->node = resultNode;
+
+ xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
+ (xmlChar *)".//a[contains(@class, 'result-link')]", xpathCtx);
+ char *url =
+ (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
+ (xmlChar *)"href")
+ : NULL;
+
+ xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
+ (xmlChar *)".//h2[contains(@class, 'wgl-title')]", xpathCtx);
+ char *title =
+ (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
+ (xmlChar *)".//p[contains(@class, 'description')]", xpathCtx);
+ char *snippet_text =
+ (snippetObj && snippetObj->nodesetval &&
+ snippetObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ if (url && title) {
+ (*out_results)[found_count].url = strdup(url);
+ (*out_results)[found_count].title = strdup(title);
+ (*out_results)[found_count].snippet =
+ strdup(snippet_text ? snippet_text : "");
+ found_count++;
+ }
- if (title) xmlFree(title);
- if (url) xmlFree(url);
- if (snippet_text) xmlFree(snippet_text);
- if (linkObj) xmlXPathFreeObject(linkObj);
- if (titleObj) xmlXPathFreeObject(titleObj);
- if (snippetObj) xmlXPathFreeObject(snippetObj);
+ if (title) xmlFree(title);
+ if (url) xmlFree(url);
+ if (snippet_text) xmlFree(snippet_text);
+ if (linkObj) xmlXPathFreeObject(linkObj);
+ if (titleObj) xmlXPathFreeObject(titleObj);
+ if (snippetObj) xmlXPathFreeObject(snippetObj);
}
xpathCtx->node = NULL;
@@ -205,22 +205,22 @@ static int parse_startpage(const char *engine_name, xmlDocPtr doc,
}
static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
- SearchResult **out_results, int max_results) {
+ SearchResult **out_results, int max_results) {
(void)engine_name;
int found_count = 0;
xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
if (!xpathCtx) {
- return 0;
+ return 0;
}
const char *container_xpath = "//div[contains(@class, 'algo-sr')]";
xmlXPathObjectPtr xpathObj =
- xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
+ xmlXPathEvalExpression((xmlChar *)container_xpath, xpathCtx);
if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
- if (xpathObj) xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return 0;
+ if (xpathObj) xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
}
int num_results = xpathObj->nodesetval->nodeNr;
@@ -228,53 +228,53 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
int actual_alloc = (num_results < max_results) ? num_results : max_results;
*out_results = (SearchResult *)calloc(actual_alloc, sizeof(SearchResult));
if (!*out_results) {
- xmlXPathFreeObject(xpathObj);
- xmlXPathFreeContext(xpathCtx);
- return 0;
+ xmlXPathFreeObject(xpathObj);
+ xmlXPathFreeContext(xpathCtx);
+ return 0;
}
for (int i = 0; i < num_results && found_count < max_results; i++) {
- xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
- xpathCtx->node = resultNode;
-
- xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
- (xmlChar *)".//div[contains(@class, 'compTitle')]//a[@target='_blank']",
- xpathCtx);
- char *url =
- (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
- ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
- (xmlChar *)"href")
- : NULL;
-
- xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
- (xmlChar *)".//h3[contains(@class, 'title')]", xpathCtx);
- char *title =
- (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
- ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
- : NULL;
-
- xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
- (xmlChar *)".//div[contains(@class, 'compText')]//p", xpathCtx);
- char *snippet_text =
- (snippetObj && snippetObj->nodesetval &&
- snippetObj->nodesetval->nodeNr > 0)
- ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
- : NULL;
-
- if (url && title) {
- (*out_results)[found_count].url = unescape_search_url(url);
- (*out_results)[found_count].title = strdup(title);
- (*out_results)[found_count].snippet =
- strdup(snippet_text ? snippet_text : "");
- found_count++;
- }
+ xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
+ xpathCtx->node = resultNode;
+
+ xmlXPathObjectPtr linkObj = xmlXPathEvalExpression(
+ (xmlChar *)".//div[contains(@class, 'compTitle')]//a[@target='_blank']",
+ xpathCtx);
+ char *url =
+ (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
+ (xmlChar *)"href")
+ : NULL;
+
+ xmlXPathObjectPtr titleObj = xmlXPathEvalExpression(
+ (xmlChar *)".//h3[contains(@class, 'title')]", xpathCtx);
+ char *title =
+ (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(titleObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ xmlXPathObjectPtr snippetObj = xmlXPathEvalExpression(
+ (xmlChar *)".//div[contains(@class, 'compText')]//p", xpathCtx);
+ char *snippet_text =
+ (snippetObj && snippetObj->nodesetval &&
+ snippetObj->nodesetval->nodeNr > 0)
+ ? (char *)xmlNodeGetContent(snippetObj->nodesetval->nodeTab[0])
+ : NULL;
+
+ if (url && title) {
+ (*out_results)[found_count].url = unescape_search_url(url);
+ (*out_results)[found_count].title = strdup(title);
+ (*out_results)[found_count].snippet =
+ strdup(snippet_text ? snippet_text : "");
+ found_count++;
+ }
- if (title) xmlFree(title);
- if (url) xmlFree(url);
- if (snippet_text) xmlFree(snippet_text);
- if (linkObj) xmlXPathFreeObject(linkObj);
- if (titleObj) xmlXPathFreeObject(titleObj);
- if (snippetObj) xmlXPathFreeObject(snippetObj);
+ if (title) xmlFree(title);
+ if (url) xmlFree(url);
+ if (snippet_text) xmlFree(snippet_text);
+ if (linkObj) xmlXPathFreeObject(linkObj);
+ if (titleObj) xmlXPathFreeObject(titleObj);
+ if (snippetObj) xmlXPathFreeObject(snippetObj);
}
xpathCtx->node = NULL;
@@ -284,36 +284,36 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
}
const SearchEngine ENGINE_REGISTRY[] = {
- {.name = "DuckDuckGo Lite",
- .base_url = "https://lite.duckduckgo.com/lite/?q=",
- .host_header = "lite.duckduckgo.com",
- .referer = "https://lite.duckduckgo.com/",
- .page_param = "s",
- .page_multiplier = 30,
- .page_base = 0,
- .parser = parse_ddg_lite},
- {.name = "Startpage",
- .base_url = "https://www.startpage.com/sp/search?q=",
- .host_header = "www.startpage.com",
- .referer = "https://www.startpage.com/",
- .page_param = "page",
- .page_multiplier = 1,
- .page_base = 1,
- .parser = parse_startpage},
- {.name = "Yahoo",
- .base_url = "https://search.yahoo.com/search?p=",
- .host_header = "search.yahoo.com",
- .referer = "https://search.yahoo.com/",
- .page_param = "b",
- .page_multiplier = 10,
- .page_base = 1,
- .parser = parse_yahoo}};
+ {.name = "DuckDuckGo Lite",
+ .base_url = "https://lite.duckduckgo.com/lite/?q=",
+ .host_header = "lite.duckduckgo.com",
+ .referer = "https://lite.duckduckgo.com/",
+ .page_param = "s",
+ .page_multiplier = 30,
+ .page_base = 0,
+ .parser = parse_ddg_lite},
+ {.name = "Startpage",
+ .base_url = "https://www.startpage.com/sp/search?q=",
+ .host_header = "www.startpage.com",
+ .referer = "https://www.startpage.com/",
+ .page_param = "page",
+ .page_multiplier = 1,
+ .page_base = 1,
+ .parser = parse_startpage},
+ {.name = "Yahoo",
+ .base_url = "https://search.yahoo.com/search?p=",
+ .host_header = "search.yahoo.com",
+ .referer = "https://search.yahoo.com/",
+ .page_param = "b",
+ .page_multiplier = 10,
+ .page_base = 1,
+ .parser = parse_yahoo}};
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
static void configure_curl_handle(CURL *curl, const char *full_url,
- MemoryBuffer *chunk,
- struct curl_slist *headers) {
+ MemoryBuffer *chunk,
+ struct curl_slist *headers) {
curl_easy_setopt(curl, CURLOPT_URL, full_url);
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
@@ -340,62 +340,62 @@ int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) {
retry:
CURLM *multi_handle = curl_multi_init();
if (!multi_handle) {
- return -1;
+ return -1;
}
for (int i = 0; i < num_jobs; i++) {
- ScrapeJob *job = &jobs[i];
+ ScrapeJob *job = &jobs[i];
- if (job->handle) {
- curl_easy_cleanup(job->handle);
- job->handle = NULL;
- }
- if (job->response.memory) {
- free(job->response.memory);
- }
+ if (job->handle) {
+ curl_easy_cleanup(job->handle);
+ job->handle = NULL;
+ }
+ if (job->response.memory) {
+ free(job->response.memory);
+ }
- job->handle = curl_easy_init();
- if (!job->handle) {
- continue;
- }
+ job->handle = curl_easy_init();
+ if (!job->handle) {
+ continue;
+ }
- job->response.memory = (char *)malloc(16384);
- job->response.size = 0;
- job->response.capacity = 16384;
+ job->response.memory = (char *)malloc(16384);
+ job->response.size = 0;
+ job->response.capacity = 16384;
- char full_url[1024];
- char *encoded_query = curl_easy_escape(job->handle, job->query, 0);
- if (!encoded_query) {
- curl_easy_cleanup(job->handle);
- job->handle = NULL;
- continue;
- }
+ char full_url[1024];
+ char *encoded_query = curl_easy_escape(job->handle, job->query, 0);
+ if (!encoded_query) {
+ curl_easy_cleanup(job->handle);
+ job->handle = NULL;
+ continue;
+ }
- int page = (job->page < 1) ? 1 : job->page;
- int page_value = (page - 1) * job->engine->page_multiplier + job->engine->page_base;
+ int page = (job->page < 1) ? 1 : job->page;
+ int page_value = (page - 1) * job->engine->page_multiplier + job->engine->page_base;
- snprintf(full_url, sizeof(full_url), "%s%s&%s=%d",
- job->engine->base_url,
- encoded_query,
- job->engine->page_param,
- page_value);
- curl_free(encoded_query);
+ snprintf(full_url, sizeof(full_url), "%s%s&%s=%d",
+ job->engine->base_url,
+ encoded_query,
+ job->engine->page_param,
+ page_value);
+ curl_free(encoded_query);
- struct curl_slist *headers = NULL;
- char host_buf[256], ref_buf[256];
- snprintf(host_buf, sizeof(host_buf), "Host: %s", job->engine->host_header);
- snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer);
- headers = curl_slist_append(headers, host_buf);
- headers = curl_slist_append(headers, ref_buf);
- headers = curl_slist_append(headers, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
- headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
- headers = curl_slist_append(headers, "DNT: 1");
+ struct curl_slist *headers = NULL;
+ char host_buf[256], ref_buf[256];
+ snprintf(host_buf, sizeof(host_buf), "Host: %s", job->engine->host_header);
+ snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", job->engine->referer);
+ headers = curl_slist_append(headers, host_buf);
+ headers = curl_slist_append(headers, ref_buf);
+ headers = curl_slist_append(headers, "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
+ headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
+ headers = curl_slist_append(headers, "DNT: 1");
- configure_curl_handle(job->handle, full_url, &job->response, headers);
+ configure_curl_handle(job->handle, full_url, &job->response, headers);
- curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
+ curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
- curl_multi_add_handle(multi_handle, job->handle);
+ curl_multi_add_handle(multi_handle, job->handle);
}
usleep(100000 + (rand() % 100000));
@@ -404,86 +404,86 @@ retry:
curl_multi_perform(multi_handle, &still_running);
do {
- int numfds = 0;
- CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds);
+ int numfds = 0;
+ CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds);
- if (mc != CURLM_OK) {
- break;
- }
+ if (mc != CURLM_OK) {
+ break;
+ }
- curl_multi_perform(multi_handle, &still_running);
+ curl_multi_perform(multi_handle, &still_running);
} while (still_running);
CURLMsg *msg;
int msgs_left;
while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
- if (msg->msg == CURLMSG_DONE) {
- CURL *handle = msg->easy_handle;
-
- for (int i = 0; i < num_jobs; i++) {
- if (jobs[i].handle && jobs[i].handle == handle) {
- ScrapeJob *job = &jobs[i];
-
- long response_code;
- curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code);
-
- if (msg->data.result == CURLE_OK && job->response.size > 0) {
- xmlDocPtr doc = htmlReadMemory(
- job->response.memory, job->response.size, NULL, NULL,
- HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
-
- if (doc) {
- job->results_count = job->engine->parser(
- job->engine->name, doc, job->out_results, job->max_results);
- xmlFreeDoc(doc);
- }
- } else {
- job->results_count = 0;
- }
-
- struct curl_slist *headers;
- curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
- if (headers) curl_slist_free_all(headers);
-
- free(job->response.memory);
- job->response.memory = NULL;
- curl_multi_remove_handle(multi_handle, handle);
- if (handle) curl_easy_cleanup(handle);
- job->handle = NULL;
- break;
- }
+ if (msg->msg == CURLMSG_DONE) {
+ CURL *handle = msg->easy_handle;
+
+ for (int i = 0; i < num_jobs; i++) {
+ if (jobs[i].handle && jobs[i].handle == handle) {
+ ScrapeJob *job = &jobs[i];
+
+ long response_code;
+ curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code);
+
+ if (msg->data.result == CURLE_OK && job->response.size > 0) {
+ xmlDocPtr doc = htmlReadMemory(
+ job->response.memory, job->response.size, NULL, NULL,
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
+
+ if (doc) {
+ job->results_count = job->engine->parser(
+ job->engine->name, doc, job->out_results, job->max_results);
+ xmlFreeDoc(doc);
}
+ } else {
+ job->results_count = 0;
+ }
+
+ struct curl_slist *headers;
+ curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
+ if (headers) curl_slist_free_all(headers);
+
+ free(job->response.memory);
+ job->response.memory = NULL;
+ curl_multi_remove_handle(multi_handle, handle);
+ if (handle) curl_easy_cleanup(handle);
+ job->handle = NULL;
+ break;
+ }
}
}
+ }
curl_multi_cleanup(multi_handle);
if (retries < max_proxy_retries && proxy_count > 0) {
- int any_failed = 0;
- for (int i = 0; i < num_jobs; i++) {
- if (jobs[i].results_count == 0 && jobs[i].response.size == 0) {
- any_failed = 1;
- break;
- }
- }
- if (any_failed) {
- retries++;
- goto retry;
+ int any_failed = 0;
+ for (int i = 0; i < num_jobs; i++) {
+ if (jobs[i].results_count == 0 && jobs[i].response.size == 0) {
+ any_failed = 1;
+ break;
}
}
+ if (any_failed) {
+ retries++;
+ goto retry;
+ }
+ }
return 0;
}
int scrape_engine(const SearchEngine *engine, const char *query,
- SearchResult **out_results, int max_results) {
+ SearchResult **out_results, int max_results) {
ScrapeJob job = {
- .engine = engine,
- .query = (char *)query,
- .out_results = out_results,
- .max_results = max_results,
- .results_count = 0,
- .page = 1
+ .engine = engine,
+ .query = (char *)query,
+ .out_results = out_results,
+ .max_results = max_results,
+ .results_count = 0,
+ .page = 1
};
scrape_engines_parallel(&job, 1);
diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h
index 0865825..e33f529 100644
--- a/src/Scraping/Scraping.h
+++ b/src/Scraping/Scraping.h
@@ -11,7 +11,7 @@ typedef struct {
} SearchResult;
typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
- SearchResult **out_results, int max_results);
+ SearchResult **out_results, int max_results);
typedef struct {
const char *name;
@@ -20,8 +20,8 @@ typedef struct {
const char *referer;
const char *page_param;
- int page_multiplier;
- int page_base;
+ int page_multiplier;
+ int page_base;
ParserFunc parser;
} SearchEngine;
@@ -36,7 +36,7 @@ typedef struct {
char *query;
SearchResult **out_results;
int max_results;
- int page;
+ int page;
CURL *handle;
MemoryBuffer response;
int results_count;
@@ -46,7 +46,7 @@ extern const SearchEngine ENGINE_REGISTRY[];
extern const int ENGINE_COUNT;
int scrape_engine(const SearchEngine *engine, const char *query,
- SearchResult **out_results, int max_results);
+ SearchResult **out_results, int max_results);
int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs);
diff --git a/src/Utility/Display.c b/src/Utility/Display.c
index 492e998..9737757 100644
--- a/src/Utility/Display.c
+++ b/src/Utility/Display.c
@@ -5,42 +5,42 @@
#include <strings.h>
char *pretty_display_url(const char *input) {
- if (!input) return NULL;
-
- const char *start = input;
-
- const char *protocol_pos = strstr(input, "://");
- if (protocol_pos) {
- start = protocol_pos + 3;
- }
-
- if (strncasecmp(start, "www.", 4) == 0) {
- start += 4;
- }
-
- size_t input_len = strlen(start);
- char temp[512];
- strncpy(temp, start, sizeof(temp) - 1);
- temp[sizeof(temp) - 1] = '\0';
-
- if (input_len > 0 && temp[input_len - 1] == '/') {
- temp[input_len - 1] = '\0';
- }
-
- char *output = (char *)malloc(strlen(temp) * 3 + 1);
- if (!output) return NULL;
-
- size_t j = 0;
- for (size_t i = 0; temp[i] != '\0'; i++) {
- if (temp[i] == '/') {
- output[j++] = ' ';
- output[j++] = '>';
- output[j++] = ' ';
- } else {
- output[j++] = (char)tolower((unsigned char)temp[i]);
- }
+ if (!input) return NULL;
+
+ const char *start = input;
+
+ const char *protocol_pos = strstr(input, "://");
+ if (protocol_pos) {
+ start = protocol_pos + 3;
+ }
+
+ if (strncasecmp(start, "www.", 4) == 0) {
+ start += 4;
+ }
+
+ size_t input_len = strlen(start);
+ char temp[512];
+ strncpy(temp, start, sizeof(temp) - 1);
+ temp[sizeof(temp) - 1] = '\0';
+
+ if (input_len > 0 && temp[input_len - 1] == '/') {
+ temp[input_len - 1] = '\0';
+ }
+
+ char *output = (char *)malloc(strlen(temp) * 3 + 1);
+ if (!output) return NULL;
+
+ size_t j = 0;
+ for (size_t i = 0; temp[i] != '\0'; i++) {
+ if (temp[i] == '/') {
+ output[j++] = ' ';
+ output[j++] = '>';
+ output[j++] = ' ';
+ } else {
+ output[j++] = (char)tolower((unsigned char)temp[i]);
}
- output[j] = '\0';
+ }
+ output[j] = '\0';
- return output;
+ return output;
}
diff --git a/src/Utility/Utility.c b/src/Utility/Utility.c
index 8e5af92..07fa1da 100644
--- a/src/Utility/Utility.c
+++ b/src/Utility/Utility.c
@@ -1,8 +1,8 @@
#include "Utility.h"
int hex_to_int(char c) {
- if (c >= '0' && c <= '9') return c - '0';
- if (c >= 'a' && c <= 'f') return c - 'a' + 10;
- if (c >= 'A' && c <= 'F') return c - 'A' + 10;
- return -1;
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ return -1;
}