diff options
| author | frosty <gabriel@bwaaa.monster> | 2026-03-12 18:05:09 -0400 |
|---|---|---|
| committer | frosty <gabriel@bwaaa.monster> | 2026-03-12 18:05:09 -0400 |
| commit | 0d65dcd24c8090dcc719be599cd3ef4dc2220e9b (patch) | |
| tree | 4fc3eaf09d7a41b6b96ccee9637b2e8bdff77f6c /src/Infobox | |
| parent | c802a4784ab70e0a7512dac0419727fdefacd75c (diff) | |
| download | omnisearch-0d65dcd24c8090dcc719be599cd3ef4dc2220e9b.tar.gz | |
refactor: put HTTP and XML logic into reusable modules
Diffstat (limited to 'src/Infobox')
| -rw-r--r-- | src/Infobox/Dictionary.c | 74 | ||||
| -rw-r--r-- | src/Infobox/Wikipedia.c | 122 |
2 files changed, 35 insertions, 161 deletions
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c index 768c2c6..58d0dfa 100644 --- a/src/Infobox/Dictionary.c +++ b/src/Infobox/Dictionary.c @@ -1,7 +1,8 @@ #include "Dictionary.h" #include "../Cache/Cache.h" -#include "../Proxy/Proxy.h" #include "../Scraping/Scraping.h" +#include "../Utility/HttpClient.h" +#include "../Utility/XmlHelper.h" #include <ctype.h> #include <curl/curl.h> #include <libxml/HTMLparser.h> @@ -52,44 +53,6 @@ static const char *strcasestr_impl(const char *haystack, const char *needle) { return NULL; } -struct MemStruct { - char *memory; - size_t size; -}; - -static size_t WriteCallback(void *contents, size_t size, size_t nmemb, - void *userp) { - size_t realsize = size * nmemb; - struct MemStruct *mem = (struct MemStruct *)userp; - char *ptr = realloc(mem->memory, mem->size + realsize + 1); - if (!ptr) - return 0; - mem->memory = ptr; - memcpy(&(mem->memory[mem->size]), contents, realsize); - mem->size += realsize; - mem->memory[mem->size] = 0; - return realsize; -} - -static char *xpath_text(xmlDocPtr doc, const char *xpath) { - xmlXPathContextPtr ctx = xmlXPathNewContext(doc); - if (!ctx) - return NULL; - xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx); - xmlXPathFreeContext(ctx); - if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) { - if (obj) - xmlXPathFreeObject(obj); - return NULL; - } - xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]); - char *result = content ? strdup((char *)content) : NULL; - if (content) - xmlFree(content); - xmlXPathFreeObject(obj); - return result; -} - static char *build_html(const char *word, const char *pron, const char *pos, const char *def, const char *ex) { char html[4096]; @@ -240,13 +203,7 @@ char *construct_dictionary_url(const char *query) { if (!word) return NULL; - CURL *curl = curl_easy_init(); - if (!curl) { - free(word); - return NULL; - } - - char *escaped = curl_easy_escape(curl, word, 0); + char *escaped = curl_easy_escape(NULL, word, 0); const char *base = "https://dictionary.cambridge.org/dictionary/english/"; char *url = malloc(strlen(base) + strlen(escaped) + 1); if (url) { @@ -255,7 +212,6 @@ char *construct_dictionary_url(const char *query) { } curl_free(escaped); - curl_easy_cleanup(curl); free(word); return url; } @@ -309,28 +265,15 @@ InfoBox fetch_dictionary_data(const char *query) { } free(cache_key); - CURL *curl = curl_easy_init(); - if (!curl) { - free(url); - return info; - } - - struct MemStruct chunk = {malloc(1), 0}; - curl_easy_setopt(curl, CURLOPT_URL, url); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk); - curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0"); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - apply_proxy_settings(curl); - - if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { + HttpResponse resp = http_get(url, "Mozilla/5.0"); + if (resp.memory && resp.size > 0) { cache_key = cache_compute_key(url, 0, "dictionary"); if (cache_key && get_cache_ttl_infobox() > 0) { - cache_set(cache_key, chunk.memory, chunk.size); + cache_set(cache_key, resp.memory, resp.size); } free(cache_key); - htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL, + htmlDocPtr doc = htmlReadMemory(resp.memory, resp.size, url, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); if (doc) { @@ -358,8 +301,7 @@ InfoBox fetch_dictionary_data(const char *query) { } } - curl_easy_cleanup(curl); - free(chunk.memory); + http_response_free(&resp); free(url); return info; } diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c index ca7238d..b29b678 100644 --- a/src/Infobox/Wikipedia.c +++ b/src/Infobox/Wikipedia.c @@ -1,7 +1,7 @@ #include "Wikipedia.h" #include "../Cache/Cache.h" -#include "../Proxy/Proxy.h" #include "../Scraping/Scraping.h" +#include "../Utility/HttpClient.h" #include <curl/curl.h> #include <libxml/parser.h> #include <libxml/tree.h> @@ -9,11 +9,6 @@ #include <stdlib.h> #include <string.h> -struct WikiMemoryStruct { - char *memory; - size_t size; -}; - static void shorten_summary(char **extract_ptr, int max_chars) { if (!extract_ptr || !*extract_ptr) return; @@ -43,25 +38,6 @@ static void shorten_summary(char **extract_ptr, int max_chars) { } } -static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb, - void *userp) { - size_t realsize = size * nmemb; - struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp; - - char *ptr = realloc(mem->memory, mem->size + realsize + 1); - if (ptr == NULL) { - fprintf(stderr, "Not enough memory (realloc returned NULL)\n"); - return 0; - } - - mem->memory = ptr; - memcpy(&(mem->memory[mem->size]), contents, realsize); - mem->size += realsize; - mem->memory[mem->size] = 0; - - return realsize; -} - static void extract_wiki_info(xmlNode *node, InfoBox *info) { xmlNode *cur_node = NULL; @@ -113,9 +89,6 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) { } InfoBox fetch_wiki_data(char *api_url) { - CURL *curl_handle; - CURLcode res; - struct WikiMemoryStruct chunk; InfoBox info = {NULL, NULL, NULL, NULL}; if (!api_url) { @@ -144,47 +117,31 @@ InfoBox fetch_wiki_data(char *api_url) { } free(cache_key); - chunk.memory = malloc(1); - chunk.size = 0; - - curl_handle = curl_easy_init(); - - if (curl_handle) { - curl_easy_setopt(curl_handle, CURLOPT_URL, api_url); - curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, - WikiWriteMemoryCallback); - curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk); - curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0"); - apply_proxy_settings(curl_handle); - - res = curl_easy_perform(curl_handle); - - if (res == CURLE_OK && chunk.size > 0) { - cache_key = cache_compute_key(api_url, 0, "wikipedia"); - if (cache_key && get_cache_ttl_infobox() > 0) { - cache_set(cache_key, chunk.memory, chunk.size); - } - free(cache_key); - - xmlDocPtr doc = - xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0); - if (doc != NULL) { - xmlNode *root_element = xmlDocGetRootElement(doc); - extract_wiki_info(root_element, &info); - xmlFreeDoc(doc); - } + HttpResponse resp = http_get(api_url, "libcurl-agent/1.0"); + if (resp.memory && resp.size > 0) { + cache_key = cache_compute_key(api_url, 0, "wikipedia"); + if (cache_key && get_cache_ttl_infobox() > 0) { + cache_set(cache_key, resp.memory, resp.size); } + free(cache_key); - curl_easy_cleanup(curl_handle); - free(chunk.memory); + xmlDocPtr doc = + xmlReadMemory(resp.memory, resp.size, "noname.xml", NULL, 0); + if (doc != NULL) { + xmlNode *root_element = xmlDocGetRootElement(doc); + extract_wiki_info(root_element, &info); + xmlFreeDoc(doc); + } } + http_response_free(&resp); return info; } static xmlNode *find_node_recursive(xmlNode *node, const char *target_name) { for (xmlNode *cur = node; cur; cur = cur->next) { - if (cur->type == XML_ELEMENT_NODE && strcmp((const char *)cur->name, target_name) == 0) { + if (cur->type == XML_ELEMENT_NODE && + strcmp((const char *)cur->name, target_name) == 0) { return cur; } xmlNode *found = find_node_recursive(cur->children, target_name); @@ -195,21 +152,15 @@ static xmlNode *find_node_recursive(xmlNode *node, const char *target_name) { } static char *get_first_search_result(const char *search_term) { - CURL *curl = curl_easy_init(); - if (!curl) - return NULL; - - char *escaped_term = curl_easy_escape(curl, search_term, 0); + char *escaped_term = curl_easy_escape(NULL, search_term, 0); const char *search_base = "https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch="; - const char *search_suffix = - "&format=xml&origin=*&srlimit=1"; + const char *search_suffix = "&format=xml&origin=*&srlimit=1"; char *search_url = malloc(strlen(search_base) + strlen(escaped_term) + - strlen(search_suffix) + 1); + strlen(search_suffix) + 1); if (!search_url) { curl_free(escaped_term); - curl_easy_cleanup(curl); return NULL; } @@ -219,22 +170,13 @@ static char *get_first_search_result(const char *search_term) { curl_free(escaped_term); - struct WikiMemoryStruct chunk = {malloc(1), 0}; - if (!chunk.memory) { - free(search_url); - curl_easy_cleanup(curl); - return NULL; - } - - curl_easy_setopt(curl, CURLOPT_URL, search_url); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WikiWriteMemoryCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk); - curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0"); - apply_proxy_settings(curl); + HttpResponse resp = http_get(search_url, "libcurl-agent/1.0"); + free(search_url); char *first_title = NULL; - if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { - xmlDocPtr doc = xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0); + if (resp.memory && resp.size > 0) { + xmlDocPtr doc = + xmlReadMemory(resp.memory, resp.size, "noname.xml", NULL, 0); if (doc) { xmlNode *root = xmlDocGetRootElement(doc); xmlNode *search_node = find_node_recursive(root, "search"); @@ -255,10 +197,7 @@ static char *get_first_search_result(const char *search_term) { } } - free(chunk.memory); - free(search_url); - curl_easy_cleanup(curl); - + http_response_free(&resp); return first_title; } @@ -267,13 +206,7 @@ char *construct_wiki_url(const char *search_term) { if (!first_title) return NULL; - CURL *curl = curl_easy_init(); - if (!curl) { - free(first_title); - return NULL; - } - - char *escaped_title = curl_easy_escape(curl, first_title, 0); + char *escaped_title = curl_easy_escape(NULL, first_title, 0); const char *base = "https://en.wikipedia.org/w/" "api.php?action=query&prop=extracts|pageimages&exintro&" "explaintext&pithumbsize=400&format=xml&origin=*&titles="; @@ -285,7 +218,6 @@ char *construct_wiki_url(const char *search_term) { } curl_free(escaped_title); - curl_easy_cleanup(curl); free(first_title); return full_url; } |
