aboutsummaryrefslogtreecommitdiff
path: root/src/Infobox/Wikipedia.c
diff options
context:
space:
mode:
authorfrosty <gabriel@bwaaa.monster>2026-03-12 18:05:09 -0400
committerfrosty <gabriel@bwaaa.monster>2026-03-12 18:05:09 -0400
commit0d65dcd24c8090dcc719be599cd3ef4dc2220e9b (patch)
tree4fc3eaf09d7a41b6b96ccee9637b2e8bdff77f6c /src/Infobox/Wikipedia.c
parentc802a4784ab70e0a7512dac0419727fdefacd75c (diff)
downloadomnisearch-0d65dcd24c8090dcc719be599cd3ef4dc2220e9b.tar.gz
refactor: put HTTP and XML logic into reusable modules
Diffstat (limited to 'src/Infobox/Wikipedia.c')
-rw-r--r--src/Infobox/Wikipedia.c122
1 files changed, 27 insertions, 95 deletions
diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c
index ca7238d..b29b678 100644
--- a/src/Infobox/Wikipedia.c
+++ b/src/Infobox/Wikipedia.c
@@ -1,7 +1,7 @@
#include "Wikipedia.h"
#include "../Cache/Cache.h"
-#include "../Proxy/Proxy.h"
#include "../Scraping/Scraping.h"
+#include "../Utility/HttpClient.h"
#include <curl/curl.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
@@ -9,11 +9,6 @@
#include <stdlib.h>
#include <string.h>
-struct WikiMemoryStruct {
- char *memory;
- size_t size;
-};
-
static void shorten_summary(char **extract_ptr, int max_chars) {
if (!extract_ptr || !*extract_ptr)
return;
@@ -43,25 +38,6 @@ static void shorten_summary(char **extract_ptr, int max_chars) {
}
}
-static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
- void *userp) {
- size_t realsize = size * nmemb;
- struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
-
- char *ptr = realloc(mem->memory, mem->size + realsize + 1);
- if (ptr == NULL) {
- fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
- return 0;
- }
-
- mem->memory = ptr;
- memcpy(&(mem->memory[mem->size]), contents, realsize);
- mem->size += realsize;
- mem->memory[mem->size] = 0;
-
- return realsize;
-}
-
static void extract_wiki_info(xmlNode *node, InfoBox *info) {
xmlNode *cur_node = NULL;
@@ -113,9 +89,6 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
}
InfoBox fetch_wiki_data(char *api_url) {
- CURL *curl_handle;
- CURLcode res;
- struct WikiMemoryStruct chunk;
InfoBox info = {NULL, NULL, NULL, NULL};
if (!api_url) {
@@ -144,47 +117,31 @@ InfoBox fetch_wiki_data(char *api_url) {
}
free(cache_key);
- chunk.memory = malloc(1);
- chunk.size = 0;
-
- curl_handle = curl_easy_init();
-
- if (curl_handle) {
- curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
- curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
- WikiWriteMemoryCallback);
- curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
- curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
- apply_proxy_settings(curl_handle);
-
- res = curl_easy_perform(curl_handle);
-
- if (res == CURLE_OK && chunk.size > 0) {
- cache_key = cache_compute_key(api_url, 0, "wikipedia");
- if (cache_key && get_cache_ttl_infobox() > 0) {
- cache_set(cache_key, chunk.memory, chunk.size);
- }
- free(cache_key);
-
- xmlDocPtr doc =
- xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
- if (doc != NULL) {
- xmlNode *root_element = xmlDocGetRootElement(doc);
- extract_wiki_info(root_element, &info);
- xmlFreeDoc(doc);
- }
+ HttpResponse resp = http_get(api_url, "libcurl-agent/1.0");
+ if (resp.memory && resp.size > 0) {
+ cache_key = cache_compute_key(api_url, 0, "wikipedia");
+ if (cache_key && get_cache_ttl_infobox() > 0) {
+ cache_set(cache_key, resp.memory, resp.size);
}
+ free(cache_key);
- curl_easy_cleanup(curl_handle);
- free(chunk.memory);
+ xmlDocPtr doc =
+ xmlReadMemory(resp.memory, resp.size, "noname.xml", NULL, 0);
+ if (doc != NULL) {
+ xmlNode *root_element = xmlDocGetRootElement(doc);
+ extract_wiki_info(root_element, &info);
+ xmlFreeDoc(doc);
+ }
}
+ http_response_free(&resp);
return info;
}
static xmlNode *find_node_recursive(xmlNode *node, const char *target_name) {
for (xmlNode *cur = node; cur; cur = cur->next) {
- if (cur->type == XML_ELEMENT_NODE && strcmp((const char *)cur->name, target_name) == 0) {
+ if (cur->type == XML_ELEMENT_NODE &&
+ strcmp((const char *)cur->name, target_name) == 0) {
return cur;
}
xmlNode *found = find_node_recursive(cur->children, target_name);
@@ -195,21 +152,15 @@ static xmlNode *find_node_recursive(xmlNode *node, const char *target_name) {
}
static char *get_first_search_result(const char *search_term) {
- CURL *curl = curl_easy_init();
- if (!curl)
- return NULL;
-
- char *escaped_term = curl_easy_escape(curl, search_term, 0);
+ char *escaped_term = curl_easy_escape(NULL, search_term, 0);
const char *search_base =
"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=";
- const char *search_suffix =
- "&format=xml&origin=*&srlimit=1";
+ const char *search_suffix = "&format=xml&origin=*&srlimit=1";
char *search_url = malloc(strlen(search_base) + strlen(escaped_term) +
- strlen(search_suffix) + 1);
+ strlen(search_suffix) + 1);
if (!search_url) {
curl_free(escaped_term);
- curl_easy_cleanup(curl);
return NULL;
}
@@ -219,22 +170,13 @@ static char *get_first_search_result(const char *search_term) {
curl_free(escaped_term);
- struct WikiMemoryStruct chunk = {malloc(1), 0};
- if (!chunk.memory) {
- free(search_url);
- curl_easy_cleanup(curl);
- return NULL;
- }
-
- curl_easy_setopt(curl, CURLOPT_URL, search_url);
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WikiWriteMemoryCallback);
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
- curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
- apply_proxy_settings(curl);
+ HttpResponse resp = http_get(search_url, "libcurl-agent/1.0");
+ free(search_url);
char *first_title = NULL;
- if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
- xmlDocPtr doc = xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
+ if (resp.memory && resp.size > 0) {
+ xmlDocPtr doc =
+ xmlReadMemory(resp.memory, resp.size, "noname.xml", NULL, 0);
if (doc) {
xmlNode *root = xmlDocGetRootElement(doc);
xmlNode *search_node = find_node_recursive(root, "search");
@@ -255,10 +197,7 @@ static char *get_first_search_result(const char *search_term) {
}
}
- free(chunk.memory);
- free(search_url);
- curl_easy_cleanup(curl);
-
+ http_response_free(&resp);
return first_title;
}
@@ -267,13 +206,7 @@ char *construct_wiki_url(const char *search_term) {
if (!first_title)
return NULL;
- CURL *curl = curl_easy_init();
- if (!curl) {
- free(first_title);
- return NULL;
- }
-
- char *escaped_title = curl_easy_escape(curl, first_title, 0);
+ char *escaped_title = curl_easy_escape(NULL, first_title, 0);
const char *base = "https://en.wikipedia.org/w/"
"api.php?action=query&prop=extracts|pageimages&exintro&"
"explaintext&pithumbsize=400&format=xml&origin=*&titles=";
@@ -285,7 +218,6 @@ char *construct_wiki_url(const char *search_term) {
}
curl_free(escaped_title);
- curl_easy_cleanup(curl);
free(first_title);
return full_url;
}