refactor: put HTTP and XML logic into reusable modules

author: frosty <gabriel@bwaaa.monster> 2026-03-12 18:05:09 -0400
committer: frosty <gabriel@bwaaa.monster> 2026-03-12 18:05:09 -0400
commit: 0d65dcd24c8090dcc719be599cd3ef4dc2220e9b (patch)
tree: 4fc3eaf09d7a41b6b96ccee9637b2e8bdff77f6c /src/Infobox
parent: c802a4784ab70e0a7512dac0419727fdefacd75c (diff)
download: omnisearch-0d65dcd24c8090dcc719be599cd3ef4dc2220e9b.tar.gz
2 files changed, 35 insertions, 161 deletions
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c
index 768c2c6..58d0dfa 100644
--- a/src/Infobox/Dictionary.c
+++ b/src/Infobox/Dictionary.c
@@ -1,7 +1,8 @@
 #include "Dictionary.h"
 #include "../Cache/Cache.h"
-#include "../Proxy/Proxy.h"
 #include "../Scraping/Scraping.h"
+#include "../Utility/HttpClient.h"
+#include "../Utility/XmlHelper.h"
 #include <ctype.h>
 #include <curl/curl.h>
 #include <libxml/HTMLparser.h>
@@ -52,44 +53,6 @@ static const char *strcasestr_impl(const char *haystack, const char *needle) {
   return NULL;
 }
 
-struct MemStruct {
-  char *memory;
-  size_t size;
-};
-
-static size_t WriteCallback(void *contents, size_t size, size_t nmemb,
-                            void *userp) {
-  size_t realsize = size * nmemb;
-  struct MemStruct *mem = (struct MemStruct *)userp;
-  char *ptr = realloc(mem->memory, mem->size + realsize + 1);
-  if (!ptr)
-    return 0;
-  mem->memory = ptr;
-  memcpy(&(mem->memory[mem->size]), contents, realsize);
-  mem->size += realsize;
-  mem->memory[mem->size] = 0;
-  return realsize;
-}
-
-static char *xpath_text(xmlDocPtr doc, const char *xpath) {
-  xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
-  if (!ctx)
-    return NULL;
-  xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
-  xmlXPathFreeContext(ctx);
-  if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
-    if (obj)
-      xmlXPathFreeObject(obj);
-    return NULL;
-  }
-  xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
-  char *result = content ? strdup((char *)content) : NULL;
-  if (content)
-    xmlFree(content);
-  xmlXPathFreeObject(obj);
-  return result;
-}
-
 static char *build_html(const char *word, const char *pron, const char *pos,
                         const char *def, const char *ex) {
   char html[4096];
@@ -240,13 +203,7 @@ char *construct_dictionary_url(const char *query) {
   if (!word)
     return NULL;
 
-  CURL *curl = curl_easy_init();
-  if (!curl) {
-    free(word);
-    return NULL;
-  }
-
-  char *escaped = curl_easy_escape(curl, word, 0);
+  char *escaped = curl_easy_escape(NULL, word, 0);
   const char *base = "https://dictionary.cambridge.org/dictionary/english/";
   char *url = malloc(strlen(base) + strlen(escaped) + 1);
   if (url) {
@@ -255,7 +212,6 @@ char *construct_dictionary_url(const char *query) {
   }
 
   curl_free(escaped);
-  curl_easy_cleanup(curl);
   free(word);
   return url;
 }
@@ -309,28 +265,15 @@ InfoBox fetch_dictionary_data(const char *query) {
   }
   free(cache_key);
 
-  CURL *curl = curl_easy_init();
-  if (!curl) {
-    free(url);
-    return info;
-  }
-
-  struct MemStruct chunk = {malloc(1), 0};
-  curl_easy_setopt(curl, CURLOPT_URL, url);
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0");
-  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
-  apply_proxy_settings(curl);
-
-  if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
+  HttpResponse resp = http_get(url, "Mozilla/5.0");
+  if (resp.memory && resp.size > 0) {
     cache_key = cache_compute_key(url, 0, "dictionary");
     if (cache_key && get_cache_ttl_infobox() > 0) {
-      cache_set(cache_key, chunk.memory, chunk.size);
+      cache_set(cache_key, resp.memory, resp.size);
     }
     free(cache_key);
 
-    htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
+    htmlDocPtr doc = htmlReadMemory(resp.memory, resp.size, url, NULL,
                                     HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |
                                         HTML_PARSE_NOWARNING);
     if (doc) {
@@ -358,8 +301,7 @@ InfoBox fetch_dictionary_data(const char *query) {
     }
   }
 
-  curl_easy_cleanup(curl);
-  free(chunk.memory);
+  http_response_free(&resp);
   free(url);
   return info;
 }
diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c
index ca7238d..b29b678 100644
--- a/src/Infobox/Wikipedia.c
+++ b/src/Infobox/Wikipedia.c
@@ -1,7 +1,7 @@
 #include "Wikipedia.h"
 #include "../Cache/Cache.h"
-#include "../Proxy/Proxy.h"
 #include "../Scraping/Scraping.h"
+#include "../Utility/HttpClient.h"
 #include <curl/curl.h>
 #include <libxml/parser.h>
 #include <libxml/tree.h>
@@ -9,11 +9,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-struct WikiMemoryStruct {
-  char *memory;
-  size_t size;
-};
-
 static void shorten_summary(char **extract_ptr, int max_chars) {
   if (!extract_ptr || !*extract_ptr)
     return;
@@ -43,25 +38,6 @@ static void shorten_summary(char **extract_ptr, int max_chars) {
   }
 }
 
-static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
-                                      void *userp) {
-  size_t realsize = size * nmemb;
-  struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
-
-  char *ptr = realloc(mem->memory, mem->size + realsize + 1);
-  if (ptr == NULL) {
-    fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
-    return 0;
-  }
-
-  mem->memory = ptr;
-  memcpy(&(mem->memory[mem->size]), contents, realsize);
-  mem->size += realsize;
-  mem->memory[mem->size] = 0;
-
-  return realsize;
-}
-
 static void extract_wiki_info(xmlNode *node, InfoBox *info) {
   xmlNode *cur_node = NULL;
 
@@ -113,9 +89,6 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
 }
 
 InfoBox fetch_wiki_data(char *api_url) {
-  CURL *curl_handle;
-  CURLcode res;
-  struct WikiMemoryStruct chunk;
   InfoBox info = {NULL, NULL, NULL, NULL};
 
   if (!api_url) {
@@ -144,47 +117,31 @@ InfoBox fetch_wiki_data(char *api_url) {
   }
   free(cache_key);
 
-  chunk.memory = malloc(1);
-  chunk.size = 0;
-
-  curl_handle = curl_easy_init();
-
-  if (curl_handle) {
-    curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
-    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
-                     WikiWriteMemoryCallback);
-    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
-    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-    apply_proxy_settings(curl_handle);
-
-    res = curl_easy_perform(curl_handle);
-
-    if (res == CURLE_OK && chunk.size > 0) {
-      cache_key = cache_compute_key(api_url, 0, "wikipedia");
-      if (cache_key && get_cache_ttl_infobox() > 0) {
-        cache_set(cache_key, chunk.memory, chunk.size);
-      }
-      free(cache_key);
-
-      xmlDocPtr doc =
-          xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
-      if (doc != NULL) {
-        xmlNode *root_element = xmlDocGetRootElement(doc);
-        extract_wiki_info(root_element, &info);
-        xmlFreeDoc(doc);
-      }
+  HttpResponse resp = http_get(api_url, "libcurl-agent/1.0");
+  if (resp.memory && resp.size > 0) {
+    cache_key = cache_compute_key(api_url, 0, "wikipedia");
+    if (cache_key && get_cache_ttl_infobox() > 0) {
+      cache_set(cache_key, resp.memory, resp.size);
     }
+    free(cache_key);
 
-    curl_easy_cleanup(curl_handle);
-    free(chunk.memory);
+    xmlDocPtr doc =
+        xmlReadMemory(resp.memory, resp.size, "noname.xml", NULL, 0);
+    if (doc != NULL) {
+      xmlNode *root_element = xmlDocGetRootElement(doc);
+      extract_wiki_info(root_element, &info);
+      xmlFreeDoc(doc);
+    }
   }
 
+  http_response_free(&resp);
   return info;
 }
 
 static xmlNode *find_node_recursive(xmlNode *node, const char *target_name) {
   for (xmlNode *cur = node; cur; cur = cur->next) {
-    if (cur->type == XML_ELEMENT_NODE && strcmp((const char *)cur->name, target_name) == 0) {
+    if (cur->type == XML_ELEMENT_NODE &&
+        strcmp((const char *)cur->name, target_name) == 0) {
       return cur;
     }
     xmlNode *found = find_node_recursive(cur->children, target_name);
@@ -195,21 +152,15 @@ static xmlNode *find_node_recursive(xmlNode *node, const char *target_name) {
 }
 
 static char *get_first_search_result(const char *search_term) {
-  CURL *curl = curl_easy_init();
-  if (!curl)
-    return NULL;
-
-  char *escaped_term = curl_easy_escape(curl, search_term, 0);
+  char *escaped_term = curl_easy_escape(NULL, search_term, 0);
   const char *search_base =
       "https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=";
-  const char *search_suffix =
-      "&format=xml&origin=*&srlimit=1";
+  const char *search_suffix = "&format=xml&origin=*&srlimit=1";
 
   char *search_url = malloc(strlen(search_base) + strlen(escaped_term) +
-                           strlen(search_suffix) + 1);
+                            strlen(search_suffix) + 1);
   if (!search_url) {
     curl_free(escaped_term);
-    curl_easy_cleanup(curl);
     return NULL;
   }
 
@@ -219,22 +170,13 @@ static char *get_first_search_result(const char *search_term) {
 
   curl_free(escaped_term);
 
-  struct WikiMemoryStruct chunk = {malloc(1), 0};
-  if (!chunk.memory) {
-    free(search_url);
-    curl_easy_cleanup(curl);
-    return NULL;
-  }
-
-  curl_easy_setopt(curl, CURLOPT_URL, search_url);
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WikiWriteMemoryCallback);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
-  curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
-  apply_proxy_settings(curl);
+  HttpResponse resp = http_get(search_url, "libcurl-agent/1.0");
+  free(search_url);
 
   char *first_title = NULL;
-  if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
-    xmlDocPtr doc = xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
+  if (resp.memory && resp.size > 0) {
+    xmlDocPtr doc =
+        xmlReadMemory(resp.memory, resp.size, "noname.xml", NULL, 0);
     if (doc) {
       xmlNode *root = xmlDocGetRootElement(doc);
       xmlNode *search_node = find_node_recursive(root, "search");
@@ -255,10 +197,7 @@ static char *get_first_search_result(const char *search_term) {
     }
   }
 
-  free(chunk.memory);
-  free(search_url);
-  curl_easy_cleanup(curl);
-
+  http_response_free(&resp);
   return first_title;
 }
 
@@ -267,13 +206,7 @@ char *construct_wiki_url(const char *search_term) {
   if (!first_title)
     return NULL;
 
-  CURL *curl = curl_easy_init();
-  if (!curl) {
-    free(first_title);
-    return NULL;
-  }
-
-  char *escaped_title = curl_easy_escape(curl, first_title, 0);
+  char *escaped_title = curl_easy_escape(NULL, first_title, 0);
   const char *base = "https://en.wikipedia.org/w/"
                      "api.php?action=query&prop=extracts|pageimages&exintro&"
                      "explaintext&pithumbsize=400&format=xml&origin=*&titles=";
@@ -285,7 +218,6 @@ char *construct_wiki_url(const char *search_term) {
   }
 
   curl_free(escaped_title);
-  curl_easy_cleanup(curl);
   free(first_title);
   return full_url;
 }
author	frosty <gabriel@bwaaa.monster>	2026-03-12 18:05:09 -0400
committer	frosty <gabriel@bwaaa.monster>	2026-03-12 18:05:09 -0400
commit	0d65dcd24c8090dcc719be599cd3ef4dc2220e9b (patch)
tree	4fc3eaf09d7a41b6b96ccee9637b2e8bdff77f6c /src/Infobox
parent	c802a4784ab70e0a7512dac0419727fdefacd75c (diff)
download	omnisearch-0d65dcd24c8090dcc719be599cd3ef4dc2220e9b.tar.gz