aboutsummaryrefslogtreecommitdiff
path: root/src/Infobox/Wikipedia.c
diff options
context:
space:
mode:
authorfrosty <frosty@illegalfirearms.store>2026-01-06 23:46:24 -0500
committerfrosty <frosty@illegalfirearms.store>2026-01-06 23:46:24 -0500
commitf3aa7ca0bc2ef7c286609e8f87d07cc2568093af (patch)
tree269352af1238b4dd7c3e2e023f71a27b858cdb34 /src/Infobox/Wikipedia.c
rebase(d)
Diffstat (limited to 'src/Infobox/Wikipedia.c')
-rw-r--r--src/Infobox/Wikipedia.c165
1 files changed, 165 insertions, 0 deletions
diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c
new file mode 100644
index 0000000..ed4645f
--- /dev/null
+++ b/src/Infobox/Wikipedia.c
@@ -0,0 +1,165 @@
+#include "Wikipedia.h"
+#include <curl/curl.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct WikiMemoryStruct {
+ char *memory;
+ size_t size;
+};
+
+static void shorten_summary(char **extract_ptr, int max_chars) {
+ if (!extract_ptr || !*extract_ptr) return;
+
+ char *text = *extract_ptr;
+ int len = strlen(text);
+
+ if (len <= max_chars) return;
+
+ int end_pos = max_chars;
+ for (int i = max_chars; i > (max_chars / 2); i--) {
+ if (text[i] == '.' || text[i] == '!' || text[i] == '?') {
+ end_pos = i + 1;
+ break;
+ }
+ }
+
+ char *new_text = (char *)malloc(end_pos + 4);
+
+ if (new_text) {
+ strncpy(new_text, text, end_pos);
+ new_text[end_pos] = '\0';
+ strcat(new_text, "...");
+ free(*extract_ptr);
+ *extract_ptr = new_text;
+ }
+}
+
+static size_t WikiWriteMemoryCallback(void *contents, size_t size, size_t nmemb,
+ void *userp) {
+ size_t realsize = size * nmemb;
+ struct WikiMemoryStruct *mem = (struct WikiMemoryStruct *)userp;
+
+ char *ptr = realloc(mem->memory, mem->size + realsize + 1);
+ if (ptr == NULL) {
+ fprintf(stderr, "Not enough memory (realloc returned NULL)\n");
+ return 0;
+ }
+
+ mem->memory = ptr;
+ memcpy(&(mem->memory[mem->size]), contents, realsize);
+ mem->size += realsize;
+ mem->memory[mem->size] = 0;
+
+ return realsize;
+}
+
+static void extract_wiki_info(xmlNode *node, InfoBox *info) {
+ xmlNode *cur_node = NULL;
+
+ for (cur_node = node; cur_node; cur_node = cur_node->next) {
+ if (cur_node->type == XML_ELEMENT_NODE) {
+ if (strcmp((const char *)cur_node->name, "page") == 0) {
+ xmlChar *title = xmlGetProp(cur_node, (const xmlChar *)"title");
+ if (title) {
+ info->title = strdup((const char *)title);
+
+ const char *base_article_url = "https://en.wikipedia.org/wiki/";
+ char *formatted_title = strdup((const char *)title);
+ for (int i = 0; formatted_title[i]; i++) {
+ if (formatted_title[i] == ' ') formatted_title[i] = '_';
+ }
+
+ info->url =
+ malloc(strlen(base_article_url) + strlen(formatted_title) + 1);
+ if (info->url) {
+ strcpy(info->url, base_article_url);
+ strcat(info->url, formatted_title);
+ }
+ free(formatted_title);
+ xmlFree(title);
+ }
+ }
+
+ if (strcmp((const char *)cur_node->name, "thumbnail") == 0) {
+ xmlChar *source = xmlGetProp(cur_node, (const xmlChar *)"source");
+ if (source) {
+ info->thumbnail_url = strdup((const char *)source);
+ xmlFree(source);
+ }
+ }
+
+ if (strcmp((const char *)cur_node->name, "extract") == 0) {
+ xmlChar *content = xmlNodeGetContent(cur_node);
+ if (content) {
+ info->extract = strdup((const char *)content);
+
+ shorten_summary(&(info->extract), 300);
+ xmlFree(content);
+ }
+ }
+ }
+ extract_wiki_info(cur_node->children, info);
+ }
+}
+
+InfoBox fetch_wiki_data(char *api_url) {
+ CURL *curl_handle;
+ CURLcode res;
+ struct WikiMemoryStruct chunk;
+ InfoBox info = {NULL, NULL, NULL, NULL};
+
+ chunk.memory = malloc(1);
+ chunk.size = 0;
+
+ curl_handle = curl_easy_init();
+
+ if (curl_handle) {
+ curl_easy_setopt(curl_handle, CURLOPT_URL, api_url);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION,
+ WikiWriteMemoryCallback);
+ curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
+ curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
+
+ res = curl_easy_perform(curl_handle);
+
+ if (res == CURLE_OK) {
+ xmlDocPtr doc =
+ xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0);
+ if (doc != NULL) {
+ xmlNode *root_element = xmlDocGetRootElement(doc);
+ extract_wiki_info(root_element, &info);
+ xmlFreeDoc(doc);
+ }
+ }
+
+ curl_easy_cleanup(curl_handle);
+ free(chunk.memory);
+ }
+
+ return info;
+}
+
+char *construct_wiki_url(const char *search_term) {
+ CURL *curl = curl_easy_init();
+ if (!curl) return NULL;
+
+ char *escaped_term = curl_easy_escape(curl, search_term, 0);
+ const char *base =
+ "https://en.wikipedia.org/w/"
+ "api.php?action=query&prop=extracts|pageimages&exintro&"
+ "explaintext&pithumbsize=400&format=xml&origin=*&titles=";
+
+ char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1);
+ if (full_url) {
+ strcpy(full_url, base);
+ strcat(full_url, escaped_term);
+ }
+
+ curl_free(escaped_term);
+ curl_easy_cleanup(curl);
+ return full_url;
+}