diff options
| author | frosty <gabriel@bwaaa.monster> | 2026-03-11 16:59:51 -0400 |
|---|---|---|
| committer | frosty <gabriel@bwaaa.monster> | 2026-03-11 16:59:51 -0400 |
| commit | c802a4784ab70e0a7512dac0419727fdefacd75c (patch) | |
| tree | 789a70161ddb12faaaba9d2a39ae43e507c7f76d /src | |
| parent | ab7244b0f7c7dd5d268a9b5520d28770988f22e7 (diff) | |
| download | omnisearch-c802a4784ab70e0a7512dac0419727fdefacd75c.tar.gz | |
feature: improved wikipedia infobox by searching first
Diffstat (limited to 'src')
| -rw-r--r-- | src/Infobox/Wikipedia.c | 95 |
1 files changed, 91 insertions, 4 deletions
diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c index d22fd89..ca7238d 100644 --- a/src/Infobox/Wikipedia.c +++ b/src/Infobox/Wikipedia.c @@ -182,23 +182,110 @@ InfoBox fetch_wiki_data(char *api_url) { return info; } -char *construct_wiki_url(const char *search_term) { +static xmlNode *find_node_recursive(xmlNode *node, const char *target_name) { + for (xmlNode *cur = node; cur; cur = cur->next) { + if (cur->type == XML_ELEMENT_NODE && strcmp((const char *)cur->name, target_name) == 0) { + return cur; + } + xmlNode *found = find_node_recursive(cur->children, target_name); + if (found) + return found; + } + return NULL; +} + +static char *get_first_search_result(const char *search_term) { CURL *curl = curl_easy_init(); if (!curl) return NULL; char *escaped_term = curl_easy_escape(curl, search_term, 0); + const char *search_base = + "https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch="; + const char *search_suffix = + "&format=xml&origin=*&srlimit=1"; + + char *search_url = malloc(strlen(search_base) + strlen(escaped_term) + + strlen(search_suffix) + 1); + if (!search_url) { + curl_free(escaped_term); + curl_easy_cleanup(curl); + return NULL; + } + + strcpy(search_url, search_base); + strcat(search_url, escaped_term); + strcat(search_url, search_suffix); + + curl_free(escaped_term); + + struct WikiMemoryStruct chunk = {malloc(1), 0}; + if (!chunk.memory) { + free(search_url); + curl_easy_cleanup(curl); + return NULL; + } + + curl_easy_setopt(curl, CURLOPT_URL, search_url); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WikiWriteMemoryCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk); + curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0"); + apply_proxy_settings(curl); + + char *first_title = NULL; + if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { + xmlDocPtr doc = xmlReadMemory(chunk.memory, chunk.size, "noname.xml", NULL, 0); + if (doc) { + xmlNode *root = xmlDocGetRootElement(doc); + xmlNode *search_node = find_node_recursive(root, "search"); + if (search_node) { + for (xmlNode *sr = search_node->children; sr; sr = sr->next) { + if (sr->type == XML_ELEMENT_NODE && + strcmp((const char *)sr->name, "p") == 0) { + xmlChar *title = xmlGetProp(sr, (const xmlChar *)"title"); + if (title) { + first_title = strdup((const char *)title); + xmlFree(title); + break; + } + } + } + } + xmlFreeDoc(doc); + } + } + + free(chunk.memory); + free(search_url); + curl_easy_cleanup(curl); + + return first_title; +} + +char *construct_wiki_url(const char *search_term) { + char *first_title = get_first_search_result(search_term); + if (!first_title) + return NULL; + + CURL *curl = curl_easy_init(); + if (!curl) { + free(first_title); + return NULL; + } + + char *escaped_title = curl_easy_escape(curl, first_title, 0); const char *base = "https://en.wikipedia.org/w/" "api.php?action=query&prop=extracts|pageimages&exintro&" "explaintext&pithumbsize=400&format=xml&origin=*&titles="; - char *full_url = malloc(strlen(base) + strlen(escaped_term) + 1); + char *full_url = malloc(strlen(base) + strlen(escaped_title) + 1); if (full_url) { strcpy(full_url, base); - strcat(full_url, escaped_term); + strcat(full_url, escaped_title); } - curl_free(escaped_term); + curl_free(escaped_title); curl_easy_cleanup(curl); + free(first_title); return full_url; } |
