aboutsummaryrefslogtreecommitdiff
path: root/src/Infobox/Dictionary.c
diff options
context:
space:
mode:
authorfrosty <gabriel@bwaaa.monster>2026-03-10 02:32:51 -0400
committerfrosty <gabriel@bwaaa.monster>2026-03-10 02:32:51 -0400
commita11bf8bb6c1baaef51c25d441f5348567280967b (patch)
treef7ae4435e157dac9b024af0611fbdfa3ab8870e8 /src/Infobox/Dictionary.c
parent24cec7a35072ac50e1fe902a925e69330866dbca (diff)
downloadomnisearch-a11bf8bb6c1baaef51c25d441f5348567280967b.tar.gz
those who commitHEADmaster
Diffstat (limited to 'src/Infobox/Dictionary.c')
-rw-r--r--src/Infobox/Dictionary.c197
1 files changed, 132 insertions, 65 deletions
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c
index 1900c21..053b0f2 100644
--- a/src/Infobox/Dictionary.c
+++ b/src/Infobox/Dictionary.c
@@ -1,6 +1,7 @@
#include "Dictionary.h"
#include "../Proxy/Proxy.h"
#include "../Scraping/Scraping.h"
+#include <ctype.h>
#include <curl/curl.h>
#include <libxml/HTMLparser.h>
#include <libxml/xpath.h>
@@ -8,42 +9,60 @@
#include <stdlib.h>
#include <string.h>
#include <strings.h>
-#include <ctype.h>
-static const char *PREFIXES[] = {
- "what is the definition of ", "what's the definition of ",
- "what is the meaning of ", "what's the meaning of ",
- "what does the word ", "definition of ", "meaning of ", "def of ",
- "define ", "definition ", "define:", "def ", "def:",
- "what does ", "what is ", "what's ", "whats ",
- "meaning ", "dictionary ", "dict ", NULL
-};
+static const char *PREFIXES[] = {"what is the definition of ",
+ "what's the definition of ",
+ "what is the meaning of ",
+ "what's the meaning of ",
+ "what does the word ",
+ "definition of ",
+ "meaning of ",
+ "def of ",
+ "define ",
+ "definition ",
+ "define:",
+ "def ",
+ "def:",
+ "what does ",
+ "what is ",
+ "what's ",
+ "whats ",
+ "meaning ",
+ "dictionary ",
+ "dict ",
+ NULL};
static const char *SUFFIXES[] = {
- " definition", " def", " meaning", " mean", " means",
- " dictionary", " dict", " define", " defined",
- " definition?", " def?", " meaning?", " mean?", " means?",
- " in english", " in english?", NULL
-};
+ " definition", " def", " meaning", " mean", " means",
+ " dictionary", " dict", " define", " defined", " definition?",
+ " def?", " meaning?", " mean?", " means?", " in english",
+ " in english?", NULL};
static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
static const char *strcasestr_impl(const char *haystack, const char *needle) {
- if (!haystack || !needle || !*needle) return haystack;
+ if (!haystack || !needle || !*needle)
+ return haystack;
size_t len = strlen(needle);
for (const char *h = haystack; *h; h++) {
- if (strncasecmp(h, needle, len) == 0) return h;
+ if (strncasecmp(h, needle, len) == 0)
+ return h;
}
return NULL;
}
-struct MemStruct { char *memory; size_t size; };
+struct MemStruct {
+ char *memory;
+ size_t size;
+};
-static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) {
+static size_t WriteCallback(void *contents, size_t size, size_t nmemb,
+ void *userp) {
size_t realsize = size * nmemb;
struct MemStruct *mem = (struct MemStruct *)userp;
char *ptr = realloc(mem->memory, mem->size + realsize + 1);
- if (!ptr) return 0;
+ if (!ptr)
+ return 0;
mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
@@ -53,40 +72,57 @@ static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *use
static char *xpath_text(xmlDocPtr doc, const char *xpath) {
xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
- if (!ctx) return NULL;
+ if (!ctx)
+ return NULL;
xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
xmlXPathFreeContext(ctx);
if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
- if (obj) xmlXPathFreeObject(obj);
+ if (obj)
+ xmlXPathFreeObject(obj);
return NULL;
}
xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
char *result = content ? strdup((char *)content) : NULL;
- if (content) xmlFree(content);
+ if (content)
+ xmlFree(content);
xmlXPathFreeObject(obj);
return result;
}
-static char *build_html(const char *word, const char *pron, const char *pos,
- const char *def, const char *ex) {
+static char *build_html(const char *word, const char *pron, const char *pos,
+ const char *def, const char *ex) {
char html[4096];
- int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>");
- if (word) n += snprintf(html + n, sizeof(html) - n,
- "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word);
- if (pron) n += snprintf(html + n, sizeof(html) - n,
- "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron);
- if (pos) n += snprintf(html + n, sizeof(html) - n,
- "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos);
- if (def) n += snprintf(html + n, sizeof(html) - n,
- "<div style='margin-bottom: 8px;'>%s</div>", def);
- if (ex) n += snprintf(html + n, sizeof(html) - n,
- "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex);
+ int n = snprintf(html, sizeof(html),
+ "<div class='dict-container' style='line-height: 1.6;'>");
+ if (word)
+ n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-size: 1.3em; font-weight: bold; "
+ "margin-bottom: 4px;'>%s</div>",
+ word);
+ if (pron)
+ n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>",
+ pron);
+ if (pos)
+ n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-style: italic; color: #888; margin-bottom: "
+ "8px;'>%s</div>",
+ pos);
+ if (def)
+ n += snprintf(html + n, sizeof(html) - n,
+ "<div style='margin-bottom: 8px;'>%s</div>", def);
+ if (ex)
+ n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #555; font-style: italic; margin-top: "
+ "8px;'>\"%s\"</div>",
+ ex);
snprintf(html + n, sizeof(html) - n, "</div>");
return strdup(html);
}
static char *extract_word(const char *query) {
- if (!query) return NULL;
+ if (!query)
+ return NULL;
const char *start = query;
@@ -98,9 +134,11 @@ static char *extract_word(const char *query) {
}
}
- while (*start == ' ') start++;
+ while (*start == ' ')
+ start++;
char *word = strdup(start);
- if (!word) return NULL;
+ if (!word)
+ return NULL;
int changed = 1;
while (changed) {
@@ -130,29 +168,37 @@ static char *extract_word(const char *query) {
}
size_t len = strlen(word);
- while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' ||
- word[len-1] == '!' || word[len-1] == '.')) {
+ while (len > 0 && (word[len - 1] == ' ' || word[len - 1] == '?' ||
+ word[len - 1] == '!' || word[len - 1] == '.')) {
word[--len] = '\0';
}
- if (len == 0) { free(word); return NULL; }
+ if (len == 0) {
+ free(word);
+ return NULL;
+ }
- for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]);
+ for (size_t i = 0; i < len; i++)
+ word[i] = tolower((unsigned char)word[i]);
char *space = strchr(word, ' ');
- if (space) *space = '\0';
+ if (space)
+ *space = '\0';
return word;
}
int is_dictionary_query(const char *query) {
- if (!query) return 0;
+ if (!query)
+ return 0;
for (int i = 0; PREFIXES[i]; i++) {
size_t len = strlen(PREFIXES[i]);
if (strncasecmp(query, PREFIXES[i], len) == 0) {
const char *after = query + len;
- while (*after == ' ') after++;
- if (*after != '\0') return 1;
+ while (*after == ' ')
+ after++;
+ if (*after != '\0')
+ return 1;
}
}
@@ -160,23 +206,29 @@ int is_dictionary_query(const char *query) {
const char *pos = strcasestr_impl(query, SUFFIXES[i]);
if (pos) {
const char *after = pos + strlen(SUFFIXES[i]);
- while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++;
- if (*after == '\0' && pos > query && (pos - query) < 100) return 1;
+ while (*after == ' ' || *after == '?' || *after == '!' || *after == '.')
+ after++;
+ if (*after == '\0' && pos > query && (pos - query) < 100)
+ return 1;
}
}
- if (strncasecmp(query, "what is ", 8) == 0 ||
- strncasecmp(query, "what's ", 7) == 0 ||
- strncasecmp(query, "whats ", 6) == 0) {
- const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 :
- strncasecmp(query, "what's ", 7) == 0 ? 7 : 6);
- const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ",
- "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL};
+ if (strncasecmp(query, "what is ", 8) == 0 ||
+ strncasecmp(query, "what's ", 7) == 0 ||
+ strncasecmp(query, "whats ", 6) == 0) {
+ const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8
+ : strncasecmp(query, "what's ", 7) == 0 ? 7
+ : 6);
+ const char *articles[] = {"the ", "your ", "my ", "his ", "her ",
+ "their ", "our ", "this ", "that ", "these ",
+ "those ", "a ", "an ", NULL};
for (int i = 0; articles[i]; i++) {
- if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0;
+ if (strncasecmp(word, articles[i], strlen(articles[i])) == 0)
+ return 0;
}
const char *space = strchr(word, ' ');
- if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
+ if (!space || *(space + 1) == '\0' || *(space + 1) == '?')
+ return 1;
}
return 0;
@@ -184,10 +236,14 @@ int is_dictionary_query(const char *query) {
char *construct_dictionary_url(const char *query) {
char *word = extract_word(query);
- if (!word) return NULL;
+ if (!word)
+ return NULL;
CURL *curl = curl_easy_init();
- if (!curl) { free(word); return NULL; }
+ if (!curl) {
+ free(word);
+ return NULL;
+ }
char *escaped = curl_easy_escape(curl, word, 0);
const char *base = "https://dictionary.cambridge.org/dictionary/english/";
@@ -207,10 +263,14 @@ InfoBox fetch_dictionary_data(const char *query) {
InfoBox info = {NULL, NULL, NULL, NULL};
char *url = construct_dictionary_url(query);
- if (!url) return info;
+ if (!url)
+ return info;
CURL *curl = curl_easy_init();
- if (!curl) { free(url); return info; }
+ if (!curl) {
+ free(url);
+ return info;
+ }
struct MemStruct chunk = {malloc(1), 0};
curl_easy_setopt(curl, CURLOPT_URL, url);
@@ -222,10 +282,13 @@ InfoBox fetch_dictionary_data(const char *query) {
if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
- HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |
+ HTML_PARSE_NOWARNING);
if (doc) {
char *word = xpath_text(doc, "//span[@class='hw dhw']");
- char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
+ char *pron = xpath_text(
+ doc,
+ "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
char *pos = xpath_text(doc, "//span[@class='pos dpos']");
char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
@@ -237,7 +300,11 @@ InfoBox fetch_dictionary_data(const char *query) {
info.url = strdup(url);
}
- free(word); free(pron); free(pos); free(def); free(ex);
+ free(word);
+ free(pron);
+ free(pos);
+ free(def);
+ free(ex);
xmlFreeDoc(doc);
}
}
@@ -246,4 +313,4 @@ InfoBox fetch_dictionary_data(const char *query) {
free(chunk.memory);
free(url);
return info;
-} \ No newline at end of file
+}