diff options
| author | frosty <gabriel@bwaaa.monster> | 2026-03-10 02:32:51 -0400 |
|---|---|---|
| committer | frosty <gabriel@bwaaa.monster> | 2026-03-10 02:32:51 -0400 |
| commit | a11bf8bb6c1baaef51c25d441f5348567280967b (patch) | |
| tree | f7ae4435e157dac9b024af0611fbdfa3ab8870e8 /src/Infobox/Dictionary.c | |
| parent | 24cec7a35072ac50e1fe902a925e69330866dbca (diff) | |
| download | omnisearch-a11bf8bb6c1baaef51c25d441f5348567280967b.tar.gz | |
Diffstat (limited to 'src/Infobox/Dictionary.c')
| -rw-r--r-- | src/Infobox/Dictionary.c | 197 |
1 files changed, 132 insertions, 65 deletions
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c index 1900c21..053b0f2 100644 --- a/src/Infobox/Dictionary.c +++ b/src/Infobox/Dictionary.c @@ -1,6 +1,7 @@ #include "Dictionary.h" #include "../Proxy/Proxy.h" #include "../Scraping/Scraping.h" +#include <ctype.h> #include <curl/curl.h> #include <libxml/HTMLparser.h> #include <libxml/xpath.h> @@ -8,42 +9,60 @@ #include <stdlib.h> #include <string.h> #include <strings.h> -#include <ctype.h> -static const char *PREFIXES[] = { - "what is the definition of ", "what's the definition of ", - "what is the meaning of ", "what's the meaning of ", - "what does the word ", "definition of ", "meaning of ", "def of ", - "define ", "definition ", "define:", "def ", "def:", - "what does ", "what is ", "what's ", "whats ", - "meaning ", "dictionary ", "dict ", NULL -}; +static const char *PREFIXES[] = {"what is the definition of ", + "what's the definition of ", + "what is the meaning of ", + "what's the meaning of ", + "what does the word ", + "definition of ", + "meaning of ", + "def of ", + "define ", + "definition ", + "define:", + "def ", + "def:", + "what does ", + "what is ", + "what's ", + "whats ", + "meaning ", + "dictionary ", + "dict ", + NULL}; static const char *SUFFIXES[] = { - " definition", " def", " meaning", " mean", " means", - " dictionary", " dict", " define", " defined", - " definition?", " def?", " meaning?", " mean?", " means?", - " in english", " in english?", NULL -}; + " definition", " def", " meaning", " mean", " means", + " dictionary", " dict", " define", " defined", " definition?", + " def?", " meaning?", " mean?", " means?", " in english", + " in english?", NULL}; static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL}; static const char *strcasestr_impl(const char *haystack, const char *needle) { - if (!haystack || !needle || !*needle) return haystack; + if (!haystack || !needle || !*needle) + return haystack; size_t len = strlen(needle); for (const char *h = haystack; *h; h++) { - if (strncasecmp(h, needle, len) == 0) return h; + if (strncasecmp(h, needle, len) == 0) + return h; } return NULL; } -struct MemStruct { char *memory; size_t size; }; +struct MemStruct { + char *memory; + size_t size; +}; -static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) { +static size_t WriteCallback(void *contents, size_t size, size_t nmemb, + void *userp) { size_t realsize = size * nmemb; struct MemStruct *mem = (struct MemStruct *)userp; char *ptr = realloc(mem->memory, mem->size + realsize + 1); - if (!ptr) return 0; + if (!ptr) + return 0; mem->memory = ptr; memcpy(&(mem->memory[mem->size]), contents, realsize); mem->size += realsize; @@ -53,40 +72,57 @@ static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *use static char *xpath_text(xmlDocPtr doc, const char *xpath) { xmlXPathContextPtr ctx = xmlXPathNewContext(doc); - if (!ctx) return NULL; + if (!ctx) + return NULL; xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx); xmlXPathFreeContext(ctx); if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) { - if (obj) xmlXPathFreeObject(obj); + if (obj) + xmlXPathFreeObject(obj); return NULL; } xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]); char *result = content ? strdup((char *)content) : NULL; - if (content) xmlFree(content); + if (content) + xmlFree(content); xmlXPathFreeObject(obj); return result; } -static char *build_html(const char *word, const char *pron, const char *pos, - const char *def, const char *ex) { +static char *build_html(const char *word, const char *pron, const char *pos, + const char *def, const char *ex) { char html[4096]; - int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>"); - if (word) n += snprintf(html + n, sizeof(html) - n, - "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word); - if (pron) n += snprintf(html + n, sizeof(html) - n, - "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron); - if (pos) n += snprintf(html + n, sizeof(html) - n, - "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos); - if (def) n += snprintf(html + n, sizeof(html) - n, - "<div style='margin-bottom: 8px;'>%s</div>", def); - if (ex) n += snprintf(html + n, sizeof(html) - n, - "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex); + int n = snprintf(html, sizeof(html), + "<div class='dict-container' style='line-height: 1.6;'>"); + if (word) + n += snprintf(html + n, sizeof(html) - n, + "<div style='font-size: 1.3em; font-weight: bold; " + "margin-bottom: 4px;'>%s</div>", + word); + if (pron) + n += snprintf(html + n, sizeof(html) - n, + "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", + pron); + if (pos) + n += snprintf(html + n, sizeof(html) - n, + "<div style='font-style: italic; color: #888; margin-bottom: " + "8px;'>%s</div>", + pos); + if (def) + n += snprintf(html + n, sizeof(html) - n, + "<div style='margin-bottom: 8px;'>%s</div>", def); + if (ex) + n += snprintf(html + n, sizeof(html) - n, + "<div style='color: #555; font-style: italic; margin-top: " + "8px;'>\"%s\"</div>", + ex); snprintf(html + n, sizeof(html) - n, "</div>"); return strdup(html); } static char *extract_word(const char *query) { - if (!query) return NULL; + if (!query) + return NULL; const char *start = query; @@ -98,9 +134,11 @@ static char *extract_word(const char *query) { } } - while (*start == ' ') start++; + while (*start == ' ') + start++; char *word = strdup(start); - if (!word) return NULL; + if (!word) + return NULL; int changed = 1; while (changed) { @@ -130,29 +168,37 @@ static char *extract_word(const char *query) { } size_t len = strlen(word); - while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' || - word[len-1] == '!' || word[len-1] == '.')) { + while (len > 0 && (word[len - 1] == ' ' || word[len - 1] == '?' || + word[len - 1] == '!' || word[len - 1] == '.')) { word[--len] = '\0'; } - if (len == 0) { free(word); return NULL; } + if (len == 0) { + free(word); + return NULL; + } - for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]); + for (size_t i = 0; i < len; i++) + word[i] = tolower((unsigned char)word[i]); char *space = strchr(word, ' '); - if (space) *space = '\0'; + if (space) + *space = '\0'; return word; } int is_dictionary_query(const char *query) { - if (!query) return 0; + if (!query) + return 0; for (int i = 0; PREFIXES[i]; i++) { size_t len = strlen(PREFIXES[i]); if (strncasecmp(query, PREFIXES[i], len) == 0) { const char *after = query + len; - while (*after == ' ') after++; - if (*after != '\0') return 1; + while (*after == ' ') + after++; + if (*after != '\0') + return 1; } } @@ -160,23 +206,29 @@ int is_dictionary_query(const char *query) { const char *pos = strcasestr_impl(query, SUFFIXES[i]); if (pos) { const char *after = pos + strlen(SUFFIXES[i]); - while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++; - if (*after == '\0' && pos > query && (pos - query) < 100) return 1; + while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') + after++; + if (*after == '\0' && pos > query && (pos - query) < 100) + return 1; } } - if (strncasecmp(query, "what is ", 8) == 0 || - strncasecmp(query, "what's ", 7) == 0 || - strncasecmp(query, "whats ", 6) == 0) { - const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 : - strncasecmp(query, "what's ", 7) == 0 ? 7 : 6); - const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ", - "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL}; + if (strncasecmp(query, "what is ", 8) == 0 || + strncasecmp(query, "what's ", 7) == 0 || + strncasecmp(query, "whats ", 6) == 0) { + const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 + : strncasecmp(query, "what's ", 7) == 0 ? 7 + : 6); + const char *articles[] = {"the ", "your ", "my ", "his ", "her ", + "their ", "our ", "this ", "that ", "these ", + "those ", "a ", "an ", NULL}; for (int i = 0; articles[i]; i++) { - if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0; + if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) + return 0; } const char *space = strchr(word, ' '); - if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1; + if (!space || *(space + 1) == '\0' || *(space + 1) == '?') + return 1; } return 0; @@ -184,10 +236,14 @@ int is_dictionary_query(const char *query) { char *construct_dictionary_url(const char *query) { char *word = extract_word(query); - if (!word) return NULL; + if (!word) + return NULL; CURL *curl = curl_easy_init(); - if (!curl) { free(word); return NULL; } + if (!curl) { + free(word); + return NULL; + } char *escaped = curl_easy_escape(curl, word, 0); const char *base = "https://dictionary.cambridge.org/dictionary/english/"; @@ -207,10 +263,14 @@ InfoBox fetch_dictionary_data(const char *query) { InfoBox info = {NULL, NULL, NULL, NULL}; char *url = construct_dictionary_url(query); - if (!url) return info; + if (!url) + return info; CURL *curl = curl_easy_init(); - if (!curl) { free(url); return info; } + if (!curl) { + free(url); + return info; + } struct MemStruct chunk = {malloc(1), 0}; curl_easy_setopt(curl, CURLOPT_URL, url); @@ -222,10 +282,13 @@ InfoBox fetch_dictionary_data(const char *query) { if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL, - HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); + HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | + HTML_PARSE_NOWARNING); if (doc) { char *word = xpath_text(doc, "//span[@class='hw dhw']"); - char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']"); + char *pron = xpath_text( + doc, + "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']"); char *pos = xpath_text(doc, "//span[@class='pos dpos']"); char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]"); char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]"); @@ -237,7 +300,11 @@ InfoBox fetch_dictionary_data(const char *query) { info.url = strdup(url); } - free(word); free(pron); free(pos); free(def); free(ex); + free(word); + free(pron); + free(pos); + free(def); + free(ex); xmlFreeDoc(doc); } } @@ -246,4 +313,4 @@ InfoBox fetch_dictionary_data(const char *query) { free(chunk.memory); free(url); return info; -}
\ No newline at end of file +} |
