diff options
| author | frosty <gabriel@bwaaa.monster> | 2026-03-05 04:50:32 +0000 |
|---|---|---|
| committer | frosty <gabriel@bwaaa.monster> | 2026-03-05 04:50:32 +0000 |
| commit | 24cec7a35072ac50e1fe902a925e69330866dbca (patch) | |
| tree | f60bb7c38b0b952e074f9b8f8d1cef758d7f8ec8 /src/Infobox/Dictionary.c | |
| parent | 5ed5a6ecc749e8b557ea62f04a444e203f4e48f6 (diff) | |
| download | omnisearch-24cec7a35072ac50e1fe902a925e69330866dbca.tar.gz | |
made formatting more consistent
Diffstat (limited to 'src/Infobox/Dictionary.c')
| -rw-r--r-- | src/Infobox/Dictionary.c | 376 |
1 files changed, 188 insertions, 188 deletions
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c index ca4e5cd..1900c21 100644 --- a/src/Infobox/Dictionary.c +++ b/src/Infobox/Dictionary.c @@ -11,239 +11,239 @@ #include <ctype.h> static const char *PREFIXES[] = { - "what is the definition of ", "what's the definition of ", - "what is the meaning of ", "what's the meaning of ", - "what does the word ", "definition of ", "meaning of ", "def of ", - "define ", "definition ", "define:", "def ", "def:", - "what does ", "what is ", "what's ", "whats ", - "meaning ", "dictionary ", "dict ", NULL + "what is the definition of ", "what's the definition of ", + "what is the meaning of ", "what's the meaning of ", + "what does the word ", "definition of ", "meaning of ", "def of ", + "define ", "definition ", "define:", "def ", "def:", + "what does ", "what is ", "what's ", "whats ", + "meaning ", "dictionary ", "dict ", NULL }; static const char *SUFFIXES[] = { - " definition", " def", " meaning", " mean", " means", - " dictionary", " dict", " define", " defined", - " definition?", " def?", " meaning?", " mean?", " means?", - " in english", " in english?", NULL + " definition", " def", " meaning", " mean", " means", + " dictionary", " dict", " define", " defined", + " definition?", " def?", " meaning?", " mean?", " means?", + " in english", " in english?", NULL }; static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL}; static const char *strcasestr_impl(const char *haystack, const char *needle) { - if (!haystack || !needle || !*needle) return haystack; - size_t len = strlen(needle); - for (const char *h = haystack; *h; h++) { - if (strncasecmp(h, needle, len) == 0) return h; - } - return NULL; + if (!haystack || !needle || !*needle) return haystack; + size_t len = strlen(needle); + for (const char *h = haystack; *h; h++) { + if (strncasecmp(h, needle, len) == 0) return h; + } + return NULL; } struct MemStruct { char *memory; size_t size; }; static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) { - size_t realsize = size * nmemb; - struct MemStruct *mem = (struct MemStruct *)userp; - char *ptr = realloc(mem->memory, mem->size + realsize + 1); - if (!ptr) return 0; - mem->memory = ptr; - memcpy(&(mem->memory[mem->size]), contents, realsize); - mem->size += realsize; - mem->memory[mem->size] = 0; - return realsize; + size_t realsize = size * nmemb; + struct MemStruct *mem = (struct MemStruct *)userp; + char *ptr = realloc(mem->memory, mem->size + realsize + 1); + if (!ptr) return 0; + mem->memory = ptr; + memcpy(&(mem->memory[mem->size]), contents, realsize); + mem->size += realsize; + mem->memory[mem->size] = 0; + return realsize; } static char *xpath_text(xmlDocPtr doc, const char *xpath) { - xmlXPathContextPtr ctx = xmlXPathNewContext(doc); - if (!ctx) return NULL; - xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx); - xmlXPathFreeContext(ctx); - if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) { - if (obj) xmlXPathFreeObject(obj); - return NULL; - } - xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]); - char *result = content ? strdup((char *)content) : NULL; - if (content) xmlFree(content); - xmlXPathFreeObject(obj); - return result; + xmlXPathContextPtr ctx = xmlXPathNewContext(doc); + if (!ctx) return NULL; + xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx); + xmlXPathFreeContext(ctx); + if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) { + if (obj) xmlXPathFreeObject(obj); + return NULL; + } + xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]); + char *result = content ? strdup((char *)content) : NULL; + if (content) xmlFree(content); + xmlXPathFreeObject(obj); + return result; } static char *build_html(const char *word, const char *pron, const char *pos, - const char *def, const char *ex) { - char html[4096]; - int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>"); - if (word) n += snprintf(html + n, sizeof(html) - n, - "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word); - if (pron) n += snprintf(html + n, sizeof(html) - n, - "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron); - if (pos) n += snprintf(html + n, sizeof(html) - n, - "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos); - if (def) n += snprintf(html + n, sizeof(html) - n, - "<div style='margin-bottom: 8px;'>%s</div>", def); - if (ex) n += snprintf(html + n, sizeof(html) - n, - "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex); - snprintf(html + n, sizeof(html) - n, "</div>"); - return strdup(html); + const char *def, const char *ex) { + char html[4096]; + int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>"); + if (word) n += snprintf(html + n, sizeof(html) - n, + "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word); + if (pron) n += snprintf(html + n, sizeof(html) - n, + "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron); + if (pos) n += snprintf(html + n, sizeof(html) - n, + "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos); + if (def) n += snprintf(html + n, sizeof(html) - n, + "<div style='margin-bottom: 8px;'>%s</div>", def); + if (ex) n += snprintf(html + n, sizeof(html) - n, + "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex); + snprintf(html + n, sizeof(html) - n, "</div>"); + return strdup(html); } static char *extract_word(const char *query) { - if (!query) return NULL; + if (!query) return NULL; - const char *start = query; + const char *start = query; - for (int i = 0; PREFIXES[i]; i++) { - size_t len = strlen(PREFIXES[i]); - if (strncasecmp(start, PREFIXES[i], len) == 0) { - start += len; - break; - } + for (int i = 0; PREFIXES[i]; i++) { + size_t len = strlen(PREFIXES[i]); + if (strncasecmp(start, PREFIXES[i], len) == 0) { + start += len; + break; } - - while (*start == ' ') start++; - char *word = strdup(start); - if (!word) return NULL; - - int changed = 1; - while (changed) { - changed = 0; - for (int i = 0; SKIP_WORDS[i]; i++) { - size_t len = strlen(SKIP_WORDS[i]); - if (strncasecmp(word, SKIP_WORDS[i], len) == 0) { - memmove(word, word + len, strlen(word + len) + 1); - changed = 1; - break; - } - } + } + + while (*start == ' ') start++; + char *word = strdup(start); + if (!word) return NULL; + + int changed = 1; + while (changed) { + changed = 0; + for (int i = 0; SKIP_WORDS[i]; i++) { + size_t len = strlen(SKIP_WORDS[i]); + if (strncasecmp(word, SKIP_WORDS[i], len) == 0) { + memmove(word, word + len, strlen(word + len) + 1); + changed = 1; + break; + } } + } - changed = 1; - while (changed) { - changed = 0; - for (int i = 0; SUFFIXES[i]; i++) { - const char *found = strcasestr_impl(word, SUFFIXES[i]); - if (found) { - char *pos = word + (found - word); - *pos = '\0'; - changed = 1; - break; - } - } + changed = 1; + while (changed) { + changed = 0; + for (int i = 0; SUFFIXES[i]; i++) { + const char *found = strcasestr_impl(word, SUFFIXES[i]); + if (found) { + char *pos = word + (found - word); + *pos = '\0'; + changed = 1; + break; + } } + } - size_t len = strlen(word); - while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' || - word[len-1] == '!' || word[len-1] == '.')) { - word[--len] = '\0'; - } + size_t len = strlen(word); + while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' || + word[len-1] == '!' || word[len-1] == '.')) { + word[--len] = '\0'; + } - if (len == 0) { free(word); return NULL; } + if (len == 0) { free(word); return NULL; } - for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]); - char *space = strchr(word, ' '); - if (space) *space = '\0'; + for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]); + char *space = strchr(word, ' '); + if (space) *space = '\0'; - return word; + return word; } int is_dictionary_query(const char *query) { - if (!query) return 0; - - for (int i = 0; PREFIXES[i]; i++) { - size_t len = strlen(PREFIXES[i]); - if (strncasecmp(query, PREFIXES[i], len) == 0) { - const char *after = query + len; - while (*after == ' ') after++; - if (*after != '\0') return 1; - } + if (!query) return 0; + + for (int i = 0; PREFIXES[i]; i++) { + size_t len = strlen(PREFIXES[i]); + if (strncasecmp(query, PREFIXES[i], len) == 0) { + const char *after = query + len; + while (*after == ' ') after++; + if (*after != '\0') return 1; } - - for (int i = 0; SUFFIXES[i]; i++) { - const char *pos = strcasestr_impl(query, SUFFIXES[i]); - if (pos) { - const char *after = pos + strlen(SUFFIXES[i]); - while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++; - if (*after == '\0' && pos > query && (pos - query) < 100) return 1; - } + } + + for (int i = 0; SUFFIXES[i]; i++) { + const char *pos = strcasestr_impl(query, SUFFIXES[i]); + if (pos) { + const char *after = pos + strlen(SUFFIXES[i]); + while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++; + if (*after == '\0' && pos > query && (pos - query) < 100) return 1; } - - if (strncasecmp(query, "what is ", 8) == 0 || - strncasecmp(query, "what's ", 7) == 0 || - strncasecmp(query, "whats ", 6) == 0) { - const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 : - strncasecmp(query, "what's ", 7) == 0 ? 7 : 6); - const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ", - "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL}; - for (int i = 0; articles[i]; i++) { - if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0; - } - const char *space = strchr(word, ' '); - if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1; + } + + if (strncasecmp(query, "what is ", 8) == 0 || + strncasecmp(query, "what's ", 7) == 0 || + strncasecmp(query, "whats ", 6) == 0) { + const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 : + strncasecmp(query, "what's ", 7) == 0 ? 7 : 6); + const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ", + "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL}; + for (int i = 0; articles[i]; i++) { + if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0; } + const char *space = strchr(word, ' '); + if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1; + } - return 0; + return 0; } char *construct_dictionary_url(const char *query) { - char *word = extract_word(query); - if (!word) return NULL; - - CURL *curl = curl_easy_init(); - if (!curl) { free(word); return NULL; } - - char *escaped = curl_easy_escape(curl, word, 0); - const char *base = "https://dictionary.cambridge.org/dictionary/english/"; - char *url = malloc(strlen(base) + strlen(escaped) + 1); - if (url) { - strcpy(url, base); - strcat(url, escaped); - } - - curl_free(escaped); - curl_easy_cleanup(curl); - free(word); - return url; + char *word = extract_word(query); + if (!word) return NULL; + + CURL *curl = curl_easy_init(); + if (!curl) { free(word); return NULL; } + + char *escaped = curl_easy_escape(curl, word, 0); + const char *base = "https://dictionary.cambridge.org/dictionary/english/"; + char *url = malloc(strlen(base) + strlen(escaped) + 1); + if (url) { + strcpy(url, base); + strcat(url, escaped); + } + + curl_free(escaped); + curl_easy_cleanup(curl); + free(word); + return url; } InfoBox fetch_dictionary_data(const char *query) { - InfoBox info = {NULL, NULL, NULL, NULL}; - - char *url = construct_dictionary_url(query); - if (!url) return info; - - CURL *curl = curl_easy_init(); - if (!curl) { free(url); return info; } - - struct MemStruct chunk = {malloc(1), 0}; - curl_easy_setopt(curl, CURLOPT_URL, url); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk); - curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0"); - curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - apply_proxy_settings(curl); - - if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { - htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL, - HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); - if (doc) { - char *word = xpath_text(doc, "//span[@class='hw dhw']"); - char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']"); - char *pos = xpath_text(doc, "//span[@class='pos dpos']"); - char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]"); - char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]"); - - if (word && def) { - info.title = strdup("Dictionary"); - info.extract = build_html(word, pron, pos, def, ex); - info.thumbnail_url = strdup("/static/dictionary.jpg"); - info.url = strdup(url); - } - - free(word); free(pron); free(pos); free(def); free(ex); - xmlFreeDoc(doc); - } + InfoBox info = {NULL, NULL, NULL, NULL}; + + char *url = construct_dictionary_url(query); + if (!url) return info; + + CURL *curl = curl_easy_init(); + if (!curl) { free(url); return info; } + + struct MemStruct chunk = {malloc(1), 0}; + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk); + curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0"); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + apply_proxy_settings(curl); + + if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) { + htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL, + HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING); + if (doc) { + char *word = xpath_text(doc, "//span[@class='hw dhw']"); + char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']"); + char *pos = xpath_text(doc, "//span[@class='pos dpos']"); + char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]"); + char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]"); + + if (word && def) { + info.title = strdup("Dictionary"); + info.extract = build_html(word, pron, pos, def, ex); + info.thumbnail_url = strdup("/static/dictionary.jpg"); + info.url = strdup(url); + } + + free(word); free(pron); free(pos); free(def); free(ex); + xmlFreeDoc(doc); } + } - curl_easy_cleanup(curl); - free(chunk.memory); - free(url); - return info; + curl_easy_cleanup(curl); + free(chunk.memory); + free(url); + return info; }
\ No newline at end of file |
