aboutsummaryrefslogtreecommitdiff
path: root/src/Infobox/Dictionary.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/Infobox/Dictionary.c')
-rw-r--r--src/Infobox/Dictionary.c246
1 files changed, 246 insertions, 0 deletions
diff --git a/src/Infobox/Dictionary.c b/src/Infobox/Dictionary.c
new file mode 100644
index 0000000..a835899
--- /dev/null
+++ b/src/Infobox/Dictionary.c
@@ -0,0 +1,246 @@
+#include "Dictionary.h"
+#include <curl/curl.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/xpath.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+
+static const char *PREFIXES[] = {
+ "what is the definition of ", "what's the definition of ",
+ "what is the meaning of ", "what's the meaning of ",
+ "what does the word ", "definition of ", "meaning of ", "def of ",
+ "define ", "definition ", "define:", "def ", "def:",
+ "what does ", "what is ", "what's ", "whats ",
+ "meaning ", "dictionary ", "dict ", NULL
+};
+
+static const char *SUFFIXES[] = {
+ " definition", " def", " meaning", " mean", " means",
+ " dictionary", " dict", " define", " defined",
+ " definition?", " def?", " meaning?", " mean?", " means?",
+ " in english", " in english?", NULL
+};
+
+static const char *SKIP_WORDS[] = {"of ", "the ", "a ", "an ", NULL};
+
+static const char *strcasestr_impl(const char *haystack, const char *needle) {
+ if (!haystack || !needle || !*needle) return haystack;
+ size_t len = strlen(needle);
+ for (const char *h = haystack; *h; h++) {
+ if (strncasecmp(h, needle, len) == 0) return h;
+ }
+ return NULL;
+}
+
+struct MemStruct { char *memory; size_t size; };
+
+static size_t WriteCallback(void *contents, size_t size, size_t nmemb, void *userp) {
+ size_t realsize = size * nmemb;
+ struct MemStruct *mem = (struct MemStruct *)userp;
+ char *ptr = realloc(mem->memory, mem->size + realsize + 1);
+ if (!ptr) return 0;
+ mem->memory = ptr;
+ memcpy(&(mem->memory[mem->size]), contents, realsize);
+ mem->size += realsize;
+ mem->memory[mem->size] = 0;
+ return realsize;
+}
+
+static char *xpath_text(xmlDocPtr doc, const char *xpath) {
+ xmlXPathContextPtr ctx = xmlXPathNewContext(doc);
+ if (!ctx) return NULL;
+ xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx);
+ xmlXPathFreeContext(ctx);
+ if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
+ if (obj) xmlXPathFreeObject(obj);
+ return NULL;
+ }
+ xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]);
+ char *result = content ? strdup((char *)content) : NULL;
+ if (content) xmlFree(content);
+ xmlXPathFreeObject(obj);
+ return result;
+}
+
+static char *build_html(const char *word, const char *pron, const char *pos,
+ const char *def, const char *ex) {
+ char html[4096];
+ int n = snprintf(html, sizeof(html), "<div class='dict-container' style='line-height: 1.6;'>");
+ if (word) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-size: 1.3em; font-weight: bold; margin-bottom: 4px;'>%s</div>", word);
+ if (pron) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #666; margin-bottom: 8px;'>/%s/</div>", pron);
+ if (pos) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='font-style: italic; color: #888; margin-bottom: 8px;'>%s</div>", pos);
+ if (def) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='margin-bottom: 8px;'>%s</div>", def);
+ if (ex) n += snprintf(html + n, sizeof(html) - n,
+ "<div style='color: #555; font-style: italic; margin-top: 8px;'>\"%s\"</div>", ex);
+ snprintf(html + n, sizeof(html) - n, "</div>");
+ return strdup(html);
+}
+
+static char *extract_word(const char *query) {
+ if (!query) return NULL;
+
+ const char *start = query;
+
+ for (int i = 0; PREFIXES[i]; i++) {
+ size_t len = strlen(PREFIXES[i]);
+ if (strncasecmp(start, PREFIXES[i], len) == 0) {
+ start += len;
+ break;
+ }
+ }
+
+ while (*start == ' ') start++;
+ char *word = strdup(start);
+ if (!word) return NULL;
+
+ int changed = 1;
+ while (changed) {
+ changed = 0;
+ for (int i = 0; SKIP_WORDS[i]; i++) {
+ size_t len = strlen(SKIP_WORDS[i]);
+ if (strncasecmp(word, SKIP_WORDS[i], len) == 0) {
+ memmove(word, word + len, strlen(word + len) + 1);
+ changed = 1;
+ break;
+ }
+ }
+ }
+
+ changed = 1;
+ while (changed) {
+ changed = 0;
+ for (int i = 0; SUFFIXES[i]; i++) {
+ const char *found = strcasestr_impl(word, SUFFIXES[i]);
+ if (found) {
+ char *pos = word + (found - word);
+ *pos = '\0';
+ changed = 1;
+ break;
+ }
+ }
+ }
+
+ size_t len = strlen(word);
+ while (len > 0 && (word[len-1] == ' ' || word[len-1] == '?' ||
+ word[len-1] == '!' || word[len-1] == '.')) {
+ word[--len] = '\0';
+ }
+
+ if (len == 0) { free(word); return NULL; }
+
+ for (size_t i = 0; i < len; i++) word[i] = tolower((unsigned char)word[i]);
+ char *space = strchr(word, ' ');
+ if (space) *space = '\0';
+
+ return word;
+}
+
+int is_dictionary_query(const char *query) {
+ if (!query) return 0;
+
+ for (int i = 0; PREFIXES[i]; i++) {
+ size_t len = strlen(PREFIXES[i]);
+ if (strncasecmp(query, PREFIXES[i], len) == 0) {
+ const char *after = query + len;
+ while (*after == ' ') after++;
+ if (*after != '\0') return 1;
+ }
+ }
+
+ for (int i = 0; SUFFIXES[i]; i++) {
+ const char *pos = strcasestr_impl(query, SUFFIXES[i]);
+ if (pos) {
+ const char *after = pos + strlen(SUFFIXES[i]);
+ while (*after == ' ' || *after == '?' || *after == '!' || *after == '.') after++;
+ if (*after == '\0' && pos > query && (pos - query) < 100) return 1;
+ }
+ }
+
+ if (strncasecmp(query, "what is ", 8) == 0 ||
+ strncasecmp(query, "what's ", 7) == 0 ||
+ strncasecmp(query, "whats ", 6) == 0) {
+ const char *word = query + (strncasecmp(query, "what is ", 8) == 0 ? 8 :
+ strncasecmp(query, "what's ", 7) == 0 ? 7 : 6);
+ const char *articles[] = {"the ", "your ", "my ", "his ", "her ", "their ",
+ "our ", "this ", "that ", "these ", "those ", "a ", "an ", NULL};
+ for (int i = 0; articles[i]; i++) {
+ if (strncasecmp(word, articles[i], strlen(articles[i])) == 0) return 0;
+ }
+ const char *space = strchr(word, ' ');
+ if (!space || *(space + 1) == '\0' || *(space + 1) == '?') return 1;
+ }
+
+ return 0;
+}
+
+char *construct_dictionary_url(const char *query) {
+ char *word = extract_word(query);
+ if (!word) return NULL;
+
+ CURL *curl = curl_easy_init();
+ if (!curl) { free(word); return NULL; }
+
+ char *escaped = curl_easy_escape(curl, word, 0);
+ const char *base = "https://dictionary.cambridge.org/dictionary/english/";
+ char *url = malloc(strlen(base) + strlen(escaped) + 1);
+ if (url) {
+ strcpy(url, base);
+ strcat(url, escaped);
+ }
+
+ curl_free(escaped);
+ curl_easy_cleanup(curl);
+ free(word);
+ return url;
+}
+
+InfoBox fetch_dictionary_data(const char *query) {
+ InfoBox info = {NULL, NULL, NULL, NULL};
+
+ char *url = construct_dictionary_url(query);
+ if (!url) return info;
+
+ CURL *curl = curl_easy_init();
+ if (!curl) { free(url); return info; }
+
+ struct MemStruct chunk = {malloc(1), 0};
+ curl_easy_setopt(curl, CURLOPT_URL, url);
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback);
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, &chunk);
+ curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5.0");
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+
+ if (curl_easy_perform(curl) == CURLE_OK && chunk.size > 0) {
+ htmlDocPtr doc = htmlReadMemory(chunk.memory, chunk.size, url, NULL,
+ HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
+ if (doc) {
+ char *word = xpath_text(doc, "//span[@class='hw dhw']");
+ char *pron = xpath_text(doc, "//span[@class='us dpron-i']//span[@class='ipa dipa lpr-2 lpl-1']");
+ char *pos = xpath_text(doc, "//span[@class='pos dpos']");
+ char *def = xpath_text(doc, "(//div[@class='def ddef_d db'])[1]");
+ char *ex = xpath_text(doc, "(//span[@class='eg deg'])[1]");
+
+ if (word && def) {
+ info.title = strdup("Dictionary");
+ info.extract = build_html(word, pron, pos, def, ex);
+ info.thumbnail_url = strdup("/static/dictionary.jpg");
+ info.url = strdup(url);
+ }
+
+ free(word); free(pron); free(pos); free(def); free(ex);
+ xmlFreeDoc(doc);
+ }
+ }
+
+ curl_easy_cleanup(curl);
+ free(chunk.memory);
+ free(url);
+ return info;
+} \ No newline at end of file