From b280ab6bcdf6c9bae46a7a21b7138d46d953dd71 Mon Sep 17 00:00:00 2001 From: frosty Date: Mon, 23 Feb 2026 00:57:21 -0500 Subject: oopsies --- src/Routes/Home.c | 14 +++ src/Routes/Home.h | 8 ++ src/Routes/Images.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/Routes/Images.h | 8 ++ src/Routes/Search.c | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/Routes/Search.h | 8 ++ 6 files changed, 591 insertions(+) create mode 100644 src/Routes/Home.c create mode 100644 src/Routes/Home.h create mode 100644 src/Routes/Images.c create mode 100644 src/Routes/Images.h create mode 100644 src/Routes/Search.c create mode 100644 src/Routes/Search.h (limited to 'src/Routes') diff --git a/src/Routes/Home.c b/src/Routes/Home.c new file mode 100644 index 0000000..81370ba --- /dev/null +++ b/src/Routes/Home.c @@ -0,0 +1,14 @@ +#include "Home.h" +#include + +int home_handler(UrlParams *params) { + (void)params; + TemplateContext ctx = new_context(); + char *rendered_html = render_template("home.html", &ctx); + send_response(rendered_html); + + free(rendered_html); + free_context(&ctx); + + return 0; +} diff --git a/src/Routes/Home.h b/src/Routes/Home.h new file mode 100644 index 0000000..5d01ab3 --- /dev/null +++ b/src/Routes/Home.h @@ -0,0 +1,8 @@ +#ifndef HOME_H +#define HOME_H + +#include + +int home_handler(UrlParams *params); + +#endif diff --git a/src/Routes/Images.c b/src/Routes/Images.c new file mode 100644 index 0000000..67ae94c --- /dev/null +++ b/src/Routes/Images.c @@ -0,0 +1,278 @@ +#include "Images.h" +#include "../Utility/Unescape.h" + +#include +#include +#include +#include +#include +#include +#include + +struct MemoryBlock { + char *response; + size_t size; +}; + +static size_t ImageWriteCallback(void *data, size_t size, size_t nmemb, + void *userp) { + size_t realsize = size * nmemb; + struct MemoryBlock *mem = (struct MemoryBlock *)userp; + char *ptr = (char *)realloc(mem->response, mem->size + realsize + 1); + if (ptr == NULL) { + return 0; + } + mem->response = ptr; + memcpy(&(mem->response[mem->size]), data, realsize); + mem->size += realsize; + mem->response[mem->size] = 0; + return realsize; +} + +static char *fetch_images_html(const char *url) { + CURL *curl_handle; + struct MemoryBlock chunk = {.response = malloc(1), .size = 0}; + if (!chunk.response) { + return NULL; + } + + curl_handle = curl_easy_init(); + if (!curl_handle) { + free(chunk.response); + return NULL; + } + + curl_easy_setopt(curl_handle, CURLOPT_URL, url); + curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, ImageWriteCallback); + curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk); + curl_easy_setopt( + curl_handle, CURLOPT_USERAGENT, + "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"); + curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, 10L); + + CURLcode res = curl_easy_perform(curl_handle); + if (res != CURLE_OK) { + free(chunk.response); + curl_easy_cleanup(curl_handle); + return NULL; + } + + curl_easy_cleanup(curl_handle); + return chunk.response; +} + +int images_handler(UrlParams *params) { + TemplateContext ctx = new_context(); + char *raw_query = ""; + + if (params) { + for (int i = 0; i < params->count; i++) { + if (strcmp(params->params[i].key, "q") == 0) { + raw_query = params->params[i].value; + break; + } + } + } + + char *display_query = url_decode_query(raw_query); + context_set(&ctx, "query", display_query); + + if (!raw_query || strlen(raw_query) == 0) { + send_response("

No query provided

"); + if (display_query) free(display_query); + free_context(&ctx); + return -1; + } + + CURL *tmp = curl_easy_init(); + if (!tmp) { + send_response("

Error initializing curl

"); + if (display_query) free(display_query); + free_context(&ctx); + return -1; + } + char *encoded_query = curl_easy_escape(tmp, raw_query, 0); + curl_easy_cleanup(tmp); + + if (!encoded_query) { + send_response("

Error encoding query

"); + if (display_query) free(display_query); + free_context(&ctx); + return -1; + } + + char url[1024]; + snprintf(url, sizeof(url), + "https://www.bing.com/images/search?q=%s", encoded_query); + fprintf(stderr, "[DEBUG] Fetching URL: %s\n", url); + + char *html = fetch_images_html(url); + if (!html) { + fprintf(stderr, "[DEBUG] Failed to fetch HTML\n"); + send_response("

Error fetching images

"); + free(encoded_query); + free(display_query); + free_context(&ctx); + return -1; + } + + htmlDocPtr doc = htmlReadMemory(html, (int)strlen(html), NULL, NULL, + HTML_PARSE_RECOVER | HTML_PARSE_NOERROR); + if (!doc) { + free(html); + free(encoded_query); + free(display_query); + free_context(&ctx); + return -1; + } + + xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); + + if (!xpathCtx) { + xmlFreeDoc(doc); + free(html); + free(encoded_query); + free(display_query); + free_context(&ctx); + return -1; + } + + xmlXPathObjectPtr xpathObj = + xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx); + + int image_count = 0; + char ***image_matrix = NULL; + int *inner_counts = NULL; + + if (xpathObj && xpathObj->nodesetval) { + int nodes = xpathObj->nodesetval->nodeNr; + fprintf(stderr, "[DEBUG] Found %d image items\n", nodes); + + int max_images = (nodes < 32) ? nodes : 32; + image_matrix = malloc(sizeof(char **) * max_images); + inner_counts = malloc(sizeof(int) * max_images); + + for (int i = 0; i < nodes; i++) { + if (image_count >= 32) break; + + xmlNodePtr node = xpathObj->nodesetval->nodeTab[i]; + xmlNodePtr img_node = NULL; + xmlNodePtr tit_node = NULL; + xmlNodePtr des_node = NULL; + xmlNodePtr thumb_link = NULL; + + for (xmlNodePtr child = node->children; child; child = child->next) { + if (child->type != XML_ELEMENT_NODE) continue; + + if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) { + xmlChar *class = xmlGetProp(child, (const xmlChar *)"class"); + if (class) { + if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) { + thumb_link = child; + for (xmlNodePtr thumb_child = child->children; thumb_child; thumb_child = thumb_child->next) { + if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) { + xmlChar *div_class = xmlGetProp(thumb_child, (const xmlChar *)"class"); + if (div_class && xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) { + for (xmlNodePtr cico_child = thumb_child->children; cico_child; cico_child = cico_child->next) { + if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") == 0) { + img_node = cico_child; + break; + } + } + } + if (div_class) xmlFree(div_class); + } + } + } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) { + tit_node = child; + } + xmlFree(class); + } + } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) { + xmlChar *class = xmlGetProp(child, (const xmlChar *)"class"); + if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) { + for (xmlNodePtr meta_child = child->children; meta_child; meta_child = meta_child->next) { + if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) { + xmlChar *div_class = xmlGetProp(meta_child, (const xmlChar *)"class"); + if (div_class) { + if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) { + des_node = meta_child; + } + xmlFree(div_class); + } + } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) { + xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class"); + if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) { + tit_node = meta_child; + } + if (a_class) xmlFree(a_class); + } + } + } + if (class) xmlFree(class); + } + } + + xmlChar *iurl = img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL; + xmlChar *full_url = thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL; + xmlChar *title = des_node ? xmlNodeGetContent(des_node) : (tit_node ? xmlNodeGetContent(tit_node) : NULL); + xmlChar *rurl = tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL; + + fprintf(stderr, "[DEBUG] Image %d: thumb=%s, full=%s, title=%s, site=%s\n", + image_count, iurl ? (char *)iurl : "nil", + full_url ? (char *)full_url : "nil", + title ? (char *)title : "nil", + rurl ? (char *)rurl : "nil"); + + if (iurl && strlen((char *)iurl) > 0) { + image_matrix[image_count] = malloc(sizeof(char *) * 4); + image_matrix[image_count][0] = strdup((char *)iurl); + image_matrix[image_count][1] = strdup(title ? (char *)title : "Image"); + image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#"); + image_matrix[image_count][3] = strdup(full_url ? (char *)full_url : "#"); + inner_counts[image_count] = 4; + image_count++; + } + + if (iurl) xmlFree(iurl); + if (title) xmlFree(title); + if (rurl) xmlFree(rurl); + if (full_url) xmlFree(full_url); + } + } + + context_set_array_of_arrays(&ctx, "images", image_matrix, image_count, + inner_counts); + + char *rendered = render_template("images.html", &ctx); + if (rendered) { + send_response(rendered); + free(rendered); + } else { + send_response("

Error rendering image results

"); + } + + if (image_matrix) { + for (int i = 0; i < image_count; i++) { + for (int j = 0; j < 4; j++) { + free(image_matrix[i][j]); + } + free(image_matrix[i]); + } + free(image_matrix); + } + if (inner_counts) { + free(inner_counts); + } + + if (xpathObj) xmlXPathFreeObject(xpathObj); + if (xpathCtx) xmlXPathFreeContext(xpathCtx); + if (doc) xmlFreeDoc(doc); + free(html); + curl_free(encoded_query); + free(display_query); + free_context(&ctx); + + return 0; +} \ No newline at end of file diff --git a/src/Routes/Images.h b/src/Routes/Images.h new file mode 100644 index 0000000..86f4a31 --- /dev/null +++ b/src/Routes/Images.h @@ -0,0 +1,8 @@ +#ifndef IMAGES_HANDLER_H +#define IMAGES_HANDLER_H + +#include + +int images_handler(UrlParams *params); + +#endif diff --git a/src/Routes/Search.c b/src/Routes/Search.c new file mode 100644 index 0000000..4e8c7ad --- /dev/null +++ b/src/Routes/Search.c @@ -0,0 +1,275 @@ +#include "Search.h" +#include "../Infobox/Wikipedia.h" +#include "../Infobox/Calculator.h" +#include "../Infobox/Dictionary.h" +#include "../Scraping/Scraping.h" +#include "../Utility/Display.h" +#include "../Utility/Unescape.h" +#include +#include +#include +#include +#include +#include + +typedef struct { + const char *query; + InfoBox result; + int success; +} InfoBoxThreadData; + +static void *wiki_thread_func(void *arg) { + InfoBoxThreadData *data = (InfoBoxThreadData *)arg; + char *dynamic_url = construct_wiki_url(data->query); + if (dynamic_url) { + data->result = fetch_wiki_data(dynamic_url); + data->success = + (data->result.title != NULL && data->result.extract != NULL && + strlen(data->result.extract) > 10); + free(dynamic_url); + } else { + data->success = 0; + } + return NULL; +} + +static int is_calculator_query(const char *query) { + if (!query) return 0; + + int has_digit = 0; + int has_operator = 0; + + for (const char *p = query; *p; p++) { + if (isdigit(*p) || *p == '.') { + has_digit = 1; + } + if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '=' || + *p == '^') { + has_operator = 1; + } + } + + return has_digit && (has_operator || strchr(query, '.')); +} + +static void *calc_thread_func(void *arg) { + InfoBoxThreadData *data = (InfoBoxThreadData *)arg; + + if (is_calculator_query(data->query)) { + data->result = fetch_calc_data((char *)data->query); + data->success = + (data->result.title != NULL && data->result.extract != NULL); + } else { + data->success = 0; + } + + return NULL; +} + +static void *dict_thread_func(void *arg) { + InfoBoxThreadData *data = (InfoBoxThreadData *)arg; + + if (is_dictionary_query(data->query)) { + data->result = fetch_dictionary_data(data->query); + data->success = + (data->result.title != NULL && data->result.extract != NULL); + } else { + data->success = 0; + } + + return NULL; +} + +static int add_infobox_to_collection(InfoBox *infobox, char ****collection, + int **inner_counts, int current_count) { + *collection = + (char ***)realloc(*collection, sizeof(char **) * (current_count + 1)); + *inner_counts = + (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1)); + + (*collection)[current_count] = (char **)malloc(sizeof(char *) * 4); + (*collection)[current_count][0] = infobox->title; + (*collection)[current_count][1] = infobox->thumbnail_url; + (*collection)[current_count][2] = infobox->extract; + (*collection)[current_count][3] = infobox->url; + (*inner_counts)[current_count] = 4; + + return current_count + 1; +} + +int results_handler(UrlParams *params) { + TemplateContext ctx = new_context(); + char *raw_query = ""; + int page = 1; + + if (params) { + for (int i = 0; i < params->count; i++) { + if (strcmp(params->params[i].key, "q") == 0) { + raw_query = params->params[i].value; + } else if (strcmp(params->params[i].key, "p") == 0) { + int parsed = atoi(params->params[i].value); + if (parsed > 1) page = parsed; + } + } + } + + context_set(&ctx, "query", raw_query); + + char page_str[16], prev_str[16], next_str[16]; + snprintf(page_str, sizeof(page_str), "%d", page); + snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0); + snprintf(next_str, sizeof(next_str), "%d", page + 1); + context_set(&ctx, "page", page_str); + context_set(&ctx, "prev_page", prev_str); + context_set(&ctx, "next_page", next_str); + + if (!raw_query || strlen(raw_query) == 0) { + send_response("

No query provided

"); + free_context(&ctx); + return -1; + } + + pthread_t wiki_tid, calc_tid, dict_tid; + InfoBoxThreadData wiki_data = {.query = raw_query, .success = 0}; + InfoBoxThreadData calc_data = {.query = raw_query, .success = 0}; + InfoBoxThreadData dict_data = {.query = raw_query, .success = 0}; + + if (page == 1) { + pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data); + pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data); + pthread_create(&dict_tid, NULL, dict_thread_func, &dict_data); + } + + ScrapeJob jobs[ENGINE_COUNT]; + SearchResult *all_results[ENGINE_COUNT]; + + for (int i = 0; i < ENGINE_COUNT; i++) { + all_results[i] = NULL; + jobs[i].engine = &ENGINE_REGISTRY[i]; + jobs[i].query = raw_query; + jobs[i].out_results = &all_results[i]; + jobs[i].max_results = 10; + jobs[i].results_count = 0; + jobs[i].page = page; + } + + scrape_engines_parallel(jobs, ENGINE_COUNT); + + if (page == 1) { + pthread_join(wiki_tid, NULL); + pthread_join(calc_tid, NULL); + pthread_join(dict_tid, NULL); + } + + char ***infobox_matrix = NULL; + int *infobox_inner_counts = NULL; + int infobox_count = 0; + + if (page == 1) { + if (dict_data.success) { + infobox_count = add_infobox_to_collection(&dict_data.result, &infobox_matrix, + &infobox_inner_counts, infobox_count); + } + + if (calc_data.success) { + infobox_count = add_infobox_to_collection(&calc_data.result, &infobox_matrix, + &infobox_inner_counts, infobox_count); + } + + if (wiki_data.success) { + infobox_count = add_infobox_to_collection(&wiki_data.result, &infobox_matrix, + &infobox_inner_counts, infobox_count); + } + } + + if (infobox_count > 0) { + context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix, + infobox_count, infobox_inner_counts); + free(infobox_matrix); + free(infobox_inner_counts); + } + + int total_results = 0; + for (int i = 0; i < ENGINE_COUNT; i++) { + total_results += jobs[i].results_count; + } + + if (total_results > 0) { + char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results); + int *results_inner_counts = (int *)malloc(sizeof(int) * total_results); + char **seen_urls = (char **)malloc(sizeof(char *) * total_results); + int unique_count = 0; + + for (int i = 0; i < ENGINE_COUNT; i++) { + for (int j = 0; j < jobs[i].results_count; j++) { + char *display_url = all_results[i][j].url; + + int is_duplicate = 0; + for (int k = 0; k < unique_count; k++) { + if (strcmp(seen_urls[k], display_url) == 0) { + is_duplicate = 1; + break; + } + } + + if (is_duplicate) { + free(all_results[i][j].url); + free(all_results[i][j].title); + free(all_results[i][j].snippet); + continue; + } + + seen_urls[unique_count] = strdup(display_url); + results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4); + char *pretty_url = pretty_display_url(display_url); + + results_matrix[unique_count][0] = strdup(display_url); + results_matrix[unique_count][1] = strdup(pretty_url); + results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled"); + results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup(""); + + results_inner_counts[unique_count] = 4; + + free(pretty_url); + free(all_results[i][j].url); + free(all_results[i][j].title); + free(all_results[i][j].snippet); + + unique_count++; + } + free(all_results[i]); + } + + context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts); + + char *html = render_template("results.html", &ctx); + if (html) { + send_response(html); + free(html); + } + + for (int i = 0; i < unique_count; i++) { + for (int j = 0; j < 4; j++) free(results_matrix[i][j]); + free(results_matrix[i]); + free(seen_urls[i]); + } + free(seen_urls); + free(results_matrix); + free(results_inner_counts); + } else { + char *html = render_template("results.html", &ctx); + if (html) { + send_response(html); + free(html); + } + } + + if (page == 1) { + if (wiki_data.success) free_infobox(&wiki_data.result); + if (calc_data.success) free_infobox(&calc_data.result); + if (dict_data.success) free_infobox(&dict_data.result); + } + free_context(&ctx); + + return 0; +} \ No newline at end of file diff --git a/src/Routes/Search.h b/src/Routes/Search.h new file mode 100644 index 0000000..c6bc146 --- /dev/null +++ b/src/Routes/Search.h @@ -0,0 +1,8 @@ +#ifndef SEARCH_HANDLER_H +#define SEARCH_HANDLER_H + +#include + +int results_handler(UrlParams *params); + +#endif -- cgit v1.2.3