#include "Search.h" #include "../Infobox/Calculator.h" #include "../Infobox/CurrencyConversion.h" #include "../Infobox/Dictionary.h" #include "../Infobox/UnitConversion.h" #include "../Infobox/Wikipedia.h" #include "../Scraping/Scraping.h" #include "../Utility/Display.h" #include "../Utility/Unescape.h" #include #include #include #include #include #include #define INFOBOX_FIELD_COUNT 4 #define MAX_RESULTS_PER_ENGINE 10 typedef struct { const char *query; InfoBox result; int success; } InfoBoxThreadData; typedef struct { int (*check_fn)(const char *query); InfoBox (*fetch_fn)(char *query); char *(*url_construct_fn)(const char *query); } InfoBoxHandler; static InfoBox fetch_wiki_wrapper(char *query) { char *url = construct_wiki_url(query); if (!url) return (InfoBox){NULL}; InfoBox result = fetch_wiki_data(url); free(url); return result; } static int always_true(const char *query) { (void)query; return 1; } static InfoBox fetch_dict_wrapper(char *query) { return fetch_dictionary_data(query); } static InfoBox fetch_calc_wrapper(char *query) { return fetch_calc_data(query); } static InfoBox fetch_unit_wrapper(char *query) { return fetch_unit_conv_data(query); } static InfoBox fetch_currency_wrapper(char *query) { return fetch_currency_data(query); } static int is_calculator_query(const char *query) { if (!query) return 0; int has_digit = 0; int has_math_operator = 0; for (const char *p = query; *p; p++) { if (isdigit(*p) || *p == '.') { has_digit = 1; } if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '^') { has_math_operator = 1; } } if (!has_digit || !has_math_operator) return 0; int len = strlen(query); for (int i = 0; i < len; i++) { char c = query[i]; if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^') { int has_num_before = 0; int has_num_after = 0; for (int j = i - 1; j >= 0; j--) { if (isdigit(query[j]) || query[j] == '.') { has_num_before = 1; break; } if (query[j] != ' ') break; } for (int j = i + 1; j < len; j++) { if (isdigit(query[j]) || query[j] == '.') { has_num_after = 1; break; } if (query[j] != ' ') break; } if (has_num_before || has_num_after) { return 1; } } } return 0; } static InfoBoxHandler handlers[] = { {is_dictionary_query, fetch_dict_wrapper, NULL}, {is_calculator_query, fetch_calc_wrapper, NULL}, {is_unit_conv_query, fetch_unit_wrapper, NULL}, {is_currency_query, fetch_currency_wrapper, NULL}, {always_true, fetch_wiki_wrapper, construct_wiki_url}, }; enum { HANDLER_COUNT = sizeof(handlers) / sizeof(handlers[0]) }; static void *infobox_thread_func(void *arg) { InfoBoxThreadData *data = (InfoBoxThreadData *)arg; int handler_idx = data->success; if (handler_idx < 0 || handler_idx >= HANDLER_COUNT) return NULL; InfoBoxHandler *h = &handlers[handler_idx]; if (h->check_fn && !h->check_fn(data->query)) { data->success = 0; return NULL; } data->result = h->fetch_fn((char *)data->query); data->success = (data->result.title != NULL && data->result.extract != NULL && strlen(data->result.extract) > 10); return NULL; } static int add_infobox_to_collection(InfoBox *infobox, char ****collection, int **inner_counts, int current_count) { *collection = (char ***)realloc(*collection, sizeof(char **) * (current_count + 1)); *inner_counts = (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1)); (*collection)[current_count] = (char **)malloc(sizeof(char *) * INFOBOX_FIELD_COUNT); (*collection)[current_count][0] = infobox->title ? strdup(infobox->title) : NULL; (*collection)[current_count][1] = infobox->thumbnail_url ? strdup(infobox->thumbnail_url) : NULL; (*collection)[current_count][2] = infobox->extract ? strdup(infobox->extract) : NULL; (*collection)[current_count][3] = infobox->url ? strdup(infobox->url) : NULL; (*inner_counts)[current_count] = INFOBOX_FIELD_COUNT; return current_count + 1; } int results_handler(UrlParams *params) { TemplateContext ctx = new_context(); char *raw_query = ""; int page = 1; if (params) { for (int i = 0; i < params->count; i++) { if (strcmp(params->params[i].key, "q") == 0) { raw_query = params->params[i].value; } else if (strcmp(params->params[i].key, "p") == 0) { int parsed = atoi(params->params[i].value); if (parsed > 1) page = parsed; } } } context_set(&ctx, "query", raw_query); char page_str[16], prev_str[16], next_str[16]; snprintf(page_str, sizeof(page_str), "%d", page); snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0); snprintf(next_str, sizeof(next_str), "%d", page + 1); context_set(&ctx, "page", page_str); context_set(&ctx, "prev_page", prev_str); context_set(&ctx, "next_page", next_str); if (!raw_query || strlen(raw_query) == 0) { send_response("

No query provided

"); free_context(&ctx); return -1; } pthread_t infobox_threads[HANDLER_COUNT]; InfoBoxThreadData infobox_data[HANDLER_COUNT]; for (int i = 0; i < HANDLER_COUNT; i++) { infobox_data[i].query = raw_query; infobox_data[i].success = i; infobox_data[i].result = (InfoBox){NULL}; } if (page == 1) { for (int i = 0; i < HANDLER_COUNT; i++) { pthread_create(&infobox_threads[i], NULL, infobox_thread_func, &infobox_data[i]); } } ScrapeJob jobs[ENGINE_COUNT]; SearchResult *all_results[ENGINE_COUNT]; for (int i = 0; i < ENGINE_COUNT; i++) { all_results[i] = NULL; jobs[i].engine = &ENGINE_REGISTRY[i]; jobs[i].query = raw_query; jobs[i].out_results = &all_results[i]; jobs[i].max_results = MAX_RESULTS_PER_ENGINE; jobs[i].results_count = 0; jobs[i].page = page; jobs[i].handle = NULL; jobs[i].response.memory = NULL; jobs[i].response.size = 0; jobs[i].response.capacity = 0; } scrape_engines_parallel(jobs, ENGINE_COUNT); if (page == 1) { for (int i = 0; i < HANDLER_COUNT; i++) { pthread_join(infobox_threads[i], NULL); } } char ***infobox_matrix = NULL; int *infobox_inner_counts = NULL; int infobox_count = 0; if (page == 1) { for (int i = 0; i < HANDLER_COUNT; i++) { if (infobox_data[i].success) { infobox_count = add_infobox_to_collection(&infobox_data[i].result, &infobox_matrix, &infobox_inner_counts, infobox_count); } } } if (infobox_count > 0) { context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix, infobox_count, infobox_inner_counts); for (int i = 0; i < infobox_count; i++) { for (int j = 0; j < INFOBOX_FIELD_COUNT; j++) free(infobox_matrix[i][j]); free(infobox_matrix[i]); } free(infobox_matrix); free(infobox_inner_counts); } int total_results = 0; for (int i = 0; i < ENGINE_COUNT; i++) { total_results += jobs[i].results_count; } if (total_results > 0) { char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results); int *results_inner_counts = (int *)malloc(sizeof(int) * total_results); char **seen_urls = (char **)malloc(sizeof(char *) * total_results); int unique_count = 0; for (int i = 0; i < ENGINE_COUNT; i++) { for (int j = 0; j < jobs[i].results_count; j++) { char *display_url = all_results[i][j].url; int is_duplicate = 0; for (int k = 0; k < unique_count; k++) { if (strcmp(seen_urls[k], display_url) == 0) { is_duplicate = 1; break; } } if (is_duplicate) { free(all_results[i][j].url); free(all_results[i][j].title); free(all_results[i][j].snippet); continue; } seen_urls[unique_count] = strdup(display_url); results_matrix[unique_count] = (char **)malloc(sizeof(char *) * INFOBOX_FIELD_COUNT); char *pretty_url = pretty_display_url(display_url); results_matrix[unique_count][0] = strdup(display_url); results_matrix[unique_count][1] = strdup(pretty_url); results_matrix[unique_count][2] = all_results[i][j].title ? strdup(all_results[i][j].title) : strdup("Untitled"); results_matrix[unique_count][3] = all_results[i][j].snippet ? strdup(all_results[i][j].snippet) : strdup(""); results_inner_counts[unique_count] = INFOBOX_FIELD_COUNT; free(pretty_url); free(all_results[i][j].url); free(all_results[i][j].title); free(all_results[i][j].snippet); unique_count++; } free(all_results[i]); } context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, results_inner_counts); char *html = render_template("results.html", &ctx); if (html) { send_response(html); free(html); } for (int i = 0; i < unique_count; i++) { for (int j = 0; j < INFOBOX_FIELD_COUNT; j++) free(results_matrix[i][j]); free(results_matrix[i]); free(seen_urls[i]); } free(seen_urls); free(results_matrix); free(results_inner_counts); } else { char *html = render_template("results.html", &ctx); if (html) { send_response(html); free(html); } } if (page == 1) { for (int i = 0; i < HANDLER_COUNT; i++) { if (infobox_data[i].success) { free_infobox(&infobox_data[i].result); } } } free_context(&ctx); return 0; }