diff options
Diffstat (limited to 'src/Routes/Search.c')
| -rw-r--r-- | src/Routes/Search.c | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/src/Routes/Search.c b/src/Routes/Search.c new file mode 100644 index 0000000..110e6f7 --- /dev/null +++ b/src/Routes/Search.c @@ -0,0 +1,273 @@ +#include "Search.h" +#include "../Infobox/Wikipedia.h" +#include "../Infobox/Calculator.h" +#include "../Scraping/Scraping.h" +#include "../Utility/Display.h" +#include "../Utility/Unescape.h" +#include <ctype.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +typedef struct { + const SearchEngine *engine; + const char *query; + SearchResult *results; + int count; +} EngineThreadData; + +static void *scrape_thread_func(void *arg) { + EngineThreadData *data = (EngineThreadData *)arg; + data->count = scrape_engine(data->engine, data->query, &data->results, 10); + return NULL; +} + +typedef struct { + const char *query; + InfoBox result; + int success; +} InfoBoxThreadData; + +static void *wiki_thread_func(void *arg) { + InfoBoxThreadData *data = (InfoBoxThreadData *)arg; + char *dynamic_url = construct_wiki_url(data->query); + if (dynamic_url) { + data->result = fetch_wiki_data(dynamic_url); + data->success = + (data->result.title != NULL && data->result.extract != NULL && + strlen(data->result.extract) > 10); + free(dynamic_url); + } else { + data->success = 0; + } + return NULL; +} + +static int is_calculator_query(const char *query) { + if (!query) return 0; + + int has_digit = 0; + int has_operator = 0; + + for (const char *p = query; *p; p++) { + if (isdigit(*p) || *p == '.') { + has_digit = 1; + } + if (*p == '+' || *p == '-' || *p == '*' || *p == '/' || *p == '=' || + *p == '^') { + has_operator = 1; + } + } + + return has_digit && (has_operator || strchr(query, '.')); +} + +static void *calc_thread_func(void *arg) { + InfoBoxThreadData *data = (InfoBoxThreadData *)arg; + + if (is_calculator_query(data->query)) { + data->result = fetch_calc_data((char *)data->query); + data->success = + (data->result.title != NULL && data->result.extract != NULL); + } else { + data->success = 0; + } + + return NULL; +} + +static int add_infobox_to_collection(InfoBox *infobox, char ****collection, + int **inner_counts, int current_count) { + *collection = + (char ***)realloc(*collection, sizeof(char **) * (current_count + 1)); + *inner_counts = + (int *)realloc(*inner_counts, sizeof(int) * (current_count + 1)); + + (*collection)[current_count] = (char **)malloc(sizeof(char *) * 4); + (*collection)[current_count][0] = infobox->title; + (*collection)[current_count][1] = infobox->thumbnail_url; + (*collection)[current_count][2] = infobox->extract; + (*collection)[current_count][3] = infobox->url; + (*inner_counts)[current_count] = 4; + + return current_count + 1; +} + +int results_handler(UrlParams *params) { + TemplateContext ctx = new_context(); + char *raw_query = ""; + + if (params) { + for (int i = 0; i < params->count; i++) { + if (strcmp(params->params[i].key, "q") == 0) { + raw_query = params->params[i].value; + break; + } + } + } + + char *encoded_query = strdup(raw_query); + + char *display_query = url_decode_query(raw_query); + LOG_INFO("Processing search request for query: '%s'", display_query); + context_set(&ctx, "query", display_query); + + if (!encoded_query || strlen(encoded_query) == 0) { + LOG_ERROR("Empty search query provided."); + send_response("<h1>No query provided</h1>"); + if (encoded_query) free(encoded_query); + if (display_query) free(display_query); + free_context(&ctx); + return -1; + } + + pthread_t wiki_tid, calc_tid; + InfoBoxThreadData wiki_data = {.query = display_query, .success = 0}; + InfoBoxThreadData calc_data = {.query = display_query, .success = 0}; + + pthread_create(&wiki_tid, NULL, wiki_thread_func, &wiki_data); + pthread_create(&calc_tid, NULL, calc_thread_func, &calc_data); + + pthread_t engine_tids[ENGINE_COUNT]; + EngineThreadData engine_data[ENGINE_COUNT]; + + for (int i = 0; i < ENGINE_COUNT; i++) { + engine_data[i].engine = &ENGINE_REGISTRY[i]; + engine_data[i].query = encoded_query; + + engine_data[i].results = NULL; + engine_data[i].count = 0; + pthread_create(&engine_tids[i], NULL, scrape_thread_func, &engine_data[i]); + } + + pthread_join(wiki_tid, NULL); + pthread_join(calc_tid, NULL); + + char ***infobox_matrix = NULL; + int *infobox_inner_counts = NULL; + int infobox_count = 0; + + if (calc_data.success) { + infobox_count = + add_infobox_to_collection(&calc_data.result, &infobox_matrix, + &infobox_inner_counts, infobox_count); + } + + if (wiki_data.success) { + infobox_count = + add_infobox_to_collection(&wiki_data.result, &infobox_matrix, + &infobox_inner_counts, infobox_count); + } + + if (infobox_count > 0) { + context_set_array_of_arrays(&ctx, "infoboxes", infobox_matrix, + infobox_count, infobox_inner_counts); + free(infobox_matrix); + free(infobox_inner_counts); + } else { + context_set_array_of_arrays(&ctx, "infoboxes", NULL, 0, NULL); + } + + int total_results = 0; + for (int i = 0; i < ENGINE_COUNT; i++) { + pthread_join(engine_tids[i], NULL); + total_results += engine_data[i].count; + } + + if (total_results > 0) { + char ***results_matrix = (char ***)malloc(sizeof(char **) * total_results); + int *results_inner_counts = (int *)malloc(sizeof(int) * total_results); + char **seen_urls = (char **)malloc(sizeof(char *) * total_results); + int unique_count = 0; + + for (int i = 0; i < ENGINE_COUNT; i++) { + for (int j = 0; j < engine_data[i].count; j++) { + char *raw_url = engine_data[i].results[j].url; + char *clean_url = unescape_search_url(raw_url); + char *display_url = clean_url ? clean_url : raw_url; + + int is_duplicate = 0; + for (int k = 0; k < unique_count; k++) { + if (strcmp(seen_urls[k], display_url) == 0) { + is_duplicate = 1; + break; + } + } + + if (is_duplicate) { + if (clean_url) free(clean_url); + free(engine_data[i].results[j].url); + free(engine_data[i].results[j].title); + free(engine_data[i].results[j].snippet); + continue; + } + + seen_urls[unique_count] = strdup(display_url); + results_matrix[unique_count] = (char **)malloc(sizeof(char *) * 4); + char *pretty_url = pretty_display_url(display_url); + + results_matrix[unique_count][0] = strdup(display_url); + results_matrix[unique_count][1] = strdup(pretty_url); + results_matrix[unique_count][2] = + engine_data[i].results[j].title + ? strdup(engine_data[i].results[j].title) + : strdup("Untitled"); + results_matrix[unique_count][3] = + engine_data[i].results[j].snippet + ? strdup(engine_data[i].results[j].snippet) + : strdup(""); + + results_inner_counts[unique_count] = 4; + + free(pretty_url); + free(engine_data[i].results[j].url); + free(engine_data[i].results[j].title); + free(engine_data[i].results[j].snippet); + if (clean_url) free(clean_url); + + unique_count++; + } + free(engine_data[i].results); + } + + context_set_array_of_arrays(&ctx, "results", results_matrix, unique_count, + results_inner_counts); + + char *html = render_template("results.html", &ctx); + if (html) { + send_response(html); + free(html); + } + + for (int i = 0; i < unique_count; i++) { + for (int j = 0; j < 4; j++) free(results_matrix[i][j]); + free(results_matrix[i]); + free(seen_urls[i]); + } + free(seen_urls); + free(results_matrix); + free(results_inner_counts); + } else { + char *html = render_template("results.html", &ctx); + if (html) { + send_response(html); + free(html); + } + } + + if (wiki_data.success) { + free_infobox(&wiki_data.result); + } + + if (calc_data.success) { + free_infobox(&calc_data.result); + } + + free(encoded_query); + free(display_query); + free_context(&ctx); + + return 0; +} |
