From c7b95d05715a45c7790aa8a7e4b0b61bac2e4208 Mon Sep 17 00:00:00 2001 From: frosty Date: Tue, 17 Mar 2026 13:51:12 -0400 Subject: fix: refactored scraping components --- src/Scraping/Scraping.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src/Scraping/Scraping.h') diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h index f1ad2c4..1439118 100644 --- a/src/Scraping/Scraping.h +++ b/src/Scraping/Scraping.h @@ -3,6 +3,7 @@ #include #include +#include typedef struct { char *url; @@ -45,6 +46,25 @@ typedef struct { extern const SearchEngine ENGINE_REGISTRY[]; extern const int ENGINE_COUNT; +size_t write_memory_callback(void *contents, size_t size, size_t nmemb, + void *userp); +const char *get_random_user_agent(void); +void configure_curl_handle(CURL *curl, const char *full_url, + MemoryBuffer *chunk, struct curl_slist *headers); +char *build_search_url(const char *base_url, const char *page_param, + int page_multiplier, int page_base, + const char *encoded_query, int page); +struct curl_slist *build_request_headers(const char *host_header, + const char *referer); +void http_delay(void); + +xmlXPathContextPtr create_xpath_context(xmlDocPtr doc); +void free_xpath_objects(xmlXPathContextPtr ctx, xmlXPathObjectPtr obj); +SearchResult *alloc_results_array(int capacity, int max_results); +void assign_result(SearchResult *result, char *url, char *title, char *snippet, + int unescape); +void free_xml_node_list(char *title, char *url, char *snippet); + int scrape_engine(const SearchEngine *engine, const char *query, SearchResult **out_results, int max_results); -- cgit v1.2.3