diff options
| author | frosty <gabriel@bwaaa.monster> | 2026-03-17 13:51:12 -0400 |
|---|---|---|
| committer | frosty <gabriel@bwaaa.monster> | 2026-03-17 13:51:12 -0400 |
| commit | c7b95d05715a45c7790aa8a7e4b0b61bac2e4208 (patch) | |
| tree | b0b511b4cc6610949cdde5a6a220724a31c617fd /src/Scraping/Scraping.h | |
| parent | 8c6632502ff992e80051910451421c55894ed9d8 (diff) | |
| download | omnisearch-c7b95d05715a45c7790aa8a7e4b0b61bac2e4208.tar.gz | |
fix: refactored scraping components
Diffstat (limited to 'src/Scraping/Scraping.h')
| -rw-r--r-- | src/Scraping/Scraping.h | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h index f1ad2c4..1439118 100644 --- a/src/Scraping/Scraping.h +++ b/src/Scraping/Scraping.h @@ -3,6 +3,7 @@ #include <curl/curl.h> #include <libxml/HTMLparser.h> +#include <libxml/xpath.h> typedef struct { char *url; @@ -45,6 +46,25 @@ typedef struct { extern const SearchEngine ENGINE_REGISTRY[]; extern const int ENGINE_COUNT; +size_t write_memory_callback(void *contents, size_t size, size_t nmemb, + void *userp); +const char *get_random_user_agent(void); +void configure_curl_handle(CURL *curl, const char *full_url, + MemoryBuffer *chunk, struct curl_slist *headers); +char *build_search_url(const char *base_url, const char *page_param, + int page_multiplier, int page_base, + const char *encoded_query, int page); +struct curl_slist *build_request_headers(const char *host_header, + const char *referer); +void http_delay(void); + +xmlXPathContextPtr create_xpath_context(xmlDocPtr doc); +void free_xpath_objects(xmlXPathContextPtr ctx, xmlXPathObjectPtr obj); +SearchResult *alloc_results_array(int capacity, int max_results); +void assign_result(SearchResult *result, char *url, char *title, char *snippet, + int unescape); +void free_xml_node_list(char *title, char *url, char *snippet); + int scrape_engine(const SearchEngine *engine, const char *query, SearchResult **out_results, int max_results); |
