aboutsummaryrefslogtreecommitdiff
path: root/src/Scraping/Scraping.h
diff options
context:
space:
mode:
authorfrosty <gabriel@bwaaa.monster>2026-03-17 13:51:12 -0400
committerfrosty <gabriel@bwaaa.monster>2026-03-17 13:51:12 -0400
commitc7b95d05715a45c7790aa8a7e4b0b61bac2e4208 (patch)
treeb0b511b4cc6610949cdde5a6a220724a31c617fd /src/Scraping/Scraping.h
parent8c6632502ff992e80051910451421c55894ed9d8 (diff)
downloadomnisearch-c7b95d05715a45c7790aa8a7e4b0b61bac2e4208.tar.gz
fix: refactored scraping components
Diffstat (limited to 'src/Scraping/Scraping.h')
-rw-r--r--src/Scraping/Scraping.h20
1 files changed, 20 insertions, 0 deletions
diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h
index f1ad2c4..1439118 100644
--- a/src/Scraping/Scraping.h
+++ b/src/Scraping/Scraping.h
@@ -3,6 +3,7 @@
#include <curl/curl.h>
#include <libxml/HTMLparser.h>
+#include <libxml/xpath.h>
typedef struct {
char *url;
@@ -45,6 +46,25 @@ typedef struct {
extern const SearchEngine ENGINE_REGISTRY[];
extern const int ENGINE_COUNT;
+size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
+ void *userp);
+const char *get_random_user_agent(void);
+void configure_curl_handle(CURL *curl, const char *full_url,
+ MemoryBuffer *chunk, struct curl_slist *headers);
+char *build_search_url(const char *base_url, const char *page_param,
+ int page_multiplier, int page_base,
+ const char *encoded_query, int page);
+struct curl_slist *build_request_headers(const char *host_header,
+ const char *referer);
+void http_delay(void);
+
+xmlXPathContextPtr create_xpath_context(xmlDocPtr doc);
+void free_xpath_objects(xmlXPathContextPtr ctx, xmlXPathObjectPtr obj);
+SearchResult *alloc_results_array(int capacity, int max_results);
+void assign_result(SearchResult *result, char *url, char *title, char *snippet,
+ int unescape);
+void free_xml_node_list(char *title, char *url, char *snippet);
+
int scrape_engine(const SearchEngine *engine, const char *query,
SearchResult **out_results, int max_results);