diff options
| author | frosty <gabriel@bwaaa.monster> | 2026-02-23 00:57:21 -0500 |
|---|---|---|
| committer | frosty <gabriel@bwaaa.monster> | 2026-02-23 00:57:21 -0500 |
| commit | b280ab6bcdf6c9bae46a7a21b7138d46d953dd71 (patch) | |
| tree | 39cea490a82a1ae5072c19d1d050d90b881d1a15 /src/Scraping/Scraping.h | |
| download | omnisearch-b280ab6bcdf6c9bae46a7a21b7138d46d953dd71.tar.gz | |
oopsies
Diffstat (limited to 'src/Scraping/Scraping.h')
| -rw-r--r-- | src/Scraping/Scraping.h | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h new file mode 100644 index 0000000..d8a3b13 --- /dev/null +++ b/src/Scraping/Scraping.h @@ -0,0 +1,58 @@ +#ifndef SCRAPING_H +#define SCRAPING_H + +#include <libxml/HTMLparser.h> +#include <curl/curl.h> + +#define LOG_INFO(msg, ...) fprintf(stderr, "[INFO] " msg "\n", ##__VA_ARGS__) +#define LOG_WARN(msg, ...) fprintf(stderr, "[WARN] " msg "\n", ##__VA_ARGS__) +#define LOG_DEBUG(msg, ...) fprintf(stderr, "[DEBUG] " msg "\n", ##__VA_ARGS__) +#define LOG_ERROR(msg, ...) fprintf(stderr, "[ERROR] " msg "\n", ##__VA_ARGS__) + +typedef struct { + char *url; + char *title; + char *snippet; +} SearchResult; + +typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc, + SearchResult **out_results, int max_results); + +typedef struct { + const char *name; + const char *base_url; + const char *host_header; + const char *referer; + + const char *page_param; + int page_multiplier; + int page_base; + ParserFunc parser; +} SearchEngine; + +typedef struct { + char *memory; + size_t size; + size_t capacity; +} MemoryBuffer; + +typedef struct { + const SearchEngine *engine; + char *query; + SearchResult **out_results; + int max_results; + int page; + CURL *handle; + MemoryBuffer response; + int results_count; +} ScrapeJob; + +extern const SearchEngine ENGINE_REGISTRY[]; +extern const int ENGINE_COUNT; + +int scrape_engine(const SearchEngine *engine, const char *query, + SearchResult **out_results, int max_results); + +int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs); + +#endif
\ No newline at end of file |
