From 9f2cd561286784fd000eb8a00f1f80db3185062c Mon Sep 17 00:00:00 2001 From: frosty Date: Fri, 27 Feb 2026 18:32:23 -0500 Subject: added proxying --- src/Scraping/Scraping.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'src/Scraping/Scraping.c') diff --git a/src/Scraping/Scraping.c b/src/Scraping/Scraping.c index 42e05d6..5b1b5d6 100644 --- a/src/Scraping/Scraping.c +++ b/src/Scraping/Scraping.c @@ -1,4 +1,5 @@ #include "Scraping.h" +#include "../Proxy/Proxy.h" #include "../Utility/Unescape.h" #include #include @@ -329,9 +330,14 @@ static void configure_curl_handle(CURL *curl, const char *full_url, curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L); curl_easy_setopt(curl, CURLOPT_COOKIEFILE, ""); + + apply_proxy_settings(curl); } int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) { + int retries = 0; + +retry: CURLM *multi_handle = curl_multi_init(); if (!multi_handle) { return -1; @@ -339,6 +345,15 @@ int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) { for (int i = 0; i < num_jobs; i++) { ScrapeJob *job = &jobs[i]; + + if (job->handle) { + curl_easy_cleanup(job->handle); + job->handle = NULL; + } + if (job->response.memory) { + free(job->response.memory); + } + job->handle = curl_easy_init(); if (!job->handle) { continue; @@ -406,7 +421,7 @@ int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) { CURL *handle = msg->easy_handle; for (int i = 0; i < num_jobs; i++) { - if (jobs[i].handle == handle) { + if (jobs[i].handle && jobs[i].handle == handle) { ScrapeJob *job = &jobs[i]; long response_code; @@ -431,8 +446,10 @@ int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) { if (headers) curl_slist_free_all(headers); free(job->response.memory); + job->response.memory = NULL; curl_multi_remove_handle(multi_handle, handle); - curl_easy_cleanup(handle); + if (handle) curl_easy_cleanup(handle); + job->handle = NULL; break; } } @@ -440,6 +457,21 @@ int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) { } curl_multi_cleanup(multi_handle); + + if (retries < max_proxy_retries && proxy_count > 0) { + int any_failed = 0; + for (int i = 0; i < num_jobs; i++) { + if (jobs[i].results_count == 0 && jobs[i].response.size == 0) { + any_failed = 1; + break; + } + } + if (any_failed) { + retries++; + goto retry; + } + } + return 0; } -- cgit v1.2.3