diff options
Diffstat (limited to 'src/Utility')
| -rw-r--r-- | src/Utility/HttpClient.c | 81 | ||||
| -rw-r--r-- | src/Utility/HttpClient.h | 16 | ||||
| -rw-r--r-- | src/Utility/XmlHelper.c | 65 | ||||
| -rw-r--r-- | src/Utility/XmlHelper.h | 14 |
4 files changed, 176 insertions, 0 deletions
diff --git a/src/Utility/HttpClient.c b/src/Utility/HttpClient.c new file mode 100644 index 0000000..150b228 --- /dev/null +++ b/src/Utility/HttpClient.c @@ -0,0 +1,81 @@ +#include "HttpClient.h" +#include "../Proxy/Proxy.h" +#include <stdlib.h> +#include <string.h> + +static size_t write_callback(void *contents, size_t size, size_t nmemb, + void *userp) { + size_t realsize = size * nmemb; + HttpResponse *mem = (HttpResponse *)userp; + + if (mem->size + realsize + 1 > mem->capacity) { + size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2; + while (new_cap < mem->size + realsize + 1) + new_cap *= 2; + + char *ptr = realloc(mem->memory, new_cap); + if (!ptr) { + return 0; + } + mem->memory = ptr; + mem->capacity = new_cap; + } + + memcpy(&(mem->memory[mem->size]), contents, realsize); + mem->size += realsize; + mem->memory[mem->size] = 0; + + return realsize; +} + +HttpResponse http_get(const char *url, const char *user_agent) { + HttpResponse resp = {.memory = NULL, .size = 0, .capacity = 0}; + + if (!url) { + return resp; + } + + resp.memory = malloc(16384); + if (!resp.memory) { + return resp; + } + resp.capacity = 16384; + + CURL *curl = curl_easy_init(); + if (!curl) { + free(resp.memory); + resp.memory = NULL; + return resp; + } + + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &resp); + curl_easy_setopt(curl, CURLOPT_USERAGENT, + user_agent ? user_agent : "libcurl-agent/1.0"); + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L); + apply_proxy_settings(curl); + + CURLcode res = curl_easy_perform(curl); + curl_easy_cleanup(curl); + + if (res != CURLE_OK) { + free(resp.memory); + resp.memory = NULL; + resp.size = 0; + resp.capacity = 0; + } + + return resp; +} + +void http_response_free(HttpResponse *resp) { + if (!resp) { + return; + } + free(resp->memory); + resp->memory = NULL; + resp->size = 0; + resp->capacity = 0; +} diff --git a/src/Utility/HttpClient.h b/src/Utility/HttpClient.h new file mode 100644 index 0000000..6eb002c --- /dev/null +++ b/src/Utility/HttpClient.h @@ -0,0 +1,16 @@ +#ifndef HTTPCLIENT_H +#define HTTPCLIENT_H + +#include <curl/curl.h> +#include <stddef.h> + +typedef struct { + char *memory; + size_t size; + size_t capacity; +} HttpResponse; + +HttpResponse http_get(const char *url, const char *user_agent); +void http_response_free(HttpResponse *resp); + +#endif diff --git a/src/Utility/XmlHelper.c b/src/Utility/XmlHelper.c new file mode 100644 index 0000000..4fed96a --- /dev/null +++ b/src/Utility/XmlHelper.c @@ -0,0 +1,65 @@ +#include "XmlHelper.h" +#include <stdlib.h> +#include <string.h> + +SearchResult *xml_result_alloc(int count, int max_results) { + if (count <= 0 || max_results <= 0) { + return NULL; + } + int actual = (count < max_results) ? count : max_results; + return (SearchResult *)calloc(actual, sizeof(SearchResult)); +} + +void xml_result_free(SearchResult *results, int count) { + if (!results) { + return; + } + for (int i = 0; i < count; i++) { + free(results[i].url); + free(results[i].title); + free(results[i].snippet); + } + free(results); +} + +xmlXPathObjectPtr xml_xpath_eval(xmlXPathContextPtr ctx, const char *xpath) { + if (!ctx || !xpath) { + return NULL; + } + return xmlXPathEvalExpression((const xmlChar *)xpath, ctx); +} + +char *xml_node_content(xmlNodePtr node) { + if (!node) { + return NULL; + } + char *content = (char *)xmlNodeGetContent(node); + return content; +} + +char *xpath_text(xmlDocPtr doc, const char *xpath) { + if (!doc || !xpath) { + return NULL; + } + + xmlXPathContextPtr ctx = xmlXPathNewContext(doc); + if (!ctx) { + return NULL; + } + + xmlXPathObjectPtr obj = xmlXPathEvalExpression((const xmlChar *)xpath, ctx); + xmlXPathFreeContext(ctx); + + if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) { + if (obj) + xmlXPathFreeObject(obj); + return NULL; + } + + xmlChar *content = xmlNodeGetContent(obj->nodesetval->nodeTab[0]); + char *result = content ? strdup((char *)content) : NULL; + if (content) + xmlFree(content); + xmlXPathFreeObject(obj); + return result; +} diff --git a/src/Utility/XmlHelper.h b/src/Utility/XmlHelper.h new file mode 100644 index 0000000..95cbcd6 --- /dev/null +++ b/src/Utility/XmlHelper.h @@ -0,0 +1,14 @@ +#ifndef XMLHELPER_H +#define XMLHELPER_H + +#include "../Scraping/Scraping.h" +#include <libxml/xpath.h> + +SearchResult *xml_result_alloc(int count, int max_results); +void xml_result_free(SearchResult *results, int count); + +xmlXPathObjectPtr xml_xpath_eval(xmlXPathContextPtr ctx, const char *xpath); +char *xml_node_content(xmlNodePtr node); +char *xpath_text(xmlDocPtr doc, const char *xpath); + +#endif |
