1 files changed, 197 insertions, 93 deletions
diff --git a/src/routing.c b/src/routing.c
index 156e843..1910203 100644
--- a/src/routing.c
+++ b/src/routing.c
@@ -26,98 +26,6 @@ void set_handler(const char *path, RequestHandler handler) {
   }
 }
 
-char *parse_request_url(const char *request_line, UrlParams *params) {
-  char method[16];                     
-  char path_with_query_buffer[MAX_PATH_LEN]; 
-  char http_version[16];               
-
-  if (sscanf(request_line, "%15s %255s %15s", method, path_with_query_buffer,
-             http_version) != 3) {
-    fprintf(stderr, "[ERROR] parse_request_url: Malformed request line: '%s'\n",
-            request_line);
-    return NULL;
-  }
-
-  params->count = 0; 
-
-  char *working_url_copy = strdup(path_with_query_buffer);
-  if (working_url_copy == NULL) {
-    perror("Failed to allocate memory for working URL copy");
-    return NULL;
-  }
-
-  char *query_start = strchr(working_url_copy, '?'); 
-  char *path_only_for_return; 
-
-  if (query_start != NULL) {
-    *query_start = '\0'; 
-    path_only_for_return = working_url_copy; 
-    char *query_string = query_start + 1;    
-
-    char *token;          
-    char *saveptr_query;  
-
-    token = strtok_r(query_string, "&", &saveptr_query);
-    while (token != NULL && params->count < MAX_URL_PARAMS) {
-      char *equals_sign = strchr(token, '='); 
-      if (equals_sign != NULL) {
-        size_t key_len = equals_sign - token; 
-
-        strncpy(params->params[params->count].key, token, key_len);
-        params->params[params->count].key[key_len] = '\0'; 
-        strncpy(params->params[params->count].value, equals_sign + 1,
-                MAX_VALUE_LEN - 1);
-        params->params[params->count].value[MAX_VALUE_LEN - 1] = '\0'; 
-        params->count++; 
-      }
-      token = strtok_r(NULL, "&", &saveptr_query); 
-    }
-  } else {
-
-    path_only_for_return = working_url_copy;
-  }
-
-  char *final_requested_path = strdup(path_only_for_return);
-  if (final_requested_path == NULL) {
-    perror("Failed to allocate final requested path");
-    free(working_url_copy); 
-    return NULL;
-  }
-
-  char *segment;          
-  int segment_index = 0;  
-  char *saveptr_path;     
-
-  segment = strtok_r(working_url_copy, "/", &saveptr_path);
-  while (segment != NULL) {
-
-    if (segment_index > 0) {
-      if (params->count < MAX_URL_PARAMS) {
-        char param_key[MAX_KEY_LEN];
-
-        snprintf(param_key, sizeof(param_key), "param%d", segment_index - 1);
-
-        strncpy(params->params[params->count].key, param_key, MAX_KEY_LEN - 1);
-        params->params[params->count].key[MAX_KEY_LEN - 1] = '\0';
-        strncpy(params->params[params->count].value, segment,
-                MAX_VALUE_LEN - 1);
-        params->params[params->count].value[MAX_VALUE_LEN - 1] = '\0';
-        params->count++;
-      } else {
-        fprintf(stderr,
-                "[WARNING] parse_request_url: Max URL parameters reached. Skipping path segment '%s'.\n",
-                segment);
-      }
-    }
-    segment_index++;
-    segment = strtok_r(NULL, "/", &saveptr_path); 
-  }
-
-  free(working_url_copy); 
-
-  return final_requested_path; 
-}
-
 const char *get_mime_type(const char *file_path) {
 
   const char *ext = strrchr(file_path, '.');
@@ -152,6 +60,201 @@ const char *get_mime_type(const char *file_path) {
   return "application/octet-stream";
 }
 
+static int canonicalize_path(char *canonical, const char *path, size_t max_len) {
+    char components[MAX_URL_PARAMS][MAX_KEY_LEN];
+    int component_count = 0;
+
+    char *path_copy = strdup(path);
+    if (!path_copy) {
+        return -1;
+    }
+
+    char *token = strtok(path_copy, "/");
+    while (token) {
+        if (strcmp(token, ".") == 0) {
+
+        } else if (strcmp(token, "..") == 0) {
+
+            if (component_count > 0) {
+                component_count--;
+            } else {
+
+                fprintf(stderr, "[SECURITY] Path traversal attempt: %s\n", path);
+                free(path_copy);
+                return -1;
+            }
+        } else if (strlen(token) > 0) {
+
+            if (component_count < MAX_URL_PARAMS) {
+                strncpy(components[component_count], token, MAX_KEY_LEN - 1);
+                components[component_count][MAX_KEY_LEN - 1] = '\0';
+                component_count++;
+            }
+        }
+        token = strtok(NULL, "/");
+    }
+
+    free(path_copy);
+
+    canonical[0] = '\0';
+    for (int i = 0; i < component_count; i++) {
+        if (strlen(canonical) + strlen(components[i]) + 2 > max_len) {
+            fprintf(stderr, "[ERROR] Canonical path too long\n");
+            return -1;
+        }
+        strcat(canonical, "/");
+        strcat(canonical, components[i]);
+    }
+
+    if (canonical[0] == '\0') {
+        strcpy(canonical, "/");
+    }
+
+    return 0;
+}
+
+static bool is_safe_path_component(const char *component) {
+
+    if (strstr(component, "..") ||
+        strstr(component, "//") ||
+        strchr(component, '\0') != component + strlen(component)) {
+        return false;
+    }
+
+    for (size_t i = 0; component[i]; i++) {
+        if (component[i] < 32 && component[i] != '\t') {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static int url_decode(char *dst, const char *src, size_t dst_size) {
+    size_t i = 0, j = 0;
+
+    while (src[i] && j < dst_size - 1) {
+        if (src[i] == '%') {
+
+            if (src[i+1] && src[i+2]) {
+                char hex[3] = {src[i+1], src[i+2], '\0'};
+                char *endptr;
+                long value = strtol(hex, &endptr, 16);
+
+                if (*endptr != '\0') {
+                    fprintf(stderr, "[SECURITY] Invalid URL encoding: %%%s\n", hex);
+                    return -1;
+                }
+
+                if (value == 0) {
+                    fprintf(stderr, "[SECURITY] Null byte in URL encoding\n");
+                    return -1;
+                }
+
+                dst[j++] = (char)value;
+                i += 3;
+            } else {
+                fprintf(stderr, "[SECURITY] Incomplete URL encoding at end\n");
+                return -1;
+            }
+        } else if (src[i] == '+') {
+
+            dst[j++] = ' ';
+            i++;
+        } else {
+            dst[j++] = src[i++];
+        }
+    }
+
+    dst[j] = '\0';
+    return 0;
+}
+
+char *parse_request_url(const char *request_line, UrlParams *params) {
+    char method[16];
+    char raw_path_with_query[MAX_PATH_LEN];
+    char http_version[16];
+
+    if (sscanf(request_line, "%15s %255s %15s", method, raw_path_with_query,
+               http_version) != 3) {
+        fprintf(stderr, "[ERROR] parse_request_url: Malformed request line\n");
+        return NULL;
+    }
+
+    params->count = 0;
+
+    char decoded_path[MAX_PATH_LEN];
+    if (url_decode(decoded_path, raw_path_with_query, sizeof(decoded_path)) != 0) {
+        fprintf(stderr, "[SECURITY] Invalid URL encoding in request\n");
+        return NULL;
+    }
+
+    char *working_url_copy = strdup(decoded_path);
+    if (!working_url_copy) {
+        perror("Failed to allocate memory for URL copy");
+        return NULL;
+    }
+
+    char *query_start = strchr(working_url_copy, '?');
+    if (query_start) {
+        *query_start = '\0';
+    }
+
+    char canonical_path[MAX_PATH_LEN];
+    if (canonicalize_path(canonical_path, working_url_copy, sizeof(canonical_path)) != 0) {
+        fprintf(stderr, "[SECURITY] Path canonicalization failed\n");
+        free(working_url_copy);
+        return NULL;
+    }
+
+    char *path_copy_for_validation = strdup(canonical_path);
+    if (path_copy_for_validation) {
+        char *token = strtok(path_copy_for_validation, "/");
+        while (token) {
+            if (!is_safe_path_component(token)) {
+                fprintf(stderr, "[SECURITY] Unsafe path component: %s\n", token);
+                free(path_copy_for_validation);
+                free(working_url_copy);
+                return NULL;
+            }
+            token = strtok(NULL, "/");
+        }
+        free(path_copy_for_validation);
+    }
+
+    if (query_start) {
+        char *query_string = query_start + 1;
+        char *token;
+        char *saveptr;
+
+        token = strtok_r(query_string, "&", &saveptr);
+        while (token && params->count < MAX_URL_PARAMS) {
+            char *equals = strchr(token, '=');
+            if (equals) {
+                size_t key_len = equals - token;
+
+                char decoded_key[MAX_KEY_LEN];
+                if (url_decode(decoded_key, token, key_len + 1) == 0) {
+                    strncpy(params->params[params->count].key, decoded_key, MAX_KEY_LEN - 1);
+                    params->params[params->count].key[MAX_KEY_LEN - 1] = '\0';
+
+                    char decoded_value[MAX_VALUE_LEN];
+                    if (url_decode(decoded_value, equals + 1, sizeof(decoded_value)) == 0) {
+                        strncpy(params->params[params->count].value, decoded_value, MAX_VALUE_LEN - 1);
+                        params->params[params->count].value[MAX_VALUE_LEN - 1] = '\0';
+                        params->count++;
+                    }
+                }
+            }
+            token = strtok_r(NULL, "&", &saveptr);
+        }
+    }
+
+    char *final_path = strdup(canonical_path);
+    free(working_url_copy);
+    return final_path;
+}
+
 bool serve_static_file(const char *request_path_relative_to_static) {
   char full_static_path[MAX_PATH_LEN]; 
 
@@ -223,4 +326,5 @@ bool serve_static_file(const char *request_path_relative_to_static) {
 
   fclose(fp); 
   return true; 
-}
-\ No newline at end of file
+}
+