Ada 3.4.4
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern.cc
Go to the documentation of this file.
1#include <fuzzer/FuzzedDataProvider.h>
2
3#include <memory>
4#include <string>
5
6#include "ada.cpp"
7#include "ada.h"
8
9using regex_provider = ada::url_pattern_regex::std_regex_provider;
10
11void exercise_result(auto result) {
12 (void)result.get_protocol();
13 (void)result.get_username();
14 (void)result.get_password();
15 (void)result.get_hostname();
16 (void)result.get_port();
17 (void)result.get_pathname();
18 (void)result.get_search();
19 (void)result.get_hash();
20 (void)result.ignore_case();
21 (void)result.has_regexp_groups();
22}
23
24// Exercise exec() and test() on a parsed url_pattern with an ASCII input.
25// We restrict inputs to ASCII to avoid catastrophic regex backtracking.
26static void exercise_exec_and_test(ada::url_pattern<regex_provider>& pattern,
27 const std::string& test_input,
28 const std::string& test_base) {
29 // test() with string input
30 std::string_view test_view(test_input.data(), test_input.size());
31 auto test_result = pattern.test(test_view, nullptr);
32 (void)test_result;
33
34 // test() with base URL
35 if (!test_base.empty()) {
36 std::string_view base_view(test_base.data(), test_base.size());
37 auto test_result_with_base = pattern.test(test_view, &base_view);
38 (void)test_result_with_base;
39 }
40
41 // exec() with string input - returns match groups
42 auto exec_result = pattern.exec(test_view, nullptr);
43 if (exec_result && exec_result->has_value()) {
44 const ada::url_pattern_result& match = **exec_result;
45 volatile size_t len = 0;
46 // Exercise .input and .groups on every component result.
47 auto exercise_component =
48 [&len](const ada::url_pattern_component_result& c) {
49 len += c.input.size();
50 for (const auto& [k, v] : c.groups) {
51 len += k.size();
52 if (v.has_value()) {
53 len += v->size();
54 }
55 }
56 };
57 exercise_component(match.protocol);
58 exercise_component(match.username);
59 exercise_component(match.password);
60 exercise_component(match.hostname);
61 exercise_component(match.port);
62 exercise_component(match.pathname);
63 exercise_component(match.search);
64 exercise_component(match.hash);
65 (void)len;
66 }
67
68 // exec() with base URL — exercise result groups, same as no-base case
69 if (!test_base.empty()) {
70 std::string_view base_view(test_base.data(), test_base.size());
71 auto exec_with_base = pattern.exec(test_view, &base_view);
72 if (exec_with_base && exec_with_base->has_value()) {
73 const ada::url_pattern_result& match = **exec_with_base;
74 volatile size_t len = 0;
75 auto exercise_component =
76 [&len](const ada::url_pattern_component_result& c) {
77 len += c.input.size();
78 for (const auto& [k, v] : c.groups) {
79 len += k.size();
80 if (v.has_value()) len += v->size();
81 }
82 };
83 exercise_component(match.protocol);
84 exercise_component(match.username);
85 exercise_component(match.password);
86 exercise_component(match.hostname);
87 exercise_component(match.port);
88 exercise_component(match.pathname);
89 exercise_component(match.search);
90 exercise_component(match.hash);
91 (void)len;
92 }
93 }
94
95 // test() with url_pattern_init input
96 ada::url_pattern_init init_input{};
97 init_input.pathname = test_input;
98 auto test_with_init = pattern.test(init_input, nullptr);
99 (void)test_with_init;
100
101 // test_components() — tests each URL component individually
102 {
103 // Split test_input into components by parsing it, then calling
104 // test_components() with the individual string pieces.
105 std::string_view sv(test_input.data(), test_input.size());
106 auto parsed = ada::parse<ada::url_aggregator>(sv);
107 if (parsed) {
108 volatile bool tc = pattern.test_components(
109 std::string(parsed->get_protocol()),
110 std::string(parsed->get_username()),
111 std::string(parsed->get_password()),
112 std::string(parsed->get_hostname()), std::string(parsed->get_port()),
113 std::string(parsed->get_pathname()),
114 std::string(parsed->get_search()), std::string(parsed->get_hash()));
115 (void)tc;
116 }
117 }
118
119 // match() - internal method that exec() uses
120 auto match_result = pattern.match(test_view, nullptr);
121 (void)match_result;
122}
123
124extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
125 auto to_ascii = [](const std::string& source) -> std::string {
126 std::string result;
127 result.reserve(source.size());
128 for (char c : source) {
129 result.push_back(static_cast<unsigned char>(c) % 128);
130 }
131 return result;
132 };
133 FuzzedDataProvider fdp(data, size);
134 // We do not want to trigger arbitrary regex matching.
135 std::string source_1 = "/" + to_ascii(fdp.ConsumeRandomLengthString(50)) +
136 "/" + to_ascii(fdp.ConsumeRandomLengthString(50));
137 std::string base_source_1 = "/" +
138 to_ascii(fdp.ConsumeRandomLengthString(50)) +
139 "/" + to_ascii(fdp.ConsumeRandomLengthString(50));
140
141 std::string source_2 = "https://ada-url.com/*";
142 std::string base_source_2 = "https://ada-url.com";
143
144 // Additional test input for exec/test calls (also ASCII-only)
145 std::string test_input = "https://" +
146 to_ascii(fdp.ConsumeRandomLengthString(30)) + "/" +
147 to_ascii(fdp.ConsumeRandomLengthString(20));
148 std::string test_base = "https://ada-url.com";
149
150 std::array<std::pair<std::string, std::string>, 2> sources = {{
151 {source_1, base_source_1},
152 {source_2, base_source_2},
153 }};
154
155 for (const auto& [source, base_source] : sources) {
156 // Without base or options
157 auto result =
158 ada::parse_url_pattern<regex_provider>(source, nullptr, nullptr);
159 if (result) {
160 exercise_result(*result);
161 exercise_exec_and_test(*result, test_input, test_base);
162 }
163
164 // Testing with base_url
165 std::string_view base_source_view(base_source.data(), base_source.length());
166 auto result_with_base = ada::parse_url_pattern<regex_provider>(
167 source, &base_source_view, nullptr);
168 if (result_with_base) {
169 exercise_result(*result_with_base);
170 exercise_exec_and_test(*result_with_base, test_input, test_base);
171 }
172
173 // Testing with base_url and options
174 ada::url_pattern_options options{.ignore_case = fdp.ConsumeBool()};
175 auto result_with_base_and_options = ada::parse_url_pattern<regex_provider>(
176 source, &base_source_view, &options);
177 if (result_with_base_and_options) {
178 exercise_result(*result_with_base_and_options);
179 exercise_exec_and_test(*result_with_base_and_options, test_input,
180 test_base);
181 }
182
183 // Testing with url_pattern_init and base url.
184 int field_index = fdp.ConsumeIntegralInRange(0, 7);
185 std::string random_value = to_ascii(fdp.ConsumeRandomLengthString(50));
186 ada::url_pattern_init init{};
187 switch (field_index) {
188 case 0:
189 init.protocol = random_value;
190 break;
191 case 1:
192 init.username = random_value;
193 break;
194 case 2:
195 init.password = random_value;
196 break;
197 case 3:
198 init.hostname = random_value;
199 break;
200 case 4:
201 init.port = random_value;
202 break;
203 case 5:
204 init.pathname = random_value;
205 break;
206 case 6:
207 init.search = random_value;
208 break;
209 case 7:
210 init.hash = random_value;
211 break;
212 }
213 auto result_with_init = ada::parse_url_pattern<regex_provider>(
214 init, &base_source_view, nullptr);
215 if (result_with_init) {
216 exercise_result(*result_with_init);
217 exercise_exec_and_test(*result_with_init, test_input, test_base);
218 }
219
220 // Testing url_pattern_init with ALL fields populated simultaneously
221 ada::url_pattern_init init_all{};
222 init_all.protocol = to_ascii(fdp.ConsumeRandomLengthString(10));
223 init_all.username = to_ascii(fdp.ConsumeRandomLengthString(10));
224 init_all.password = to_ascii(fdp.ConsumeRandomLengthString(10));
225 init_all.hostname = to_ascii(fdp.ConsumeRandomLengthString(20));
226 init_all.port = to_ascii(fdp.ConsumeRandomLengthString(5));
227 init_all.pathname = "/" + to_ascii(fdp.ConsumeRandomLengthString(20));
228 init_all.search = to_ascii(fdp.ConsumeRandomLengthString(10));
229 init_all.hash = to_ascii(fdp.ConsumeRandomLengthString(10));
230 auto result_with_init_all =
231 ada::parse_url_pattern<regex_provider>(init_all, nullptr, nullptr);
232 if (result_with_init_all) {
233 exercise_result(*result_with_init_all);
234 exercise_exec_and_test(*result_with_init_all, test_input, test_base);
235 }
236 }
237
238 return 0;
239}
Main header for the Ada URL parser library.
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
void exercise_result(auto result)
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
static void exercise_exec_and_test(ada::url_pattern< regex_provider > &pattern, const std::string &test_input, const std::string &test_base)
ada::url_pattern_regex::std_regex_provider regex_provider
Definition url_pattern.cc:9