url/lib.rs
1// Copyright 2013-2015 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9/*!
10
11rust-url is an implementation of the [URL Standard](http://url.spec.whatwg.org/)
12for the [Rust](http://rust-lang.org/) programming language.
13
14
15# URL parsing and data structures
16
17First, URL parsing may fail for various reasons and therefore returns a `Result`.
18
19```
20use url::{Url, ParseError};
21
22assert!(Url::parse("http://[:::1]") == Err(ParseError::InvalidIpv6Address))
23```
24
25Let’s parse a valid URL and look at its components.
26
27```
28use url::{Url, Host, Position};
29# use url::ParseError;
30# fn run() -> Result<(), ParseError> {
31let issue_list_url = Url::parse(
32    "https://github.com/rust-lang/rust/issues?labels=E-easy&state=open"
33)?;
34
35
36assert!(issue_list_url.scheme() == "https");
37assert!(issue_list_url.username() == "");
38assert!(issue_list_url.password() == None);
39assert!(issue_list_url.host_str() == Some("github.com"));
40assert!(issue_list_url.host() == Some(Host::Domain("github.com")));
41assert!(issue_list_url.port() == None);
42assert!(issue_list_url.path() == "/rust-lang/rust/issues");
43assert!(issue_list_url.path_segments().map(|c| c.collect::<Vec<_>>()) ==
44        Some(vec!["rust-lang", "rust", "issues"]));
45assert!(issue_list_url.query() == Some("labels=E-easy&state=open"));
46assert!(&issue_list_url[Position::BeforePath..] == "/rust-lang/rust/issues?labels=E-easy&state=open");
47assert!(issue_list_url.fragment() == None);
48assert!(!issue_list_url.cannot_be_a_base());
49# Ok(())
50# }
51# run().unwrap();
52```
53
54Some URLs are said to be *cannot-be-a-base*:
55they don’t have a username, password, host, or port,
56and their "path" is an arbitrary string rather than slash-separated segments:
57
58```
59use url::Url;
60# use url::ParseError;
61
62# fn run() -> Result<(), ParseError> {
63let data_url = Url::parse("data:text/plain,Hello?World#")?;
64
65assert!(data_url.cannot_be_a_base());
66assert!(data_url.scheme() == "data");
67assert!(data_url.path() == "text/plain,Hello");
68assert!(data_url.path_segments().is_none());
69assert!(data_url.query() == Some("World"));
70assert!(data_url.fragment() == Some(""));
71# Ok(())
72# }
73# run().unwrap();
74```
75
76## Default Features
77
78Versions `<= 2.5.2` of the crate have no default features. Versions `> 2.5.2` have the default feature 'std'.
79If you are upgrading across this boundary and you have specified `default-features = false`, then
80you will need to add the 'std' feature or the 'alloc' feature to your dependency.
81The 'std' feature has the same behavior as the previous versions. The 'alloc' feature
82provides no_std support.
83
84## Serde
85
86Enable the `serde` feature to include `Deserialize` and `Serialize` implementations for `url::Url`.
87
88# Base URL
89
90Many contexts allow URL *references* that can be relative to a *base URL*:
91
92```html
93<link rel="stylesheet" href="../main.css">
94```
95
96Since parsed URLs are absolute, giving a base is required for parsing relative URLs:
97
98```
99use url::{Url, ParseError};
100
101assert!(Url::parse("../main.css") == Err(ParseError::RelativeUrlWithoutBase))
102```
103
104Use the `join` method on an `Url` to use it as a base URL:
105
106```
107use url::Url;
108# use url::ParseError;
109
110# fn run() -> Result<(), ParseError> {
111let this_document = Url::parse("http://servo.github.io/rust-url/url/index.html")?;
112let css_url = this_document.join("../main.css")?;
113assert_eq!(css_url.as_str(), "http://servo.github.io/rust-url/main.css");
114# Ok(())
115# }
116# run().unwrap();
117```
118
119# Feature: `serde`
120
121If you enable the `serde` feature, [`Url`](struct.Url.html) will implement
122[`serde::Serialize`](https://docs.rs/serde/1/serde/trait.Serialize.html) and
123[`serde::Deserialize`](https://docs.rs/serde/1/serde/trait.Deserialize.html).
124See [serde documentation](https://serde.rs) for more information.
125
126```toml
127url = { version = "2", features = ["serde"] }
128```
129
130# Feature: `debugger_visualizer`
131
132If you enable the `debugger_visualizer` feature, the `url` crate will include
133a [natvis file](https://docs.microsoft.com/en-us/visualstudio/debugger/create-custom-views-of-native-objects)
134for [Visual Studio](https://www.visualstudio.com/) that allows you to view
135[`Url`](struct.Url.html) objects in the debugger.
136
137This feature requires Rust 1.71 or later.
138
139```toml
140url = { version = "2", features = ["debugger_visualizer"] }
141```
142
143*/
144
145#![no_std]
146#![doc(html_root_url = "https://docs.rs/url/2.5.4")]
147#![cfg_attr(
148    feature = "debugger_visualizer",
149    debugger_visualizer(natvis_file = "../../debug_metadata/url.natvis")
150)]
151
152pub use form_urlencoded;
153
154// For forwards compatibility
155#[cfg(feature = "std")]
156extern crate std;
157
158#[macro_use]
159extern crate alloc;
160
161#[cfg(feature = "serde")]
162extern crate serde;
163
164use crate::host::HostInternal;
165
166use crate::net::IpAddr;
167#[cfg(feature = "std")]
168#[cfg(any(
169    unix,
170    windows,
171    target_os = "redox",
172    target_os = "wasi",
173    target_os = "hermit"
174))]
175use crate::net::{SocketAddr, ToSocketAddrs};
176use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
177use alloc::borrow::ToOwned;
178use alloc::str;
179use alloc::string::{String, ToString};
180use core::borrow::Borrow;
181use core::convert::TryFrom;
182use core::fmt::Write;
183use core::ops::{Range, RangeFrom, RangeTo};
184use core::{cmp, fmt, hash, mem};
185use percent_encoding::utf8_percent_encode;
186#[cfg(feature = "std")]
187#[cfg(any(
188    unix,
189    windows,
190    target_os = "redox",
191    target_os = "wasi",
192    target_os = "hermit"
193))]
194use std::io;
195#[cfg(feature = "std")]
196use std::path::{Path, PathBuf};
197
198/// `std` version of `net`
199#[cfg(feature = "std")]
200pub(crate) mod net {
201    pub use std::net::*;
202}
203/// `no_std` nightly version of `net`
204#[cfg(not(feature = "std"))]
205pub(crate) mod net {
206    pub use core::net::*;
207}
208
209pub use crate::host::Host;
210pub use crate::origin::{OpaqueOrigin, Origin};
211pub use crate::parser::{ParseError, SyntaxViolation};
212pub use crate::path_segments::PathSegmentsMut;
213pub use crate::slicing::Position;
214pub use form_urlencoded::EncodingOverride;
215
216mod host;
217mod origin;
218mod parser;
219mod path_segments;
220mod slicing;
221
222#[doc(hidden)]
223pub mod quirks;
224
225/// A parsed URL record.
226#[derive(Clone)]
227pub struct Url {
228    /// Syntax in pseudo-BNF:
229    ///
230    ///   url = scheme ":" [ hierarchical | non-hierarchical ] [ "?" query ]? [ "#" fragment ]?
231    ///   non-hierarchical = non-hierarchical-path
232    ///   non-hierarchical-path = /* Does not start with "/" */
233    ///   hierarchical = authority? hierarchical-path
234    ///   authority = "//" userinfo? host [ ":" port ]?
235    ///   userinfo = username [ ":" password ]? "@"
236    ///   hierarchical-path = [ "/" path-segment ]+
237    serialization: String,
238
239    // Components
240    scheme_end: u32,   // Before ':'
241    username_end: u32, // Before ':' (if a password is given) or '@' (if not)
242    host_start: u32,
243    host_end: u32,
244    host: HostInternal,
245    port: Option<u16>,
246    path_start: u32,             // Before initial '/', if any
247    query_start: Option<u32>,    // Before '?', unlike Position::QueryStart
248    fragment_start: Option<u32>, // Before '#', unlike Position::FragmentStart
249}
250
251/// Full configuration for the URL parser.
252#[derive(Copy, Clone)]
253#[must_use]
254pub struct ParseOptions<'a> {
255    base_url: Option<&'a Url>,
256    encoding_override: EncodingOverride<'a>,
257    violation_fn: Option<&'a dyn Fn(SyntaxViolation)>,
258}
259
260impl<'a> ParseOptions<'a> {
261    /// Change the base URL
262    ///
263    /// See the notes of [`Url::join`] for more details about how this base is considered
264    /// when parsing.
265    pub fn base_url(mut self, new: Option<&'a Url>) -> Self {
266        self.base_url = new;
267        self
268    }
269
270    /// Override the character encoding of query strings.
271    /// This is a legacy concept only relevant for HTML.
272    pub fn encoding_override(mut self, new: EncodingOverride<'a>) -> Self {
273        self.encoding_override = new;
274        self
275    }
276
277    /// Call the provided function or closure for a non-fatal `SyntaxViolation`
278    /// when it occurs during parsing. Note that since the provided function is
279    /// `Fn`, the caller might need to utilize _interior mutability_, such as with
280    /// a `RefCell`, to collect the violations.
281    ///
282    /// ## Example
283    /// ```
284    /// use std::cell::RefCell;
285    /// use url::{Url, SyntaxViolation};
286    /// # use url::ParseError;
287    /// # fn run() -> Result<(), url::ParseError> {
288    /// let violations = RefCell::new(Vec::new());
289    /// let url = Url::options()
290    ///     .syntax_violation_callback(Some(&|v| violations.borrow_mut().push(v)))
291    ///     .parse("https:////example.com")?;
292    /// assert_eq!(url.as_str(), "https://example.com/");
293    /// assert_eq!(violations.into_inner(),
294    ///            vec!(SyntaxViolation::ExpectedDoubleSlash));
295    /// # Ok(())
296    /// # }
297    /// # run().unwrap();
298    /// ```
299    pub fn syntax_violation_callback(mut self, new: Option<&'a dyn Fn(SyntaxViolation)>) -> Self {
300        self.violation_fn = new;
301        self
302    }
303
304    /// Parse an URL string with the configuration so far.
305    pub fn parse(self, input: &str) -> Result<Url, crate::ParseError> {
306        Parser {
307            serialization: String::with_capacity(input.len()),
308            base_url: self.base_url,
309            query_encoding_override: self.encoding_override,
310            violation_fn: self.violation_fn,
311            context: Context::UrlParser,
312        }
313        .parse_url(input)
314    }
315}
316
317impl Url {
318    /// Parse an absolute URL from a string.
319    ///
320    /// # Examples
321    ///
322    /// ```rust
323    /// use url::Url;
324    /// # use url::ParseError;
325    ///
326    /// # fn run() -> Result<(), ParseError> {
327    /// let url = Url::parse("https://example.net")?;
328    /// # Ok(())
329    /// # }
330    /// # run().unwrap();
331    /// ```
332    ///
333    /// # Errors
334    ///
335    /// If the function can not parse an absolute URL from the given string,
336    /// a [`ParseError`] variant will be returned.
337    ///
338    /// [`ParseError`]: enum.ParseError.html
339    #[inline]
340    pub fn parse(input: &str) -> Result<Url, crate::ParseError> {
341        Url::options().parse(input)
342    }
343
344    /// Parse an absolute URL from a string and add params to its query string.
345    ///
346    /// Existing params are not removed.
347    ///
348    /// # Examples
349    ///
350    /// ```rust
351    /// use url::Url;
352    /// # use url::ParseError;
353    ///
354    /// # fn run() -> Result<(), ParseError> {
355    /// let url = Url::parse_with_params("https://example.net?dont=clobberme",
356    ///                                  &[("lang", "rust"), ("browser", "servo")])?;
357    /// assert_eq!("https://example.net/?dont=clobberme&lang=rust&browser=servo", url.as_str());
358    /// # Ok(())
359    /// # }
360    /// # run().unwrap();
361    /// ```
362    ///
363    /// # Errors
364    ///
365    /// If the function can not parse an absolute URL from the given string,
366    /// a [`ParseError`] variant will be returned.
367    ///
368    /// [`ParseError`]: enum.ParseError.html
369    #[inline]
370    pub fn parse_with_params<I, K, V>(input: &str, iter: I) -> Result<Url, crate::ParseError>
371    where
372        I: IntoIterator,
373        I::Item: Borrow<(K, V)>,
374        K: AsRef<str>,
375        V: AsRef<str>,
376    {
377        let mut url = Url::options().parse(input);
378
379        if let Ok(ref mut url) = url {
380            url.query_pairs_mut().extend_pairs(iter);
381        }
382
383        url
384    }
385
386    /// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
387    fn strip_trailing_spaces_from_opaque_path(&mut self) {
388        if !self.cannot_be_a_base() {
389            return;
390        }
391
392        if self.fragment_start.is_some() {
393            return;
394        }
395
396        if self.query_start.is_some() {
397            return;
398        }
399
400        let trailing_space_count = self
401            .serialization
402            .chars()
403            .rev()
404            .take_while(|c| *c == ' ')
405            .count();
406
407        let start = self.serialization.len() - trailing_space_count;
408
409        self.serialization.truncate(start);
410    }
411
412    /// Parse a string as an URL, with this URL as the base URL.
413    ///
414    /// The inverse of this is [`make_relative`].
415    ///
416    /// # Notes
417    ///
418    /// - A trailing slash is significant.
419    ///   Without it, the last path component is considered to be a “file” name
420    ///   to be removed to get at the “directory” that is used as the base.
421    /// - A [scheme relative special URL](https://url.spec.whatwg.org/#scheme-relative-special-url-string)
422    ///   as input replaces everything in the base URL after the scheme.
423    /// - An absolute URL (with a scheme) as input replaces the whole base URL (even the scheme).
424    ///
425    /// # Examples
426    ///
427    /// ```rust
428    /// use url::Url;
429    /// # use url::ParseError;
430    ///
431    /// // Base without a trailing slash
432    /// # fn run() -> Result<(), ParseError> {
433    /// let base = Url::parse("https://example.net/a/b.html")?;
434    /// let url = base.join("c.png")?;
435    /// assert_eq!(url.as_str(), "https://example.net/a/c.png");  // Not /a/b.html/c.png
436    ///
437    /// // Base with a trailing slash
438    /// let base = Url::parse("https://example.net/a/b/")?;
439    /// let url = base.join("c.png")?;
440    /// assert_eq!(url.as_str(), "https://example.net/a/b/c.png");
441    ///
442    /// // Input as scheme relative special URL
443    /// let base = Url::parse("https://alice.com/a")?;
444    /// let url = base.join("//eve.com/b")?;
445    /// assert_eq!(url.as_str(), "https://eve.com/b");
446    ///
447    /// // Input as absolute URL
448    /// let base = Url::parse("https://alice.com/a")?;
449    /// let url = base.join("http://eve.com/b")?;
450    /// assert_eq!(url.as_str(), "http://eve.com/b");  // http instead of https
451
452    /// # Ok(())
453    /// # }
454    /// # run().unwrap();
455    /// ```
456    ///
457    /// # Errors
458    ///
459    /// If the function can not parse an URL from the given string
460    /// with this URL as the base URL, a [`ParseError`] variant will be returned.
461    ///
462    /// [`ParseError`]: enum.ParseError.html
463    /// [`make_relative`]: #method.make_relative
464    #[inline]
465    pub fn join(&self, input: &str) -> Result<Url, crate::ParseError> {
466        Url::options().base_url(Some(self)).parse(input)
467    }
468
469    /// Creates a relative URL if possible, with this URL as the base URL.
470    ///
471    /// This is the inverse of [`join`].
472    ///
473    /// # Examples
474    ///
475    /// ```rust
476    /// use url::Url;
477    /// # use url::ParseError;
478    ///
479    /// # fn run() -> Result<(), ParseError> {
480    /// let base = Url::parse("https://example.net/a/b.html")?;
481    /// let url = Url::parse("https://example.net/a/c.png")?;
482    /// let relative = base.make_relative(&url);
483    /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
484    ///
485    /// let base = Url::parse("https://example.net/a/b/")?;
486    /// let url = Url::parse("https://example.net/a/b/c.png")?;
487    /// let relative = base.make_relative(&url);
488    /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("c.png"));
489    ///
490    /// let base = Url::parse("https://example.net/a/b/")?;
491    /// let url = Url::parse("https://example.net/a/d/c.png")?;
492    /// let relative = base.make_relative(&url);
493    /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("../d/c.png"));
494    ///
495    /// let base = Url::parse("https://example.net/a/b.html?c=d")?;
496    /// let url = Url::parse("https://example.net/a/b.html?e=f")?;
497    /// let relative = base.make_relative(&url);
498    /// assert_eq!(relative.as_ref().map(|s| s.as_str()), Some("?e=f"));
499    /// # Ok(())
500    /// # }
501    /// # run().unwrap();
502    /// ```
503    ///
504    /// # Errors
505    ///
506    /// If this URL can't be a base for the given URL, `None` is returned.
507    /// This is for example the case if the scheme, host or port are not the same.
508    ///
509    /// [`join`]: #method.join
510    pub fn make_relative(&self, url: &Url) -> Option<String> {
511        if self.cannot_be_a_base() {
512            return None;
513        }
514
515        // Scheme, host and port need to be the same
516        if self.scheme() != url.scheme() || self.host() != url.host() || self.port() != url.port() {
517            return None;
518        }
519
520        // We ignore username/password at this point
521
522        // The path has to be transformed
523        let mut relative = String::new();
524
525        // Extract the filename of both URIs, these need to be handled separately
526        fn extract_path_filename(s: &str) -> (&str, &str) {
527            let last_slash_idx = s.rfind('/').unwrap_or(0);
528            let (path, filename) = s.split_at(last_slash_idx);
529            if filename.is_empty() {
530                (path, "")
531            } else {
532                (path, &filename[1..])
533            }
534        }
535
536        let (base_path, base_filename) = extract_path_filename(self.path());
537        let (url_path, url_filename) = extract_path_filename(url.path());
538
539        let mut base_path = base_path.split('/').peekable();
540        let mut url_path = url_path.split('/').peekable();
541
542        // Skip over the common prefix
543        while base_path.peek().is_some() && base_path.peek() == url_path.peek() {
544            base_path.next();
545            url_path.next();
546        }
547
548        // Add `..` segments for the remainder of the base path
549        for base_path_segment in base_path {
550            // Skip empty last segments
551            if base_path_segment.is_empty() {
552                break;
553            }
554
555            if !relative.is_empty() {
556                relative.push('/');
557            }
558
559            relative.push_str("..");
560        }
561
562        // Append the remainder of the other URI
563        for url_path_segment in url_path {
564            if !relative.is_empty() {
565                relative.push('/');
566            }
567
568            relative.push_str(url_path_segment);
569        }
570
571        // Add the filename if they are not the same
572        if !relative.is_empty() || base_filename != url_filename {
573            // If the URIs filename is empty this means that it was a directory
574            // so we'll have to append a '/'.
575            //
576            // Otherwise append it directly as the new filename.
577            if url_filename.is_empty() {
578                relative.push('/');
579            } else {
580                if !relative.is_empty() {
581                    relative.push('/');
582                }
583                relative.push_str(url_filename);
584            }
585        }
586
587        // Query and fragment are only taken from the other URI
588        if let Some(query) = url.query() {
589            relative.push('?');
590            relative.push_str(query);
591        }
592
593        if let Some(fragment) = url.fragment() {
594            relative.push('#');
595            relative.push_str(fragment);
596        }
597
598        Some(relative)
599    }
600
601    /// Return a default `ParseOptions` that can fully configure the URL parser.
602    ///
603    /// # Examples
604    ///
605    /// Get default `ParseOptions`, then change base url
606    ///
607    /// ```rust
608    /// use url::Url;
609    /// # use url::ParseError;
610    /// # fn run() -> Result<(), ParseError> {
611    /// let options = Url::options();
612    /// let api = Url::parse("https://api.example.com")?;
613    /// let base_url = options.base_url(Some(&api));
614    /// let version_url = base_url.parse("version.json")?;
615    /// assert_eq!(version_url.as_str(), "https://api.example.com/version.json");
616    /// # Ok(())
617    /// # }
618    /// # run().unwrap();
619    /// ```
620    pub fn options<'a>() -> ParseOptions<'a> {
621        ParseOptions {
622            base_url: None,
623            encoding_override: None,
624            violation_fn: None,
625        }
626    }
627
628    /// Return the serialization of this URL.
629    ///
630    /// This is fast since that serialization is already stored in the `Url` struct.
631    ///
632    /// # Examples
633    ///
634    /// ```rust
635    /// use url::Url;
636    /// # use url::ParseError;
637    ///
638    /// # fn run() -> Result<(), ParseError> {
639    /// let url_str = "https://example.net/";
640    /// let url = Url::parse(url_str)?;
641    /// assert_eq!(url.as_str(), url_str);
642    /// # Ok(())
643    /// # }
644    /// # run().unwrap();
645    /// ```
646    #[inline]
647    pub fn as_str(&self) -> &str {
648        &self.serialization
649    }
650
651    /// Return the serialization of this URL.
652    ///
653    /// This consumes the `Url` and takes ownership of the `String` stored in it.
654    ///
655    /// # Examples
656    ///
657    /// ```rust
658    /// use url::Url;
659    /// # use url::ParseError;
660    ///
661    /// # fn run() -> Result<(), ParseError> {
662    /// let url_str = "https://example.net/";
663    /// let url = Url::parse(url_str)?;
664    /// assert_eq!(String::from(url), url_str);
665    /// # Ok(())
666    /// # }
667    /// # run().unwrap();
668    /// ```
669    #[inline]
670    #[deprecated(since = "2.3.0", note = "use Into<String>")]
671    pub fn into_string(self) -> String {
672        self.into()
673    }
674
675    /// For internal testing, not part of the public API.
676    ///
677    /// Methods of the `Url` struct assume a number of invariants.
678    /// This checks each of these invariants and panic if one is not met.
679    /// This is for testing rust-url itself.
680    #[doc(hidden)]
681    pub fn check_invariants(&self) -> Result<(), String> {
682        macro_rules! assert {
683            ($x: expr) => {
684                if !$x {
685                    return Err(format!(
686                        "!( {} ) for URL {:?}",
687                        stringify!($x),
688                        self.serialization
689                    ));
690                }
691            };
692        }
693
694        macro_rules! assert_eq {
695            ($a: expr, $b: expr) => {
696                {
697                    let a = $a;
698                    let b = $b;
699                    if a != b {
700                        return Err(format!("{:?} != {:?} ({} != {}) for URL {:?}",
701                                           a, b, stringify!($a), stringify!($b),
702                                           self.serialization))
703                    }
704                }
705            }
706        }
707
708        assert!(self.scheme_end >= 1);
709        assert!(self.byte_at(0).is_ascii_alphabetic());
710        assert!(self
711            .slice(1..self.scheme_end)
712            .chars()
713            .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.')));
714        assert_eq!(self.byte_at(self.scheme_end), b':');
715
716        if self.slice(self.scheme_end + 1..).starts_with("//") {
717            // URL with authority
718            if self.username_end != self.serialization.len() as u32 {
719                match self.byte_at(self.username_end) {
720                    b':' => {
721                        assert!(self.host_start >= self.username_end + 2);
722                        assert_eq!(self.byte_at(self.host_start - 1), b'@');
723                    }
724                    b'@' => assert!(self.host_start == self.username_end + 1),
725                    _ => assert_eq!(self.username_end, self.scheme_end + 3),
726                }
727            }
728            assert!(self.host_start >= self.username_end);
729            assert!(self.host_end >= self.host_start);
730            let host_str = self.slice(self.host_start..self.host_end);
731            match self.host {
732                HostInternal::None => assert_eq!(host_str, ""),
733                HostInternal::Ipv4(address) => assert_eq!(host_str, address.to_string()),
734                HostInternal::Ipv6(address) => {
735                    let h: Host<String> = Host::Ipv6(address);
736                    assert_eq!(host_str, h.to_string())
737                }
738                HostInternal::Domain => {
739                    if SchemeType::from(self.scheme()).is_special() {
740                        assert!(!host_str.is_empty())
741                    }
742                }
743            }
744            if self.path_start == self.host_end {
745                assert_eq!(self.port, None);
746            } else {
747                assert_eq!(self.byte_at(self.host_end), b':');
748                let port_str = self.slice(self.host_end + 1..self.path_start);
749                assert_eq!(
750                    self.port,
751                    Some(port_str.parse::<u16>().expect("Couldn't parse port?"))
752                );
753            }
754            assert!(
755                self.path_start as usize == self.serialization.len()
756                    || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?')
757            );
758        } else {
759            // Anarchist URL (no authority)
760            assert_eq!(self.username_end, self.scheme_end + 1);
761            assert_eq!(self.host_start, self.scheme_end + 1);
762            assert_eq!(self.host_end, self.scheme_end + 1);
763            assert_eq!(self.host, HostInternal::None);
764            assert_eq!(self.port, None);
765            if self.path().starts_with("//") {
766                // special case when first path segment is empty
767                assert_eq!(self.byte_at(self.scheme_end + 1), b'/');
768                assert_eq!(self.byte_at(self.scheme_end + 2), b'.');
769                assert_eq!(self.path_start, self.scheme_end + 3);
770            } else {
771                assert_eq!(self.path_start, self.scheme_end + 1);
772            }
773        }
774        if let Some(start) = self.query_start {
775            assert!(start >= self.path_start);
776            assert_eq!(self.byte_at(start), b'?');
777        }
778        if let Some(start) = self.fragment_start {
779            assert!(start >= self.path_start);
780            assert_eq!(self.byte_at(start), b'#');
781        }
782        if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) {
783            assert!(fragment_start > query_start);
784        }
785
786        let other = Url::parse(self.as_str()).expect("Failed to parse myself?");
787        assert_eq!(&self.serialization, &other.serialization);
788        assert_eq!(self.scheme_end, other.scheme_end);
789        assert_eq!(self.username_end, other.username_end);
790        assert_eq!(self.host_start, other.host_start);
791        assert_eq!(self.host_end, other.host_end);
792        assert!(
793            self.host == other.host ||
794                // XXX No host round-trips to empty host.
795                // See https://github.com/whatwg/url/issues/79
796                (self.host_str(), other.host_str()) == (None, Some(""))
797        );
798        assert_eq!(self.port, other.port);
799        assert_eq!(self.path_start, other.path_start);
800        assert_eq!(self.query_start, other.query_start);
801        assert_eq!(self.fragment_start, other.fragment_start);
802        Ok(())
803    }
804
805    /// Return the origin of this URL (<https://url.spec.whatwg.org/#origin>)
806    ///
807    /// Note: this returns an opaque origin for `file:` URLs, which causes
808    /// `url.origin() != url.origin()`.
809    ///
810    /// # Examples
811    ///
812    /// URL with `ftp` scheme:
813    ///
814    /// ```rust
815    /// use url::{Host, Origin, Url};
816    /// # use url::ParseError;
817    ///
818    /// # fn run() -> Result<(), ParseError> {
819    /// let url = Url::parse("ftp://example.com/foo")?;
820    /// assert_eq!(url.origin(),
821    ///            Origin::Tuple("ftp".into(),
822    ///                          Host::Domain("example.com".into()),
823    ///                          21));
824    /// # Ok(())
825    /// # }
826    /// # run().unwrap();
827    /// ```
828    ///
829    /// URL with `blob` scheme:
830    ///
831    /// ```rust
832    /// use url::{Host, Origin, Url};
833    /// # use url::ParseError;
834    ///
835    /// # fn run() -> Result<(), ParseError> {
836    /// let url = Url::parse("blob:https://example.com/foo")?;
837    /// assert_eq!(url.origin(),
838    ///            Origin::Tuple("https".into(),
839    ///                          Host::Domain("example.com".into()),
840    ///                          443));
841    /// # Ok(())
842    /// # }
843    /// # run().unwrap();
844    /// ```
845    ///
846    /// URL with `file` scheme:
847    ///
848    /// ```rust
849    /// use url::{Host, Origin, Url};
850    /// # use url::ParseError;
851    ///
852    /// # fn run() -> Result<(), ParseError> {
853    /// let url = Url::parse("file:///tmp/foo")?;
854    /// assert!(!url.origin().is_tuple());
855    ///
856    /// let other_url = Url::parse("file:///tmp/foo")?;
857    /// assert!(url.origin() != other_url.origin());
858    /// # Ok(())
859    /// # }
860    /// # run().unwrap();
861    /// ```
862    ///
863    /// URL with other scheme:
864    ///
865    /// ```rust
866    /// use url::{Host, Origin, Url};
867    /// # use url::ParseError;
868    ///
869    /// # fn run() -> Result<(), ParseError> {
870    /// let url = Url::parse("foo:bar")?;
871    /// assert!(!url.origin().is_tuple());
872    /// # Ok(())
873    /// # }
874    /// # run().unwrap();
875    /// ```
876    #[inline]
877    pub fn origin(&self) -> Origin {
878        origin::url_origin(self)
879    }
880
881    /// Return the scheme of this URL, lower-cased, as an ASCII string without the ':' delimiter.
882    ///
883    /// # Examples
884    ///
885    /// ```
886    /// use url::Url;
887    /// # use url::ParseError;
888    ///
889    /// # fn run() -> Result<(), ParseError> {
890    /// let url = Url::parse("file:///tmp/foo")?;
891    /// assert_eq!(url.scheme(), "file");
892    /// # Ok(())
893    /// # }
894    /// # run().unwrap();
895    /// ```
896    #[inline]
897    pub fn scheme(&self) -> &str {
898        self.slice(..self.scheme_end)
899    }
900
901    /// Return whether the URL is special (has a special scheme)
902    ///
903    /// # Examples
904    ///
905    /// ```
906    /// use url::Url;
907    /// # use url::ParseError;
908    ///
909    /// # fn run() -> Result<(), ParseError> {
910    /// assert!(Url::parse("http:///tmp/foo")?.is_special());
911    /// assert!(Url::parse("file:///tmp/foo")?.is_special());
912    /// assert!(!Url::parse("moz:///tmp/foo")?.is_special());
913    /// # Ok(())
914    /// # }
915    /// # run().unwrap();
916    /// ```
917    pub fn is_special(&self) -> bool {
918        let scheme_type = SchemeType::from(self.scheme());
919        scheme_type.is_special()
920    }
921
922    /// Return whether the URL has an 'authority',
923    /// which can contain a username, password, host, and port number.
924    ///
925    /// URLs that do *not* are either path-only like `unix:/run/foo.socket`
926    /// or cannot-be-a-base like `data:text/plain,Stuff`.
927    ///
928    /// See also the `authority` method.
929    ///
930    /// # Examples
931    ///
932    /// ```
933    /// use url::Url;
934    /// # use url::ParseError;
935    ///
936    /// # fn run() -> Result<(), ParseError> {
937    /// let url = Url::parse("ftp://rms@example.com")?;
938    /// assert!(url.has_authority());
939    ///
940    /// let url = Url::parse("unix:/run/foo.socket")?;
941    /// assert!(!url.has_authority());
942    ///
943    /// let url = Url::parse("data:text/plain,Stuff")?;
944    /// assert!(!url.has_authority());
945    /// # Ok(())
946    /// # }
947    /// # run().unwrap();
948    /// ```
949    #[inline]
950    pub fn has_authority(&self) -> bool {
951        debug_assert!(self.byte_at(self.scheme_end) == b':');
952        self.slice(self.scheme_end..).starts_with("://")
953    }
954
955    /// Return the authority of this URL as an ASCII string.
956    ///
957    /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
958    /// of a special URL, or percent encoded for non-special URLs.
959    /// IPv6 addresses are given between `[` and `]` brackets.
960    /// Ports are omitted if they match the well known port of a special URL.
961    ///
962    /// Username and password are percent-encoded.
963    ///
964    /// See also the `has_authority` method.
965    ///
966    /// # Examples
967    ///
968    /// ```
969    /// use url::Url;
970    /// # use url::ParseError;
971    ///
972    /// # fn run() -> Result<(), ParseError> {
973    /// let url = Url::parse("unix:/run/foo.socket")?;
974    /// assert_eq!(url.authority(), "");
975    /// let url = Url::parse("file:///tmp/foo")?;
976    /// assert_eq!(url.authority(), "");
977    /// let url = Url::parse("https://user:password@example.com/tmp/foo")?;
978    /// assert_eq!(url.authority(), "user:password@example.com");
979    /// let url = Url::parse("irc://àlex.рф.example.com:6667/foo")?;
980    /// assert_eq!(url.authority(), "%C3%A0lex.%D1%80%D1%84.example.com:6667");
981    /// let url = Url::parse("http://àlex.рф.example.com:80/foo")?;
982    /// assert_eq!(url.authority(), "xn--lex-8ka.xn--p1ai.example.com");
983    /// # Ok(())
984    /// # }
985    /// # run().unwrap();
986    /// ```
987    pub fn authority(&self) -> &str {
988        let scheme_separator_len = "://".len() as u32;
989        if self.has_authority() && self.path_start > self.scheme_end + scheme_separator_len {
990            self.slice(self.scheme_end + scheme_separator_len..self.path_start)
991        } else {
992            ""
993        }
994    }
995
996    /// Return whether this URL is a cannot-be-a-base URL,
997    /// meaning that parsing a relative URL string with this URL as the base will return an error.
998    ///
999    /// This is the case if the scheme and `:` delimiter are not followed by a `/` slash,
1000    /// as is typically the case of `data:` and `mailto:` URLs.
1001    ///
1002    /// # Examples
1003    ///
1004    /// ```
1005    /// use url::Url;
1006    /// # use url::ParseError;
1007    ///
1008    /// # fn run() -> Result<(), ParseError> {
1009    /// let url = Url::parse("ftp://rms@example.com")?;
1010    /// assert!(!url.cannot_be_a_base());
1011    ///
1012    /// let url = Url::parse("unix:/run/foo.socket")?;
1013    /// assert!(!url.cannot_be_a_base());
1014    ///
1015    /// let url = Url::parse("data:text/plain,Stuff")?;
1016    /// assert!(url.cannot_be_a_base());
1017    /// # Ok(())
1018    /// # }
1019    /// # run().unwrap();
1020    /// ```
1021    #[inline]
1022    pub fn cannot_be_a_base(&self) -> bool {
1023        !self.slice(self.scheme_end + 1..).starts_with('/')
1024    }
1025
1026    /// Return the username for this URL (typically the empty string)
1027    /// as a percent-encoded ASCII string.
1028    ///
1029    /// # Examples
1030    ///
1031    /// ```
1032    /// use url::Url;
1033    /// # use url::ParseError;
1034    ///
1035    /// # fn run() -> Result<(), ParseError> {
1036    /// let url = Url::parse("ftp://rms@example.com")?;
1037    /// assert_eq!(url.username(), "rms");
1038    ///
1039    /// let url = Url::parse("ftp://:secret123@example.com")?;
1040    /// assert_eq!(url.username(), "");
1041    ///
1042    /// let url = Url::parse("https://example.com")?;
1043    /// assert_eq!(url.username(), "");
1044    /// # Ok(())
1045    /// # }
1046    /// # run().unwrap();
1047    /// ```
1048    pub fn username(&self) -> &str {
1049        let scheme_separator_len = "://".len() as u32;
1050        if self.has_authority() && self.username_end > self.scheme_end + scheme_separator_len {
1051            self.slice(self.scheme_end + scheme_separator_len..self.username_end)
1052        } else {
1053            ""
1054        }
1055    }
1056
1057    /// Return the password for this URL, if any, as a percent-encoded ASCII string.
1058    ///
1059    /// # Examples
1060    ///
1061    /// ```
1062    /// use url::Url;
1063    /// # use url::ParseError;
1064    ///
1065    /// # fn run() -> Result<(), ParseError> {
1066    /// let url = Url::parse("ftp://rms:secret123@example.com")?;
1067    /// assert_eq!(url.password(), Some("secret123"));
1068    ///
1069    /// let url = Url::parse("ftp://:secret123@example.com")?;
1070    /// assert_eq!(url.password(), Some("secret123"));
1071    ///
1072    /// let url = Url::parse("ftp://rms@example.com")?;
1073    /// assert_eq!(url.password(), None);
1074    ///
1075    /// let url = Url::parse("https://example.com")?;
1076    /// assert_eq!(url.password(), None);
1077    /// # Ok(())
1078    /// # }
1079    /// # run().unwrap();
1080    /// ```
1081    pub fn password(&self) -> Option<&str> {
1082        // This ':' is not the one marking a port number since a host can not be empty.
1083        // (Except for file: URLs, which do not have port numbers.)
1084        if self.has_authority()
1085            && self.username_end != self.serialization.len() as u32
1086            && self.byte_at(self.username_end) == b':'
1087        {
1088            debug_assert!(self.byte_at(self.host_start - 1) == b'@');
1089            Some(self.slice(self.username_end + 1..self.host_start - 1))
1090        } else {
1091            None
1092        }
1093    }
1094
1095    /// Equivalent to `url.host().is_some()`.
1096    ///
1097    /// # Examples
1098    ///
1099    /// ```
1100    /// use url::Url;
1101    /// # use url::ParseError;
1102    ///
1103    /// # fn run() -> Result<(), ParseError> {
1104    /// let url = Url::parse("ftp://rms@example.com")?;
1105    /// assert!(url.has_host());
1106    ///
1107    /// let url = Url::parse("unix:/run/foo.socket")?;
1108    /// assert!(!url.has_host());
1109    ///
1110    /// let url = Url::parse("data:text/plain,Stuff")?;
1111    /// assert!(!url.has_host());
1112    /// # Ok(())
1113    /// # }
1114    /// # run().unwrap();
1115    /// ```
1116    pub fn has_host(&self) -> bool {
1117        !matches!(self.host, HostInternal::None)
1118    }
1119
1120    /// Return the string representation of the host (domain or IP address) for this URL, if any.
1121    ///
1122    /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1123    /// of a special URL, or percent encoded for non-special URLs.
1124    /// IPv6 addresses are given between `[` and `]` brackets.
1125    ///
1126    /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1127    /// don’t have a host.
1128    ///
1129    /// See also the `host` method.
1130    ///
1131    /// # Examples
1132    ///
1133    /// ```
1134    /// use url::Url;
1135    /// # use url::ParseError;
1136    ///
1137    /// # fn run() -> Result<(), ParseError> {
1138    /// let url = Url::parse("https://127.0.0.1/index.html")?;
1139    /// assert_eq!(url.host_str(), Some("127.0.0.1"));
1140    ///
1141    /// let url = Url::parse("ftp://rms@example.com")?;
1142    /// assert_eq!(url.host_str(), Some("example.com"));
1143    ///
1144    /// let url = Url::parse("unix:/run/foo.socket")?;
1145    /// assert_eq!(url.host_str(), None);
1146    ///
1147    /// let url = Url::parse("data:text/plain,Stuff")?;
1148    /// assert_eq!(url.host_str(), None);
1149    /// # Ok(())
1150    /// # }
1151    /// # run().unwrap();
1152    /// ```
1153    pub fn host_str(&self) -> Option<&str> {
1154        if self.has_host() {
1155            Some(self.slice(self.host_start..self.host_end))
1156        } else {
1157            None
1158        }
1159    }
1160
1161    /// Return the parsed representation of the host for this URL.
1162    /// Non-ASCII domain labels are punycode-encoded per IDNA if this is the host
1163    /// of a special URL, or percent encoded for non-special URLs.
1164    ///
1165    /// Cannot-be-a-base URLs (typical of `data:` and `mailto:`) and some `file:` URLs
1166    /// don’t have a host.
1167    ///
1168    /// See also the `host_str` method.
1169    ///
1170    /// # Examples
1171    ///
1172    /// ```
1173    /// use url::Url;
1174    /// # use url::ParseError;
1175    ///
1176    /// # fn run() -> Result<(), ParseError> {
1177    /// let url = Url::parse("https://127.0.0.1/index.html")?;
1178    /// assert!(url.host().is_some());
1179    ///
1180    /// let url = Url::parse("ftp://rms@example.com")?;
1181    /// assert!(url.host().is_some());
1182    ///
1183    /// let url = Url::parse("unix:/run/foo.socket")?;
1184    /// assert!(url.host().is_none());
1185    ///
1186    /// let url = Url::parse("data:text/plain,Stuff")?;
1187    /// assert!(url.host().is_none());
1188    /// # Ok(())
1189    /// # }
1190    /// # run().unwrap();
1191    /// ```
1192    pub fn host(&self) -> Option<Host<&str>> {
1193        match self.host {
1194            HostInternal::None => None,
1195            HostInternal::Domain => Some(Host::Domain(self.slice(self.host_start..self.host_end))),
1196            HostInternal::Ipv4(address) => Some(Host::Ipv4(address)),
1197            HostInternal::Ipv6(address) => Some(Host::Ipv6(address)),
1198        }
1199    }
1200
1201    /// If this URL has a host and it is a domain name (not an IP address), return it.
1202    /// Non-ASCII domains are punycode-encoded per IDNA if this is the host
1203    /// of a special URL, or percent encoded for non-special URLs.
1204    ///
1205    /// # Examples
1206    ///
1207    /// ```
1208    /// use url::Url;
1209    /// # use url::ParseError;
1210    ///
1211    /// # fn run() -> Result<(), ParseError> {
1212    /// let url = Url::parse("https://127.0.0.1/")?;
1213    /// assert_eq!(url.domain(), None);
1214    ///
1215    /// let url = Url::parse("mailto:rms@example.net")?;
1216    /// assert_eq!(url.domain(), None);
1217    ///
1218    /// let url = Url::parse("https://example.com/")?;
1219    /// assert_eq!(url.domain(), Some("example.com"));
1220    /// # Ok(())
1221    /// # }
1222    /// # run().unwrap();
1223    /// ```
1224    pub fn domain(&self) -> Option<&str> {
1225        match self.host {
1226            HostInternal::Domain => Some(self.slice(self.host_start..self.host_end)),
1227            _ => None,
1228        }
1229    }
1230
1231    /// Return the port number for this URL, if any.
1232    ///
1233    /// Note that default port numbers are never reflected by the serialization,
1234    /// use the `port_or_known_default()` method if you want a default port number returned.
1235    ///
1236    /// # Examples
1237    ///
1238    /// ```
1239    /// use url::Url;
1240    /// # use url::ParseError;
1241    ///
1242    /// # fn run() -> Result<(), ParseError> {
1243    /// let url = Url::parse("https://example.com")?;
1244    /// assert_eq!(url.port(), None);
1245    ///
1246    /// let url = Url::parse("https://example.com:443/")?;
1247    /// assert_eq!(url.port(), None);
1248    ///
1249    /// let url = Url::parse("ssh://example.com:22")?;
1250    /// assert_eq!(url.port(), Some(22));
1251    /// # Ok(())
1252    /// # }
1253    /// # run().unwrap();
1254    /// ```
1255    #[inline]
1256    pub fn port(&self) -> Option<u16> {
1257        self.port
1258    }
1259
1260    /// Return the port number for this URL, or the default port number if it is known.
1261    ///
1262    /// This method only knows the default port number
1263    /// of the `http`, `https`, `ws`, `wss` and `ftp` schemes.
1264    ///
1265    /// For URLs in these schemes, this method always returns `Some(_)`.
1266    /// For other schemes, it is the same as `Url::port()`.
1267    ///
1268    /// # Examples
1269    ///
1270    /// ```
1271    /// use url::Url;
1272    /// # use url::ParseError;
1273    ///
1274    /// # fn run() -> Result<(), ParseError> {
1275    /// let url = Url::parse("foo://example.com")?;
1276    /// assert_eq!(url.port_or_known_default(), None);
1277    ///
1278    /// let url = Url::parse("foo://example.com:1456")?;
1279    /// assert_eq!(url.port_or_known_default(), Some(1456));
1280    ///
1281    /// let url = Url::parse("https://example.com")?;
1282    /// assert_eq!(url.port_or_known_default(), Some(443));
1283    /// # Ok(())
1284    /// # }
1285    /// # run().unwrap();
1286    /// ```
1287    #[inline]
1288    pub fn port_or_known_default(&self) -> Option<u16> {
1289        self.port.or_else(|| parser::default_port(self.scheme()))
1290    }
1291
1292    /// Resolve a URL’s host and port number to `SocketAddr`.
1293    ///
1294    /// If the URL has the default port number of a scheme that is unknown to this library,
1295    /// `default_port_number` provides an opportunity to provide the actual port number.
1296    /// In non-example code this should be implemented either simply as `|| None`,
1297    /// or by matching on the URL’s `.scheme()`.
1298    ///
1299    /// If the host is a domain, it is resolved using the standard library’s DNS support.
1300    ///
1301    /// # Examples
1302    ///
1303    /// ```no_run
1304    /// let url = url::Url::parse("https://example.net/").unwrap();
1305    /// let addrs = url.socket_addrs(|| None).unwrap();
1306    /// std::net::TcpStream::connect(&*addrs)
1307    /// # ;
1308    /// ```
1309    ///
1310    /// ```
1311    /// /// With application-specific known default port numbers
1312    /// fn socket_addrs(url: url::Url) -> std::io::Result<Vec<std::net::SocketAddr>> {
1313    ///     url.socket_addrs(|| match url.scheme() {
1314    ///         "socks5" | "socks5h" => Some(1080),
1315    ///         _ => None,
1316    ///     })
1317    /// }
1318    /// ```
1319    #[cfg(feature = "std")]
1320    #[cfg(any(
1321        unix,
1322        windows,
1323        target_os = "redox",
1324        target_os = "wasi",
1325        target_os = "hermit"
1326    ))]
1327    pub fn socket_addrs(
1328        &self,
1329        default_port_number: impl Fn() -> Option<u16>,
1330    ) -> io::Result<alloc::vec::Vec<SocketAddr>> {
1331        // Note: trying to avoid the Vec allocation by returning `impl AsRef<[SocketAddr]>`
1332        // causes borrowck issues because the return value borrows `default_port_number`:
1333        //
1334        // https://github.com/rust-lang/rfcs/blob/master/text/1951-expand-impl-trait.md#scoping-for-type-and-lifetime-parameters
1335        //
1336        // > This RFC proposes that *all* type parameters are considered in scope
1337        // > for `impl Trait` in return position
1338
1339        fn io_result<T>(opt: Option<T>, message: &str) -> io::Result<T> {
1340            opt.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, message))
1341        }
1342
1343        let host = io_result(self.host(), "No host name in the URL")?;
1344        let port = io_result(
1345            self.port_or_known_default().or_else(default_port_number),
1346            "No port number in the URL",
1347        )?;
1348        Ok(match host {
1349            Host::Domain(domain) => (domain, port).to_socket_addrs()?.collect(),
1350            Host::Ipv4(ip) => vec![(ip, port).into()],
1351            Host::Ipv6(ip) => vec![(ip, port).into()],
1352        })
1353    }
1354
1355    /// Return the path for this URL, as a percent-encoded ASCII string.
1356    /// For cannot-be-a-base URLs, this is an arbitrary string that doesn’t start with '/'.
1357    /// For other URLs, this starts with a '/' slash
1358    /// and continues with slash-separated path segments.
1359    ///
1360    /// # Examples
1361    ///
1362    /// ```rust
1363    /// use url::{Url, ParseError};
1364    ///
1365    /// # fn run() -> Result<(), ParseError> {
1366    /// let url = Url::parse("https://example.com/api/versions?page=2")?;
1367    /// assert_eq!(url.path(), "/api/versions");
1368    ///
1369    /// let url = Url::parse("https://example.com")?;
1370    /// assert_eq!(url.path(), "/");
1371    ///
1372    /// let url = Url::parse("https://example.com/countries/việt nam")?;
1373    /// assert_eq!(url.path(), "/countries/vi%E1%BB%87t%20nam");
1374    /// # Ok(())
1375    /// # }
1376    /// # run().unwrap();
1377    /// ```
1378    pub fn path(&self) -> &str {
1379        match (self.query_start, self.fragment_start) {
1380            (None, None) => self.slice(self.path_start..),
1381            (Some(next_component_start), _) | (None, Some(next_component_start)) => {
1382                self.slice(self.path_start..next_component_start)
1383            }
1384        }
1385    }
1386
1387    /// Unless this URL is cannot-be-a-base,
1388    /// return an iterator of '/' slash-separated path segments,
1389    /// each as a percent-encoded ASCII string.
1390    ///
1391    /// Return `None` for cannot-be-a-base URLs.
1392    ///
1393    /// When `Some` is returned, the iterator always contains at least one string
1394    /// (which may be empty).
1395    ///
1396    /// # Examples
1397    ///
1398    /// ```
1399    /// use url::Url;
1400    ///
1401    /// # #[cfg(feature = "std")]
1402    /// # use std::error::Error;
1403    /// # #[cfg(not(feature = "std"))]
1404    /// # use core::error::Error;
1405    ///
1406    /// # fn run() -> Result<(), Box<dyn Error>> {
1407    /// let url = Url::parse("https://example.com/foo/bar")?;
1408    /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1409    /// assert_eq!(path_segments.next(), Some("foo"));
1410    /// assert_eq!(path_segments.next(), Some("bar"));
1411    /// assert_eq!(path_segments.next(), None);
1412    ///
1413    /// let url = Url::parse("https://example.com")?;
1414    /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1415    /// assert_eq!(path_segments.next(), Some(""));
1416    /// assert_eq!(path_segments.next(), None);
1417    ///
1418    /// let url = Url::parse("data:text/plain,HelloWorld")?;
1419    /// assert!(url.path_segments().is_none());
1420    ///
1421    /// let url = Url::parse("https://example.com/countries/việt nam")?;
1422    /// let mut path_segments = url.path_segments().ok_or_else(|| "cannot be base")?;
1423    /// assert_eq!(path_segments.next(), Some("countries"));
1424    /// assert_eq!(path_segments.next(), Some("vi%E1%BB%87t%20nam"));
1425    /// # Ok(())
1426    /// # }
1427    /// # run().unwrap();
1428    /// ```
1429    pub fn path_segments(&self) -> Option<str::Split<'_, char>> {
1430        let path = self.path();
1431        path.strip_prefix('/').map(|remainder| remainder.split('/'))
1432    }
1433
1434    /// Return this URL’s query string, if any, as a percent-encoded ASCII string.
1435    ///
1436    /// # Examples
1437    ///
1438    /// ```rust
1439    /// use url::Url;
1440    /// # use url::ParseError;
1441    ///
1442    /// fn run() -> Result<(), ParseError> {
1443    /// let url = Url::parse("https://example.com/products?page=2")?;
1444    /// let query = url.query();
1445    /// assert_eq!(query, Some("page=2"));
1446    ///
1447    /// let url = Url::parse("https://example.com/products")?;
1448    /// let query = url.query();
1449    /// assert!(query.is_none());
1450    ///
1451    /// let url = Url::parse("https://example.com/?country=español")?;
1452    /// let query = url.query();
1453    /// assert_eq!(query, Some("country=espa%C3%B1ol"));
1454    /// # Ok(())
1455    /// # }
1456    /// # run().unwrap();
1457    /// ```
1458    pub fn query(&self) -> Option<&str> {
1459        match (self.query_start, self.fragment_start) {
1460            (None, _) => None,
1461            (Some(query_start), None) => {
1462                debug_assert!(self.byte_at(query_start) == b'?');
1463                Some(self.slice(query_start + 1..))
1464            }
1465            (Some(query_start), Some(fragment_start)) => {
1466                debug_assert!(self.byte_at(query_start) == b'?');
1467                Some(self.slice(query_start + 1..fragment_start))
1468            }
1469        }
1470    }
1471
1472    /// Parse the URL’s query string, if any, as `application/x-www-form-urlencoded`
1473    /// and return an iterator of (key, value) pairs.
1474    ///
1475    /// # Examples
1476    ///
1477    /// ```rust
1478    /// use std::borrow::Cow;
1479    ///
1480    /// use url::Url;
1481    /// # use url::ParseError;
1482    ///
1483    /// # fn run() -> Result<(), ParseError> {
1484    /// let url = Url::parse("https://example.com/products?page=2&sort=desc")?;
1485    /// let mut pairs = url.query_pairs();
1486    ///
1487    /// assert_eq!(pairs.count(), 2);
1488    ///
1489    /// assert_eq!(pairs.next(), Some((Cow::Borrowed("page"), Cow::Borrowed("2"))));
1490    /// assert_eq!(pairs.next(), Some((Cow::Borrowed("sort"), Cow::Borrowed("desc"))));
1491    /// # Ok(())
1492    /// # }
1493    /// # run().unwrap();
1494    /// ```
1495
1496    #[inline]
1497    pub fn query_pairs(&self) -> form_urlencoded::Parse<'_> {
1498        form_urlencoded::parse(self.query().unwrap_or("").as_bytes())
1499    }
1500
1501    /// Return this URL’s fragment identifier, if any.
1502    ///
1503    /// A fragment is the part of the URL after the `#` symbol.
1504    /// The fragment is optional and, if present, contains a fragment identifier
1505    /// that identifies a secondary resource, such as a section heading
1506    /// of a document.
1507    ///
1508    /// In HTML, the fragment identifier is usually the id attribute of a an element
1509    /// that is scrolled to on load. Browsers typically will not send the fragment portion
1510    /// of a URL to the server.
1511    ///
1512    /// **Note:** the parser did *not* percent-encode this component,
1513    /// but the input may have been percent-encoded already.
1514    ///
1515    /// # Examples
1516    ///
1517    /// ```rust
1518    /// use url::Url;
1519    /// # use url::ParseError;
1520    ///
1521    /// # fn run() -> Result<(), ParseError> {
1522    /// let url = Url::parse("https://example.com/data.csv#row=4")?;
1523    ///
1524    /// assert_eq!(url.fragment(), Some("row=4"));
1525    ///
1526    /// let url = Url::parse("https://example.com/data.csv#cell=4,1-6,2")?;
1527    ///
1528    /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1529    /// # Ok(())
1530    /// # }
1531    /// # run().unwrap();
1532    /// ```
1533    pub fn fragment(&self) -> Option<&str> {
1534        self.fragment_start.map(|start| {
1535            debug_assert!(self.byte_at(start) == b'#');
1536            self.slice(start + 1..)
1537        })
1538    }
1539
1540    fn mutate<F: FnOnce(&mut Parser<'_>) -> R, R>(&mut self, f: F) -> R {
1541        let mut parser = Parser::for_setter(mem::take(&mut self.serialization));
1542        let result = f(&mut parser);
1543        self.serialization = parser.serialization;
1544        result
1545    }
1546
1547    /// Change this URL’s fragment identifier.
1548    ///
1549    /// # Examples
1550    ///
1551    /// ```rust
1552    /// use url::Url;
1553    /// # use url::ParseError;
1554    ///
1555    /// # fn run() -> Result<(), ParseError> {
1556    /// let mut url = Url::parse("https://example.com/data.csv")?;
1557    /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1558
1559    /// url.set_fragment(Some("cell=4,1-6,2"));
1560    /// assert_eq!(url.as_str(), "https://example.com/data.csv#cell=4,1-6,2");
1561    /// assert_eq!(url.fragment(), Some("cell=4,1-6,2"));
1562    ///
1563    /// url.set_fragment(None);
1564    /// assert_eq!(url.as_str(), "https://example.com/data.csv");
1565    /// assert!(url.fragment().is_none());
1566    /// # Ok(())
1567    /// # }
1568    /// # run().unwrap();
1569    /// ```
1570    pub fn set_fragment(&mut self, fragment: Option<&str>) {
1571        // Remove any previous fragment
1572        if let Some(start) = self.fragment_start {
1573            debug_assert!(self.byte_at(start) == b'#');
1574            self.serialization.truncate(start as usize);
1575        }
1576        // Write the new one
1577        if let Some(input) = fragment {
1578            self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1579            self.serialization.push('#');
1580            self.mutate(|parser| parser.parse_fragment(parser::Input::new_no_trim(input)))
1581        } else {
1582            self.fragment_start = None;
1583            self.strip_trailing_spaces_from_opaque_path();
1584        }
1585    }
1586
1587    fn take_fragment(&mut self) -> Option<String> {
1588        self.fragment_start.take().map(|start| {
1589            debug_assert!(self.byte_at(start) == b'#');
1590            let fragment = self.slice(start + 1..).to_owned();
1591            self.serialization.truncate(start as usize);
1592            fragment
1593        })
1594    }
1595
1596    fn restore_already_parsed_fragment(&mut self, fragment: Option<String>) {
1597        if let Some(ref fragment) = fragment {
1598            assert!(self.fragment_start.is_none());
1599            self.fragment_start = Some(to_u32(self.serialization.len()).unwrap());
1600            self.serialization.push('#');
1601            self.serialization.push_str(fragment);
1602        }
1603    }
1604
1605    /// Change this URL’s query string. If `query` is `None`, this URL's
1606    /// query string will be cleared.
1607    ///
1608    /// # Examples
1609    ///
1610    /// ```rust
1611    /// use url::Url;
1612    /// # use url::ParseError;
1613    ///
1614    /// # fn run() -> Result<(), ParseError> {
1615    /// let mut url = Url::parse("https://example.com/products")?;
1616    /// assert_eq!(url.as_str(), "https://example.com/products");
1617    ///
1618    /// url.set_query(Some("page=2"));
1619    /// assert_eq!(url.as_str(), "https://example.com/products?page=2");
1620    /// assert_eq!(url.query(), Some("page=2"));
1621    /// # Ok(())
1622    /// # }
1623    /// # run().unwrap();
1624    /// ```
1625    pub fn set_query(&mut self, query: Option<&str>) {
1626        let fragment = self.take_fragment();
1627
1628        // Remove any previous query
1629        if let Some(start) = self.query_start.take() {
1630            debug_assert!(self.byte_at(start) == b'?');
1631            self.serialization.truncate(start as usize);
1632        }
1633        // Write the new query, if any
1634        if let Some(input) = query {
1635            self.query_start = Some(to_u32(self.serialization.len()).unwrap());
1636            self.serialization.push('?');
1637            let scheme_type = SchemeType::from(self.scheme());
1638            let scheme_end = self.scheme_end;
1639            self.mutate(|parser| {
1640                let vfn = parser.violation_fn;
1641                parser.parse_query(
1642                    scheme_type,
1643                    scheme_end,
1644                    parser::Input::new_trim_tab_and_newlines(input, vfn),
1645                )
1646            });
1647        } else {
1648            self.query_start = None;
1649            if fragment.is_none() {
1650                self.strip_trailing_spaces_from_opaque_path();
1651            }
1652        }
1653
1654        self.restore_already_parsed_fragment(fragment);
1655    }
1656
1657    /// Manipulate this URL’s query string, viewed as a sequence of name/value pairs
1658    /// in `application/x-www-form-urlencoded` syntax.
1659    ///
1660    /// The return value has a method-chaining API:
1661    ///
1662    /// ```rust
1663    /// # use url::{Url, ParseError};
1664    ///
1665    /// # fn run() -> Result<(), ParseError> {
1666    /// let mut url = Url::parse("https://example.net?lang=fr#nav")?;
1667    /// assert_eq!(url.query(), Some("lang=fr"));
1668    ///
1669    /// url.query_pairs_mut().append_pair("foo", "bar");
1670    /// assert_eq!(url.query(), Some("lang=fr&foo=bar"));
1671    /// assert_eq!(url.as_str(), "https://example.net/?lang=fr&foo=bar#nav");
1672    ///
1673    /// url.query_pairs_mut()
1674    ///     .clear()
1675    ///     .append_pair("foo", "bar & baz")
1676    ///     .append_pair("saisons", "\u{00C9}t\u{00E9}+hiver");
1677    /// assert_eq!(url.query(), Some("foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver"));
1678    /// assert_eq!(url.as_str(),
1679    ///            "https://example.net/?foo=bar+%26+baz&saisons=%C3%89t%C3%A9%2Bhiver#nav");
1680    /// # Ok(())
1681    /// # }
1682    /// # run().unwrap();
1683    /// ```
1684    ///
1685    /// Note: `url.query_pairs_mut().clear();` is equivalent to `url.set_query(Some(""))`,
1686    /// not `url.set_query(None)`.
1687    ///
1688    /// The state of `Url` is unspecified if this return value is leaked without being dropped.
1689    pub fn query_pairs_mut(&mut self) -> form_urlencoded::Serializer<'_, UrlQuery<'_>> {
1690        let fragment = self.take_fragment();
1691
1692        let query_start;
1693        if let Some(start) = self.query_start {
1694            debug_assert!(self.byte_at(start) == b'?');
1695            query_start = start as usize;
1696        } else {
1697            query_start = self.serialization.len();
1698            self.query_start = Some(to_u32(query_start).unwrap());
1699            self.serialization.push('?');
1700        }
1701
1702        let query = UrlQuery {
1703            url: Some(self),
1704            fragment,
1705        };
1706        form_urlencoded::Serializer::for_suffix(query, query_start + "?".len())
1707    }
1708
1709    fn take_after_path(&mut self) -> String {
1710        match (self.query_start, self.fragment_start) {
1711            (Some(i), _) | (None, Some(i)) => {
1712                let after_path = self.slice(i..).to_owned();
1713                self.serialization.truncate(i as usize);
1714                after_path
1715            }
1716            (None, None) => String::new(),
1717        }
1718    }
1719
1720    /// Change this URL’s path.
1721    ///
1722    /// # Examples
1723    ///
1724    /// ```rust
1725    /// use url::Url;
1726    /// # use url::ParseError;
1727    ///
1728    /// # fn run() -> Result<(), ParseError> {
1729    /// let mut url = Url::parse("https://example.com")?;
1730    /// url.set_path("api/comments");
1731    /// assert_eq!(url.as_str(), "https://example.com/api/comments");
1732    /// assert_eq!(url.path(), "/api/comments");
1733    ///
1734    /// let mut url = Url::parse("https://example.com/api")?;
1735    /// url.set_path("data/report.csv");
1736    /// assert_eq!(url.as_str(), "https://example.com/data/report.csv");
1737    /// assert_eq!(url.path(), "/data/report.csv");
1738    ///
1739    /// // `set_path` percent-encodes the given string if it's not already percent-encoded.
1740    /// let mut url = Url::parse("https://example.com")?;
1741    /// url.set_path("api/some comments");
1742    /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1743    /// assert_eq!(url.path(), "/api/some%20comments");
1744    ///
1745    /// // `set_path` will not double percent-encode the string if it's already percent-encoded.
1746    /// let mut url = Url::parse("https://example.com")?;
1747    /// url.set_path("api/some%20comments");
1748    /// assert_eq!(url.as_str(), "https://example.com/api/some%20comments");
1749    /// assert_eq!(url.path(), "/api/some%20comments");
1750    ///
1751    /// # Ok(())
1752    /// # }
1753    /// # run().unwrap();
1754    /// ```
1755    pub fn set_path(&mut self, mut path: &str) {
1756        let after_path = self.take_after_path();
1757        let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
1758        let cannot_be_a_base = self.cannot_be_a_base();
1759        let scheme_type = SchemeType::from(self.scheme());
1760        self.serialization.truncate(self.path_start as usize);
1761        self.mutate(|parser| {
1762            if cannot_be_a_base {
1763                if path.starts_with('/') {
1764                    parser.serialization.push_str("%2F");
1765                    path = &path[1..];
1766                }
1767                parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
1768            } else {
1769                let mut has_host = true; // FIXME
1770                parser.parse_path_start(
1771                    scheme_type,
1772                    &mut has_host,
1773                    parser::Input::new_no_trim(path),
1774                );
1775            }
1776        });
1777        self.restore_after_path(old_after_path_pos, &after_path);
1778    }
1779
1780    /// Return an object with methods to manipulate this URL’s path segments.
1781    ///
1782    /// Return `Err(())` if this URL is cannot-be-a-base.
1783    #[allow(clippy::result_unit_err)]
1784    pub fn path_segments_mut(&mut self) -> Result<PathSegmentsMut<'_>, ()> {
1785        if self.cannot_be_a_base() {
1786            Err(())
1787        } else {
1788            Ok(path_segments::new(self))
1789        }
1790    }
1791
1792    fn restore_after_path(&mut self, old_after_path_position: u32, after_path: &str) {
1793        let new_after_path_position = to_u32(self.serialization.len()).unwrap();
1794        let adjust = |index: &mut u32| {
1795            *index -= old_after_path_position;
1796            *index += new_after_path_position;
1797        };
1798        if let Some(ref mut index) = self.query_start {
1799            adjust(index)
1800        }
1801        if let Some(ref mut index) = self.fragment_start {
1802            adjust(index)
1803        }
1804        self.serialization.push_str(after_path)
1805    }
1806
1807    /// Change this URL’s port number.
1808    ///
1809    /// Note that default port numbers are not reflected in the serialization.
1810    ///
1811    /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme;
1812    /// do nothing and return `Err`.
1813    ///
1814    /// # Examples
1815    ///
1816    /// ```
1817    /// use url::Url;
1818    ///
1819    /// # #[cfg(feature = "std")]
1820    /// # use std::error::Error;
1821    /// # #[cfg(not(feature = "std"))]
1822    /// # use core::error::Error;
1823    ///
1824    /// # fn run() -> Result<(), Box<dyn Error>> {
1825    /// let mut url = Url::parse("ssh://example.net:2048/")?;
1826    ///
1827    /// url.set_port(Some(4096)).map_err(|_| "cannot be base")?;
1828    /// assert_eq!(url.as_str(), "ssh://example.net:4096/");
1829    ///
1830    /// url.set_port(None).map_err(|_| "cannot be base")?;
1831    /// assert_eq!(url.as_str(), "ssh://example.net/");
1832    /// # Ok(())
1833    /// # }
1834    /// # run().unwrap();
1835    /// ```
1836    ///
1837    /// Known default port numbers are not reflected:
1838    ///
1839    /// ```rust
1840    /// use url::Url;
1841    ///
1842    /// # #[cfg(feature = "std")]
1843    /// # use std::error::Error;
1844    /// # #[cfg(not(feature = "std"))]
1845    /// # use core::error::Error;
1846    ///
1847    /// # fn run() -> Result<(), Box<dyn Error>> {
1848    /// let mut url = Url::parse("https://example.org/")?;
1849    ///
1850    /// url.set_port(Some(443)).map_err(|_| "cannot be base")?;
1851    /// assert!(url.port().is_none());
1852    /// # Ok(())
1853    /// # }
1854    /// # run().unwrap();
1855    /// ```
1856    ///
1857    /// Cannot set port for cannot-be-a-base URLs:
1858    ///
1859    /// ```
1860    /// use url::Url;
1861    /// # use url::ParseError;
1862    ///
1863    /// # fn run() -> Result<(), ParseError> {
1864    /// let mut url = Url::parse("mailto:rms@example.net")?;
1865    ///
1866    /// let result = url.set_port(Some(80));
1867    /// assert!(result.is_err());
1868    ///
1869    /// let result = url.set_port(None);
1870    /// assert!(result.is_err());
1871    /// # Ok(())
1872    /// # }
1873    /// # run().unwrap();
1874    /// ```
1875    #[allow(clippy::result_unit_err)]
1876    pub fn set_port(&mut self, mut port: Option<u16>) -> Result<(), ()> {
1877        // has_host implies !cannot_be_a_base
1878        if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
1879            return Err(());
1880        }
1881        if port.is_some() && port == parser::default_port(self.scheme()) {
1882            port = None
1883        }
1884        self.set_port_internal(port);
1885        Ok(())
1886    }
1887
1888    fn set_port_internal(&mut self, port: Option<u16>) {
1889        match (self.port, port) {
1890            (None, None) => {}
1891            (Some(_), None) => {
1892                self.serialization
1893                    .drain(self.host_end as usize..self.path_start as usize);
1894                let offset = self.path_start - self.host_end;
1895                self.path_start = self.host_end;
1896                if let Some(ref mut index) = self.query_start {
1897                    *index -= offset
1898                }
1899                if let Some(ref mut index) = self.fragment_start {
1900                    *index -= offset
1901                }
1902            }
1903            (Some(old), Some(new)) if old == new => {}
1904            (_, Some(new)) => {
1905                let path_and_after = self.slice(self.path_start..).to_owned();
1906                self.serialization.truncate(self.host_end as usize);
1907                write!(&mut self.serialization, ":{}", new).unwrap();
1908                let old_path_start = self.path_start;
1909                let new_path_start = to_u32(self.serialization.len()).unwrap();
1910                self.path_start = new_path_start;
1911                let adjust = |index: &mut u32| {
1912                    *index -= old_path_start;
1913                    *index += new_path_start;
1914                };
1915                if let Some(ref mut index) = self.query_start {
1916                    adjust(index)
1917                }
1918                if let Some(ref mut index) = self.fragment_start {
1919                    adjust(index)
1920                }
1921                self.serialization.push_str(&path_and_after);
1922            }
1923        }
1924        self.port = port;
1925    }
1926
1927    /// Change this URL’s host.
1928    ///
1929    /// Removing the host (calling this with `None`)
1930    /// will also remove any username, password, and port number.
1931    ///
1932    /// # Examples
1933    ///
1934    /// Change host:
1935    ///
1936    /// ```
1937    /// use url::Url;
1938    /// # use url::ParseError;
1939    ///
1940    /// # fn run() -> Result<(), ParseError> {
1941    /// let mut url = Url::parse("https://example.net")?;
1942    /// let result = url.set_host(Some("rust-lang.org"));
1943    /// assert!(result.is_ok());
1944    /// assert_eq!(url.as_str(), "https://rust-lang.org/");
1945    /// # Ok(())
1946    /// # }
1947    /// # run().unwrap();
1948    /// ```
1949    ///
1950    /// Remove host:
1951    ///
1952    /// ```
1953    /// use url::Url;
1954    /// # use url::ParseError;
1955    ///
1956    /// # fn run() -> Result<(), ParseError> {
1957    /// let mut url = Url::parse("foo://example.net")?;
1958    /// let result = url.set_host(None);
1959    /// assert!(result.is_ok());
1960    /// assert_eq!(url.as_str(), "foo:/");
1961    /// # Ok(())
1962    /// # }
1963    /// # run().unwrap();
1964    /// ```
1965    ///
1966    /// Cannot remove host for 'special' schemes (e.g. `http`):
1967    ///
1968    /// ```
1969    /// use url::Url;
1970    /// # use url::ParseError;
1971    ///
1972    /// # fn run() -> Result<(), ParseError> {
1973    /// let mut url = Url::parse("https://example.net")?;
1974    /// let result = url.set_host(None);
1975    /// assert!(result.is_err());
1976    /// assert_eq!(url.as_str(), "https://example.net/");
1977    /// # Ok(())
1978    /// # }
1979    /// # run().unwrap();
1980    /// ```
1981    ///
1982    /// Cannot change or remove host for cannot-be-a-base URLs:
1983    ///
1984    /// ```
1985    /// use url::Url;
1986    /// # use url::ParseError;
1987    ///
1988    /// # fn run() -> Result<(), ParseError> {
1989    /// let mut url = Url::parse("mailto:rms@example.net")?;
1990    ///
1991    /// let result = url.set_host(Some("rust-lang.org"));
1992    /// assert!(result.is_err());
1993    /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1994    ///
1995    /// let result = url.set_host(None);
1996    /// assert!(result.is_err());
1997    /// assert_eq!(url.as_str(), "mailto:rms@example.net");
1998    /// # Ok(())
1999    /// # }
2000    /// # run().unwrap();
2001    /// ```
2002    ///
2003    /// # Errors
2004    ///
2005    /// If this URL is cannot-be-a-base or there is an error parsing the given `host`,
2006    /// a [`ParseError`] variant will be returned.
2007    ///
2008    /// [`ParseError`]: enum.ParseError.html
2009    pub fn set_host(&mut self, host: Option<&str>) -> Result<(), ParseError> {
2010        if self.cannot_be_a_base() {
2011            return Err(ParseError::SetHostOnCannotBeABaseUrl);
2012        }
2013
2014        let scheme_type = SchemeType::from(self.scheme());
2015
2016        if let Some(host) = host {
2017            if host.is_empty() && scheme_type.is_special() && !scheme_type.is_file() {
2018                return Err(ParseError::EmptyHost);
2019            }
2020            let mut host_substr = host;
2021            // Otherwise, if c is U+003A (:) and the [] flag is unset, then
2022            if !host.starts_with('[') || !host.ends_with(']') {
2023                match host.find(':') {
2024                    Some(0) => {
2025                        // If buffer is the empty string, validation error, return failure.
2026                        return Err(ParseError::InvalidDomainCharacter);
2027                    }
2028                    // Let host be the result of host parsing buffer
2029                    Some(colon_index) => {
2030                        host_substr = &host[..colon_index];
2031                    }
2032                    None => {}
2033                }
2034            }
2035            if SchemeType::from(self.scheme()).is_special() {
2036                self.set_host_internal(Host::parse(host_substr)?, None);
2037            } else {
2038                self.set_host_internal(Host::parse_opaque(host_substr)?, None);
2039            }
2040        } else if self.has_host() {
2041            if scheme_type.is_special() && !scheme_type.is_file() {
2042                return Err(ParseError::EmptyHost);
2043            } else if self.serialization.len() == self.path_start as usize {
2044                self.serialization.push('/');
2045            }
2046            debug_assert!(self.byte_at(self.scheme_end) == b':');
2047            debug_assert!(self.byte_at(self.path_start) == b'/');
2048
2049            let new_path_start = if scheme_type.is_file() {
2050                self.scheme_end + 3
2051            } else {
2052                self.scheme_end + 1
2053            };
2054
2055            self.serialization
2056                .drain(new_path_start as usize..self.path_start as usize);
2057            let offset = self.path_start - new_path_start;
2058            self.path_start = new_path_start;
2059            self.username_end = new_path_start;
2060            self.host_start = new_path_start;
2061            self.host_end = new_path_start;
2062            self.port = None;
2063            if let Some(ref mut index) = self.query_start {
2064                *index -= offset
2065            }
2066            if let Some(ref mut index) = self.fragment_start {
2067                *index -= offset
2068            }
2069        }
2070        Ok(())
2071    }
2072
2073    /// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
2074    fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
2075        let old_suffix_pos = if opt_new_port.is_some() {
2076            self.path_start
2077        } else {
2078            self.host_end
2079        };
2080        let suffix = self.slice(old_suffix_pos..).to_owned();
2081        self.serialization.truncate(self.host_start as usize);
2082        if !self.has_authority() {
2083            debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
2084            debug_assert!(self.username_end == self.host_start);
2085            self.serialization.push('/');
2086            self.serialization.push('/');
2087            self.username_end += 2;
2088            self.host_start += 2;
2089        }
2090        write!(&mut self.serialization, "{}", host).unwrap();
2091        self.host_end = to_u32(self.serialization.len()).unwrap();
2092        self.host = host.into();
2093
2094        if let Some(new_port) = opt_new_port {
2095            self.port = new_port;
2096            if let Some(port) = new_port {
2097                write!(&mut self.serialization, ":{}", port).unwrap();
2098            }
2099        }
2100        let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
2101        self.serialization.push_str(&suffix);
2102
2103        let adjust = |index: &mut u32| {
2104            *index -= old_suffix_pos;
2105            *index += new_suffix_pos;
2106        };
2107        adjust(&mut self.path_start);
2108        if let Some(ref mut index) = self.query_start {
2109            adjust(index)
2110        }
2111        if let Some(ref mut index) = self.fragment_start {
2112            adjust(index)
2113        }
2114    }
2115
2116    /// Change this URL’s host to the given IP address.
2117    ///
2118    /// If this URL is cannot-be-a-base, do nothing and return `Err`.
2119    ///
2120    /// Compared to `Url::set_host`, this skips the host parser.
2121    ///
2122    /// # Examples
2123    ///
2124    /// ```rust
2125    /// use url::{Url, ParseError};
2126    ///
2127    /// # fn run() -> Result<(), ParseError> {
2128    /// let mut url = Url::parse("http://example.com")?;
2129    /// url.set_ip_host("127.0.0.1".parse().unwrap());
2130    /// assert_eq!(url.host_str(), Some("127.0.0.1"));
2131    /// assert_eq!(url.as_str(), "http://127.0.0.1/");
2132    /// # Ok(())
2133    /// # }
2134    /// # run().unwrap();
2135    /// ```
2136    ///
2137    /// Cannot change URL's from mailto(cannot-be-base) to ip:
2138    ///
2139    /// ```rust
2140    /// use url::{Url, ParseError};
2141    ///
2142    /// # fn run() -> Result<(), ParseError> {
2143    /// let mut url = Url::parse("mailto:rms@example.com")?;
2144    /// let result = url.set_ip_host("127.0.0.1".parse().unwrap());
2145    ///
2146    /// assert_eq!(url.as_str(), "mailto:rms@example.com");
2147    /// assert!(result.is_err());
2148    /// # Ok(())
2149    /// # }
2150    /// # run().unwrap();
2151    /// ```
2152    ///
2153    #[allow(clippy::result_unit_err)]
2154    pub fn set_ip_host(&mut self, address: IpAddr) -> Result<(), ()> {
2155        if self.cannot_be_a_base() {
2156            return Err(());
2157        }
2158
2159        let address = match address {
2160            IpAddr::V4(address) => Host::Ipv4(address),
2161            IpAddr::V6(address) => Host::Ipv6(address),
2162        };
2163        self.set_host_internal(address, None);
2164        Ok(())
2165    }
2166
2167    /// Change this URL’s password.
2168    ///
2169    /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2170    ///
2171    /// # Examples
2172    ///
2173    /// ```rust
2174    /// use url::{Url, ParseError};
2175    ///
2176    /// # fn run() -> Result<(), ParseError> {
2177    /// let mut url = Url::parse("mailto:rmz@example.com")?;
2178    /// let result = url.set_password(Some("secret_password"));
2179    /// assert!(result.is_err());
2180    ///
2181    /// let mut url = Url::parse("ftp://user1:secret1@example.com")?;
2182    /// let result = url.set_password(Some("secret_password"));
2183    /// assert_eq!(url.password(), Some("secret_password"));
2184    ///
2185    /// let mut url = Url::parse("ftp://user2:@example.com")?;
2186    /// let result = url.set_password(Some("secret2"));
2187    /// assert!(result.is_ok());
2188    /// assert_eq!(url.password(), Some("secret2"));
2189    /// # Ok(())
2190    /// # }
2191    /// # run().unwrap();
2192    /// ```
2193    #[allow(clippy::result_unit_err)]
2194    pub fn set_password(&mut self, password: Option<&str>) -> Result<(), ()> {
2195        // has_host implies !cannot_be_a_base
2196        if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2197            return Err(());
2198        }
2199        let password = password.unwrap_or_default();
2200        if !password.is_empty() {
2201            let host_and_after = self.slice(self.host_start..).to_owned();
2202            self.serialization.truncate(self.username_end as usize);
2203            self.serialization.push(':');
2204            self.serialization
2205                .extend(utf8_percent_encode(password, USERINFO));
2206            self.serialization.push('@');
2207
2208            let old_host_start = self.host_start;
2209            let new_host_start = to_u32(self.serialization.len()).unwrap();
2210            let adjust = |index: &mut u32| {
2211                *index -= old_host_start;
2212                *index += new_host_start;
2213            };
2214            self.host_start = new_host_start;
2215            adjust(&mut self.host_end);
2216            adjust(&mut self.path_start);
2217            if let Some(ref mut index) = self.query_start {
2218                adjust(index)
2219            }
2220            if let Some(ref mut index) = self.fragment_start {
2221                adjust(index)
2222            }
2223
2224            self.serialization.push_str(&host_and_after);
2225        } else if self.byte_at(self.username_end) == b':' {
2226            // If there is a password to remove
2227            let has_username_or_password = self.byte_at(self.host_start - 1) == b'@';
2228            debug_assert!(has_username_or_password);
2229            let username_start = self.scheme_end + 3;
2230            let empty_username = username_start == self.username_end;
2231            let start = self.username_end; // Remove the ':'
2232            let end = if empty_username {
2233                self.host_start // Remove the '@' as well
2234            } else {
2235                self.host_start - 1 // Keep the '@' to separate the username from the host
2236            };
2237            self.serialization.drain(start as usize..end as usize);
2238            let offset = end - start;
2239            self.host_start -= offset;
2240            self.host_end -= offset;
2241            self.path_start -= offset;
2242            if let Some(ref mut index) = self.query_start {
2243                *index -= offset
2244            }
2245            if let Some(ref mut index) = self.fragment_start {
2246                *index -= offset
2247            }
2248        }
2249        Ok(())
2250    }
2251
2252    /// Change this URL’s username.
2253    ///
2254    /// If this URL is cannot-be-a-base or does not have a host, do nothing and return `Err`.
2255    /// # Examples
2256    ///
2257    /// Cannot setup username from mailto(cannot-be-base)
2258    ///
2259    /// ```rust
2260    /// use url::{Url, ParseError};
2261    ///
2262    /// # fn run() -> Result<(), ParseError> {
2263    /// let mut url = Url::parse("mailto:rmz@example.com")?;
2264    /// let result = url.set_username("user1");
2265    /// assert_eq!(url.as_str(), "mailto:rmz@example.com");
2266    /// assert!(result.is_err());
2267    /// # Ok(())
2268    /// # }
2269    /// # run().unwrap();
2270    /// ```
2271    ///
2272    /// Setup username to user1
2273    ///
2274    /// ```rust
2275    /// use url::{Url, ParseError};
2276    ///
2277    /// # fn run() -> Result<(), ParseError> {
2278    /// let mut url = Url::parse("ftp://:secre1@example.com/")?;
2279    /// let result = url.set_username("user1");
2280    /// assert!(result.is_ok());
2281    /// assert_eq!(url.username(), "user1");
2282    /// assert_eq!(url.as_str(), "ftp://user1:secre1@example.com/");
2283    /// # Ok(())
2284    /// # }
2285    /// # run().unwrap();
2286    /// ```
2287    #[allow(clippy::result_unit_err)]
2288    pub fn set_username(&mut self, username: &str) -> Result<(), ()> {
2289        // has_host implies !cannot_be_a_base
2290        if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" {
2291            return Err(());
2292        }
2293        let username_start = self.scheme_end + 3;
2294        debug_assert!(self.slice(self.scheme_end..username_start) == "://");
2295        if self.slice(username_start..self.username_end) == username {
2296            return Ok(());
2297        }
2298        let after_username = self.slice(self.username_end..).to_owned();
2299        self.serialization.truncate(username_start as usize);
2300        self.serialization
2301            .extend(utf8_percent_encode(username, USERINFO));
2302
2303        let mut removed_bytes = self.username_end;
2304        self.username_end = to_u32(self.serialization.len()).unwrap();
2305        let mut added_bytes = self.username_end;
2306
2307        let new_username_is_empty = self.username_end == username_start;
2308        match (new_username_is_empty, after_username.chars().next()) {
2309            (true, Some('@')) => {
2310                removed_bytes += 1;
2311                self.serialization.push_str(&after_username[1..]);
2312            }
2313            (false, Some('@')) | (_, Some(':')) | (true, _) => {
2314                self.serialization.push_str(&after_username);
2315            }
2316            (false, _) => {
2317                added_bytes += 1;
2318                self.serialization.push('@');
2319                self.serialization.push_str(&after_username);
2320            }
2321        }
2322
2323        let adjust = |index: &mut u32| {
2324            *index -= removed_bytes;
2325            *index += added_bytes;
2326        };
2327        adjust(&mut self.host_start);
2328        adjust(&mut self.host_end);
2329        adjust(&mut self.path_start);
2330        if let Some(ref mut index) = self.query_start {
2331            adjust(index)
2332        }
2333        if let Some(ref mut index) = self.fragment_start {
2334            adjust(index)
2335        }
2336        Ok(())
2337    }
2338
2339    /// Change this URL’s scheme.
2340    ///
2341    /// Do nothing and return `Err` under the following circumstances:
2342    ///
2343    /// * If the new scheme is not in `[a-zA-Z][a-zA-Z0-9+.-]+`
2344    /// * If this URL is cannot-be-a-base and the new scheme is one of
2345    ///   `http`, `https`, `ws`, `wss` or `ftp`
2346    /// * If either the old or new scheme is `http`, `https`, `ws`,
2347    ///   `wss` or `ftp` and the other is not one of these
2348    /// * If the new scheme is `file` and this URL includes credentials
2349    ///   or has a non-null port
2350    /// * If this URL's scheme is `file` and its host is empty or null
2351    ///
2352    /// See also [the URL specification's section on legal scheme state
2353    /// overrides](https://url.spec.whatwg.org/#scheme-state).
2354    ///
2355    /// # Examples
2356    ///
2357    /// Change the URL’s scheme from `https` to `http`:
2358    ///
2359    /// ```
2360    /// use url::Url;
2361    /// # use url::ParseError;
2362    ///
2363    /// # fn run() -> Result<(), ParseError> {
2364    /// let mut url = Url::parse("https://example.net")?;
2365    /// let result = url.set_scheme("http");
2366    /// assert_eq!(url.as_str(), "http://example.net/");
2367    /// assert!(result.is_ok());
2368    /// # Ok(())
2369    /// # }
2370    /// # run().unwrap();
2371    /// ```
2372    /// Change the URL’s scheme from `foo` to `bar`:
2373    ///
2374    /// ```
2375    /// use url::Url;
2376    /// # use url::ParseError;
2377    ///
2378    /// # fn run() -> Result<(), ParseError> {
2379    /// let mut url = Url::parse("foo://example.net")?;
2380    /// let result = url.set_scheme("bar");
2381    /// assert_eq!(url.as_str(), "bar://example.net");
2382    /// assert!(result.is_ok());
2383    /// # Ok(())
2384    /// # }
2385    /// # run().unwrap();
2386    /// ```
2387    ///
2388    /// Cannot change URL’s scheme from `https` to `foõ`:
2389    ///
2390    /// ```
2391    /// use url::Url;
2392    /// # use url::ParseError;
2393    ///
2394    /// # fn run() -> Result<(), ParseError> {
2395    /// let mut url = Url::parse("https://example.net")?;
2396    /// let result = url.set_scheme("foõ");
2397    /// assert_eq!(url.as_str(), "https://example.net/");
2398    /// assert!(result.is_err());
2399    /// # Ok(())
2400    /// # }
2401    /// # run().unwrap();
2402    /// ```
2403    ///
2404    /// Cannot change URL’s scheme from `mailto` (cannot-be-a-base) to `https`:
2405    ///
2406    /// ```
2407    /// use url::Url;
2408    /// # use url::ParseError;
2409    ///
2410    /// # fn run() -> Result<(), ParseError> {
2411    /// let mut url = Url::parse("mailto:rms@example.net")?;
2412    /// let result = url.set_scheme("https");
2413    /// assert_eq!(url.as_str(), "mailto:rms@example.net");
2414    /// assert!(result.is_err());
2415    /// # Ok(())
2416    /// # }
2417    /// # run().unwrap();
2418    /// ```
2419    /// Cannot change the URL’s scheme from `foo` to `https`:
2420    ///
2421    /// ```
2422    /// use url::Url;
2423    /// # use url::ParseError;
2424    ///
2425    /// # fn run() -> Result<(), ParseError> {
2426    /// let mut url = Url::parse("foo://example.net")?;
2427    /// let result = url.set_scheme("https");
2428    /// assert_eq!(url.as_str(), "foo://example.net");
2429    /// assert!(result.is_err());
2430    /// # Ok(())
2431    /// # }
2432    /// # run().unwrap();
2433    /// ```
2434    /// Cannot change the URL’s scheme from `http` to `foo`:
2435    ///
2436    /// ```
2437    /// use url::Url;
2438    /// # use url::ParseError;
2439    ///
2440    /// # fn run() -> Result<(), ParseError> {
2441    /// let mut url = Url::parse("http://example.net")?;
2442    /// let result = url.set_scheme("foo");
2443    /// assert_eq!(url.as_str(), "http://example.net/");
2444    /// assert!(result.is_err());
2445    /// # Ok(())
2446    /// # }
2447    /// # run().unwrap();
2448    /// ```
2449    #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)]
2450    pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> {
2451        let mut parser = Parser::for_setter(String::new());
2452        let remaining = parser.parse_scheme(parser::Input::new_no_trim(scheme))?;
2453        let new_scheme_type = SchemeType::from(&parser.serialization);
2454        let old_scheme_type = SchemeType::from(self.scheme());
2455        // If url’s scheme is a special scheme and buffer is not a special scheme, then return.
2456        if (new_scheme_type.is_special() && !old_scheme_type.is_special()) ||
2457            // If url’s scheme is not a special scheme and buffer is a special scheme, then return.
2458            (!new_scheme_type.is_special() && old_scheme_type.is_special()) ||
2459            // If url includes credentials or has a non-null port, and buffer is "file", then return.
2460            // If url’s scheme is "file" and its host is an empty host or null, then return.
2461            (new_scheme_type.is_file() && self.has_authority())
2462        {
2463            return Err(());
2464        }
2465
2466        if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) {
2467            return Err(());
2468        }
2469        let old_scheme_end = self.scheme_end;
2470        let new_scheme_end = to_u32(parser.serialization.len()).unwrap();
2471        let adjust = |index: &mut u32| {
2472            *index -= old_scheme_end;
2473            *index += new_scheme_end;
2474        };
2475
2476        self.scheme_end = new_scheme_end;
2477        adjust(&mut self.username_end);
2478        adjust(&mut self.host_start);
2479        adjust(&mut self.host_end);
2480        adjust(&mut self.path_start);
2481        if let Some(ref mut index) = self.query_start {
2482            adjust(index)
2483        }
2484        if let Some(ref mut index) = self.fragment_start {
2485            adjust(index)
2486        }
2487
2488        parser.serialization.push_str(self.slice(old_scheme_end..));
2489        self.serialization = parser.serialization;
2490
2491        // Update the port so it can be removed
2492        // If it is the scheme's default
2493        // we don't mind it silently failing
2494        // if there was no port in the first place
2495        let previous_port = self.port();
2496        let _ = self.set_port(previous_port);
2497
2498        Ok(())
2499    }
2500
2501    /// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
2502    ///
2503    /// This returns `Err` if the given path is not absolute or,
2504    /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2505    ///
2506    /// # Examples
2507    ///
2508    /// On Unix-like platforms:
2509    ///
2510    /// ```
2511    /// # if cfg!(unix) {
2512    /// use url::Url;
2513    ///
2514    /// # fn run() -> Result<(), ()> {
2515    /// let url = Url::from_file_path("/tmp/foo.txt")?;
2516    /// assert_eq!(url.as_str(), "file:///tmp/foo.txt");
2517    ///
2518    /// let url = Url::from_file_path("../foo.txt");
2519    /// assert!(url.is_err());
2520    ///
2521    /// let url = Url::from_file_path("https://google.com/");
2522    /// assert!(url.is_err());
2523    /// # Ok(())
2524    /// # }
2525    /// # run().unwrap();
2526    /// # }
2527    /// ```
2528    ///
2529    /// This method is only available if the `std` Cargo feature is enabled.
2530    #[cfg(all(
2531        feature = "std",
2532        any(
2533            unix,
2534            windows,
2535            target_os = "redox",
2536            target_os = "wasi",
2537            target_os = "hermit"
2538        )
2539    ))]
2540    #[allow(clippy::result_unit_err)]
2541    pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> {
2542        let mut serialization = "file://".to_owned();
2543        let host_start = serialization.len() as u32;
2544        let (host_end, host) = path_to_file_url_segments(path.as_ref(), &mut serialization)?;
2545        Ok(Url {
2546            serialization,
2547            scheme_end: "file".len() as u32,
2548            username_end: host_start,
2549            host_start,
2550            host_end,
2551            host,
2552            port: None,
2553            path_start: host_end,
2554            query_start: None,
2555            fragment_start: None,
2556        })
2557    }
2558
2559    /// Convert a directory name as `std::path::Path` into an URL in the `file` scheme.
2560    ///
2561    /// This returns `Err` if the given path is not absolute or,
2562    /// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
2563    ///
2564    /// Compared to `from_file_path`, this ensure that URL’s the path has a trailing slash
2565    /// so that the entire path is considered when using this URL as a base URL.
2566    ///
2567    /// For example:
2568    ///
2569    /// * `"index.html"` parsed with `Url::from_directory_path(Path::new("/var/www"))`
2570    ///   as the base URL is `file:///var/www/index.html`
2571    /// * `"index.html"` parsed with `Url::from_file_path(Path::new("/var/www"))`
2572    ///   as the base URL is `file:///var/index.html`, which might not be what was intended.
2573    ///
2574    /// Note that `std::path` does not consider trailing slashes significant
2575    /// and usually does not include them (e.g. in `Path::parent()`).
2576    ///
2577    /// This method is only available if the `std` Cargo feature is enabled.
2578    #[cfg(all(
2579        feature = "std",
2580        any(
2581            unix,
2582            windows,
2583            target_os = "redox",
2584            target_os = "wasi",
2585            target_os = "hermit"
2586        )
2587    ))]
2588    #[allow(clippy::result_unit_err)]
2589    pub fn from_directory_path<P: AsRef<std::path::Path>>(path: P) -> Result<Url, ()> {
2590        let mut url = Url::from_file_path(path)?;
2591        if !url.serialization.ends_with('/') {
2592            url.serialization.push('/')
2593        }
2594        Ok(url)
2595    }
2596
2597    /// Serialize with Serde using the internal representation of the `Url` struct.
2598    ///
2599    /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2600    /// for speed, compared to the `Deserialize` trait impl.
2601    ///
2602    /// This method is only available if the `serde` Cargo feature is enabled.
2603    #[cfg(feature = "serde")]
2604    #[deny(unused)]
2605    pub fn serialize_internal<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2606    where
2607        S: serde::Serializer,
2608    {
2609        use serde::Serialize;
2610        // Destructuring first lets us ensure that adding or removing fields forces this method
2611        // to be updated
2612        let Url {
2613            ref serialization,
2614            ref scheme_end,
2615            ref username_end,
2616            ref host_start,
2617            ref host_end,
2618            ref host,
2619            ref port,
2620            ref path_start,
2621            ref query_start,
2622            ref fragment_start,
2623        } = *self;
2624        (
2625            serialization,
2626            scheme_end,
2627            username_end,
2628            host_start,
2629            host_end,
2630            host,
2631            port,
2632            path_start,
2633            query_start,
2634            fragment_start,
2635        )
2636            .serialize(serializer)
2637    }
2638
2639    /// Serialize with Serde using the internal representation of the `Url` struct.
2640    ///
2641    /// The corresponding `deserialize_internal` method sacrifices some invariant-checking
2642    /// for speed, compared to the `Deserialize` trait impl.
2643    ///
2644    /// This method is only available if the `serde` Cargo feature is enabled.
2645    #[cfg(feature = "serde")]
2646    #[deny(unused)]
2647    pub fn deserialize_internal<'de, D>(deserializer: D) -> Result<Self, D::Error>
2648    where
2649        D: serde::Deserializer<'de>,
2650    {
2651        use serde::de::{Deserialize, Error};
2652        let (
2653            serialization,
2654            scheme_end,
2655            username_end,
2656            host_start,
2657            host_end,
2658            host,
2659            port,
2660            path_start,
2661            query_start,
2662            fragment_start,
2663        ) = Deserialize::deserialize(deserializer)?;
2664        let url = Url {
2665            serialization,
2666            scheme_end,
2667            username_end,
2668            host_start,
2669            host_end,
2670            host,
2671            port,
2672            path_start,
2673            query_start,
2674            fragment_start,
2675        };
2676        if cfg!(debug_assertions) {
2677            url.check_invariants()
2678                .map_err(|reason| Error::custom(reason))?
2679        }
2680        Ok(url)
2681    }
2682
2683    /// Assuming the URL is in the `file` scheme or similar,
2684    /// convert its path to an absolute `std::path::Path`.
2685    ///
2686    /// **Note:** This does not actually check the URL’s `scheme`,
2687    /// and may give nonsensical results for other schemes.
2688    /// It is the user’s responsibility to check the URL’s scheme before calling this.
2689    ///
2690    /// ```
2691    /// # use url::Url;
2692    /// # let url = Url::parse("file:///etc/passwd").unwrap();
2693    /// let path = url.to_file_path();
2694    /// ```
2695    ///
2696    /// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
2697    /// `file:` URLs may have a non-local host),
2698    /// or if `Path::new_opt()` returns `None`.
2699    /// (That is, if the percent-decoded path contains a NUL byte or,
2700    /// for a Windows path, is not UTF-8.)
2701    ///
2702    /// This method is only available if the `std` Cargo feature is enabled.
2703    #[inline]
2704    #[cfg(all(
2705        feature = "std",
2706        any(
2707            unix,
2708            windows,
2709            target_os = "redox",
2710            target_os = "wasi",
2711            target_os = "hermit"
2712        )
2713    ))]
2714    #[allow(clippy::result_unit_err)]
2715    pub fn to_file_path(&self) -> Result<PathBuf, ()> {
2716        if let Some(segments) = self.path_segments() {
2717            let host = match self.host() {
2718                None | Some(Host::Domain("localhost")) => None,
2719                Some(_) if cfg!(windows) && self.scheme() == "file" => {
2720                    Some(&self.serialization[self.host_start as usize..self.host_end as usize])
2721                }
2722                _ => return Err(()),
2723            };
2724
2725            return file_url_segments_to_pathbuf(host, segments);
2726        }
2727        Err(())
2728    }
2729
2730    // Private helper methods:
2731
2732    #[inline]
2733    fn slice<R>(&self, range: R) -> &str
2734    where
2735        R: RangeArg,
2736    {
2737        range.slice_of(&self.serialization)
2738    }
2739
2740    #[inline]
2741    fn byte_at(&self, i: u32) -> u8 {
2742        self.serialization.as_bytes()[i as usize]
2743    }
2744}
2745
2746/// Parse a string as an URL, without a base URL or encoding override.
2747impl str::FromStr for Url {
2748    type Err = ParseError;
2749
2750    #[inline]
2751    fn from_str(input: &str) -> Result<Url, crate::ParseError> {
2752        Url::parse(input)
2753    }
2754}
2755
2756impl<'a> TryFrom<&'a str> for Url {
2757    type Error = ParseError;
2758
2759    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
2760        Url::parse(s)
2761    }
2762}
2763
2764/// Display the serialization of this URL.
2765impl fmt::Display for Url {
2766    #[inline]
2767    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
2768        fmt::Display::fmt(&self.serialization, formatter)
2769    }
2770}
2771
2772/// String conversion.
2773impl From<Url> for String {
2774    fn from(value: Url) -> String {
2775        value.serialization
2776    }
2777}
2778
2779/// Debug the serialization of this URL.
2780impl fmt::Debug for Url {
2781    #[inline]
2782    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2783        formatter
2784            .debug_struct("Url")
2785            .field("scheme", &self.scheme())
2786            .field("cannot_be_a_base", &self.cannot_be_a_base())
2787            .field("username", &self.username())
2788            .field("password", &self.password())
2789            .field("host", &self.host())
2790            .field("port", &self.port())
2791            .field("path", &self.path())
2792            .field("query", &self.query())
2793            .field("fragment", &self.fragment())
2794            .finish()
2795    }
2796}
2797
2798/// URLs compare like their serialization.
2799impl Eq for Url {}
2800
2801/// URLs compare like their serialization.
2802impl PartialEq for Url {
2803    #[inline]
2804    fn eq(&self, other: &Self) -> bool {
2805        self.serialization == other.serialization
2806    }
2807}
2808
2809/// URLs compare like their serialization.
2810impl Ord for Url {
2811    #[inline]
2812    fn cmp(&self, other: &Self) -> cmp::Ordering {
2813        self.serialization.cmp(&other.serialization)
2814    }
2815}
2816
2817/// URLs compare like their serialization.
2818impl PartialOrd for Url {
2819    #[inline]
2820    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
2821        Some(self.cmp(other))
2822    }
2823}
2824
2825/// URLs hash like their serialization.
2826impl hash::Hash for Url {
2827    #[inline]
2828    fn hash<H>(&self, state: &mut H)
2829    where
2830        H: hash::Hasher,
2831    {
2832        hash::Hash::hash(&self.serialization, state)
2833    }
2834}
2835
2836/// Return the serialization of this URL.
2837impl AsRef<str> for Url {
2838    #[inline]
2839    fn as_ref(&self) -> &str {
2840        &self.serialization
2841    }
2842}
2843
2844trait RangeArg {
2845    fn slice_of<'a>(&self, s: &'a str) -> &'a str;
2846}
2847
2848impl RangeArg for Range<u32> {
2849    #[inline]
2850    fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2851        &s[self.start as usize..self.end as usize]
2852    }
2853}
2854
2855impl RangeArg for RangeFrom<u32> {
2856    #[inline]
2857    fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2858        &s[self.start as usize..]
2859    }
2860}
2861
2862impl RangeArg for RangeTo<u32> {
2863    #[inline]
2864    fn slice_of<'a>(&self, s: &'a str) -> &'a str {
2865        &s[..self.end as usize]
2866    }
2867}
2868
2869/// Serializes this URL into a `serde` stream.
2870///
2871/// This implementation is only available if the `serde` Cargo feature is enabled.
2872#[cfg(feature = "serde")]
2873impl serde::Serialize for Url {
2874    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
2875    where
2876        S: serde::Serializer,
2877    {
2878        serializer.serialize_str(self.as_str())
2879    }
2880}
2881
2882/// Deserializes this URL from a `serde` stream.
2883///
2884/// This implementation is only available if the `serde` Cargo feature is enabled.
2885#[cfg(feature = "serde")]
2886impl<'de> serde::Deserialize<'de> for Url {
2887    fn deserialize<D>(deserializer: D) -> Result<Url, D::Error>
2888    where
2889        D: serde::Deserializer<'de>,
2890    {
2891        use serde::de::{Error, Visitor};
2892
2893        struct UrlVisitor;
2894
2895        impl<'de> Visitor<'de> for UrlVisitor {
2896            type Value = Url;
2897
2898            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
2899                formatter.write_str("a string representing an URL")
2900            }
2901
2902            fn visit_str<E>(self, s: &str) -> Result<Self::Value, E>
2903            where
2904                E: Error,
2905            {
2906                Url::parse(s).map_err(|err| Error::custom(format!("{}: {:?}", err, s)))
2907            }
2908        }
2909
2910        deserializer.deserialize_str(UrlVisitor)
2911    }
2912}
2913
2914#[cfg(all(
2915    feature = "std",
2916    any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
2917))]
2918fn path_to_file_url_segments(
2919    path: &Path,
2920    serialization: &mut String,
2921) -> Result<(u32, HostInternal), ()> {
2922    use parser::SPECIAL_PATH_SEGMENT;
2923    use percent_encoding::percent_encode;
2924    #[cfg(target_os = "hermit")]
2925    use std::os::hermit::ffi::OsStrExt;
2926    #[cfg(any(unix, target_os = "redox"))]
2927    use std::os::unix::prelude::OsStrExt;
2928    if !path.is_absolute() {
2929        return Err(());
2930    }
2931    let host_end = to_u32(serialization.len()).unwrap();
2932    let mut empty = true;
2933    // skip the root component
2934    for component in path.components().skip(1) {
2935        empty = false;
2936        serialization.push('/');
2937        #[cfg(not(target_os = "wasi"))]
2938        serialization.extend(percent_encode(
2939            component.as_os_str().as_bytes(),
2940            SPECIAL_PATH_SEGMENT,
2941        ));
2942        #[cfg(target_os = "wasi")]
2943        serialization.extend(percent_encode(
2944            component.as_os_str().to_string_lossy().as_bytes(),
2945            SPECIAL_PATH_SEGMENT,
2946        ));
2947    }
2948    if empty {
2949        // An URL’s path must not be empty.
2950        serialization.push('/');
2951    }
2952    Ok((host_end, HostInternal::None))
2953}
2954
2955#[cfg(all(feature = "std", windows))]
2956fn path_to_file_url_segments(
2957    path: &Path,
2958    serialization: &mut String,
2959) -> Result<(u32, HostInternal), ()> {
2960    path_to_file_url_segments_windows(path, serialization)
2961}
2962
2963// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
2964#[cfg(feature = "std")]
2965#[cfg_attr(not(windows), allow(dead_code))]
2966fn path_to_file_url_segments_windows(
2967    path: &Path,
2968    serialization: &mut String,
2969) -> Result<(u32, HostInternal), ()> {
2970    use crate::parser::PATH_SEGMENT;
2971    use percent_encoding::percent_encode;
2972    use std::path::{Component, Prefix};
2973    if !path.is_absolute() {
2974        return Err(());
2975    }
2976    let mut components = path.components();
2977
2978    let host_start = serialization.len() + 1;
2979    let host_end;
2980    let host_internal;
2981
2982    match components.next() {
2983        Some(Component::Prefix(ref p)) => match p.kind() {
2984            Prefix::Disk(letter) | Prefix::VerbatimDisk(letter) => {
2985                host_end = to_u32(serialization.len()).unwrap();
2986                host_internal = HostInternal::None;
2987                serialization.push('/');
2988                serialization.push(letter as char);
2989                serialization.push(':');
2990            }
2991            Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
2992                let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
2993                write!(serialization, "{}", host).unwrap();
2994                host_end = to_u32(serialization.len()).unwrap();
2995                host_internal = host.into();
2996                serialization.push('/');
2997                let share = share.to_str().ok_or(())?;
2998                serialization.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
2999            }
3000            _ => return Err(()),
3001        },
3002        _ => return Err(()),
3003    }
3004
3005    let mut path_only_has_prefix = true;
3006    for component in components {
3007        if component == Component::RootDir {
3008            continue;
3009        }
3010
3011        path_only_has_prefix = false;
3012        // FIXME: somehow work with non-unicode?
3013        let component = component.as_os_str().to_str().ok_or(())?;
3014
3015        serialization.push('/');
3016        serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT));
3017    }
3018
3019    // A windows drive letter must end with a slash.
3020    if serialization.len() > host_start
3021        && parser::is_windows_drive_letter(&serialization[host_start..])
3022        && path_only_has_prefix
3023    {
3024        serialization.push('/');
3025    }
3026
3027    Ok((host_end, host_internal))
3028}
3029
3030#[cfg(all(
3031    feature = "std",
3032    any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
3033))]
3034fn file_url_segments_to_pathbuf(
3035    host: Option<&str>,
3036    segments: str::Split<'_, char>,
3037) -> Result<PathBuf, ()> {
3038    use alloc::vec::Vec;
3039    use percent_encoding::percent_decode;
3040    #[cfg(not(target_os = "wasi"))]
3041    use std::ffi::OsStr;
3042    #[cfg(target_os = "hermit")]
3043    use std::os::hermit::ffi::OsStrExt;
3044    #[cfg(any(unix, target_os = "redox"))]
3045    use std::os::unix::prelude::OsStrExt;
3046    use std::path::PathBuf;
3047
3048    if host.is_some() {
3049        return Err(());
3050    }
3051
3052    let mut bytes = if cfg!(target_os = "redox") {
3053        b"file:".to_vec()
3054    } else {
3055        Vec::new()
3056    };
3057
3058    for segment in segments {
3059        bytes.push(b'/');
3060        bytes.extend(percent_decode(segment.as_bytes()));
3061    }
3062
3063    // A windows drive letter must end with a slash.
3064    if bytes.len() > 2
3065        && bytes[bytes.len() - 2].is_ascii_alphabetic()
3066        && matches!(bytes[bytes.len() - 1], b':' | b'|')
3067    {
3068        bytes.push(b'/');
3069    }
3070
3071    #[cfg(not(target_os = "wasi"))]
3072    let path = PathBuf::from(OsStr::from_bytes(&bytes));
3073    #[cfg(target_os = "wasi")]
3074    let path = String::from_utf8(bytes)
3075        .map(|path| PathBuf::from(path))
3076        .map_err(|_| ())?;
3077
3078    debug_assert!(
3079        path.is_absolute(),
3080        "to_file_path() failed to produce an absolute Path"
3081    );
3082
3083    Ok(path)
3084}
3085
3086#[cfg(all(feature = "std", windows))]
3087fn file_url_segments_to_pathbuf(
3088    host: Option<&str>,
3089    segments: str::Split<char>,
3090) -> Result<PathBuf, ()> {
3091    file_url_segments_to_pathbuf_windows(host, segments)
3092}
3093
3094// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
3095#[cfg(feature = "std")]
3096#[cfg_attr(not(windows), allow(dead_code))]
3097fn file_url_segments_to_pathbuf_windows(
3098    host: Option<&str>,
3099    mut segments: str::Split<'_, char>,
3100) -> Result<PathBuf, ()> {
3101    use percent_encoding::percent_decode;
3102    let mut string = if let Some(host) = host {
3103        r"\\".to_owned() + host
3104    } else {
3105        let first = segments.next().ok_or(())?;
3106
3107        match first.len() {
3108            2 => {
3109                if !first.starts_with(parser::ascii_alpha) || first.as_bytes()[1] != b':' {
3110                    return Err(());
3111                }
3112
3113                first.to_owned()
3114            }
3115
3116            4 => {
3117                if !first.starts_with(parser::ascii_alpha) {
3118                    return Err(());
3119                }
3120                let bytes = first.as_bytes();
3121                if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
3122                    return Err(());
3123                }
3124
3125                first[0..1].to_owned() + ":"
3126            }
3127
3128            _ => return Err(()),
3129        }
3130    };
3131
3132    for segment in segments {
3133        string.push('\\');
3134
3135        // Currently non-unicode windows paths cannot be represented
3136        match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
3137            Ok(s) => string.push_str(&s),
3138            Err(..) => return Err(()),
3139        }
3140    }
3141    let path = PathBuf::from(string);
3142    debug_assert!(
3143        path.is_absolute(),
3144        "to_file_path() failed to produce an absolute Path"
3145    );
3146    Ok(path)
3147}
3148
3149/// Implementation detail of `Url::query_pairs_mut`. Typically not used directly.
3150#[derive(Debug)]
3151pub struct UrlQuery<'a> {
3152    url: Option<&'a mut Url>,
3153    fragment: Option<String>,
3154}
3155
3156// `as_mut_string` string here exposes the internal serialization of an `Url`,
3157// which should not be exposed to users.
3158// We achieve that by not giving users direct access to `UrlQuery`:
3159// * Its fields are private
3160//   (and so can not be constructed with struct literal syntax outside of this crate),
3161// * It has no constructor
3162// * It is only visible (on the type level) to users in the return type of
3163//   `Url::query_pairs_mut` which is `Serializer<UrlQuery>`
3164// * `Serializer` keeps its target in a private field
3165// * Unlike in other `Target` impls, `UrlQuery::finished` does not return `Self`.
3166impl<'a> form_urlencoded::Target for UrlQuery<'a> {
3167    fn as_mut_string(&mut self) -> &mut String {
3168        &mut self.url.as_mut().unwrap().serialization
3169    }
3170
3171    fn finish(mut self) -> &'a mut Url {
3172        let url = self.url.take().unwrap();
3173        url.restore_already_parsed_fragment(self.fragment.take());
3174        url
3175    }
3176
3177    type Finished = &'a mut Url;
3178}
3179
3180impl<'a> Drop for UrlQuery<'a> {
3181    fn drop(&mut self) {
3182        if let Some(url) = self.url.take() {
3183            url.restore_already_parsed_fragment(self.fragment.take())
3184        }
3185    }
3186}