connection_string/
ado.rs

1use std::ops::{Deref, DerefMut};
2use std::str::FromStr;
3use std::{collections::HashMap, fmt};
4
5use crate::{bail, ensure};
6
7/// An ADO.net connection string.
8///
9/// Keywords are not case-sensitive. Values, however, may be case-sensitive,
10/// depending on the data source. Both keywords and values may contain whitespace
11/// characters.
12///
13/// # Limitations
14///
15/// This parser does not support [Excel connection strings with extended properties](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#connecting-to-excel).
16///
17/// [Read more](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax)
18#[derive(Debug)]
19pub struct AdoNetString {
20    pairs: HashMap<String, String>,
21}
22
23impl Deref for AdoNetString {
24    type Target = HashMap<String, String>;
25
26    fn deref(&self) -> &Self::Target {
27        &self.pairs
28    }
29}
30
31impl DerefMut for AdoNetString {
32    fn deref_mut(&mut self) -> &mut Self::Target {
33        &mut self.pairs
34    }
35}
36
37// NOTE(yosh): Unfortunately we can't parse using `split(';')` because JDBC
38// strings support escaping. This means that `{;}` is valid and we need to write
39// an actual LR parser.
40impl FromStr for AdoNetString {
41    type Err = crate::Error;
42
43    fn from_str(input: &str) -> Result<Self, Self::Err> {
44        let mut lexer = Lexer::tokenize(input)?;
45        let mut pairs = HashMap::new();
46
47        // Iterate over `key=value` pairs.
48        for n in 0.. {
49            // [property=[value][;property=value][;]]
50            //                                       ^
51            if lexer.peek().kind() == &TokenKind::Eof {
52                break;
53            }
54
55            // [property=[value][;property=value][;]]
56            //                   ^
57            if n != 0 {
58                let err = "Key-value pairs must be separated by a `;`";
59                ensure!(lexer.next().kind() == &TokenKind::Semi, err);
60
61                // [property=value[;property=value][;]]
62                //                                  ^
63                if lexer.peek().kind() == &TokenKind::Eof {
64                    break;
65                }
66            }
67
68            // [property=[value][;property=value][;]]
69            //  ^^^^^^^^
70            let key = read_ident(&mut lexer)?;
71            ensure!(!key.is_empty(), "Key must not be empty");
72
73            // [property=[value][;property=value][;]]
74            //          ^
75            let err = "key-value pairs must be joined by a `=`";
76            ensure!(lexer.next().kind() == &TokenKind::Eq, err);
77
78            // [property=[value][;property=value][;]]
79            //           ^^^^^
80            let value = read_ident(&mut lexer)?;
81
82            let key = key.to_lowercase();
83            pairs.insert(key, value);
84        }
85        Ok(Self { pairs })
86    }
87}
88
89impl fmt::Display for AdoNetString {
90    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91        /// Escape all non-alphanumeric characters in a string..
92        fn escape(s: &str) -> String {
93            let mut output = String::with_capacity(s.len());
94            let mut escaping = false;
95            for b in s.chars() {
96                if matches!(b, ':' | '=' | '\\' | '/' | ';' | '{' | '}' | '[' | ']') {
97                    if !escaping {
98                        escaping = true;
99                        output.push('{');
100                    }
101                    output.push(b);
102                } else {
103                    if escaping {
104                        escaping = false;
105                        output.push('}');
106                    }
107                    output.push(b);
108                }
109            }
110            if escaping {
111                output.push('}');
112            }
113            output
114        }
115
116        let total_pairs = self.pairs.len();
117
118        for (i, (k, v)) in self.pairs.iter().enumerate() {
119            write!(f, "{}={}", escape(k.trim()), escape(v.trim()))?;
120
121            if i < total_pairs - 1 {
122                write!(f, ";")?;
123            }
124        }
125
126        Ok(())
127    }
128}
129
130/// Read either a valid key or value from the lexer.
131fn read_ident(lexer: &mut Lexer) -> crate::Result<String> {
132    let mut output = String::new();
133    loop {
134        let Token { kind, .. } = lexer.peek();
135        match kind {
136            TokenKind::Atom(c) => {
137                let _ = lexer.next();
138                output.push(c);
139            }
140            TokenKind::Escaped(seq) => {
141                let _ = lexer.next();
142                output.extend(seq);
143            }
144            TokenKind::Semi => break,
145            TokenKind::Eq => break,
146            TokenKind::Newline => {
147                let _ = lexer.next();
148                continue; // NOTE(yosh): unsure if this is the correct behavior
149            }
150            TokenKind::Whitespace => {
151                let _ = lexer.next();
152                match output.len() {
153                    0 => continue, // ignore leading whitespace
154                    _ => output.push(' '),
155                }
156            }
157            TokenKind::Eof => break,
158        }
159    }
160    output = output.trim_end().to_owned(); // remove trailing whitespace
161    Ok(output)
162}
163
164#[derive(Debug, Clone)]
165struct Token {
166    kind: TokenKind,
167    #[allow(dead_code)] // for future use...
168    loc: Location,
169}
170
171impl Token {
172    /// Create a new instance.
173    fn new(kind: TokenKind, loc: Location) -> Self {
174        Self { kind, loc }
175    }
176
177    fn kind(&self) -> &TokenKind {
178        &self.kind
179    }
180}
181
182#[derive(Debug, Clone, Eq, PartialEq)]
183enum TokenKind {
184    Semi,
185    Eq,
186    Atom(char),
187    Escaped(Vec<char>),
188    Newline,
189    Whitespace,
190    Eof,
191}
192
193#[derive(Debug)]
194struct Lexer {
195    tokens: Vec<Token>,
196}
197
198impl Lexer {
199    /// Parse a string into a sequence of tokens.
200    fn tokenize(mut input: &str) -> crate::Result<Self> {
201        let mut tokens = vec![];
202        let mut loc = Location::default();
203        while !input.is_empty() {
204            let old_input = input;
205            let mut chars = input.chars();
206            let kind = match chars.next().unwrap() {
207                '"' => {
208                    let mut buf = Vec::new();
209                    loop {
210                        match chars.next() {
211                            None => bail!("unclosed double quote"),
212                            // When we read a double quote inside a double quote
213                            // we need to lookahead to determine whether it's an
214                            // escape sequence or a closing delimiter.
215                            Some('"') => match lookahead(&chars) {
216                                Some('"') => {
217                                    if buf.is_empty() {
218                                        break;
219                                    }
220                                    let _ = chars.next();
221                                    buf.push('"');
222                                    buf.push('"');
223                                }
224                                Some(_) | None => break,
225                            },
226                            Some(c) if c.is_ascii() => buf.push(c),
227                            _ => bail!("Invalid ado.net token"),
228                        }
229                    }
230                    TokenKind::Escaped(buf)
231                }
232                '\'' => {
233                    let mut buf = Vec::new();
234                    loop {
235                        match chars.next() {
236                            None => bail!("unclosed single quote"),
237                            // When we read a single quote inside a single quote
238                            // we need to lookahead to determine whether it's an
239                            // escape sequence or a closing delimiter.
240                            Some('\'') => match lookahead(&chars) {
241                                Some('\'') => {
242                                    if buf.is_empty() {
243                                        break;
244                                    }
245                                    let _ = chars.next();
246                                    buf.push('\'');
247                                    buf.push('\'');
248                                }
249                                Some(_) | None => break,
250                            },
251                            Some(c) if c.is_ascii() => buf.push(c),
252                            Some(c) => bail!("Invalid ado.net token `{}`", c),
253                        }
254                    }
255                    TokenKind::Escaped(buf)
256                }
257                '{' => {
258                    let mut buf = Vec::new();
259                    // Read alphanumeric ASCII including whitespace until we find a closing curly.
260                    loop {
261                        match chars.next() {
262                            None => bail!("unclosed escape literal"),
263                            Some('}') => break,
264                            Some(c) if c.is_ascii() => buf.push(c),
265                            Some(c) => bail!("Invalid ado.net token `{}`", c),
266                        }
267                    }
268                    TokenKind::Escaped(buf)
269                }
270                ';' => TokenKind::Semi,
271                '=' => TokenKind::Eq,
272                '\n' => TokenKind::Newline,
273                ' ' => TokenKind::Whitespace,
274                char if char.is_ascii() => TokenKind::Atom(char),
275                char => bail!("Invalid character found: {}", char),
276            };
277            tokens.push(Token::new(kind, loc));
278            input = chars.as_str();
279
280            let consumed = old_input.len() - input.len();
281            loc.advance(&old_input[..consumed]);
282        }
283        tokens.reverse();
284        Ok(Self { tokens })
285    }
286
287    /// Get the next token from the queue.
288    #[must_use]
289    pub(crate) fn next(&mut self) -> Token {
290        self.tokens.pop().unwrap_or(Token {
291            kind: TokenKind::Eof,
292            loc: Location::default(),
293        })
294    }
295
296    /// Peek at the next token in the queue.
297    #[must_use]
298    pub(crate) fn peek(&mut self) -> Token {
299        self.tokens.last().cloned().unwrap_or(Token {
300            kind: TokenKind::Eof,
301            loc: Location::default(),
302        })
303    }
304}
305
306/// Look at the next char in the iterator.
307fn lookahead(iter: &std::str::Chars<'_>) -> Option<char> {
308    let s = iter.as_str();
309    s.chars().next()
310}
311
312/// Track the location of the Token inside the string.
313#[derive(Copy, Clone, Default, Debug)]
314pub(crate) struct Location {
315    pub(crate) column: usize,
316}
317
318impl Location {
319    fn advance(&mut self, text: &str) {
320        self.column += text.chars().count();
321    }
322}
323
324#[cfg(test)]
325mod test {
326    use super::AdoNetString;
327
328    fn assert_kv(ado: &AdoNetString, key: &str, value: &str) {
329        assert_eq!(ado.get(&key.to_lowercase()), Some(&value.to_owned()));
330    }
331
332    // Source: https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#windows-authentication-with-sqlclient
333    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#windows-authentication-with-sqlclient
334    #[test]
335    fn windows_auth_with_sql_client() -> crate::Result<()> {
336        let input = "Persist Security Info=False;Integrated Security=true;\nInitial Catalog=AdventureWorks;Server=MSSQL1";
337        let ado: AdoNetString = input.parse()?;
338        assert_kv(&ado, "Persist Security Info", "False");
339        assert_kv(&ado, "Integrated Security", "true");
340        assert_kv(&ado, "Server", "MSSQL1");
341        assert_kv(&ado, "Initial Catalog", "AdventureWorks");
342        Ok(())
343    }
344
345    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#sql-server-authentication-with-sqlclient
346    #[test]
347    fn sql_server_auth_with_sql_client() -> crate::Result<()> {
348        let input = "Persist Security Info=False;User ID=*****;Password=*****;Initial Catalog=AdventureWorks;Server=MySqlServer";
349        let ado: AdoNetString = input.parse()?;
350        assert_kv(&ado, "Persist Security Info", "False");
351        assert_kv(&ado, "User ID", "*****");
352        assert_kv(&ado, "Password", "*****");
353        assert_kv(&ado, "Initial Catalog", "AdventureWorks");
354        assert_kv(&ado, "Server", "MySqlServer");
355        Ok(())
356    }
357
358    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#connect-to-a-named-instance-of-sql-server
359    #[test]
360    fn connect_to_named_sql_server_instance() -> crate::Result<()> {
361        let input = r#"Data Source=MySqlServer\MSSQL1;"#;
362        let ado: AdoNetString = input.parse()?;
363        assert_kv(&ado, "Data Source", r#"MySqlServer\MSSQL1"#);
364        Ok(())
365    }
366
367    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#oledb-connection-string-syntax
368    #[test]
369    fn oledb_connection_string_syntax() -> crate::Result<()> {
370        let input = r#"Provider=Microsoft.Jet.OLEDB.4.0; Data Source=d:\Northwind.mdb;User ID=Admin;Password=;"#;
371        let ado: AdoNetString = input.parse()?;
372        assert_kv(&ado, "Provider", r#"Microsoft.Jet.OLEDB.4.0"#);
373        assert_kv(&ado, "Data Source", r#"d:\Northwind.mdb"#);
374        assert_kv(&ado, "User ID", r#"Admin"#);
375        assert_kv(&ado, "Password", r#""#);
376
377        let input = r#"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=d:\Northwind.mdb;Jet OLEDB:System Database=d:\NorthwindSystem.mdw;User ID=*****;Password=*****;"#;
378        let ado: AdoNetString = input.parse()?;
379        assert_kv(&ado, "Provider", r#"Microsoft.Jet.OLEDB.4.0"#);
380        assert_kv(&ado, "Data Source", r#"d:\Northwind.mdb"#);
381        assert_kv(
382            &ado,
383            "Jet OLEDB:System Database",
384            r#"d:\NorthwindSystem.mdw"#,
385        );
386        assert_kv(&ado, "User ID", r#"*****"#);
387        assert_kv(&ado, "Password", r#"*****"#);
388        Ok(())
389    }
390
391    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#using-datadirectory-to-connect-to-accessjet
392    #[test]
393    fn connect_to_access_jet() -> crate::Result<()> {
394        let input = r#"Provider=Microsoft.Jet.OLEDB.4.0;  
395                       Data Source=|DataDirectory|\Northwind.mdb;  
396                       Jet OLEDB:System Database=|DataDirectory|\System.mdw;"#;
397        let ado: AdoNetString = input.parse()?;
398        assert_kv(&ado, "Data Source", r#"|DataDirectory|\Northwind.mdb"#);
399        assert_kv(&ado, "Provider", r#"Microsoft.Jet.OLEDB.4.0"#);
400        assert_kv(
401            &ado,
402            "Jet OLEDB:System Database",
403            r#"|DataDirectory|\System.mdw"#,
404        );
405        Ok(())
406    }
407
408    // NOTE(yosh): we do not support Excel connection strings yet because the
409    // double quote escaping is a small nightmare to parse.
410    // // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#connecting-to-excel
411    // #[test]
412    // fn connect_to_excel() -> crate::Result<()> {
413    //     let input = r#"Provider=Microsoft.Jet.OLEDB.4.0;Data Source=D:\MyExcel.xls;Extended Properties=""Excel 8.0;HDR=Yes;IMEX=1"""#;
414    //     let ado: AdoNetString = input.parse()?;
415    //     assert_kv(&ado, "Provider", r#"Microsoft.Jet.OLEDB.4.0"#);
416    //     assert_kv(&ado, "Data Source", r#"D:\MyExcel.xls"#);
417    //     assert_kv(
418    //         &ado,
419    //         "Extended Properties",
420    //         r#"""Excel 8.0;HDR=Yes;IMEX=1"""#,
421    //     );
422    //     Ok(())
423    // }
424
425    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#data-shape-provider-connection-string-syntax
426    #[test]
427    fn data_shape_provider() -> crate::Result<()> {
428        let input = r#"Provider=MSDataShape;Data Provider=SQLOLEDB;Data Source=(local);Initial Catalog=pubs;Integrated Security=SSPI;"#;
429        let ado: AdoNetString = input.parse()?;
430        assert_kv(&ado, "Provider", r#"MSDataShape"#);
431        assert_kv(&ado, "Data Provider", r#"SQLOLEDB"#);
432        assert_kv(&ado, "Data Source", r#"(local)"#);
433        assert_kv(&ado, "Initial Catalog", r#"pubs"#);
434        assert_kv(&ado, "Integrated Security", r#"SSPI"#);
435        Ok(())
436    }
437
438    // NOTE(yosh): we do not support ODBC connection strings because the first part of the
439    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#odbc-connection-strings
440    #[test]
441    fn odbc_connection_strings() -> crate::Result<()> {
442        let input = r#"Driver={Microsoft Text Driver (*.txt; *.csv)};DBQ=d:\bin"#;
443        let ado: AdoNetString = input.parse()?;
444        assert_kv(&ado, "Driver", r#"Microsoft Text Driver (*.txt; *.csv)"#);
445        assert_kv(&ado, "DBQ", r#"d:\bin"#);
446        Ok(())
447    }
448
449    // https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/connection-string-syntax#oracle-connection-strings
450    #[test]
451    fn oracle_connection_strings() -> crate::Result<()> {
452        let input = "Data Source=Oracle9i;User ID=*****;Password=*****;";
453        let ado: AdoNetString = input.parse()?;
454        assert_kv(&ado, "Data Source", "Oracle9i");
455        assert_kv(&ado, "User ID", "*****");
456        assert_kv(&ado, "Password", "*****");
457        Ok(())
458    }
459
460    #[test]
461    fn display_with_escaping() -> crate::Result<()> {
462        let input = "key=val{;}ue";
463        let conn: AdoNetString = input.parse()?;
464
465        assert_eq!(format!("{}", conn), input);
466
467        Ok(())
468    }
469}