1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
//! ## Grammar
//! Parser for email address (`addr-spec`) as defined in Section 3.4.1 of [`RFC5322`].
//! This crate implements only a subset of the grammar and does not support folding white space
//! and comments in email address. Also, the grammar rules that are defined to preserve backwards
//! compatibility are not supported. The grammar implemented is described below:
//!
//! ```text
//! ; non-terminals
//! addr-spec = local-part AT domain
//! local-part = dot-atom / quoted-string
//! domain = dot-atom / domain-literal
//! domain-literal = OPEN_BRACKET *DTEXT CLOSE_BRACKET
//! atom = 1*ATEXT
//! dot-atom-literal = DOT atom
//! dot-atom = atom *dot-atom-literal
//! quoted-pair = BACKSLASH ESCAPE
//! qcontent = QTEXT / quoted-pair
//! quoted-string = DQUOTE *qcontent DQUOTE
//!
//! ; terminals
//! AT = "@" ; @ character
//! OPEN_BRACKET = "[" ; square bracket open
//! CLOSE_BRACKET = "]" ; square bracket close
//! DTEXT = %d33-90 / %d94-126
//! ; Printable US-ASCII characters not
//! ; including "[", "]" or "\".
//! ATEXT = ALPHA / DIGIT / "!" /
//! "#" / "$" / "%" /
//! "&" / "'" / "*" /
//! "+" / "-" / "/" /
//! "=" / "?" / "^" /
//! "_" / "`" / "{" /
//! "|" / "}" / "~"
//! ; Printable US-ASCII characters not
//! ; including specials. Used for atoms.
//! SPECIALS = "(" / ")" / "<" / ">" / "@" /
//! "[" / "]" / ":" / ";" / "\" /
//! "," / "." / DQUOTE
//! ; Special characters that do not appear in
//! ; atext. Useful for tools that perform
//! ; lexical analysis: each character in
//! ; specials can be used to indicate a
//! ; tokenization point in lexical analysis.
//! BACKSLASH = "\" ; \ (backslash)
//! DOT = "." ; . (dot)
//! ESCAPE = VCHAR / WSP
//! QTEXT = %d33 / %d35-91 / %d93-126
//! ; Printable US-ASCII characters not
//! ; including "\" or the quote character.
//! DQUOTE = %x22 ; " (Double Quote)
//!
//! ; inline
//! VCHAR = %x21-7E ; visible (printing) characters
//! WSP = SP / HTAB ; white space
//! HTAB = %x09 ; horizontal tab
//! SP = %x20 ; space character
//! ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
//! DIGIT = %x30-39 ; 0-9
//! ```
//!
//! [`RFC5322`]: https://datatracker.ietf.org/doc/html/rfc5322#section-3.4.1
//!
//! ## Finite State Machine
//!
//! The above grammar defines a Regular language. So, we do not need to construct a lexer and
//! a parser. Email address as defined above can be parsed using finite automaton (or regular
//! expressions also will do). In this crate, we construct a finite state machine (module fsm)
//! and parse the given string into email address or fail and emit errors.
//!
//! ```
//! use email_parser::Email;
//! let email: Email = "someone@example.com".parse().unwrap();
//! ```
use crate::fsm::{State, FSM};
use std::fmt::{Display, Formatter};
use std::str::FromStr;
use thiserror::Error;
/// Email parsing errors.
#[derive(Error, Debug, Clone)]
pub enum Error {
#[error("cannot parse empty email id")]
EmptyEmail,
#[error("invalid RFC5322 formatted email id")]
InvalidEmail,
}
/// Email parsing is accomplished using a finite state machine. FSM is defined in this module.
/// Finite automaton has several states and transitions. When iterator is completely consumed, if
/// the state is a final state, then given string is valid email address.
mod fsm;
/// This is the core of the crate. Defines email address type which can be constructed by parsing a
/// string literal. As long as it is constructed properly, then it means the email address is valid.
pub struct Email {
local: String,
domain: String,
}
/// Support parsing from string literal.
impl FromStr for Email {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let m = fsm::Machine::new(s);
let ref state = m.into_iter().last().ok_or(Error::EmptyEmail)?;
let (one, two) = State::is_final(state)
.then(|| s.split_once('@').unwrap())
.ok_or(Error::InvalidEmail)?;
Ok(Self {
local: one.to_owned(),
domain: two.to_owned(),
})
}
}
/// Support formatted output.
impl Display for Email {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "{}@{}", self.local, self.domain)
}
}