EmailLexer.php 6.3 KB
<?php

namespace Egulias\EmailValidator;

use Doctrine\Common\Lexer\AbstractLexer;

class EmailLexer extends AbstractLexer
{
    //ASCII values
    const C_DEL              = 127;
    const C_NUL              = 0;
    const S_AT               = 64;
    const S_BACKSLASH        = 92;
    const S_DOT              = 46;
    const S_DQUOTE           = 34;
    const S_SQUOTE           = 39;
    const S_BACKTICK         = 96;
    const S_OPENPARENTHESIS  = 49;
    const S_CLOSEPARENTHESIS = 261;
    const S_OPENBRACKET      = 262;
    const S_CLOSEBRACKET     = 263;
    const S_HYPHEN           = 264;
    const S_COLON            = 265;
    const S_DOUBLECOLON      = 266;
    const S_SP               = 267;
    const S_HTAB             = 268;
    const S_CR               = 269;
    const S_LF               = 270;
    const S_IPV6TAG          = 271;
    const S_LOWERTHAN        = 272;
    const S_GREATERTHAN      = 273;
    const S_COMMA            = 274;
    const S_SEMICOLON        = 275;
    const S_OPENQBRACKET     = 276;
    const S_CLOSEQBRACKET    = 277;
    const S_SLASH            = 278;
    const S_EMPTY            = null;
    const GENERIC            = 300;
    const CRLF               = 301;
    const INVALID            = 302;
    const ASCII_INVALID_FROM = 127;
    const ASCII_INVALID_TO   = 199;

    /**
     * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
     *
     * @var array
     */
    protected $charValue = array(
        '('    => self::S_OPENPARENTHESIS,
        ')'    => self::S_CLOSEPARENTHESIS,
        '<'    => self::S_LOWERTHAN,
        '>'    => self::S_GREATERTHAN,
        '['    => self::S_OPENBRACKET,
        ']'    => self::S_CLOSEBRACKET,
        ':'    => self::S_COLON,
        ';'    => self::S_SEMICOLON,
        '@'    => self::S_AT,
        '\\'   => self::S_BACKSLASH,
        '/'    => self::S_SLASH,
        ','    => self::S_COMMA,
        '.'    => self::S_DOT,
        "'"    => self::S_SQUOTE,
        "`"    => self::S_BACKTICK,
        '"'    => self::S_DQUOTE,
        '-'    => self::S_HYPHEN,
        '::'   => self::S_DOUBLECOLON,
        ' '    => self::S_SP,
        "\t"   => self::S_HTAB,
        "\r"   => self::S_CR,
        "\n"   => self::S_LF,
        "\r\n" => self::CRLF,
        'IPv6' => self::S_IPV6TAG,
        '{'    => self::S_OPENQBRACKET,
        '}'    => self::S_CLOSEQBRACKET,
        ''     => self::S_EMPTY,
        '\0'   => self::C_NUL,
    );

    /**
     * @var bool
     */
    protected $hasInvalidTokens = false;

    /**
     * @var array
     *
     * @psalm-var array{value:string, type:null|int, position:int}|array<empty, empty>
     */
    protected $previous = [];

    /**
     * The last matched/seen token.
     *
     * @var array
     *
     * @psalm-var array{value:string, type:null|int, position:int}
     */
    public $token;

    /**
     * The next token in the input.
     *
     * @var array|null
     */
    public $lookahead;

    /**
     * @psalm-var array{value:'', type:null, position:0}
     */
    private static $nullToken = [
        'value' => '',
        'type' => null,
        'position' => 0,
    ];

    public function __construct()
    {
        $this->previous = $this->token = self::$nullToken;
        $this->lookahead = null;
    }

    /**
     * @return void
     */
    public function reset()
    {
        $this->hasInvalidTokens = false;
        parent::reset();
        $this->previous = $this->token = self::$nullToken;
    }

    /**
     * @return bool
     */
    public function hasInvalidTokens()
    {
        return $this->hasInvalidTokens;
    }

    /**
     * @param int $type
     * @throws \UnexpectedValueException
     * @return boolean
     *
     * @psalm-suppress InvalidScalarArgument
     */
    public function find($type)
    {
        $search = clone $this;
        $search->skipUntil($type);

        if (!$search->lookahead) {
            throw new \UnexpectedValueException($type . ' not found');
        }
        return true;
    }

    /**
     * getPrevious
     *
     * @return array
     */
    public function getPrevious()
    {
        return $this->previous;
    }

    /**
     * moveNext
     *
     * @return boolean
     */
    public function moveNext()
    {
        $this->previous = $this->token;
        $hasNext = parent::moveNext();
        $this->token = $this->token ?: self::$nullToken;

        return $hasNext;
    }

    /**
     * Lexical catchable patterns.
     *
     * @return string[]
     */
    protected function getCatchablePatterns()
    {
        return array(
            '[a-zA-Z_]+[46]?', //ASCII and domain literal
            '[^\x00-\x7F]',  //UTF-8
            '[0-9]+',
            '\r\n',
            '::',
            '\s+?',
            '.',
            );
    }

    /**
     * Lexical non-catchable patterns.
     *
     * @return string[]
     */
    protected function getNonCatchablePatterns()
    {
        return array('[\xA0-\xff]+');
    }

    /**
     * Retrieve token type. Also processes the token value if necessary.
     *
     * @param string $value
     * @throws \InvalidArgumentException
     * @return integer
     */
    protected function getType(&$value)
    {
        if ($this->isNullType($value)) {
            return self::C_NUL;
        }

        if ($this->isValid($value)) {
            return $this->charValue[$value];
        }

        if ($this->isUTF8Invalid($value)) {
            $this->hasInvalidTokens = true;
            return self::INVALID;
        }

        return  self::GENERIC;
    }

    /**
     * @param string $value
     *
     * @return bool
     */
    protected function isValid($value)
    {
        if (isset($this->charValue[$value])) {
            return true;
        }

        return false;
    }

    /**
     * @param string $value
     * @return bool
     */
    protected function isNullType($value)
    {
        if ($value === "\0") {
            return true;
        }

        return false;
    }

    /**
     * @param string $value
     * @return bool
     */
    protected function isUTF8Invalid($value)
    {
        if (preg_match('/\p{Cc}+/u', $value)) {
            return true;
        }

        return false;
    }

    /**
     * @return string
     */
    protected function getModifiers()
    {
        return 'iu';
    }
}