! This file is part of toml-f. ! SPDX-Identifier: Apache-2.0 OR MIT ! ! Licensed under either of Apache License, Version 2.0 or MIT license ! at your option; you may not use this file except in compliance with ! the License. ! ! Unless required by applicable law or agreed to in writing, software ! distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the License for the specific language governing permissions and ! limitations under the License. !> Provides tokenization for TOML documents. !> !> The lexer provides a way to turn a stream of characters into tokens which !> are further processed by the parser and turned into actual TOML data structures. !> In the current structure no knowledge about the character stream is required !> in the parser to generate the data structures. !> !> The validity of all tokens can be guaranteed by the lexer, however syntax errors !> and semantic errors are not detected until the parser is run. Identification of !> invalid tokens and recovery of the tokenization is done on a best effort basis. !> !> To avoid overflows in the parser due to deeply nested but unclosed groups, the !> lexer will always tokenize a complete group to verify it is closed properly. !> Unclosed groups will lead to the first token of the group getting invalidated, !> to allow reporting in the parsing phase. module tomlf_de_lexer use tomlf_constants, only : tfc, tfi, tfr, TOML_BACKSPACE, TOML_TABULATOR, TOML_NEWLINE, & & TOML_CARRIAGE_RETURN, TOML_FORMFEED use tomlf_datetime, only : toml_datetime, toml_date, toml_time use tomlf_de_abc, only : abstract_lexer use tomlf_de_context, only : toml_context use tomlf_de_token, only : toml_token, stringify, token_kind, resize use tomlf_error, only : toml_error, toml_stat, make_error use tomlf_utils, only : read_whole_file, read_whole_line implicit none private public :: toml_lexer, new_lexer_from_file, new_lexer_from_unit, new_lexer_from_string public :: toml_token, stringify, token_kind !> Possible characters encountered in a lexeme type :: enum_char character(1, tfc) :: space = tfc_" " character(1, tfc) :: hash = tfc_"#" character(1, tfc) :: squote = tfc_"'" character(3, tfc) :: squote3 = repeat(tfc_"'", 3) character(1, tfc) :: dquote = tfc_"""" character(3, tfc) :: dquote3 = repeat(tfc_"""", 3) character(1, tfc) :: backslash = tfc_"\" character(1, tfc) :: dot = tfc_"." character(1, tfc) :: comma = tfc_"," character(1, tfc) :: equal = tfc_"=" character(1, tfc) :: lbrace = tfc_"{" character(1, tfc) :: rbrace = tfc_"}" character(1, tfc) :: lbracket = tfc_"[" character(1, tfc) :: rbracket = tfc_"]" character(1, tfc) :: newline = achar(10, kind=tfc) character(1, tfc) :: formfeed = achar(12, kind=tfc) character(1, tfc) :: carriage_return = achar(13, kind=tfc) character(1, tfc) :: bspace = achar(8, kind=tfc) character(1, tfc) :: tab = achar(9, kind=tfc) character(1, tfc) :: plus = tfc_"+" character(1, tfc) :: minus = tfc_"-" character(12, tfc) :: literal = tfc_"0123456789-_" end type enum_char !> Actual enumerator for possible characters type(enum_char), parameter :: char_kind = enum_char() !> Set of characters marking a terminated lexeme, mainly used for values and to !> obtain boundaries of invalid tokens. character(*, tfc), parameter :: terminated = & & char_kind%space//char_kind%tab//char_kind%newline//char_kind%carriage_return//& & char_kind%hash//char_kind%rbrace//char_kind%rbracket//char_kind%comma//& & char_kind%equal !> Scopes to identify the state of the lexer. type :: enum_scope !> Table scopes allow keypaths, in this scenario only bare keys, strings and !> literals are allowed, furthermore dots become special characters to separate !> the keypaths. integer :: table = 1 !> Terminates a table scope and opens a value scope. Here usual values, like integer, !> floats or strings are allowed. integer :: equal = 2 !> Opens an array scope, similar to the value scope for allowed characters but with !> simplified closing rules to allow handling of values and inline tables in arrays. integer :: array = 3 end type enum_scope !> Actual enumerator for auxiliary scopes type(enum_scope), parameter :: lexer_scope = enum_scope() !> Item identifying the scope and the corresponding token index type :: stack_item !> Current scope of the item, can only be removed with matching scope integer :: scope !> Token index in the buffer of the lexer, used for invalidation of unclosed groups integer :: token end type stack_item !> Reallocate the stack of scopes interface resize module procedure :: resize_scope end interface !> Tokenizer for TOML documents. type, extends(abstract_lexer) :: toml_lexer !> Name of the source file, used for error reporting character(len=:), allocatable :: filename !> Current internal position in the source chunk integer :: pos = 0 !> Current source chunk, for convenience stored as character array rather than string character(:, tfc), allocatable :: chunk !> Last scope of the lexer integer :: top = 0 !> Stack of scopes, used to identify the current state of the lexer type(stack_item), allocatable :: stack(:) !> Index in the buffer queue integer :: buffer = 0 !> Douple-ended queue for buffering tokens type(toml_context) :: context contains !> Obtain the next token procedure :: next !> Extract a string from a token procedure :: extract_string !> Extract an integer from a token procedure :: extract_integer !> Extract a float from a token procedure :: extract_float !> Extract a boolean from a token procedure :: extract_bool !> Extract a timestamp from a token procedure :: extract_datetime !> Get information about source procedure :: get_info end type toml_lexer contains !> Create a new instance of a lexer by reading from a file subroutine new_lexer_from_file(lexer, filename, error) !> Instance of the lexer type(toml_lexer), intent(out) :: lexer !> Name of the file to read from character(len=*), intent(in) :: filename !> Error code type(toml_error), allocatable, intent(out) :: error integer :: stat lexer%pos = 0 lexer%filename = filename call resize(lexer%stack) call read_whole_file(filename, lexer%chunk, stat) if (stat /= 0) then call make_error(error, "Could not open file '"//filename//"'") end if end subroutine new_lexer_from_file !> Create a new instance of a lexer by reading from a unit. !> !> Currently, only sequential access units can be processed by this constructor. subroutine new_lexer_from_unit(lexer, io, error) !> Instance of the lexer type(toml_lexer), intent(out) :: lexer !> Unit to read from integer, intent(in) :: io !> Error code type(toml_error), allocatable, intent(out) :: error character(:, tfc), allocatable :: source, line integer, parameter :: bufsize = 512 character(bufsize, tfc) :: filename, mode integer :: stat inquire(unit=io, access=mode, name=filename) select case(trim(mode)) case default stat = 1 case("sequential", "SEQUENTIAL") allocate(character(0) :: source) do call read_whole_line(io, line, stat) if (stat > 0) exit source = source // line // TOML_NEWLINE if (stat < 0) then if (is_iostat_end(stat)) stat = 0 exit end if end do call new_lexer_from_string(lexer, source) end select if (len_trim(filename) > 0) lexer%filename = trim(filename) if (stat /= 0) then call make_error(error, "Failed to read from unit") end if end subroutine new_lexer_from_unit !> Create a new instance of a lexer by reading from a string. subroutine new_lexer_from_string(lexer, string) !> Instance of the lexer type(toml_lexer), intent(out) :: lexer !> String to read from character(*, tfc), intent(in) :: string integer :: length length = len(string) lexer%pos = 0 lexer%buffer = 0 allocate(character(length) :: lexer%chunk) lexer%chunk(:length) = string call resize(lexer%stack) end subroutine new_lexer_from_string !> Advance the lexer to the next token. subroutine next(lexer, token) !> Instance of the lexer class(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token if (lexer%buffer >= lexer%context%top) then call fill_buffer(lexer) end if lexer%buffer = lexer%buffer + 1 token = lexer%context%token(lexer%buffer) end subroutine next !> Fill the buffer with tokens, this routine will attempt to create as many tokens as !> necessary to determine whether all opened groups are closed properly. !> !> The state of the buffer can be changed while this routine is running, therefore !> accessing the buffer concurrently is not allowed. subroutine fill_buffer(lexer) !> Instance of the lexer class(toml_lexer), intent(inout) :: lexer type(toml_token) :: token integer :: stack_top, it lexer%buffer = 0 lexer%context%top = 0 stack_top = lexer%top ! Tokenization will cover always a complete scope do while(lexer%top >= stack_top .and. token%kind /= token_kind%eof) call next_token(lexer, token) call lexer%context%push_back(token) end do ! Flag all incomplete inline table and array scopes for the parser if (lexer%top > stack_top) then do it = lexer%top, stack_top + 1, -1 select case(lexer%stack(it)%scope) case(lexer_scope%table, lexer_scope%array) lexer%context%token(lexer%stack(it)%token)%kind = token_kind%unclosed end select end do end if end subroutine fill_buffer !> Actually generate the next token, unbuffered version subroutine next_token(lexer, token) !> Instance of the lexer class(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token integer :: prev, pos ! Consume current token lexer%pos = lexer%pos + token%last - token%first + 1 prev = lexer%pos pos = lexer%pos ! If lexer is exhausted, return EOF as early as possible if (pos > len(lexer%chunk)) then call pop(lexer, lexer_scope%equal) token = toml_token(token_kind%eof, prev, pos) return end if select case(peek(lexer, pos)) case(char_kind%hash) do while(all(peek(lexer, pos+1) /= [char_kind%carriage_return, char_kind%newline]) & & .and. pos <= len(lexer%chunk)) pos = pos + 1 end do token = toml_token(token_kind%comment, prev, pos) case(char_kind%space, char_kind%tab) do while(any(match(lexer, pos+1, [char_kind%space, char_kind%tab])) & & .and. pos <= len(lexer%chunk)) pos = pos + 1 end do token = toml_token(token_kind%whitespace, prev, pos) case(char_kind%newline) call pop(lexer, lexer_scope%equal) token = toml_token(token_kind%newline, prev, pos) case(char_kind%carriage_return) if (match(lexer, pos+1, char_kind%newline)) then pos = pos + 1 call pop(lexer, lexer_scope%equal) token = toml_token(token_kind%newline, prev, pos) else token = toml_token(token_kind%invalid, prev, pos) end if case(char_kind%dot) if (view_scope(lexer) == lexer_scope%table) then token = toml_token(token_kind%dot, prev, pos) else token = toml_token(token_kind%invalid, prev, pos) end if case(char_kind%comma) call pop(lexer, lexer_scope%equal) token = toml_token(token_kind%comma, prev, pos) case(char_kind%equal) token = toml_token(token_kind%equal, prev, pos) call push_back(lexer, lexer_scope%equal, lexer%context%top + 1) case(char_kind%lbrace) token = toml_token(token_kind%lbrace, prev, pos) call push_back(lexer, lexer_scope%table, lexer%context%top + 1) case(char_kind%rbrace) call pop(lexer, lexer_scope%equal) call pop(lexer, lexer_scope%table) token = toml_token(token_kind%rbrace, prev, pos) case(char_kind%lbracket) token = toml_token(token_kind%lbracket, prev, pos) if (any(view_scope(lexer) == [lexer_scope%equal, lexer_scope%array])) then call push_back(lexer, lexer_scope%array, lexer%context%top + 1) end if case(char_kind%rbracket) call pop(lexer, lexer_scope%array) token = toml_token(token_kind%rbracket, prev, pos) case(char_kind%squote) call next_sstring(lexer, token) case(char_kind%dquote) call next_dstring(lexer, token) case default if (view_scope(lexer) == lexer_scope%table) then call next_keypath(lexer, token) else call next_literal(lexer, token) end if end select end subroutine next_token !> Process next literal string token, can produce normal literals and multiline literals subroutine next_sstring(lexer, token) !> Instance of the lexer type(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token character(1, tfc) :: ch integer :: prev, pos, it logical :: valid prev = lexer%pos pos = lexer%pos if (all(match(lexer, [pos+1, pos+2], char_kind%squote))) then pos = pos + 3 pos = strstr(lexer%chunk(pos:), char_kind%squote3) + pos - 1 if (pos < prev + 3) then token = toml_token(token_kind%invalid, prev, len(lexer%chunk)) return end if do it = 1, 2 if (match(lexer, pos+3, char_kind%squote)) pos = pos + 1 end do valid = .true. do it = prev + 3, pos - 1 ch = peek(lexer, it) valid = valid .and. valid_string(ch) end do token = toml_token(merge(token_kind%mliteral, token_kind%invalid, valid), prev, pos+2) return end if valid = .true. do while(pos < len(lexer%chunk)) pos = pos + 1 ch = peek(lexer, pos) valid = valid .and. valid_string(ch) if (ch == char_kind%squote) exit if (ch == char_kind%newline) then pos = pos - 1 valid = .false. exit end if end do valid = valid .and. peek(lexer, pos) == char_kind%squote .and. pos /= prev token = toml_token(merge(token_kind%literal, token_kind%invalid, valid), prev, pos) end subroutine next_sstring !> Process next string token, can produce normal string and multiline string tokens subroutine next_dstring(lexer, token) !> Instance of the lexer type(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token character(1, tfc) :: ch character(*, tfc), parameter :: hexnum = "0123456789ABCDEF", valid_escape = "btnfr\""" integer :: prev, pos, expect, it, hex logical :: escape, valid, space prev = lexer%pos pos = lexer%pos hex = 0 if (all(match(lexer, [pos+1, pos+2], char_kind%dquote))) then pos = pos + 3 do it = strstr(lexer%chunk(pos:), char_kind%dquote3) pos = it + pos - 1 if (pos < prev + 3 .or. it == 0) then token = toml_token(token_kind%invalid, prev, len(lexer%chunk)) return end if if (match(lexer, pos-1, char_kind%backslash)) then pos = pos + 1 cycle end if do it = 1, 2 if (match(lexer, pos+3, char_kind%dquote)) pos = pos + 1 end do exit end do valid = .true. escape = .false. space = .false. expect = 0 do it = prev + 3, pos - 1 ch = peek(lexer, it) if (escape) then space = verify(ch, char_kind%space//char_kind%tab//& & char_kind%carriage_return//char_kind%newline) == 0 end if if (space) then escape = .false. if (ch == char_kind%newline) then if (expect > 0) expect = expect - 1 space = .false. cycle end if if (verify(ch, char_kind%space//char_kind%tab) == 0 .and. expect == 0) cycle if (ch == char_kind%carriage_return) then expect = 1 cycle end if valid = .false. space = .false. expect = 0 cycle end if valid = valid .and. valid_string(ch) if (escape) then escape = .false. space = .false. if (verify(ch, valid_escape) == 0) cycle if (ch == "u") then expect = 4 hex = pos + 1 cycle end if if (ch == "U") then expect = 8 hex = pos + 1 cycle end if valid = .false. cycle end if if (expect > 0) then expect = expect - 1 valid = valid .and. verify(ch, hexnum) == 0 if (expect == 0) valid = valid .and. verify_ucs(lexer%chunk(hex:pos)) cycle end if escape = ch == char_kind%backslash end do ! Check for any unfinished escape sequences valid = valid .and. expect == 0 .and. .not.(escape.or.space) token = toml_token(merge(token_kind%mstring, token_kind%invalid, valid), prev, pos+2) return end if valid = .true. escape = .false. expect = 0 do while(pos < len(lexer%chunk)) pos = pos + 1 ch = peek(lexer, pos) valid = valid .and. valid_string(ch) if (escape) then escape = .false. if (verify(ch, valid_escape) == 0) cycle if (ch == "u") then expect = 4 hex = pos + 1 cycle end if if (ch == "U") then expect = 8 hex = pos + 1 cycle end if valid = .false. cycle end if if (expect > 0) then expect = expect - 1 valid = valid .and. verify(ch, hexnum) == 0 if (expect == 0) valid = valid .and. verify_ucs(lexer%chunk(hex:pos)) cycle end if escape = ch == char_kind%backslash if (ch == char_kind%dquote) exit if (ch == char_kind%newline) then pos = pos - 1 valid = .false. exit end if end do valid = valid .and. peek(lexer, pos) == char_kind%dquote .and. pos /= prev token = toml_token(merge(token_kind%string, token_kind%invalid, valid), prev, pos) end subroutine next_dstring !> Validate characters in string, non-printable characters are invalid in this context pure function valid_string(ch) result(valid) character(1, tfc), intent(in) :: ch logical :: valid character(1, tfc), parameter :: x00 = achar(int(z"00")), x08 = achar(int(z"08")), & & x0b = achar(int(z"0b")), x1f = achar(int(z"1f")), x7f = achar(int(z"7f")) valid = & & .not.(x00 <= ch .and. ch <= x08) .and. & & .not.(x0b <= ch .and. ch <= x1f) .and. & & ch /= x7f end function !> Process next bare key token, produces keypath tokens. subroutine next_keypath(lexer, token) !> Instance of the lexer type(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token logical :: valid integer :: prev, pos character(1, tfc) :: ch prev = lexer%pos pos = lexer%pos ch = peek(lexer, pos) valid = (tfc_"A" <= ch .and. ch <= tfc_"Z") & & .or. (tfc_"a" <= ch .and. ch <= tfc_"z") & & .or. (verify(ch, char_kind%literal) == 0) do while(verify(peek(lexer, pos+1), terminated//char_kind%dot) > 0) pos = pos + 1 ch = peek(lexer, pos) if (tfc_"A" <= ch .and. ch <= tfc_"Z") cycle if (tfc_"a" <= ch .and. ch <= tfc_"z") cycle if (verify(ch, char_kind%literal) == 0) cycle valid = .false. cycle end do token = toml_token(merge(token_kind%keypath, token_kind%invalid, valid), prev, pos) end subroutine next_keypath !> Identify literal values, produces integer, float, boolean, and datetime tokens. subroutine next_literal(lexer, token) !> Instance of the lexer type(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token integer :: prev, pos integer, parameter :: offset(*) = [0, 1, 2, 3, 4, 5] character(1, tfc), parameter :: & & true(4) = ["t", "r", "u", "e"], false(5) = ["f", "a", "l", "s", "e"] prev = lexer%pos pos = lexer%pos select case(peek(lexer, pos)) case("t") if (match_all(lexer, pos+offset(:4), true) .and. & & verify(peek(lexer, pos+4), terminated) == 0) then token = toml_token(token_kind%bool, prev, pos+3) return end if case("f") if (match_all(lexer, pos+offset(:5), false) .and. & & verify(peek(lexer, pos+5), terminated) == 0) then token = toml_token(token_kind%bool, prev, pos+4) return end if case default call next_datetime(lexer, token) if (token%kind == token_kind%datetime) return call next_integer(lexer, token) if (token%kind == token_kind%int) return call next_float(lexer, token) if (token%kind == token_kind%float) return end select ! If the current token is invalid, advance to the next terminator do while(verify(peek(lexer, pos+1), terminated) > 0) pos = pos + 1 end do token = toml_token(token_kind%invalid, prev, pos) end subroutine next_literal !> Process integer tokens and binary, octal, and hexadecimal literals. subroutine next_integer(lexer, token) !> Instance of the lexer type(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token character(*, tfc), parameter :: toml_base(4) = [& & "0123456789abcdefABCDEF", & & "0123456789000000000000", & & "0123456700000000000000", & & "0100000000000000000000"] integer, parameter :: b10 = 2, b16 = 1, b8 = 3, b2 = 4 character(1, tfc) :: ch integer :: prev, pos, base logical :: underscore, okay prev = lexer%pos pos = lexer%pos okay = .true. underscore = .true. base = b10 if (any(match(lexer, pos, ["+", "-"]))) then pos = pos + 1 end if if (match(lexer, pos, "0")) then select case(peek(lexer, pos+1)) case("x") okay = pos == prev base = b16 pos = pos + 2 case("o") okay = pos == prev base = b8 pos = pos + 2 case("b") okay = pos == prev base = b2 pos = pos + 2 case(char_kind%space, char_kind%tab, char_kind%newline, char_kind%carriage_return, & & char_kind%hash, char_kind%rbrace, char_kind%rbracket, char_kind%comma) token = toml_token(token_kind%int, prev, pos) return case default do while(verify(peek(lexer, pos), terminated) > 0) pos = pos + 1 end do token = toml_token(token_kind%invalid, prev, pos-1) return end select end if do while(pos <= len(lexer%chunk)) ch = peek(lexer, pos) if (ch == "_") then if (underscore) then token = toml_token(token_kind%invalid, prev, pos) return end if underscore = .true. pos = pos + 1 cycle end if if (verify(ch, toml_base(base)) == 0) then pos = pos + 1 underscore = .false. cycle end if okay = okay .and. verify(ch, terminated) == 0 exit end do okay = .not.underscore .and. okay token = toml_token(merge(token_kind%int, token_kind%invalid, okay), prev, pos-1) end subroutine next_integer !> Process float tokens. subroutine next_float(lexer, token) !> Instance of the lexer type(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token integer :: prev, pos logical :: plus_minus, underscore, point, expo, okay, zero, first character(1, tfc) :: ch integer, parameter :: offset(*) = [0, 1, 2] character(1, tfc), parameter :: nan(3) = ["n", "a", "n"], inf(3) = ["i", "n", "f"] prev = lexer%pos pos = lexer%pos point = .false. expo = .false. zero = .false. first = .true. underscore = .true. plus_minus = any(match(lexer, pos, ["+", "-"])) if (plus_minus) pos = pos + 1 if (match_all(lexer, pos+offset, nan) .and. & & verify(peek(lexer, pos+3), terminated) == 0) then token = toml_token(token_kind%float, prev, pos+2) return end if if (match_all(lexer, pos+offset, inf) .and. & & verify(peek(lexer, pos+3), terminated) == 0) then token = toml_token(token_kind%float, prev, pos+2) return end if do while(pos <= len(lexer%chunk)) ch = peek(lexer, pos) if (ch == "_") then if (underscore) then token = toml_token(token_kind%invalid, prev, pos) return end if underscore = .true. pos = pos + 1 cycle end if if (ch == ".") then if (point .or. expo .or. underscore) then token = toml_token(token_kind%invalid, prev, pos) return end if zero = .false. underscore = .true. point = .true. pos = pos + 1 cycle end if if (ch == "e" .or. ch == "E") then if (expo .or. underscore) then token = toml_token(token_kind%invalid, prev, pos) return end if zero = .false. underscore = .true. expo = .true. pos = pos + 1 cycle end if if (ch == "+" .or. ch == "-") then if (.not.any(match(lexer, pos-1, ["e", "E"]))) then token = toml_token(token_kind%invalid, prev, pos) return end if underscore = .true. pos = pos + 1 cycle end if if (verify(ch, "0123456789") == 0) then if (zero) then token = toml_token(token_kind%invalid, prev, pos) return end if zero = first .and. ch == "0" first = .false. pos = pos + 1 underscore = .false. cycle end if exit end do okay = .not.underscore .and. (expo .or. point) token = toml_token(merge(token_kind%float, token_kind%invalid, okay), prev, pos-1) end subroutine next_float !> Find the next datetime expression subroutine next_datetime(lexer, token) !> Instance of the lexer type(toml_lexer), intent(inout) :: lexer !> Current lexeme type(toml_token), intent(inout) :: token logical :: has_date, has_time, has_millisec, has_local, okay integer :: prev, pos, it integer, parameter :: offset(*) = [(it, it = 0, 10)], & & offset_date = 10, offset_time = 8, offset_local = 6 character(*, tfc), parameter :: num = "0123456789" prev = lexer%pos pos = lexer%pos has_date = valid_date(peek(lexer, pos+offset(:offset_date))) if (has_date) then if (verify(peek(lexer, pos+offset_date), "Tt ") == 0 & & .and. pos + offset_date < len(lexer%chunk) & & .and. verify(peek(lexer, pos+offset_date+1), num) == 0) then pos = pos + offset_date + 1 end if end if has_time = valid_time(peek(lexer, pos+offset(:offset_time))) if (has_time) then pos = pos + offset_time - 1 if (match(lexer, pos+1, char_kind%dot)) then it = 1 do while(verify(peek(lexer, pos+it+1), num) == 0) it = it + 1 end do has_millisec = it > 1 if (.not.has_millisec) then token = toml_token(token_kind%invalid, prev, prev) return end if pos = pos + it end if has_local = valid_local(peek(lexer, pos+offset(:offset_local)+1)) if (has_local) then if (.not.has_date) then token = toml_token(token_kind%invalid, prev, prev) return end if pos = pos + offset_local else if (verify(peek(lexer, pos+1), "zZ") == 0) then pos = pos + 1 end if end if if (.not.(has_time.or.has_date)) then token = toml_token(token_kind%invalid, prev, prev) return end if if (.not.has_time.and.has_date) pos = pos + offset_date - 1 okay = verify(peek(lexer, pos+1), terminated) == 0 .and. pos <= len(lexer%chunk) token = toml_token(merge(token_kind%datetime, token_kind%invalid, okay), prev, pos) end subroutine next_datetime !> Validate a string as date pure function valid_date(string) result(valid) !> Input string, 10 characters character(1, tfc), intent(in) :: string(:) !> Valid date logical :: valid integer :: it, val character(*, tfc), parameter :: num = "0123456789" integer :: year, month, day, mday logical :: leap valid = .false. if (any(string([5, 8]) /= "-")) return year = 0 do it = 1, 4 val = scan(num, string(it)) - 1 if (val < 0) return year = year * 10 + val end do month = 0 do it = 6, 7 val = scan(num, string(it)) - 1 if (val < 0) return month = month * 10 + val end do day = 0 do it = 9, 10 val = scan(num, string(it)) - 1 if (val < 0) return day = day * 10 + val end do mday = 0 select case(month) case(1, 3, 5, 7, 8, 10, 12) mday = 31 case(2) leap = mod(year, 4) == 0 .and. (mod(year, 100) /= 0 .or. mod(year, 400) == 0) mday = merge(29, 28, leap) case(4, 6, 9, 11) mday = 30 end select valid = day >= 1 .and. day <= mday end function valid_date !> Validate a string as time function valid_time(string) result(valid) !> Input string, 8 characters character(1, tfc), intent(in) :: string(:) !> Valid time logical :: valid integer :: it, val character(*, tfc), parameter :: num = "0123456789" integer :: hour, minute, second valid = .false. if (any(string([3, 6]) /= ":")) return hour = 0 do it = 1, 2 val = scan(num, string(it)) - 1 if (val < 0) return hour = hour * 10 + val end do minute = 0 do it = 4, 5 val = scan(num, string(it)) - 1 if (val < 0) return minute = minute * 10 + val end do second = 0 do it = 7, 8 val = scan(num, string(it)) - 1 if (val < 0) return second = second * 10 + val end do valid = second >= 0 .and. second < 60 & & .and. minute >= 0 .and. minute < 60 & & .and. hour >= 0 .and. hour < 24 end function valid_time !> Validate a string as timezone function valid_local(string) result(valid) !> Input string, 6 characters character(1, tfc), intent(in) :: string(:) !> Valid timezone logical :: valid integer :: it, val character(*, tfc), parameter :: num = "0123456789" integer :: hour, minute valid = .false. if (string(4) /= ":" .or. all(string(1) /= ["+", "-"])) return hour = 0 do it = 2, 3 val = scan(num, string(it)) - 1 if (val < 0) return hour = hour * 10 + val end do minute = 0 do it = 5, 6 val = scan(num, string(it)) - 1 if (val < 0) return minute = minute * 10 + val end do valid = minute >= 0 .and. minute < 60 & & .and. hour >= 0 .and. hour < 24 end function valid_local !> Show current character elemental function peek(lexer, pos) result(ch) !> Instance of the lexer type(toml_lexer), intent(in) :: lexer !> Position to fetch character from integer, intent(in) :: pos !> Character found character(1, tfc) :: ch if (pos <= len(lexer%chunk)) then ch = lexer%chunk(pos:pos) else ch = char_kind%space end if end function peek !> Compare a character elemental function match(lexer, pos, kind) !> Instance of the lexer type(toml_lexer), intent(in) :: lexer !> Position to fetch character from integer, intent(in) :: pos !> Character to compare against character(1, tfc), intent(in) :: kind !> Characters match logical :: match match = peek(lexer, pos) == kind end function match !> Compare a set of characters pure function match_all(lexer, pos, kind) result(match) !> Instance of the lexer type(toml_lexer), intent(in) :: lexer !> Position to fetch character from integer, intent(in) :: pos(:) !> Character to compare against character(1, tfc), intent(in) :: kind(:) !> Characters match logical :: match match = all(peek(lexer, pos) == kind) end function match_all pure function strstr(string, pattern) result(res) character(*, tfc), intent(in) :: string character(*, tfc), intent(in) :: pattern integer :: lps_array(len(pattern)) integer :: res, s_i, p_i, length_string, length_pattern res = 0 length_string = len(string) length_pattern = len(pattern) if (length_pattern > 0 .and. length_pattern <= length_string) then lps_array = compute_lps(pattern) s_i = 1 p_i = 1 do while(s_i <= length_string) if (string(s_i:s_i) == pattern(p_i:p_i)) then if (p_i == length_pattern) then res = s_i - length_pattern + 1 exit end if s_i = s_i + 1 p_i = p_i + 1 else if (p_i > 1) then p_i = lps_array(p_i - 1) + 1 else s_i = s_i + 1 end if end do end if contains pure function compute_lps(string) result(lps_array) character(*, tfc), intent(in) :: string integer :: lps_array(len(string)) integer :: i, j, length_string length_string = len(string) if (length_string > 0) then lps_array(1) = 0 i = 2 j = 1 do while (i <= length_string) if (string(j:j) == string(i:i)) then lps_array(i) = j i = i + 1 j = j + 1 else if (j > 1) then j = lps_array(j - 1) + 1 else lps_array(i) = 0 i = i + 1 end if end do end if end function compute_lps end function strstr !> Extract string value of token, works for keypath, string, multiline string, literal, !> and mulitline literal tokens. subroutine extract_string(lexer, token, string) !> Instance of the lexer class(toml_lexer), intent(in) :: lexer !> Token to extract string value from type(toml_token), intent(in) :: token !> String value of token character(len=:), allocatable, intent(out) :: string integer :: it, length logical :: escape, leading_newline character(1, tfc) :: ch length = token%last - token%first + 1 select case(token%kind) case(token_kind%string) string = "" escape = .false. it = token%first + 1 do while(it <= token%last - 1) ch = peek(lexer, it) if (escape) then escape = .false. select case(ch) case("""", "\"); string = string // ch case("b"); string = string // TOML_BACKSPACE case("t"); string = string // TOML_TABULATOR case("n"); string = string // TOML_NEWLINE case("r"); string = string // TOML_CARRIAGE_RETURN case("f"); string = string // TOML_FORMFEED case("u"); string = string // convert_ucs(lexer%chunk(it+1:it+4)); it = it + 5 case("U"); string = string // convert_ucs(lexer%chunk(it+1:it+8)); it = it + 9 end select else escape = ch == char_kind%backslash if (.not.escape) string = string // ch end if it = it + 1 end do case(token_kind%mstring) leading_newline = peek(lexer, token%first+3) == char_kind%newline string = "" escape = .false. it = token%first + merge(4, 3, leading_newline) do while(it <= token%last - 3) ch = peek(lexer, it) if (escape) then escape = .false. select case(ch) case("""", "\"); string = string // ch case("b"); string = string // TOML_BACKSPACE case("t"); string = string // TOML_TABULATOR case("n"); string = string // TOML_NEWLINE case("r"); string = string // TOML_CARRIAGE_RETURN case("f"); string = string // TOML_FORMFEED case("u"); string = string // convert_ucs(lexer%chunk(it+1:it+4)); it = it + 5 case("U"); string = string // convert_ucs(lexer%chunk(it+1:it+8)); it = it + 9 case(char_kind%space, char_kind%tab, char_kind%carriage_return) escape = .true. case(char_kind%newline) continue end select else escape = ch == char_kind%backslash if (.not.escape) string = string // ch end if it = it + 1 end do case(token_kind%literal) allocate(character(length - 2)::string) string = lexer%chunk(token%first+1:token%last-1) case(token_kind%mliteral) leading_newline = peek(lexer, token%first+3) == char_kind%newline allocate(character(length - merge(7, 6, leading_newline))::string) string = lexer%chunk(token%first+merge(4, 3, leading_newline):token%last-3) case(token_kind%keypath) allocate(character(length)::string) string = lexer%chunk(token%first:token%last) end select end subroutine extract_string !> Extract integer value of token subroutine extract_integer(lexer, token, val) !> Instance of the lexer class(toml_lexer), intent(in) :: lexer !> Token to extract integer value from type(toml_token), intent(in) :: token !> Integer value of token integer(tfi), intent(out) :: val integer :: first, base, it, tmp logical :: minus character(1, tfc) :: ch character(*, tfc), parameter :: num = "0123456789abcdef" if (token%kind /= token_kind%int) return val = 0 base = 10 first = token%first if (any(peek(lexer, first) == ["+", "-"])) first = first + 1 if (peek(lexer, first) == "0") then select case(peek(lexer, first + 1)) case("x") first = first + 2 base = 16 case("o") first = first + 2 base = 8 case("b") first = first + 2 base = 2 case default return end select end if minus = match(lexer, token%first, char_kind%minus) do it = first, token%last ch = peek(lexer, it) if ("A" <= ch .and. ch <= "Z") ch = achar(iachar(ch) - iachar("A") + iachar("a")) tmp = scan(num(:abs(base)), ch) - 1 if (tmp < 0) cycle val = val * base + merge(-tmp, tmp, minus) end do end subroutine extract_integer !> Extract floating point value of token subroutine extract_float(lexer, token, val) ! Not useable since unsupported with GFortran on some platforms (MacOS/ppc) ! use, intrinsic :: ieee_arithmetic, only : ieee_value, ieee_quite_nan, & ! & ieee_positive_inf, ieee_negative_inf !> Instance of the lexer class(toml_lexer), intent(in) :: lexer !> Token to extract floating point value from type(toml_token), intent(in) :: token !> Floating point value of token real(tfr), intent(out) :: val integer :: first, it, ic character(len=token%last - token%first + 1) :: buffer character(1, tfc) :: ch if (token%kind /= token_kind%float) return first = token%first if (any(peek(lexer, first) == ["+", "-"])) first = first + 1 if (match(lexer, first, "n")) then ! val = ieee_value(val, ieee_quite_nan) buffer = "NaN" read(buffer, *, iostat=ic) val return end if if (match(lexer, first, "i")) then ! val = ieee_value(val, ieee_positive_inf) buffer = "Inf" read(buffer, *, iostat=ic) val if (match(lexer, token%first, char_kind%minus)) val = -val return end if ! ival = 0 ! idot = 0 ! ! do it = first, token%last ! ch = peek(lexer, it) ! if (any(ch == [".", "e", "E"])) exit ! tmp = scan(num(:base), ch) - 1 ! if (tmp < 0) cycle ! ival = ival * base + tmp ! end do ! first = it ! ! if (ch == ".") then ! idot = 0 ! do it = first, token%last ! ch = peek(lexer, it) ! if (any(ch == ["e", "E"])) exit ! tmp = scan(num(:base), ch) - 1 ! if (tmp < 0) cycle ! idot = idot + 1 ! ival = ival * base + tmp ! end do ! first = it ! end if ! ! expo = 0 ! if (any(ch == ["e", "E"])) then ! first = first + 1 ! do it = first, token%last ! ch = peek(lexer, it) ! tmp = scan(num(:base), ch) - 1 ! if (tmp < 0) cycle ! expo = expo * base + tmp ! end do ! if (match(lexer, first, char_kind%minus)) expo = -expo ! end if ! expo = expo - idot ! val = ival * 10.0_tfr ** expo ! FIXME ! ! if (match(lexer, token%first, char_kind%minus)) val = -val ic = 0 do it = token%first, token%last ch = peek(lexer, it) if (ch == "_") cycle ic = ic + 1 buffer(ic:ic) = ch end do read(buffer(:ic), *, iostat=it) val end subroutine extract_float !> Extract boolean value of token subroutine extract_bool(lexer, token, val) !> Instance of the lexer class(toml_lexer), intent(in) :: lexer !> Token to extract boolean value from type(toml_token), intent(in) :: token !> Boolean value of token logical, intent(out) :: val if (token%kind /= token_kind%bool) return val = peek(lexer, token%first) == "t" end subroutine extract_bool !> Extract datetime value of token subroutine extract_datetime(lexer, token, val) !> Instance of the lexer class(toml_lexer), intent(in) :: lexer !> Token to extract datetime value from type(toml_token), intent(in) :: token !> Datetime value of token type(toml_datetime), intent(out) :: val if (token%kind /= token_kind%datetime) return val = toml_datetime(lexer%chunk(token%first:token%last)) end subroutine extract_datetime !> Push a new scope onto the lexer stack and record the token pure subroutine push_back(lexer, scope, token) type(toml_lexer), intent(inout) :: lexer integer, intent(in) :: scope integer, intent(in) :: token lexer%top = lexer%top + 1 if (lexer%top > size(lexer%stack)) call resize(lexer%stack) lexer%stack(lexer%top) = stack_item(scope, token) end subroutine push_back !> Pop a scope from the lexer stack in case the topmost scope matches the requested scope subroutine pop(lexer, scope) type(toml_lexer), intent(inout) :: lexer integer, intent(in) :: scope if (lexer%top > 0) then if (lexer%stack(lexer%top)%scope == scope) lexer%top = lexer%top - 1 end if end subroutine pop !> Peek at the topmost scope on the lexer stack pure function view_scope(lexer) result(scope) type(toml_lexer), intent(in) :: lexer integer :: scope if (lexer%top > 0) then scope = lexer%stack(lexer%top)%scope else scope = lexer_scope%table end if end function view_scope !> Reallocate list of scopes pure subroutine resize_scope(var, n) !> Instance of the array to be resized type(stack_item), allocatable, intent(inout) :: var(:) !> Dimension of the final array size integer, intent(in), optional :: n type(stack_item), allocatable :: tmp(:) integer :: this_size, new_size integer, parameter :: initial_size = 8 if (allocated(var)) then this_size = size(var, 1) call move_alloc(var, tmp) else this_size = initial_size end if if (present(n)) then new_size = n else new_size = this_size + this_size/2 + 1 end if allocate(var(new_size)) if (allocated(tmp)) then this_size = min(size(tmp, 1), size(var, 1)) var(:this_size) = tmp(:this_size) deallocate(tmp) end if end subroutine resize_scope !> Extract information about the source subroutine get_info(lexer, meta, output) !> Instance of the lexer class(toml_lexer), intent(in) :: lexer !> Query about the source character(*, tfc), intent(in) :: meta !> Metadata about the source character(:, tfc), allocatable, intent(out) :: output select case(meta) case("source") output = lexer%chunk // TOML_NEWLINE case("filename") if (allocated(lexer%filename)) output = lexer%filename end select end subroutine get_info function hex_to_int(hex) result(val) character(*, tfc), intent(in) :: hex integer(tfi) :: val integer :: i character(1, tfc) :: ch character(*, tfc), parameter :: hex_digits = "0123456789abcdef" val = 0_tfi do i = 1, len(hex) ch = hex(i:i) if ("A" <= ch .and. ch <= "Z") ch = achar(iachar(ch) - iachar("A") + iachar("a")) val = val * 16 + max(index(hex_digits, ch) - 1, 0) end do end function hex_to_int function verify_ucs(escape) result(valid) character(*, tfc), intent(in) :: escape logical :: valid integer(tfi) :: code code = hex_to_int(escape) valid = code > 0 .and. code < int(z"7FFFFFFF", tfi) & & .and. (code < int(z"d800", tfi) .or. code > int(z"dfff", tfi)) & & .and. (code < int(z"fffe", tfi) .or. code > int(z"ffff", tfi)) end function verify_ucs function convert_ucs(escape) result(str) character(*, tfc), intent(in) :: escape character(:, tfc), allocatable :: str integer(tfi) :: code code = hex_to_int(escape) select case(code) case(int(z"00000000", tfi):int(z"0000007f", tfi)) str = achar(code, kind=tfc) case(int(z"00000080", tfi):int(z"000007ff", tfi)) str = & achar(ior(int(z"c0", tfi), ishft(code, -6)), kind=tfc) // & achar(ior(int(z"80", tfi), iand(code, int(z"3f", tfi))), kind=tfc) case(int(z"00000800", tfi):int(z"0000ffff", tfi)) str = & achar(ior(int(z"e0", tfi), ishft(code, -12)), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -6), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(code, int(z"3f", tfi))), kind=tfc) case(int(z"00010000", tfi):int(z"001fffff", tfi)) str = & achar(ior(int(z"f0", tfi), ishft(code, -18)), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -12), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -6), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(code, int(z"3f", tfi))), kind=tfc) case(int(z"00200000", tfi):int(z"03ffffff", tfi)) str = & achar(ior(int(z"f8", tfi), ishft(code, -24)), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -18), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -12), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -6), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(code, int(z"3f", tfi))), kind=tfc) case(int(z"04000000", tfi):int(z"7fffffff", tfi)) str = & achar(ior(int(z"fc", tfi), ishft(code, -30)), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -24), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -18), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -12), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(ishft(code, -6), int(z"3f", tfi))), kind=tfc) // & achar(ior(int(z"80", tfi), iand(code, int(z"3f", tfi))), kind=tfc) end select end function convert_ucs end module tomlf_de_lexer