130 lines
4.6 KiB
Ruby
130 lines
4.6 KiB
Ruby
# -*- coding: utf-8; frozen_string_literal: true -*-
|
|
#
|
|
#--
|
|
# Copyright (C) 2009-2019 Thomas Leitner <t_leitner@gmx.at>
|
|
#
|
|
# This file is part of kramdown which is licensed under the MIT.
|
|
#++
|
|
#
|
|
|
|
require 'kramdown/utils'
|
|
require 'kramdown/parser'
|
|
|
|
module Kramdown
|
|
|
|
module Parser
|
|
|
|
# == \Base class for parsers
|
|
#
|
|
# This class serves as base class for parsers. It provides common methods that can/should be
|
|
# used by all parsers, especially by those using StringScanner(Kramdown) for parsing.
|
|
#
|
|
# A parser object is used as a throw-away object, i.e. it is only used for storing the needed
|
|
# state information during parsing. Therefore one can't instantiate a parser object directly but
|
|
# only use the Base::parse method.
|
|
#
|
|
# == Implementing a parser
|
|
#
|
|
# Implementing a new parser is rather easy: just derive a new class from this class and put it
|
|
# in the Kramdown::Parser module -- the latter is needed so that the auto-detection of the new
|
|
# parser works correctly. Then you need to implement the +#parse+ method which has to contain
|
|
# the parsing code.
|
|
#
|
|
# Have a look at the Base::parse, Base::new and Base#parse methods for additional information!
|
|
class Base
|
|
|
|
# The hash with the parsing options.
|
|
attr_reader :options
|
|
|
|
# The array with the parser warnings.
|
|
attr_reader :warnings
|
|
|
|
# The original source string.
|
|
attr_reader :source
|
|
|
|
# The root element of element tree that is created from the source string.
|
|
attr_reader :root
|
|
|
|
# Initialize the parser object with the +source+ string and the parsing +options+.
|
|
#
|
|
# The @root element, the @warnings array and @text_type (specifies the default type for newly
|
|
# created text nodes) are automatically initialized.
|
|
def initialize(source, options)
|
|
@source = source
|
|
@options = Kramdown::Options.merge(options)
|
|
@root = Element.new(:root, nil, nil, encoding: (source.encoding rescue nil), location: 1,
|
|
options: {}, abbrev_defs: {}, abbrev_attr: {})
|
|
@warnings = []
|
|
@text_type = :text
|
|
end
|
|
private_class_method(:new, :allocate)
|
|
|
|
# Parse the +source+ string into an element tree, possibly using the parsing +options+, and
|
|
# return the root element of the element tree and an array with warning messages.
|
|
#
|
|
# Initializes a new instance of the calling class and then calls the +#parse+ method that must
|
|
# be implemented by each subclass.
|
|
def self.parse(source, options = {})
|
|
parser = new(source, options)
|
|
parser.parse
|
|
[parser.root, parser.warnings]
|
|
end
|
|
|
|
# Parse the source string into an element tree.
|
|
#
|
|
# The parsing code should parse the source provided in @source and build an element tree the
|
|
# root of which should be @root.
|
|
#
|
|
# This is the only method that has to be implemented by sub-classes!
|
|
def parse
|
|
raise NotImplementedError
|
|
end
|
|
|
|
# Add the given warning +text+ to the warning array.
|
|
def warning(text)
|
|
@warnings << text
|
|
# TODO: add position information
|
|
end
|
|
|
|
# Modify the string +source+ to be usable by the parser (unifies line ending characters to
|
|
# +\n+ and makes sure +source+ ends with a new line character).
|
|
def adapt_source(source)
|
|
unless source.valid_encoding?
|
|
raise "The source text contains invalid characters for the used encoding #{source.encoding}"
|
|
end
|
|
source = source.encode('UTF-8')
|
|
source.gsub(/\r\n?/, "\n").chomp + "\n"
|
|
end
|
|
|
|
# This helper method adds the given +text+ either to the last element in the +tree+ if it is a
|
|
# +type+ element or creates a new text element with the given +type+.
|
|
def add_text(text, tree = @tree, type = @text_type)
|
|
last = tree.children.last
|
|
if last && last.type == type
|
|
last.value << text
|
|
elsif !text.empty?
|
|
location = (last && last.options[:location] || tree.options[:location])
|
|
tree.children << Element.new(type, text, nil, location: location)
|
|
end
|
|
end
|
|
|
|
# Extract the part of the StringScanner +strscan+ backed string specified by the +range+. This
|
|
# method works correctly under Ruby 1.8 and Ruby 1.9.
|
|
def extract_string(range, strscan)
|
|
result = nil
|
|
begin
|
|
enc = strscan.string.encoding
|
|
strscan.string.force_encoding('ASCII-8BIT')
|
|
result = strscan.string[range].force_encoding(enc)
|
|
ensure
|
|
strscan.string.force_encoding(enc)
|
|
end
|
|
result
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|