Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var DefaultExtrator = &Extractor{ TextLineBreak: fmt.Sprintln(), }
DefaultExtrator ...
View Source
var LINEREAK = fmt.Sprintln()
Functions ¶
func GetHTMLContent ¶
GetHTMLContent fetch and cleans the raw html from article
func GetTextContent ¶
func GetTextContent(articleContent *goquery.Selection, customRender *TextRenderers) string
GetTextContent fetch and cleans the text from article
Types ¶
type Article ¶
type Article struct {
URL string `json:"url"`
Meta Metadata `json:"meta"`
Text string `json:"text"`
HTML string `json:"html"`
Images []string `json:"images"`
}
Article is the content of an URL
func FromReader ¶
FromReader get readable content from the specified io.Reader
type Extractor ¶
type Extractor struct {
TextLineBreak string
CustomTextRenderers *TextRenderers
}
Extractor ...
func (*Extractor) FromReader ¶
FromReader get readable content from the specified io.Reader
func (*Extractor) FromReaderWithSelector ¶
type Metadata ¶
type Metadata struct {
Title string `json:"title"`
Image string `json:"image"`
Excerpt string `json:"excerpt"`
Author string `json:"author"`
MinReadTime int `json:"min_read_time"`
MaxReadTime int `json:"max_read_time"`
}
Metadata is metadata of an article
type TextRenderers ¶
type TextRenderers struct {
LineBreak string
// contains filtered or unexported fields
}
func NewNoobTextRenderers ¶
func NewNoobTextRenderers(lineBreak string) *TextRenderers
func NewTextRenderers ¶
func NewTextRenderers(lineBreak string) *TextRenderers
func (*TextRenderers) Register ¶
func (r *TextRenderers) Register(tag string, before, after RenderFunc) error
func (*TextRenderers) WriteLineBreak ¶
func (r *TextRenderers) WriteLineBreak(buf *bytes.Buffer)
Click to show internal directories.
Click to hide internal directories.
