Add text parser implementation for IP extraction from plain text files
This commit is contained in:
160
parser/text.go
Normal file
160
parser/text.go
Normal file
@@ -0,0 +1,160 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// TextExtract defines the function signature for extracting IP addresses from a text item.
|
||||
type TextExtract interface {
|
||||
// Extract extracts an IP address from the given text line.
|
||||
Extract(line string) (string, bool)
|
||||
}
|
||||
|
||||
// defaultTextExtract is a default implementation of TextExtract.
|
||||
type defaultTextExtract struct {
|
||||
// fieldIndexOne is the index of the field that contains the IP address.
|
||||
fieldIndexOne uint8
|
||||
|
||||
// separator is the separator used to split the fields.
|
||||
separator string
|
||||
}
|
||||
|
||||
// Extract extracts an IP address from the given text line.
|
||||
// It returns the IP address and a boolean indicating whether the IP address was found.
|
||||
func (d *defaultTextExtract) Extract(line string) (string, bool) {
|
||||
fields := strings.Split(line, d.separator)
|
||||
if len(fields) <= int(d.fieldIndexOne) {
|
||||
return "", false
|
||||
}
|
||||
|
||||
return fields[d.fieldIndexOne], true
|
||||
}
|
||||
|
||||
// intervalTextExtract is a TextExtract implementation that extracts an IP address range from a text line.
|
||||
type intervalTextExtract struct {
|
||||
// fieldIndexOne specifies the index of the first field to extract from the split string.
|
||||
// From this field, the IP address range will be extracted.
|
||||
fieldIndexOne uint8
|
||||
|
||||
// fieldIndexTwo specifies the index of the second field to extract from the split string.
|
||||
// This field will be used as the end of the IP address range.
|
||||
fieldIndexTwo uint8
|
||||
|
||||
// separator specifies the separator used to split the fields.
|
||||
separator string
|
||||
}
|
||||
|
||||
// Extract extracts an IP address range from the given text line.
|
||||
// It returns the IP address range and a boolean indicating whether the IP address range was found.
|
||||
func (d *intervalTextExtract) Extract(line string) (string, bool) {
|
||||
fields := strings.Split(line, d.separator)
|
||||
if len(fields) <= int(d.fieldIndexOne) || len(fields) <= int(d.fieldIndexTwo) {
|
||||
return "", false
|
||||
}
|
||||
|
||||
return fields[d.fieldIndexOne] + "-" + fields[d.fieldIndexTwo], true
|
||||
}
|
||||
|
||||
// cidrTextExtract is a TextExtract implementation that extracts an IP address range from a text line.
|
||||
type cidrTextExtract struct {
|
||||
// fieldIndexOne is the index of the field that contains the IP address.
|
||||
fieldIndexOne uint8
|
||||
|
||||
// fieldIndexTwo is the index of the field that contains the CIDR prefix length.
|
||||
fieldIndexTwo uint8
|
||||
|
||||
// separator is the separator used to split the fields.
|
||||
separator string
|
||||
}
|
||||
|
||||
// Extract extracts an IP address range from the given text line.
|
||||
// It returns the IP address range and a boolean indicating whether the IP address range was found.
|
||||
func (d *cidrTextExtract) Extract(line string) (string, bool) {
|
||||
fields := strings.Split(line, d.separator)
|
||||
if len(fields) <= int(d.fieldIndexOne) || len(fields) <= int(d.fieldIndexTwo) {
|
||||
return "", false
|
||||
}
|
||||
return fields[d.fieldIndexOne] + "/" + fields[d.fieldIndexTwo], true
|
||||
}
|
||||
|
||||
// NewDefaultTextExtract creates a new default TextExtract instance.
|
||||
func NewDefaultTextExtract(fieldIndexOne uint8, separator string) TextExtract {
|
||||
return &defaultTextExtract{
|
||||
fieldIndexOne: fieldIndexOne,
|
||||
separator: separator,
|
||||
}
|
||||
}
|
||||
|
||||
// NewIntervalTextExtract creates a new TextExtract instance that extracts an IP address range.
|
||||
func NewIntervalTextExtract(fieldIndexOne uint8, fieldIndexTwo uint8, separator string) TextExtract {
|
||||
return &intervalTextExtract{
|
||||
fieldIndexOne: fieldIndexOne,
|
||||
fieldIndexTwo: fieldIndexTwo,
|
||||
separator: separator,
|
||||
}
|
||||
}
|
||||
|
||||
// NewCIDRTextExtract creates a new TextExtract instance that extracts an IP address range.
|
||||
func NewCIDRTextExtract(fieldIndexOne uint8, fieldIndexTwo uint8, separator string) TextExtract {
|
||||
return &cidrTextExtract{
|
||||
fieldIndexOne: fieldIndexOne,
|
||||
fieldIndexTwo: fieldIndexTwo,
|
||||
separator: separator,
|
||||
}
|
||||
}
|
||||
|
||||
// textParser is a parser implementation that reads text data from an io.Reader and extracts IP addresses.
|
||||
type textParser struct {
|
||||
textExtract TextExtract
|
||||
}
|
||||
|
||||
// NewText creates a new TextParser instance.
|
||||
func NewText(textExtract TextExtract) (Parser, error) {
|
||||
if textExtract == nil || textExtract.Extract == nil {
|
||||
return nil, fmt.Errorf("text extract is nil")
|
||||
}
|
||||
|
||||
return &textParser{
|
||||
textExtract: textExtract,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Parse reads text data from the given io.Reader and extracts IP addresses.
|
||||
func (p *textParser) Parse(body io.Reader, validator IPValidator, limit uint) (IPs, error) {
|
||||
scanner := bufio.NewScanner(body)
|
||||
|
||||
buf := make([]byte, 0, 64*1024)
|
||||
scanner.Buffer(buf, 1024*1024)
|
||||
|
||||
var ips IPs
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" || strings.HasPrefix(line, ";") || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
ip, isFound := p.textExtract.Extract(line)
|
||||
if !isFound {
|
||||
continue
|
||||
}
|
||||
|
||||
ip = strings.TrimSpace(ip)
|
||||
if !validator.IsValid(ip) {
|
||||
continue
|
||||
}
|
||||
|
||||
ips = append(ips, ip)
|
||||
if limit > 0 && uint(len(ips)) >= limit {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, fmt.Errorf("read response: %w", err)
|
||||
}
|
||||
|
||||
return ips, nil
|
||||
}
|
||||
Reference in New Issue
Block a user