Add text parser implementation for IP extraction from plain text files
This commit is contained in:
160
parser/text.go
Normal file
160
parser/text.go
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
package parser
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TextExtract defines the function signature for extracting IP addresses from a text item.
|
||||||
|
type TextExtract interface {
|
||||||
|
// Extract extracts an IP address from the given text line.
|
||||||
|
Extract(line string) (string, bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
// defaultTextExtract is a default implementation of TextExtract.
|
||||||
|
type defaultTextExtract struct {
|
||||||
|
// fieldIndexOne is the index of the field that contains the IP address.
|
||||||
|
fieldIndexOne uint8
|
||||||
|
|
||||||
|
// separator is the separator used to split the fields.
|
||||||
|
separator string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract extracts an IP address from the given text line.
|
||||||
|
// It returns the IP address and a boolean indicating whether the IP address was found.
|
||||||
|
func (d *defaultTextExtract) Extract(line string) (string, bool) {
|
||||||
|
fields := strings.Split(line, d.separator)
|
||||||
|
if len(fields) <= int(d.fieldIndexOne) {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields[d.fieldIndexOne], true
|
||||||
|
}
|
||||||
|
|
||||||
|
// intervalTextExtract is a TextExtract implementation that extracts an IP address range from a text line.
|
||||||
|
type intervalTextExtract struct {
|
||||||
|
// fieldIndexOne specifies the index of the first field to extract from the split string.
|
||||||
|
// From this field, the IP address range will be extracted.
|
||||||
|
fieldIndexOne uint8
|
||||||
|
|
||||||
|
// fieldIndexTwo specifies the index of the second field to extract from the split string.
|
||||||
|
// This field will be used as the end of the IP address range.
|
||||||
|
fieldIndexTwo uint8
|
||||||
|
|
||||||
|
// separator specifies the separator used to split the fields.
|
||||||
|
separator string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract extracts an IP address range from the given text line.
|
||||||
|
// It returns the IP address range and a boolean indicating whether the IP address range was found.
|
||||||
|
func (d *intervalTextExtract) Extract(line string) (string, bool) {
|
||||||
|
fields := strings.Split(line, d.separator)
|
||||||
|
if len(fields) <= int(d.fieldIndexOne) || len(fields) <= int(d.fieldIndexTwo) {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields[d.fieldIndexOne] + "-" + fields[d.fieldIndexTwo], true
|
||||||
|
}
|
||||||
|
|
||||||
|
// cidrTextExtract is a TextExtract implementation that extracts an IP address range from a text line.
|
||||||
|
type cidrTextExtract struct {
|
||||||
|
// fieldIndexOne is the index of the field that contains the IP address.
|
||||||
|
fieldIndexOne uint8
|
||||||
|
|
||||||
|
// fieldIndexTwo is the index of the field that contains the CIDR prefix length.
|
||||||
|
fieldIndexTwo uint8
|
||||||
|
|
||||||
|
// separator is the separator used to split the fields.
|
||||||
|
separator string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract extracts an IP address range from the given text line.
|
||||||
|
// It returns the IP address range and a boolean indicating whether the IP address range was found.
|
||||||
|
func (d *cidrTextExtract) Extract(line string) (string, bool) {
|
||||||
|
fields := strings.Split(line, d.separator)
|
||||||
|
if len(fields) <= int(d.fieldIndexOne) || len(fields) <= int(d.fieldIndexTwo) {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
return fields[d.fieldIndexOne] + "/" + fields[d.fieldIndexTwo], true
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDefaultTextExtract creates a new default TextExtract instance.
|
||||||
|
func NewDefaultTextExtract(fieldIndexOne uint8, separator string) TextExtract {
|
||||||
|
return &defaultTextExtract{
|
||||||
|
fieldIndexOne: fieldIndexOne,
|
||||||
|
separator: separator,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewIntervalTextExtract creates a new TextExtract instance that extracts an IP address range.
|
||||||
|
func NewIntervalTextExtract(fieldIndexOne uint8, fieldIndexTwo uint8, separator string) TextExtract {
|
||||||
|
return &intervalTextExtract{
|
||||||
|
fieldIndexOne: fieldIndexOne,
|
||||||
|
fieldIndexTwo: fieldIndexTwo,
|
||||||
|
separator: separator,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCIDRTextExtract creates a new TextExtract instance that extracts an IP address range.
|
||||||
|
func NewCIDRTextExtract(fieldIndexOne uint8, fieldIndexTwo uint8, separator string) TextExtract {
|
||||||
|
return &cidrTextExtract{
|
||||||
|
fieldIndexOne: fieldIndexOne,
|
||||||
|
fieldIndexTwo: fieldIndexTwo,
|
||||||
|
separator: separator,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// textParser is a parser implementation that reads text data from an io.Reader and extracts IP addresses.
|
||||||
|
type textParser struct {
|
||||||
|
textExtract TextExtract
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewText creates a new TextParser instance.
|
||||||
|
func NewText(textExtract TextExtract) (Parser, error) {
|
||||||
|
if textExtract == nil || textExtract.Extract == nil {
|
||||||
|
return nil, fmt.Errorf("text extract is nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
return &textParser{
|
||||||
|
textExtract: textExtract,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse reads text data from the given io.Reader and extracts IP addresses.
|
||||||
|
func (p *textParser) Parse(body io.Reader, validator IPValidator, limit uint) (IPs, error) {
|
||||||
|
scanner := bufio.NewScanner(body)
|
||||||
|
|
||||||
|
buf := make([]byte, 0, 64*1024)
|
||||||
|
scanner.Buffer(buf, 1024*1024)
|
||||||
|
|
||||||
|
var ips IPs
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
if line == "" || strings.HasPrefix(line, ";") || strings.HasPrefix(line, "#") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ip, isFound := p.textExtract.Extract(line)
|
||||||
|
if !isFound {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ip = strings.TrimSpace(ip)
|
||||||
|
if !validator.IsValid(ip) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ips = append(ips, ip)
|
||||||
|
if limit > 0 && uint(len(ips)) >= limit {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("read response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ips, nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user