Skip to content

Feature

gffbase.feature.Feature

Feature(seqid: str = '.', source: str = '.', featuretype: str = '.', start='.', end='.', score: str = '.', strand: str = '.', frame: str = '.', attributes=None, extra=None, bin: Optional[int] = None, id: Optional[str] = None, dialect: Optional[dict] = None, file_order: Optional[int] = None, keep_order: bool = False, sort_attribute_values: bool = False)

Backward-compatible public Feature object.

Mirrors the legacy gffutils.Feature constructor and observable behavior: 1-based inclusive coordinates, list-wrapped multi-value attributes, dialect-faithful __str__ round-trip.

Source code in python/gffbase/feature.py
def __init__(
    self,
    seqid: str = ".",
    source: str = ".",
    featuretype: str = ".",
    start=".",
    end=".",
    score: str = ".",
    strand: str = ".",
    frame: str = ".",
    attributes=None,
    extra=None,
    bin: Optional[int] = None,
    id: Optional[str] = None,
    dialect: Optional[dict] = None,
    file_order: Optional[int] = None,
    keep_order: bool = False,
    sort_attribute_values: bool = False,
):
    self.seqid = seqid
    self.source = source
    self.featuretype = featuretype
    self.start = _coord_to_int(start)
    self.end = _coord_to_int(end)
    self.score = score if score is not None else "."
    self.strand = strand if strand is not None else "."
    self.frame = frame if frame is not None else "."
    self.bin = bin
    self.id = id
    self.dialect = dialect or {}
    self.file_order = file_order
    self.keep_order = keep_order
    self.sort_attribute_values = sort_attribute_values
    self._attributes_blob = None
    self.children = None

    fmt = (self.dialect or {}).get("fmt", "gff3")
    if isinstance(attributes, _LazyAttributes):
        self.attributes = attributes
    elif isinstance(attributes, (bytes, bytearray)):
        self._attributes_blob = bytes(attributes)
        self.attributes = _LazyAttributes(blob=self._attributes_blob, dialect_fmt=fmt)
    elif attributes is None:
        self.attributes = _LazyAttributes(initial={}, dialect_fmt=fmt)
    else:
        self.attributes = _LazyAttributes(initial=attributes, dialect_fmt=fmt)

    if extra is None:
        self.extra = []
    elif isinstance(extra, (bytes, bytearray)):
        text = extra.decode("utf-8", errors="replace")
        self.extra = text.split("\t") if text else []
    elif isinstance(extra, str):
        self.extra = extra.split("\t") if extra else []
    else:
        self.extra = list(extra)

astuple

astuple(encoding=None)

Legacy 12-tuple shape used by the SQLite export path: (id, seqid, source, featuretype, start, end, score, strand, frame, attributes_json, extra_json, bin).

Source code in python/gffbase/feature.py
def astuple(self, encoding=None):
    """Legacy 12-tuple shape used by the SQLite export path:
    ``(id, seqid, source, featuretype, start, end, score, strand, frame,
        attributes_json, extra_json, bin)``.
    """
    attrs_dict = {k: list(v) for k, v in self.attributes.items()}
    return (
        self.id,
        self.seqid,
        self.source,
        self.featuretype,
        self.start,
        self.end,
        self.score,
        self.strand,
        self.frame,
        json.dumps(attrs_dict, separators=(",", ":")),
        json.dumps(self.extra, separators=(",", ":")) if self.extra else "[]",
        self.bin if self.bin is not None else self.calc_bin(),
    )

sequence

sequence(fasta, use_strand: bool = True) -> str

Extract sequence from a FASTA path or a pyfaidx-style mapping.

Source code in python/gffbase/feature.py
def sequence(self, fasta, use_strand: bool = True) -> str:
    """Extract sequence from a FASTA path or a pyfaidx-style mapping."""
    if isinstance(fasta, str):  # pragma: no cover - pyfaidx is optional
        try:
            import pyfaidx  # type: ignore
        except ImportError as e:
            raise ImportError(
                "Feature.sequence(path=...) requires the optional `pyfaidx` package"
            ) from e
        fa = pyfaidx.Fasta(fasta)
    else:
        fa = fasta
    seq = str(fa[self.seqid][self.start - 1 : self.end])
    if use_strand and self.strand == "-":
        seq = _revcomp(seq)
    return seq

ParsedFeature (parser-internal record)

The slotted dataclass the Rust+Python parser emits before features land in the database.

gffbase.feature.ParsedFeature dataclass

ParsedFeature(seqid: str, source: str, featuretype: str, start: Optional[int], end: Optional[int], score: str, strand: str, frame: str, attributes_blob: bytes, attributes_pairs: List[Tuple[str, str, int]] = list(), extra: List[str] = list())

attributes_dict

attributes_dict() -> dict

Materialize attributes as {key: [values...]}. Preserves first-seen key order and multi-value ordering. Defers to attributes_pairs so the Rust and Python parsers remain trivially comparable.

Source code in python/gffbase/feature.py
def attributes_dict(self) -> dict:
    """Materialize attributes as `{key: [values...]}`. Preserves first-seen
    key order and multi-value ordering. Defers to `attributes_pairs` so the
    Rust and Python parsers remain trivially comparable."""
    out: dict = {}
    for k, v, _idx in self.attributes_pairs:
        out.setdefault(k, []).append(v)
    return out

from_tuple classmethod

from_tuple(tup) -> 'ParsedFeature'

Build from the 11-tuple shape that the Rust extension yields.

Source code in python/gffbase/feature.py
@classmethod
def from_tuple(cls, tup) -> "ParsedFeature":
    """Build from the 11-tuple shape that the Rust extension yields."""
    (
        seqid,
        source,
        featuretype,
        start,
        end,
        score,
        strand,
        frame,
        blob,
        pairs,
        extra,
    ) = tup
    return cls(
        seqid=seqid,
        source=source,
        featuretype=featuretype,
        start=start,
        end=end,
        score=score,
        strand=strand,
        frame=frame,
        attributes_blob=bytes(blob) if not isinstance(blob, bytes) else blob,
        attributes_pairs=[(k, v, int(i)) for (k, v, i) in pairs],
        extra=list(extra),
    )