Skip to content

Extract Configuration

ts
interface ExtractNode {
  attr(name: string): string | undefined;
  text(): string;
  html(): string | undefined;
}

type ExtractDescriptor = {
  selector: string;
  value?:
    | string // attribute name
    | ((node: ExtractNode) => unknown)
    | ExtractConfig;
};
type ExtractConfig = Record<
  string,
  string | ExtractDescriptor | [string | ExtractDescriptor]
>;
  • selector: CSS selector
  • value:
    • string: attribute
    • fn: custom extractor receiving an xscrape ExtractNode
    • object: nested
    • undefined: text

Arrays: wrap descriptor in [ ].