Official Python SDK for the AAS Studio public REST API.
extract
extract(file, *, file_name="datasheet.pdf", id_prefix=None, api_key=None, provider=None) -> dict
Extract an AAS submodel from a PDF datasheet. Counts as 1 extraction toward the monthly quota.
with open("./datasheets/ur10e.pdf", "rb") as f:
response = aas.extract(f.read(), file_name="ur10e.pdf", id_prefix="urn:acme:aas")
result = response["result"]
print(result["assetIdShort"]) # 'UR10e'
print(len(result["submodels"])) # 4
print(response["provider"]) # 'gemini'- PDF must be ≤ 20 MB. Scanned PDFs return UnprocessableError until OCR ships in Phase 3.
- For LLM-cost-sensitive workloads, pass `api_key` — uses your own provider key, bypasses ours.
list_extractions
list_extractions(*, limit=None, cursor=None) -> dict
List the caller's saved extractions, cursor-paginated.
cursor = None
while True:
page = aas.list_extractions(limit=50, cursor=cursor)
for e in page["extractions"]:
print(f"{e['id']} {e['name']} ({e['mode']}, {e['sourceCount']} sources)")
cursor = page.get("nextCursor")
if not cursor:
breakget_extraction
get_extraction(extraction_id: str) -> dict
Fetch a saved extraction's full audit-replay payload.
detail = aas.get_extraction("cmoyalmj3000004jxefn8cicd")
extraction = detail["extraction"]
print(extraction["aas"]) # the AAS itself
print(extraction["meta"]) # merged result + traces + cert + sourceHashvalidate
validate(file, *, file_name="aas.xml") -> dict
Validate an AASX/XML file against the AAS schema.
with open("./output.aasx", "rb") as f:
result = aas.validate(f.read())
if not result["valid"]:
for err in result["errors"]:
print(f"Line {err.get('line', '?')}: {err['message']}")fix
fix(file, *, file_name="aas.xml") -> dict
Best-effort repair of a malformed AAS XML file.
with open("./broken.xml", "rb") as f:
fixed = aas.fix(f.read())
if fixed["changed"]:
Path("./fixed.xml").write_text(fixed["xml"])
for w in fixed["warnings"]:
print("warn:", w)health
health() -> dict
Service liveness probe. No auth required.
status = aas.health()
print(status["status"]) # 'ok'
merge
merge(sources: list[dict], *, weights: dict | None = None) -> dict
Multi-source consensus voting — merge N ExtractionResults via the weighted per-field algorithm. Pure compute (no LLM cost).
merged = aas.merge(
sources=[
{"sourceId": "mfg", "authority": "manufacturer", "result": mfg_result},
{"sourceId": "dist", "authority": "distributor", "result": dist_result},
],
weights={"distributor": 1.5}, # optional; defaults to manufacturer=2.0 / distributor=1.0 / third-party=0.5
)
for sm in merged["merged"]["submodels"]:
for el in sm["elements"]:
if el.get("consensus", {}).get("alternatives"):
print(el["idShort"], "has", len(el["consensus"]["alternatives"]), "alternatives")search_datasheets
search_datasheets(query: str, *, max_hits: int | None = None, skip_probe: bool = False) -> dict
Tavily-backed search for product datasheets. Returns hits scored by domain authority + pre-validated as PDFs.
result = aas.search_datasheets("Universal Robots UR10e", max_hits=5)
top = result["hits"][0]
print(top["title"], top["url"], top["authority"], top["isPdf"])