Skip to main content

Setup

Account Setup Create an account on https://prism.prosights.co. API Keys Visit https://prism.prosights.co/app/api-keys to generate an API key. Installation
# install from PyPI
pip install recreate_sdk 

Usage

We require an RECREATE_API_KEY and aRECREATE_BASE_URL to be set to function appropriately. Production URL:RECREATE_BASE_URL=https://prism-api.prosights.co Enterprise URL:RECREATE_BASE_URL=https://<firmname>-api.prosights.co If you need help accesssing either an API_KEY or need to know your BASE_URL, please reach out to support@prosights.co.
import time

from recreate_sdk import RecreateSDK

client = RecreateSDK(
    api_key="<RECREATE_API_KEY>",
    base_url="<RECREATE_BASE_URL>",
)

def main():
	recreate_id: Optional[str] = None
    with open("base.pdf", "rb") as file:
        response = client.enterprise_api.recreate.create(
            file=file,
            selected_pages="1-3",
            is_ppt_process_enabled="true",
            is_xls_process_charts_enabled="true",
            is_xls_process_tables_enabled="true",
            table_clustering_mode="page",
        )
        recreate_id = response.data.get("recreate_id")
        print(response)

        while response.data.get("status", "UNKNOWN") != "COMPLETED":
            response = client.enterprise_api.recreate.retrieve_status(
                recreate_id=response.data.get("recreate_id")
            )
            print(response)

	assert recreate_id

    response = client.enterprise_api.recreate.retrieve_status(
        recreate_id=recreate_id,
    )

    print(response)
    dfs = client.enterprise_api.recreate.get_dfs(recreate_id=recreate_id)
    for i, df in enumerate(dfs):
        print(df.to_string())
        print(f"DataFrame {i}:")
        print(f"Shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        print(f"Is empty: {df.empty}")
        if not df.empty:
            print(df.head())
        else:
            print("DataFrame is empty - no rows")
        print("-" * 40)


if __name__ == "__main__":
    main()
This will create a recreate, poll until completion, and print out dataframes.
EnterpriseAPIResponse(status='SUCCESS', data={'recreate_id': 'cf9b1754-8dd6-4ef1-a3b4-9615411f1363', 'status': 'CREATED'})

EnterpriseAPIResponse(status='SUCCESS', data={'recreate_id': 'cf9b1754-8dd6-4ef1-a3b4-9615411f1363', 'status': 'RECEIVED', 'xls_download_url': None, 'ppt_download_url': None, 'jobs': [{'job_id': 'e2e7df57-c11b-44ab-8d6b-7d21e9e52c61', 'job_type': 'PPT_PROCESS_PDF_JOB', 'page_number': -1, 'num_cells': None, 'status': 'CREATED'}, {'job_id': 'e38629e5-6138-43bd-a55a-39829aef4e2d', 'job_type': 'XLS_PROCESS_TABLES_JOB', 'page_number': 1, 'num_cells': None, 'status': 'CREATED'}, {'job_id': '082a347c-67b9-4610-b746-03f4e7771451', 'job_type': 'XLS_PROCESS_CHARTS_JOB', 'page_number': 1, 'num_cells': None, 'status': 'CREATED'}]})

...
I