Modules

Main Fusion module.

`Fusion` ¶

Core Fusion class for API access.

Source code in py_src/fusion/fusion.py

class Fusion:
    """Core Fusion class for API access."""

    @staticmethod
    def _call_for_dataframe(url: str, session: requests.Session) -> pd.DataFrame:
        """Private function that calls an API endpoint and returns the data as a pandas dataframe.

        Args:
            url (Union[FusionCredentials, Union[str, dict]): URL for an API endpoint with valid parameters.
            session (requests.Session): Specify a proxy if required to access the authentication server. Defaults to {}.

        Returns:
            pandas.DataFrame: a dataframe containing the requested data.
        """
        response = session.get(url)
        response.raise_for_status()
        table = response.json()["resources"]
        ret_df = pd.DataFrame(table).reset_index(drop=True)
        return ret_df

    @staticmethod
    def _call_for_bytes_object(url: str, session: requests.Session) -> BytesIO:
        """Private function that calls an API endpoint and returns the data as a bytes object in memory.

        Args:
            url (Union[FusionCredentials, Union[str, dict]): URL for an API endpoint with valid parameters.
            session (requests.Session): Specify a proxy if required to access the authentication server. Defaults to {}.

        Returns:
            io.BytesIO: in memory file content
        """

        response = session.get(url)
        response.raise_for_status()

        return BytesIO(response.content)

    def __init__(
        self,
        credentials: str | FusionCredentials = "config/client_credentials.json",
        root_url: str = "https://fusion.jpmorgan.com/api/v1/",
        download_folder: str = "downloads",
        log_level: int = logging.ERROR,
        fs: fsspec.filesystem = None,
        log_path: str = ".",
    ) -> None:
        """Constructor to instantiate a new Fusion object.

        Args:
            credentials (Union[str, FusionCredentials]): A path to a credentials file or a fully populated
            FusionCredentials object. Defaults to 'config/client_credentials.json'.
            root_url (_type_, optional): The API root URL.
                Defaults to "https://fusion.jpmorgan.com/api/v1/".
            download_folder (str, optional): The folder path where downloaded data files
                are saved. Defaults to "downloads".
            log_level (int, optional): Set the logging level. Defaults to logging.ERROR.
            fs (fsspec.filesystem): filesystem.
            log_path (str, optional): The folder path where the log is stored.
        """
        self._default_catalog = "common"

        self.root_url = root_url
        self.download_folder = download_folder
        Path(download_folder).mkdir(parents=True, exist_ok=True)

        if logger.hasHandlers():
            logger.handlers.clear()
        file_handler = logging.FileHandler(filename=f"{log_path}/fusion_sdk.log")
        logging.addLevelName(VERBOSE_LVL, "VERBOSE")
        stdout_handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            "%(asctime)s.%(msecs)03d %(name)s:%(levelname)s %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S",
        )
        stdout_handler.setFormatter(formatter)
        logger.addHandler(stdout_handler)
        logger.addHandler(file_handler)
        logger.setLevel(log_level)

        if isinstance(credentials, FusionCredentials):
            self.credentials = credentials
        elif isinstance(credentials, str):
            self.credentials = FusionCredentials.from_file(Path(credentials))
        else:
            raise ValueError("credentials must be a path to a credentials file or FusionCredentials object")

        self.session = get_session(self.credentials, self.root_url)
        self.fs = fs if fs else get_default_fs()
        self.events: pd.DataFrame | None = None

    def __repr__(self) -> str:
        """Object representation to list all available methods."""
        return "Fusion object \nAvailable methods:\n" + tabulate(
            pd.DataFrame(  # type: ignore
                [
                    [
                        method_name
                        for method_name in dir(Fusion)
                        if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                    ]
                    + [p for p in dir(Fusion) if isinstance(getattr(Fusion, p), property)],
                    [
                        getattr(Fusion, method_name).__doc__.split("\n")[0]
                        for method_name in dir(Fusion)
                        if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                    ]
                    + [
                        getattr(Fusion, p).__doc__.split("\n")[0]
                        for p in dir(Fusion)
                        if isinstance(getattr(Fusion, p), property)
                    ],
                ]
            ).T.set_index(0),
            tablefmt="psql",
        )

    @property
    def default_catalog(self) -> str:
        """Returns the default catalog.

        Returns:
            None
        """
        return self._default_catalog

    @default_catalog.setter
    def default_catalog(self, catalog: str) -> None:
        """Allow the default catalog, which is "common" to be overridden.

        Args:
            catalog (str): The catalog to use as the default

        Returns:
            None
        """
        self._default_catalog = catalog

    def _use_catalog(self, catalog: str | None) -> str:
        """Determine which catalog to use in an API call.

        Args:
            catalog (str): The catalog value passed as an argument to an API function wrapper.

        Returns:
            str: The catalog to use
        """
        if catalog is None:
            return self.default_catalog

        return catalog

    def get_fusion_filesystem(self) -> FusionHTTPFileSystem:
        """Creates Fusion Filesystem.

        Returns: Fusion Filesystem

        """
        return FusionHTTPFileSystem(client_kwargs={"root_url": self.root_url, "credentials": self.credentials})

    def list_catalogs(self, output: bool = False) -> pd.DataFrame:
        """Lists the catalogs available to the API account.

        Args:
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each catalog
        """
        url = f"{self.root_url}catalogs/"
        cat_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return cat_df

    def catalog_resources(self, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
        """List the resources contained within the catalog, for example products and datasets.

        Args:
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
           class:`pandas.DataFrame`: A dataframe with a row for each resource within the catalog
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}"
        cat_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return cat_df

    def list_products(
        self,
        contains: str | list[str] | None = None,
        id_contains: bool = False,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
        display_all_columns: bool = False,
    ) -> pd.DataFrame:
        """Get the products contained in a catalog. A product is a grouping of datasets.

        Args:
            contains (Union[str, list], optional): A string or a list of strings that are product
                identifiers to filter the products list. If a list is provided then it will return
                products whose identifier matches any of the strings. Defaults to None.
            id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
                ignoring description.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed

        Returns:
            class:`pandas.DataFrame`: a dataframe with a row for each product
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/products"
        full_prod_df: pd.DataFrame = Fusion._call_for_dataframe(url, self.session)

        if contains:
            if isinstance(contains, list):
                contains = "|".join(f"{s}" for s in contains)
            if id_contains:
                filtered_df = full_prod_df[full_prod_df["identifier"].str.contains(contains, case=False)]
            else:
                filtered_df = full_prod_df[
                    full_prod_df["identifier"].str.contains(contains, case=False)
                    | full_prod_df["description"].str.contains(contains, case=False)
                ]
        else:
            filtered_df = full_prod_df

        filtered_df["category"] = filtered_df.category.str.join(", ")
        filtered_df["region"] = filtered_df.region.str.join(", ")
        if not display_all_columns:
            filtered_df = filtered_df[
                filtered_df.columns.intersection(
                    [
                        "identifier",
                        "title",
                        "region",
                        "category",
                        "status",
                        "description",
                    ]
                )
            ]

        if max_results > -1:
            filtered_df = filtered_df[0:max_results]

        if output:
            pass

        return filtered_df

    def list_datasets(  # noqa: PLR0913
        self,
        contains: str | list[str] | None = None,
        id_contains: bool = False,
        product: str | list[str] | None = None,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
        display_all_columns: bool = False,
        status: str | None = None,
        dataset_type: str | None = None,
    ) -> pd.DataFrame:
        """Get the datasets contained in a catalog.

        Args:
            contains (Union[str, list], optional): A string or a list of strings that are dataset
                identifiers to filter the datasets list. If a list is provided then it will return
                datasets whose identifier matches any of the strings. Defaults to None.
            id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
                ignoring description.
            product (Union[str, list], optional): A string or a list of strings that are product
                identifiers to filter the datasets list. Defaults to None.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed
            status (str, optional): filter the datasets by status, default is to show all results.
            dataset_type (str, optional): filter the datasets by type, default is to show all results.

        Returns:
            class:`pandas.DataFrame`: a dataframe with a row for each dataset.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets"
        ds_df = Fusion._call_for_dataframe(url, self.session)

        if contains:
            if isinstance(contains, list):
                contains = "|".join(f"{s}" for s in contains)
            if id_contains:
                ds_df = ds_df[ds_df["identifier"].str.contains(contains, case=False)]
            else:
                ds_df = ds_df[
                    ds_df["identifier"].str.contains(contains, case=False)
                    | ds_df["description"].str.contains(contains, case=False)
                ]

        if product:
            url = f"{self.root_url}catalogs/{catalog}/productDatasets"
            prd_df = Fusion._call_for_dataframe(url, self.session)
            prd_df = (
                prd_df[prd_df["product"] == product]
                if isinstance(product, str)
                else prd_df[prd_df["product"].isin(product)]
            )
            ds_df = ds_df[ds_df["identifier"].str.lower().isin(prd_df["dataset"].str.lower())].reset_index(drop=True)

        if max_results > -1:
            ds_df = ds_df[0:max_results]

        ds_df["category"] = ds_df.category.str.join(", ")
        ds_df["region"] = ds_df.region.str.join(", ")
        if not display_all_columns:
            cols = [
                "identifier",
                "title",
                "containerType",
                "region",
                "category",
                "coverageStartDate",
                "coverageEndDate",
                "description",
                "status",
                "type",
            ]
            cols = [c for c in cols if c in ds_df.columns]
            ds_df = ds_df[cols]

        if status is not None:
            ds_df = ds_df[ds_df["status"] == status]

        if dataset_type is not None:
            ds_df = ds_df[ds_df["type"] == dataset_type]

        if output:
            pass

        return ds_df

    def dataset_resources(self, dataset: str, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
        """List the resources available for a dataset, currently this will always be a datasetseries.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each resource
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}"
        ds_res_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return ds_res_df

    def list_dataset_attributes(
        self,
        dataset: str,
        catalog: str | None = None,
        output: bool = False,
        display_all_columns: bool = False,
    ) -> pd.DataFrame:
        """Returns the list of attributes that are in the dataset.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each attribute
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
        ds_attr_df = Fusion._call_for_dataframe(url, self.session)

        if "index" in ds_attr_df.columns: 
            ds_attr_df = ds_attr_df.sort_values(by="index").reset_index(drop=True)

        if not display_all_columns:
            ds_attr_df = ds_attr_df[
                ds_attr_df.columns.intersection(
                    [
                        "identifier",
                        "title",
                        "dataType",
                        "isDatasetKey",
                        "description",
                        "source",
                    ]
                )
            ]

        if output:
            pass

        return ds_attr_df

    def list_datasetmembers(
        self,
        dataset: str,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
    ) -> pd.DataFrame:
        """List the available members in the dataset series.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.

        Returns:
            class:`pandas.DataFrame`: a dataframe with a row for each dataset member.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
        ds_members_df = Fusion._call_for_dataframe(url, self.session)

        if max_results > -1:
            ds_members_df = ds_members_df[0:max_results]

        if output:
            pass

        return ds_members_df

    def datasetmember_resources(
        self,
        dataset: str,
        series: str,
        catalog: str | None = None,
        output: bool = False,
    ) -> pd.DataFrame:
        """List the available resources for a datasetseries member.

        Args:
            dataset (str): A dataset identifier
            series (str): The datasetseries identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each datasetseries member resource.
                Currently, this will always be distributions.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}"
        ds_mem_res_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return ds_mem_res_df

    def list_distributions(
        self,
        dataset: str,
        series: str,
        catalog: str | None = None,
        output: bool = False,
    ) -> pd.DataFrame:
        """List the available distributions (downloadable instances of the dataset with a format type).

        Args:
            dataset (str): A dataset identifier
            series (str): The datasetseries identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each distribution.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}/distributions"
        distros_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return distros_df

    def _resolve_distro_tuples(
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
    ) -> list[tuple[str, str, str, str]]:
        """Resolve distribution tuples given specification params.

        A private utility function to generate a list of distribution tuples.
        Each tuple is a distribution, identified by catalog, dataset id,
        datasetseries member id, and the file format.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.

        Returns:
            list: a list of tuples, one for each distribution
        """
        catalog = self._use_catalog(catalog)

        datasetseries_list = self.list_datasetmembers(dataset, catalog)
        if len(datasetseries_list) == 0:
            raise AssertionError(f"There are no dataset members for dataset {dataset} in catalog {catalog}")

        if datasetseries_list.empty:
            raise APIResponseError(  # pragma: no cover
                f"No data available for dataset {dataset}. "
                f"Check that a valid dataset identifier and date/date range has been set."
            )

        if dt_str == "latest":
            dt_str = (
                datasetseries_list[
                    datasetseries_list["createdDate"] == datasetseries_list["createdDate"].to_numpy().max()
                ]
                .sort_values(by="identifier")
                .iloc[-1]["identifier"]
            )
            datasetseries_list = datasetseries_list[datasetseries_list["identifier"] == dt_str]
        else:
            parsed_dates = normalise_dt_param_str(dt_str)
            if len(parsed_dates) == 1:
                parsed_dates = (parsed_dates[0], parsed_dates[0])

            if parsed_dates[0]:
                datasetseries_list = datasetseries_list[
                    pd.Series([pd.to_datetime(i, errors="coerce") for i in datasetseries_list["identifier"]])
                    >= pd.to_datetime(parsed_dates[0])
                ].reset_index()

            if parsed_dates[1]:
                datasetseries_list = datasetseries_list[
                    pd.Series([pd.to_datetime(i, errors="coerce") for i in datasetseries_list["identifier"]])
                    <= pd.to_datetime(parsed_dates[1])
                ].reset_index()

        if len(datasetseries_list) == 0:
            raise APIResponseError(  # pragma: no cover
                f"No data available for dataset {dataset} in catalog {catalog}.\n"
                f"Check that a valid dataset identifier and date/date range has been set."
            )

        required_series = list(datasetseries_list["@id"])
        tups = [(catalog, dataset, series, dataset_format) for series in required_series]

        return tups

    def download(  # noqa: PLR0912, PLR0913
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        force_download: bool = False,
        download_folder: str | None = None,
        return_paths: bool = False,
        partitioning: str | None = None,
        preserve_original_name: bool = False,
    ) -> list[tuple[bool, str, str | None]] | None:
        """Downloads the requested distributions of a dataset to disk.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset. If more than one series member exists on the latest date, the
                series member identifiers will be sorted alphabetically and the last one will be downloaded.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            force_download (bool, optional): If True then will always download a file even
                if it is already on disk. Defaults to True.
            download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
                Defaults to download_folder as set in __init__
            return_paths (bool, optional): Return paths and success statuses of the downloaded files.
            partitioning (str, optional): Partitioning specification.
            preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

        Returns:

        """
        catalog = self._use_catalog(catalog)

        valid_date_range = re.compile(r"^(\d{4}\d{2}\d{2})$|^((\d{4}\d{2}\d{2})?([:])(\d{4}\d{2}\d{2})?)$")

        if valid_date_range.match(dt_str) or dt_str == "latest":
            required_series = self._resolve_distro_tuples(dataset, dt_str, dataset_format, catalog)
        else:
            # sample data is limited to csv
            if dt_str == "sample":
                dataset_format = self.list_distributions(dataset, dt_str, catalog)["identifier"].iloc[0]
            required_series = [(catalog, dataset, dt_str, dataset_format)]

        if dataset_format not in RECOGNIZED_FORMATS + ["raw"]:
            raise ValueError(f"Dataset format {dataset_format} is not supported")

        if not download_folder:
            download_folder = self.download_folder

        download_folders = [download_folder] * len(required_series)

        if partitioning == "hive":
            members = [series[2].strip("/") for series in required_series]
            download_folders = [
                f"{download_folders[i]}/{series[0]}/{series[1]}/{members[i]}"
                for i, series in enumerate(required_series)
            ]

        for d in download_folders:
            if not self.fs.exists(d):
                self.fs.mkdir(d, create_parents=True)

        n_par = cpu_count(n_par)
        download_spec = [
            {
                "lfs": self.fs,
                "rpath": distribution_to_url(
                    self.root_url,
                    series[1],
                    series[2],
                    series[3],
                    series[0],
                    is_download=True,
                ),
                "lpath": distribution_to_filename(
                    download_folders[i],
                    series[1],
                    series[2],
                    series[3],
                    series[0],
                    partitioning=partitioning,
                ),
                "overwrite": force_download,
                "preserve_original_name": preserve_original_name,
            }
            for i, series in enumerate(required_series)
        ]

        logger.log(
            VERBOSE_LVL,
            f"Beginning {len(download_spec)} downloads in batches of {n_par}",
        )
        if show_progress:
            with joblib_progress("Downloading", total=len(download_spec)):
                res = Parallel(n_jobs=n_par)(
                    delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
                )
        else:
            res = Parallel(n_jobs=n_par)(
                delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
            )

        if (len(res) > 0) and (not all(r[0] for r in res)):
            for r in res:
                if not r[0]:
                    warnings.warn(f"The download of {r[1]} was not successful", stacklevel=2)
        return res if return_paths else None

    def to_df(  # noqa: PLR0913
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        columns: list[str] | None = None,
        filters: PyArrowFilterT | None = None,
        force_download: bool = False,
        download_folder: str | None = None,
        dataframe_type: str = "pandas",
        **kwargs: Any,
    ) -> pd.DataFrame:
        """Gets distributions for a specified date or date range and returns the data as a dataframe.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            columns (List, optional): A list of columns to return from a parquet file. Defaults to None
            filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
                Rows which do not match the filter predicate will be removed from scanned data.
                Partition keys embedded in a nested directory structure will be exploited to avoid
                loading files at all if they contain no matching rows. If use_legacy_dataset is True,
                filters can only reference partition keys and only a hive-style directory structure
                is supported. When setting use_legacy_dataset to False, also within-file level filtering
                and different partitioning schemes are supported.
                More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
            force_download (bool, optional): If True then will always download a file even
                if it is already on disk. Defaults to False.
            download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
                Defaults to download_folder as set in __init__
            dataframe_type (str, optional): Type
        Returns:
            class:`pandas.DataFrame`: a dataframe containing the requested data.
                If multiple dataset instances are retrieved then these are concatenated first.
        """
        catalog = self._use_catalog(catalog)

        # sample data is limited to csv
        if dt_str == "sample":
            dataset_format = "csv"

        if not download_folder:
            download_folder = self.download_folder
        download_res = self.download(
            dataset,
            dt_str,
            dataset_format,
            catalog,
            n_par,
            show_progress,
            force_download,
            download_folder,
            return_paths=True,
        )

        if not download_res:
            raise ValueError("Must specify 'return_paths=True' in download call to use this function")

        if not all(res[0] for res in download_res):
            failed_res = [res for res in download_res if not res[0]]
            raise Exception(
                f"Not all downloads were successfully completed. "
                f"Re-run to collect missing files. The following failed:\n{failed_res}"
            )

        files = [res[1] for res in download_res]

        pd_read_fn_map = {
            "csv": read_csv,
            "parquet": read_parquet,
            "parq": read_parquet,
            "json": read_json,
            "raw": read_csv,
        }

        pd_read_default_kwargs: dict[str, dict[str, object]] = {
            "csv": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
            "parquet": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
            "json": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
            "raw": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
        }

        pd_read_default_kwargs["parq"] = pd_read_default_kwargs["parquet"]

        pd_reader = pd_read_fn_map.get(dataset_format)
        pd_read_kwargs = pd_read_default_kwargs.get(dataset_format, {})
        if not pd_reader:
            raise Exception(f"No pandas function to read file in format {dataset_format}")

        pd_read_kwargs.update(kwargs)

        if len(files) == 0:
            raise APIResponseError(
                f"No series members for dataset: {dataset} "
                f"in date or date range: {dt_str} and format: {dataset_format}"
            )
        if dataset_format in ["parquet", "parq"]:
            data_df = pd_reader(files, **pd_read_kwargs)  # type: ignore
        elif dataset_format == "raw":
            dataframes = (
                pd.concat(
                    [pd_reader(ZipFile(f).open(p), **pd_read_kwargs) for p in ZipFile(f).namelist()],  # type: ignore
                    ignore_index=True,
                )
                for f in files
            )
            data_df = pd.concat(dataframes, ignore_index=True)
        else:
            dataframes = (pd_reader(f, **pd_read_kwargs) for f in files)  # type: ignore
            if dataframe_type == "pandas":
                data_df = pd.concat(dataframes, ignore_index=True)
            if dataframe_type == "polars":
                import polars as pl

                data_df = pl.concat(dataframes, how="diagonal")  # type: ignore

        return data_df

    def to_bytes(
        self,
        dataset: str,
        series_member: str,
        dataset_format: str = "parquet",
        catalog: str | None = None,
    ) -> BytesIO:
        """Returns an instance of dataset (the distribution) as a bytes object.

        Args:
            dataset (str): A dataset identifier
            series_member (str,): A dataset series member identifier
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
        """

        catalog = self._use_catalog(catalog)

        url = distribution_to_url(
            self.root_url,
            dataset,
            series_member,
            dataset_format,
            catalog,
        )

        return Fusion._call_for_bytes_object(url, self.session)

    def to_table(  # noqa: PLR0913
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        columns: list[str] | None = None,
        filters: PyArrowFilterT | None = None,
        force_download: bool = False,
        download_folder: str | None = None,
        **kwargs: Any,
    ) -> pa.Table:
        """Gets distributions for a specified date or date range and returns the data as an arrow table.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            columns (List, optional): A list of columns to return from a parquet file. Defaults to None
            filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
                Rows which do not match the filter predicate will be removed from scanned data.
                Partition keys embedded in a nested directory structure will be exploited to avoid
                loading files at all if they contain no matching rows. If use_legacy_dataset is True,
                filters can only reference partition keys and only a hive-style directory structure
                is supported. When setting use_legacy_dataset to False, also within-file level filtering
                and different partitioning schemes are supported.
                More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
            force_download (bool, optional): If True then will always download a file even
                if it is already on disk. Defaults to False.
            download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
                Defaults to download_folder as set in __init__
        Returns:
            class:`pyarrow.Table`: a dataframe containing the requested data.
                If multiple dataset instances are retrieved then these are concatenated first.
        """
        catalog = self._use_catalog(catalog)
        n_par = cpu_count(n_par)
        if not download_folder:
            download_folder = self.download_folder
        download_res = self.download(
            dataset,
            dt_str,
            dataset_format,
            catalog,
            n_par,
            show_progress,
            force_download,
            download_folder,
            return_paths=True,
        )

        if not download_res:
            raise ValueError("Must specify 'return_paths=True' in download call to use this function")

        if not all(res[0] for res in download_res):
            failed_res = [res for res in download_res if not res[0]]
            raise RuntimeError(
                f"Not all downloads were successfully completed. "
                f"Re-run to collect missing files. The following failed:\n{failed_res}"
            )

        files = [res[1] for res in download_res]

        read_fn_map = {
            "csv": csv_to_table,
            "parquet": parquet_to_table,
            "parq": parquet_to_table,
            "json": json_to_table,
            "raw": csv_to_table,
        }

        read_default_kwargs: dict[str, dict[str, object]] = {
            "csv": {"columns": columns, "filters": filters, "fs": self.fs},
            "parquet": {"columns": columns, "filters": filters, "fs": self.fs},
            "json": {"columns": columns, "filters": filters, "fs": self.fs},
            "raw": {"columns": columns, "filters": filters, "fs": self.fs},
        }

        read_default_kwargs["parq"] = read_default_kwargs["parquet"]

        reader = read_fn_map.get(dataset_format)
        read_kwargs = read_default_kwargs.get(dataset_format, {})
        if not reader:
            raise AssertionError(f"No function to read file in format {dataset_format}")

        read_kwargs.update(kwargs)

        if len(files) == 0:
            raise APIResponseError(
                f"No series members for dataset: {dataset} "
                f"in date or date range: {dt_str} and format: {dataset_format}"
            )
        if dataset_format in ["parquet", "parq"]:
            tbl = reader(files, **read_kwargs)  # type: ignore
        else:
            tbl = (reader(f, **read_kwargs) for f in files)  # type: ignore
            tbl = pa.concat_tables(tbl)

        return tbl

    def upload(  # noqa: PLR0913
        self,
        path: str,
        dataset: str | None = None,
        dt_str: str = "latest",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        return_paths: bool = False,
        multipart: bool = True,
        chunk_size: int = 5 * 2**20,
        from_date: str | None = None,
        to_date: str | None = None,
        preserve_original_name: bool | None = False,
        additional_headers: dict[str, str] | None = None,
    ) -> list[tuple[bool, str, str | None]] | None:
        """Uploads the requested files/files to Fusion.

        Args:
            path (str): path to a file or a folder with files
            dataset (str, optional): Dataset identifier to which the file will be uploaded (for single file only).
                                    If not provided the dataset will be implied from file's name.
            dt_str (str, optional): A file name. Can be any string but is usually a date.
                                    Defaults to 'latest' which will return the most recent.
                                    Relevant for a single file upload only. If not provided the dataset will
                                    be implied from file's name.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            return_paths (bool, optional): Return paths and success statuses of the downloaded files.
            multipart (bool, optional): Is multipart upload.
            chunk_size (int, optional): Maximum chunk size.
            from_date (str, optional): start of the data date range contained in the distribution,
                defaults to upoad date
            to_date (str, optional): end of the data date range contained in the distribution,
                defaults to upload date.
            preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

        Returns:


        """
        catalog = self._use_catalog(catalog)

        if not self.fs.exists(path):
            raise RuntimeError("The provided path does not exist")

        fs_fusion = self.get_fusion_filesystem()
        if self.fs.info(path)["type"] == "directory":
            file_path_lst = self.fs.find(path)
            local_file_validation = validate_file_names(file_path_lst, fs_fusion)
            file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
            file_name = [f.split("/")[-1] for f in file_path_lst]
            is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
            local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
        else:
            file_path_lst = [path]
            if not catalog or not dataset:
                local_file_validation = validate_file_names(file_path_lst, fs_fusion)
                file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
                is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
                local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
                if preserve_original_name:
                    raise ValueError("preserve_original_name can only be used when catalog and dataset are provided.")
            else:
                date_identifier = re.compile(r"^(\d{4})(\d{2})(\d{2})$")
                if date_identifier.match(dt_str):
                    dt_str = dt_str if dt_str != "latest" else pd.Timestamp("today").date().strftime("%Y%m%d")
                    dt_str = pd.Timestamp(dt_str).date().strftime("%Y%m%d")

                if catalog not in fs_fusion.ls("") or dataset not in [
                    i.split("/")[-1] for i in fs_fusion.ls(f"{catalog}/datasets")
                ]:
                    msg = (
                        f"File file has not been uploaded, one of the catalog: {catalog} "
                        f"or dataset: {dataset} does not exit."
                    )
                    warnings.warn(msg, stacklevel=2)
                    return [(False, path, msg)]
                file_format = path.split(".")[-1]
                file_name = [path.split("/")[-1]]
                file_format = "raw" if file_format not in RECOGNIZED_FORMATS else file_format

                local_url_eqiv = [
                    "/".join(distribution_to_url("", dataset, dt_str, file_format, catalog, False).split("/")[1:])
                ]

        if not preserve_original_name:
            data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv]).T
            data_map_df.columns = pd.Index(["path", "url"])
        else:
            data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv, file_name]).T
            data_map_df.columns = pd.Index(["path", "url", "file_name"])

        n_par = cpu_count(n_par)
        parallel = len(data_map_df) > 1
        res = upload_files(
            fs_fusion,
            self.fs,
            data_map_df,
            parallel=parallel,
            n_par=n_par,
            multipart=multipart,
            chunk_size=chunk_size,
            show_progress=show_progress,
            from_date=from_date,
            to_date=to_date,
            additional_headers=additional_headers,
        )

        if not all(r[0] for r in res):
            failed_res = [r for r in res if not r[0]]
            msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
            logger.warning(msg)
            warnings.warn(msg, stacklevel=2)

        return res if return_paths else None

    def from_bytes(  # noqa: PLR0913
        self,
        data: BytesIO,
        dataset: str,
        series_member: str = "latest",
        catalog: str | None = None,
        distribution: str = "parquet",
        show_progress: bool = True,
        return_paths: bool = False,
        chunk_size: int = 5 * 2**20,
        from_date: str | None = None,
        to_date: str | None = None,
        file_name: str | None = None,
        **kwargs: Any,  # noqa: ARG002
    ) -> list[tuple[bool, str, str | None]] | None:
        """Uploads data from an object in memory.

        Args:
            data (str): an object in memory to upload
            dataset (str): Dataset name to which the bytes will be uploaded.
            series_member (str, optional): A single date or label. Defaults to 'latest' which will return
                the most recent.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            distribution (str, optional): A distribution type, e.g. a file format or raw
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            return_paths (bool, optional): Return paths and success statuses of the downloaded files.
            chunk_size (int, optional): Maximum chunk size.
            from_date (str, optional): start of the data date range contained in the distribution,
                defaults to upload date
            to_date (str, optional): end of the data date range contained in the distribution, defaults to upload date.
            file_name (str, optional): file name to be used for the uploaded file. Defaults to Fusion standard naming.

        Returns:
            Optional[list[tuple[bool, str, Optional[str]]]: a list of tuples, one for each distribution

        """
        catalog = self._use_catalog(catalog)

        fs_fusion = self.get_fusion_filesystem()
        if distribution not in RECOGNIZED_FORMATS + ["raw"]:
            raise ValueError(f"Dataset format {distribution} is not supported")

        is_raw = js.loads(fs_fusion.cat(f"{catalog}/datasets/{dataset}"))["isRawData"]
        local_url_eqiv = path_to_url(f"{dataset}__{catalog}__{series_member}.{distribution}", is_raw)

        data_map_df = pd.DataFrame(["", local_url_eqiv, file_name]).T
        data_map_df.columns = ["path", "url", "file_name"]  # type: ignore

        res = upload_files(
            fs_fusion,
            data,
            data_map_df,
            parallel=False,
            n_par=1,
            multipart=False,
            chunk_size=chunk_size,
            show_progress=show_progress,
            from_date=from_date,
            to_date=to_date,
        )

        if not all(r[0] for r in res):
            failed_res = [r for r in res if not r[0]]
            msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
            logger.warning(msg)
            warnings.warn(msg, stacklevel=2)

        return res if return_paths else None

    def listen_to_events(
        self,
        last_event_id: str | None = None,
        catalog: str | None = None,
        url: str = "https://fusion.jpmorgan.com/api/v1/",
    ) -> None | pd.DataFrame:
        """Run server sent event listener in the background. Retrieve results by running get_events.

        Args:
            last_event_id (str): Last event ID (exclusive).
            catalog (str): catalog.
            url (str): subscription url.
        Returns:
            Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
                If in_background is set to False then pandas DataFrame is output upon keyboard termination.
        """

        catalog = self._use_catalog(catalog)
        import asyncio
        import json
        import threading

        from aiohttp_sse_client import client as sse_client

        from .utils import get_client

        kwargs: dict[str, Any] = {}
        if last_event_id:
            kwargs = {"headers": {"Last-Event-ID": last_event_id}}

        async def async_events() -> None:
            """Events sync function.

            Returns:
                None
            """
            timeout = 1e100
            session = await get_client(self.credentials, timeout=timeout)
            async with sse_client.EventSource(
                f"{url}catalogs/{catalog}/notifications/subscribe",
                session=session,
                **kwargs,
            ) as messages:
                lst = []
                try:
                    async for msg in messages:
                        event = json.loads(msg.data)
                        lst.append(event)
                        if self.events is None:
                            self.events = pd.DataFrame()
                        else:
                            self.events = pd.concat([self.events, pd.DataFrame(lst)], ignore_index=True)
                except TimeoutError as ex:
                    raise ex from None
                except BaseException:
                    raise

        _ = self.list_catalogs()  # refresh token
        if "headers" in kwargs:
            kwargs["headers"].update({"authorization": f"bearer {self.credentials.bearer_token}"})
        else:
            kwargs["headers"] = {
                "authorization": f"bearer {self.credentials.bearer_token}",
            }
        if "http" in self.credentials.proxies:
            kwargs["proxy"] = self.credentials.proxies["http"]
        elif "https" in self.credentials.proxies:
            kwargs["proxy"] = self.credentials.proxies["https"]
        th = threading.Thread(target=asyncio.run, args=(async_events(),), daemon=True)
        th.start()
        return None

    def get_events(
        self,
        last_event_id: str | None = None,
        catalog: str | None = None,
        in_background: bool = True,
        url: str = "https://fusion.jpmorgan.com/api/v1/",
    ) -> None | pd.DataFrame:
        """Run server sent event listener and print out the new events. Keyboard terminate to stop.

        Args:
            last_event_id (str): id of the last event.
            catalog (str): catalog.
            in_background (bool): execute event monitoring in the background (default = True).
            url (str): subscription url.
        Returns:
            Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
                If in_background is set to False then pandas DataFrame is output upon keyboard termination.
        """

        catalog = self._use_catalog(catalog)
        if not in_background:
            from sseclient import SSEClient

            _ = self.list_catalogs()  # refresh token
            interrupted = False
            messages = SSEClient(
                session=self.session,
                url=f"{url}catalogs/{catalog}/notifications/subscribe",
                last_id=last_event_id,
                headers={
                    "authorization": f"bearer {self.credentials.bearer_token}",
                },
            )
            lst = []
            try:
                for msg in messages:
                    event = js.loads(msg.data)
                    if event["type"] != "HeartBeatNotification":
                        lst.append(event)
            except KeyboardInterrupt:
                interrupted = True
            except Exception as e:
                raise e
            finally:
                result = pd.DataFrame(lst) if interrupted or lst else None
            return result
        else:
            return self.events

    def list_dataset_lineage(
        self,
        dataset_id: str,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
    ) -> pd.DataFrame:
        """List the upstream and downstream lineage of the dataset.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each resource

        Raises:
            HTTPError: If the dataset is not found in the catalog.

        """
        catalog = self._use_catalog(catalog)

        url_dataset = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}"
        resp_dataset = self.session.get(url_dataset)
        resp_dataset.raise_for_status()

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}/lineage"
        resp = self.session.get(url)
        data = resp.json()
        relations_data = data["relations"]

        restricted_datasets = [
            dataset_metadata["identifier"]
            for dataset_metadata in data["datasets"]
            if dataset_metadata.get("status", None) == "Restricted"
        ]

        data_dict = {}

        for entry in relations_data:
            source_dataset_id = entry["source"]["dataset"]
            source_catalog = entry["source"]["catalog"]
            destination_dataset_id = entry["destination"]["dataset"]
            destination_catalog = entry["destination"]["catalog"]

            if destination_dataset_id == dataset_id:
                for dataset in data["datasets"]:
                    if dataset["identifier"] == source_dataset_id and dataset.get("status", None) != "Restricted":
                        source_dataset_title = dataset["title"]
                    elif dataset["identifier"] == source_dataset_id and dataset.get("status", None) == "Restricted":
                        source_dataset_title = "Access Restricted"
                data_dict[source_dataset_id] = (
                    "source",
                    source_catalog,
                    source_dataset_title,
                )

            if source_dataset_id == dataset_id:
                for dataset in data["datasets"]:
                    if dataset["identifier"] == destination_dataset_id and dataset.get("status", None) != "Restricted":
                        destination_dataset_title = dataset["title"]
                    elif (
                        dataset["identifier"] == destination_dataset_id and dataset.get("status", None) == "Restricted"
                    ):
                        destination_dataset_title = "Access Restricted"
                data_dict[destination_dataset_id] = (
                    "produced",
                    destination_catalog,
                    destination_dataset_title,
                )

        output_data = {
            "type": [v[0] for v in data_dict.values()],
            "dataset_identifier": list(data_dict.keys()),
            "title": [v[2] for v in data_dict.values()],
            "catalog": [v[1] for v in data_dict.values()],
        }

        lineage_df = pd.DataFrame(output_data)
        lineage_df.loc[
            lineage_df["dataset_identifier"].isin(restricted_datasets),
            ["dataset_identifier", "catalog", "title"],
        ] = "Access Restricted"

        if max_results > -1:
            lineage_df = lineage_df[0:max_results]

        if output:
            pass

        return lineage_df

    def create_dataset_lineage(
        self,
        base_dataset: str,
        source_dataset_catalog_mapping: pd.DataFrame | list[dict[str, str]],
        catalog: str | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload lineage to a dataset.

        Args:
            base_dataset (str): A dataset identifier to which you want to add lineage.
            source_dataset_catalog_mapping (Union[pd.DataFrame, list[dict[str]]]): Mapping for the dataset
                identifier(s) and catalog(s) from which to add lineage.
            catalog (Optional[str], optional): Catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Raises:
            ValueError: If source_dataset_catalog_mapping is not a pandas DataFrame or a list of dictionaries
            HTTPError: If the request is unsuccessful.

        Examples:
            Creating lineage from a pandas DataFrame.
            >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
            >>> df = pd.DataFrame(data)
            >>> fusion = Fusion()
            >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=df, catalog="c")

            Creating lineage from a list of dictionaries.
            >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
            >>> fusion = Fusion()
            >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=data, catalog="c")

        """
        catalog = self._use_catalog(catalog)

        if isinstance(source_dataset_catalog_mapping, pd.DataFrame):
            dataset_mapping_list = [
                {"dataset": row["dataset"], "catalog": row["catalog"]}
                for _, row in source_dataset_catalog_mapping.iterrows()
            ]
        elif isinstance(source_dataset_catalog_mapping, list):
            dataset_mapping_list = source_dataset_catalog_mapping
        else:
            raise ValueError("source_dataset_catalog_mapping must be a pandas DataFrame or a list of dictionaries.")
        data = {"source": dataset_mapping_list}

        url = f"{self.root_url}catalogs/{catalog}/datasets/{base_dataset}/lineage"

        resp = self.session.post(url, json=data)

        resp.raise_for_status()

        return resp if return_resp_obj else None

    def list_product_dataset_mapping(
        self,
        dataset: str | list[str] | None = None,
        product: str | list[str] | None = None,
        catalog: str | None = None,
    ) -> pd.DataFrame:
        """get the product to dataset linking contained in  a catalog. A product is a grouping of datasets.

        Args:
            dataset (str | list[str] | None, optional): A string or list of strings that are dataset
            identifiers to filter the output. If a list is provided then it will return
            datasets whose identifier matches any of the strings. Defaults to None.
            product (str | list[str] | None, optional): A string or list of strings that are product
            identifiers to filter the output. If a list is provided then it will return
            products whose identifier matches any of the strings. Defaults to None.
            catalog (str | None, optional): A catalog identifier. Defaults to 'common'.

        Returns:
            pd.DataFrame: a dataframe with a row  for each dataset to product mapping.
        """
        catalog = self._use_catalog(catalog)
        url = f"{self.root_url}catalogs/{catalog}/productDatasets"
        mapping_df = pd.DataFrame(self._call_for_dataframe(url, self.session))

        if dataset:
            if isinstance(dataset, list):
                contains = "|".join(f"{s}" for s in dataset)
                mapping_df = mapping_df[mapping_df["dataset"].str.contains(contains, case=False)]
            if isinstance(dataset, str):
                mapping_df = mapping_df[mapping_df["dataset"].str.contains(dataset, case=False)]
        if product:
            if isinstance(product, list):
                contains = "|".join(f"{s}" for s in product)
                mapping_df = mapping_df[mapping_df["product"].str.contains(contains, case=False)]
            if isinstance(product, str):
                mapping_df = mapping_df[mapping_df["product"].str.contains(product, case=False)]
        return mapping_df

    def product(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        short_abstract: str = "",
        description: str = "",
        is_active: bool = True,
        is_restricted: bool | None = None,
        maintainer: str | list[str] | None = None,
        region: str | list[str] = "Global",
        publisher: str = "J.P. Morgan",
        sub_category: str | list[str] | None = None,
        tag: str | list[str] | None = None,
        delivery_channel: str | list[str] = "API",
        theme: str | None = None,
        release_date: str | None = None,
        language: str = "English",
        status: str = "Available",
        image: str = "",
        logo: str = "",
        dataset: str | list[str] | None = None,
        **kwargs: Any,
    ) -> Product:
        """Instantiate a Product object with this client for metadata creation.

        Args:
            identifier (str): Product identifier.
            title (str, optional): Product title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): Category. Defaults to None.
            short_abstract (str, optional): Short description. Defaults to "".
            description (str, optional): Description. If not provided, defaults to identifier.
            is_active (bool, optional): Boolean for Active status. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted products. Defaults to None.
            maintainer (str | list[str] | None, optional): Product maintainer. Defaults to None.
            region (str | list[str] | None, optional): Product region. Defaults to None.
            publisher (str | None, optional): Name of vendor that publishes the data. Defaults to None.
            sub_category (str | list[str] | None, optional): Product sub-category. Defaults to None.
            tag (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            delivery_channel (str | list[str], optional): Product delivery channel. Defaults to "API".
            theme (str | None, optional): Product theme. Defaults to None.
            release_date (str | None, optional): Product release date. Defaults to None.
            language (str, optional): Product language. Defaults to "English".
            status (str, optional): Product status. Defaults to "Available".
            image (str, optional): Product image. Defaults to "".
            logo (str, optional): Product logo. Defaults to "".
            dataset (str | list[str] | None, optional): Product datasets. Defaults to None.

        Returns:
            Product: Fusion Product class instance.

        Examples:
            >>> fusion = Fusion()
            >>> fusion.product(identifier="PRODUCT_1", title="Product")

        Note:
            See the product module for more information on functionalities of product objects.

        """
        product_obj = Product(
            identifier=identifier,
            title=title,
            category=category,
            short_abstract=short_abstract,
            description=description,
            is_active=is_active,
            is_restricted=is_restricted,
            maintainer=maintainer,
            region=region,
            publisher=publisher,
            sub_category=sub_category,
            tag=tag,
            delivery_channel=delivery_channel,
            theme=theme,
            release_date=release_date,
            language=language,
            status=status,
            image=image,
            logo=logo,
            dataset=dataset,
            **kwargs,
        )
        product_obj.client = self
        return product_obj

    def dataset(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Source",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Dataset:
        """Instantiate a Dataset object with this client for metadata creation.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Source".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.

        Returns:
            Dataset: Fusion Dataset class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset(identifier="DATASET_1")

        Note:
            See the dataset module for more information on functionalities of dataset objects.

        """
        dataset_obj = Dataset(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            **kwargs,
        )
        dataset_obj.client = self
        return dataset_obj

    def attribute(  # noqa: PLR0913
        self,
        identifier: str,
        index: int,
        data_type: str | Types = "String",
        title: str = "",
        description: str = "",
        is_dataset_key: bool = False,
        source: str | None = None,
        source_field_id: str | None = None,
        is_internal_dataset_key: bool | None = None,
        is_externally_visible: bool | None = True,
        unit: Any | None = None,
        multiplier: float = 1.0,
        is_propagation_eligible: bool | None = None,
        is_metric: bool | None = None,
        available_from: str | None = None,
        deprecated_from: str | None = None,
        term: str = "bizterm1",
        dataset: int | None = None,
        attribute_type: str | None = None,
        application_id: str | dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Attribute:
        """Instantiate an Attribute object with this client for metadata creation.

        Args:
            identifier (str): The unique identifier for the attribute.
            index (int): Attribute index.
            data_type (str | Types, optional): Datatype of attribute. Defaults to "String".
            title (str, optional): Attribute title. If not provided, defaults to identifier.
            description (str, optional): Attribute description. If not provided, defaults to identifier.
            is_dataset_key (bool, optional): Flag for primary keys. Defaults to False.
            source (str | None, optional): Name of data vendor which provided the data. Defaults to None.
            source_field_id (str | None, optional): Original identifier of attribute, if attribute has been renamed.
                If not provided, defaults to identifier.
            is_internal_dataset_key (bool | None, optional): Flag for internal primary keys. Defaults to None.
            is_externally_visible (bool | None, optional): Flag for externally visible attributes. Defaults to True.
            unit (Any | None, optional): Unit of attribute. Defaults to None.
            multiplier (float, optional): Multiplier for unit. Defaults to 1.0.
            is_propagation_eligible (bool | None, optional): Flag for propagation eligibility. Defaults to None.
            is_metric (bool | None, optional): Flag for attributes that are metrics. Defaults to None.
            available_from (str | None, optional): Date from which the attribute is available. Defaults to None.
            deprecated_from (str | None, optional): Date from which the attribute is deprecated. Defaults to None.
            term (str, optional): Term. Defaults to "bizterm1".
            dataset (int | None, optional): Dataset. Defaults to None.
            attribute_type (str | None, optional): Attribute type. Defaults to None.

        Returns:
            Attribute: Fusion Attribute class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attr = fusion.attribute(identifier="attr1", index=0)

        Note:
            See the attributes module for more information on functionalities of attribute objects.

        """
        data_type = Types[str(data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
        attribute_obj = Attribute(
            identifier=identifier,
            index=index,
            data_type=data_type,
            title=title,
            description=description,
            is_dataset_key=is_dataset_key,
            source=source,
            source_field_id=source_field_id,
            is_internal_dataset_key=is_internal_dataset_key,
            is_externally_visible=is_externally_visible,
            unit=unit,
            multiplier=multiplier,
            is_propagation_eligible=is_propagation_eligible,
            is_metric=is_metric,
            available_from=available_from,
            deprecated_from=deprecated_from,
            term=term,
            dataset=dataset,
            attribute_type=attribute_type,
            application_id=application_id,
            **kwargs,
        )
        attribute_obj.client = self
        return attribute_obj

    def attributes(
        self,
        attributes: list[Attribute] | None = None,
    ) -> Attributes:
        """Instantiate an Attributes object with this client for metadata creation.

        Args:
            attributes (list[Attribute] | None, optional): List of Attribute objects. Defaults to None.

        Returns:
            Attributes: Fusion Attributes class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attr1 = fusion.attribute("attr1", 0)
            >>> attr2 = fusion.attribute("attr2", 1)
            >>> attrs = fusion.attributes([attr1, attr2])

        Note:
            See the attributes module for more information on functionalities of attributes object.

        """
        attributes_obj = Attributes(attributes=attributes or [])
        attributes_obj.client = self
        return attributes_obj

    def delete_datasetmembers(
        self,
        dataset: str,
        series_members: str | list[str],
        catalog: str | None = None,
        return_resp_obj: bool = False,
    ) -> list[requests.Response] | None:
        """Delete dataset members.

        Args:
            dataset (str): A dataset identifier
            series_members (str | list[str]): A string or list of strings that are dataset series member
            identifiers to delete.
            catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            list[requests.Response]: a list of response objects.

        Examples:
            Delete one dataset member.

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.delete_datasetmembers(dataset="dataset1", series_members="series1")

            Delete multiple dataset members.

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.delete_datasetmembers(dataset="dataset1", series_members=["series1", "series2"])

        """
        catalog = self._use_catalog(catalog)
        if isinstance(series_members, str):
            series_members = [series_members]
        responses = []
        for series_member in series_members:
            url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series_member}"
            resp = self.session.delete(url)
            requests_raise_for_status(resp)
            responses.append(resp)
        return responses if return_resp_obj else None

    def delete_all_datasetmembers(
        self,
        dataset: str,
        catalog: str | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete all dataset members within a dataset.

        Args:
            dataset (str): A dataset identifier
            catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            list[requests.Response]: a list of response objects.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.delete_all_datasetmembers(dataset="dataset1")

        """
        catalog = self._use_catalog(catalog)
        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
        resp = self.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def list_registered_attributes(
        self,
        catalog: str | None = None,
        output: bool = False,
        display_all_columns: bool = False,
    ) -> pd.DataFrame:
        """Returns the list of attributes in a catalog.

        Args:
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each attribute
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/attributes"
        ds_attr_df = Fusion._call_for_dataframe(url, self.session).reset_index(drop=True)

        if not display_all_columns:
            ds_attr_df = ds_attr_df[
                ds_attr_df.columns.intersection(
                    [
                        "identifier",
                        "title",
                        "dataType",
                        "description",
                        "publisher",
                        "applicationId",
                    ]
                )
            ]

        if output:
            pass

        return ds_attr_df

    def report(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Report",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        report: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Report:
        """Instantiate Report object with this client for metadata creation for managing regulatory reporting metadata.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Source".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.
            report (dict[str, str] | None, optional): The report metadata. Specifies the tier of the report.
                Required for registered reports to the catalog.

        Returns:
            Dataset: Fusion Dataset class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.report(identifier="DATASET_1")

        Note:
            See the dataset module for more information on functionalities of report objects.

        """
        report_obj = Report(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            report=report,
            **kwargs,
        )
        report_obj.client = self
        return report_obj

    def input_dataflow(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Flow",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        producer_application_id: dict[str, str] | None = None,
        consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
        flow_details: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> InputDataFlow:
        """Instantiate an Input Dataflow object with this client for metadata creation.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Flow".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.
            producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
                producing the flow).
            consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
                ID (downstream application, consuming the flow).
            flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
                Defaults to {"flowDirection": "Input"}.

        Returns:
            Dataset: Fusion InputDataFlow class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.input_dataflow(identifier="MY_DATAFLOW")

        Note:
            See the dataset module for more information on functionalities of input dataflow objects.

        """
        flow_details = {"flowDirection": "Input"} if flow_details is None else flow_details
        dataflow_obj = InputDataFlow(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            producer_application_id=producer_application_id,
            consumer_application_id=consumer_application_id,
            flow_details=flow_details,
            **kwargs,
        )
        dataflow_obj.client = self
        return dataflow_obj

    def output_dataflow(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Flow",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        producer_application_id: dict[str, str] | None = None,
        consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
        flow_details: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> OutputDataFlow:
        """Instantiate an Output Dataflow object with this client for metadata creation.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Flow".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.
            producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
                producing the flow).
            consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
                ID (downstream application, consuming the flow).
            flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
                Defaults to {"flowDirection": "Output"}.

        Returns:
            Dataset: Fusion OutputDataFlow class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.output_dataflow(identifier="MY_DATAFLOW")

        Note:
            See the dataset module for more information on functionalities of output dataflow objects.

        """
        flow_details = {"flowDirection": "Output"} if flow_details is None else flow_details
        dataflow_obj = OutputDataFlow(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            producer_application_id=producer_application_id,
            consumer_application_id=consumer_application_id,
            flow_details=flow_details,
            **kwargs,
        )
        dataflow_obj.client = self
        return dataflow_obj

`default_catalog: str` `property` `writable` ¶

Returns the default catalog.

Returns:

Type	Description
`str`	None

`init(credentials='config/client_credentials.json', root_url='https://fusion.jpmorgan.com/api/v1/', download_folder='downloads', log_level=logging.ERROR, fs=None, log_path='.')` ¶

Constructor to instantiate a new Fusion object.

Parameters:

Name	Type	Description	Default
`credentials`	`Union[str, FusionCredentials]`	A path to a credentials file or a fully populated	`'config/client_credentials.json'`
`root_url`	`_type_`	The API root URL. Defaults to "https://fusion.jpmorgan.com/api/v1/".	`'https://fusion.jpmorgan.com/api/v1/'`
`download_folder`	`str`	The folder path where downloaded data files are saved. Defaults to "downloads".	`'downloads'`
`log_level`	`int`	Set the logging level. Defaults to logging.ERROR.	`ERROR`
`fs`	`filesystem`	filesystem.	`None`
`log_path`	`str`	The folder path where the log is stored.	`'.'`

Source code in py_src/fusion/fusion.py

def __init__(
    self,
    credentials: str | FusionCredentials = "config/client_credentials.json",
    root_url: str = "https://fusion.jpmorgan.com/api/v1/",
    download_folder: str = "downloads",
    log_level: int = logging.ERROR,
    fs: fsspec.filesystem = None,
    log_path: str = ".",
) -> None:
    """Constructor to instantiate a new Fusion object.

    Args:
        credentials (Union[str, FusionCredentials]): A path to a credentials file or a fully populated
        FusionCredentials object. Defaults to 'config/client_credentials.json'.
        root_url (_type_, optional): The API root URL.
            Defaults to "https://fusion.jpmorgan.com/api/v1/".
        download_folder (str, optional): The folder path where downloaded data files
            are saved. Defaults to "downloads".
        log_level (int, optional): Set the logging level. Defaults to logging.ERROR.
        fs (fsspec.filesystem): filesystem.
        log_path (str, optional): The folder path where the log is stored.
    """
    self._default_catalog = "common"

    self.root_url = root_url
    self.download_folder = download_folder
    Path(download_folder).mkdir(parents=True, exist_ok=True)

    if logger.hasHandlers():
        logger.handlers.clear()
    file_handler = logging.FileHandler(filename=f"{log_path}/fusion_sdk.log")
    logging.addLevelName(VERBOSE_LVL, "VERBOSE")
    stdout_handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s.%(msecs)03d %(name)s:%(levelname)s %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    stdout_handler.setFormatter(formatter)
    logger.addHandler(stdout_handler)
    logger.addHandler(file_handler)
    logger.setLevel(log_level)

    if isinstance(credentials, FusionCredentials):
        self.credentials = credentials
    elif isinstance(credentials, str):
        self.credentials = FusionCredentials.from_file(Path(credentials))
    else:
        raise ValueError("credentials must be a path to a credentials file or FusionCredentials object")

    self.session = get_session(self.credentials, self.root_url)
    self.fs = fs if fs else get_default_fs()
    self.events: pd.DataFrame | None = None

`repr()` ¶

Object representation to list all available methods.

Source code in py_src/fusion/fusion.py

def __repr__(self) -> str:
    """Object representation to list all available methods."""
    return "Fusion object \nAvailable methods:\n" + tabulate(
        pd.DataFrame(  # type: ignore
            [
                [
                    method_name
                    for method_name in dir(Fusion)
                    if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                ]
                + [p for p in dir(Fusion) if isinstance(getattr(Fusion, p), property)],
                [
                    getattr(Fusion, method_name).__doc__.split("\n")[0]
                    for method_name in dir(Fusion)
                    if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                ]
                + [
                    getattr(Fusion, p).__doc__.split("\n")[0]
                    for p in dir(Fusion)
                    if isinstance(getattr(Fusion, p), property)
                ],
            ]
        ).T.set_index(0),
        tablefmt="psql",
    )

`attribute(identifier, index, data_type='String', title='', description='', is_dataset_key=False, source=None, source_field_id=None, is_internal_dataset_key=None, is_externally_visible=True, unit=None, multiplier=1.0, is_propagation_eligible=None, is_metric=None, available_from=None, deprecated_from=None, term='bizterm1', dataset=None, attribute_type=None, application_id=None, **kwargs)` ¶

Instantiate an Attribute object with this client for metadata creation.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	The unique identifier for the attribute.	required
`index`	`int`	Attribute index.	required
`data_type`	`str \| Types`	Datatype of attribute. Defaults to "String".	`'String'`
`title`	`str`	Attribute title. If not provided, defaults to identifier.	`''`
`description`	`str`	Attribute description. If not provided, defaults to identifier.	`''`
`is_dataset_key`	`bool`	Flag for primary keys. Defaults to False.	`False`
`source`	`str \| None`	Name of data vendor which provided the data. Defaults to None.	`None`
`source_field_id`	`str \| None`	Original identifier of attribute, if attribute has been renamed. If not provided, defaults to identifier.	`None`
`is_internal_dataset_key`	`bool \| None`	Flag for internal primary keys. Defaults to None.	`None`
`is_externally_visible`	`bool \| None`	Flag for externally visible attributes. Defaults to True.	`True`
`unit`	`Any \| None`	Unit of attribute. Defaults to None.	`None`
`multiplier`	`float`	Multiplier for unit. Defaults to 1.0.	`1.0`
`is_propagation_eligible`	`bool \| None`	Flag for propagation eligibility. Defaults to None.	`None`
`is_metric`	`bool \| None`	Flag for attributes that are metrics. Defaults to None.	`None`
`available_from`	`str \| None`	Date from which the attribute is available. Defaults to None.	`None`
`deprecated_from`	`str \| None`	Date from which the attribute is deprecated. Defaults to None.	`None`
`term`	`str`	Term. Defaults to "bizterm1".	`'bizterm1'`
`dataset`	`int \| None`	Dataset. Defaults to None.	`None`
`attribute_type`	`str \| None`	Attribute type. Defaults to None.	`None`

Returns:

Name	Type	Description
`Attribute`	`Attribute`	Fusion Attribute class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attr = fusion.attribute(identifier="attr1", index=0)

Note

See the attributes module for more information on functionalities of attribute objects.

Source code in py_src/fusion/fusion.py

def attribute(  # noqa: PLR0913
    self,
    identifier: str,
    index: int,
    data_type: str | Types = "String",
    title: str = "",
    description: str = "",
    is_dataset_key: bool = False,
    source: str | None = None,
    source_field_id: str | None = None,
    is_internal_dataset_key: bool | None = None,
    is_externally_visible: bool | None = True,
    unit: Any | None = None,
    multiplier: float = 1.0,
    is_propagation_eligible: bool | None = None,
    is_metric: bool | None = None,
    available_from: str | None = None,
    deprecated_from: str | None = None,
    term: str = "bizterm1",
    dataset: int | None = None,
    attribute_type: str | None = None,
    application_id: str | dict[str, str] | None = None,
    **kwargs: Any,
) -> Attribute:
    """Instantiate an Attribute object with this client for metadata creation.

    Args:
        identifier (str): The unique identifier for the attribute.
        index (int): Attribute index.
        data_type (str | Types, optional): Datatype of attribute. Defaults to "String".
        title (str, optional): Attribute title. If not provided, defaults to identifier.
        description (str, optional): Attribute description. If not provided, defaults to identifier.
        is_dataset_key (bool, optional): Flag for primary keys. Defaults to False.
        source (str | None, optional): Name of data vendor which provided the data. Defaults to None.
        source_field_id (str | None, optional): Original identifier of attribute, if attribute has been renamed.
            If not provided, defaults to identifier.
        is_internal_dataset_key (bool | None, optional): Flag for internal primary keys. Defaults to None.
        is_externally_visible (bool | None, optional): Flag for externally visible attributes. Defaults to True.
        unit (Any | None, optional): Unit of attribute. Defaults to None.
        multiplier (float, optional): Multiplier for unit. Defaults to 1.0.
        is_propagation_eligible (bool | None, optional): Flag for propagation eligibility. Defaults to None.
        is_metric (bool | None, optional): Flag for attributes that are metrics. Defaults to None.
        available_from (str | None, optional): Date from which the attribute is available. Defaults to None.
        deprecated_from (str | None, optional): Date from which the attribute is deprecated. Defaults to None.
        term (str, optional): Term. Defaults to "bizterm1".
        dataset (int | None, optional): Dataset. Defaults to None.
        attribute_type (str | None, optional): Attribute type. Defaults to None.

    Returns:
        Attribute: Fusion Attribute class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attr = fusion.attribute(identifier="attr1", index=0)

    Note:
        See the attributes module for more information on functionalities of attribute objects.

    """
    data_type = Types[str(data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
    attribute_obj = Attribute(
        identifier=identifier,
        index=index,
        data_type=data_type,
        title=title,
        description=description,
        is_dataset_key=is_dataset_key,
        source=source,
        source_field_id=source_field_id,
        is_internal_dataset_key=is_internal_dataset_key,
        is_externally_visible=is_externally_visible,
        unit=unit,
        multiplier=multiplier,
        is_propagation_eligible=is_propagation_eligible,
        is_metric=is_metric,
        available_from=available_from,
        deprecated_from=deprecated_from,
        term=term,
        dataset=dataset,
        attribute_type=attribute_type,
        application_id=application_id,
        **kwargs,
    )
    attribute_obj.client = self
    return attribute_obj

`attributes(attributes=None)` ¶

Instantiate an Attributes object with this client for metadata creation.

Parameters:

Name	Type	Description	Default
`attributes`	`list[Attribute] \| None`	List of Attribute objects. Defaults to None.	`None`

Returns:

Name	Type	Description
`Attributes`	`Attributes`	Fusion Attributes class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attr1 = fusion.attribute("attr1", 0)
>>> attr2 = fusion.attribute("attr2", 1)
>>> attrs = fusion.attributes([attr1, attr2])

Note

See the attributes module for more information on functionalities of attributes object.

Source code in py_src/fusion/fusion.py

def attributes(
    self,
    attributes: list[Attribute] | None = None,
) -> Attributes:
    """Instantiate an Attributes object with this client for metadata creation.

    Args:
        attributes (list[Attribute] | None, optional): List of Attribute objects. Defaults to None.

    Returns:
        Attributes: Fusion Attributes class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attr1 = fusion.attribute("attr1", 0)
        >>> attr2 = fusion.attribute("attr2", 1)
        >>> attrs = fusion.attributes([attr1, attr2])

    Note:
        See the attributes module for more information on functionalities of attributes object.

    """
    attributes_obj = Attributes(attributes=attributes or [])
    attributes_obj.client = self
    return attributes_obj

`catalog_resources(catalog=None, output=False)` ¶

List the resources contained within the catalog, for example products and datasets.

Parameters:

Name	Type	Description	Default
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each resource within the catalog

Source code in py_src/fusion/fusion.py

def catalog_resources(self, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
    """List the resources contained within the catalog, for example products and datasets.

    Args:
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
       class:`pandas.DataFrame`: A dataframe with a row for each resource within the catalog
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}"
    cat_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return cat_df

`create_dataset_lineage(base_dataset, source_dataset_catalog_mapping, catalog=None, return_resp_obj=False)` ¶

Upload lineage to a dataset.

Parameters:

Name	Type	Description	Default
`base_dataset`	`str`	A dataset identifier to which you want to add lineage.	required
`source_dataset_catalog_mapping`	`Union[DataFrame, list[dict[str]]]`	Mapping for the dataset identifier(s) and catalog(s) from which to add lineage.	required
`catalog`	`Optional[str]`	Catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Raises:

Type	Description
`ValueError`	If source_dataset_catalog_mapping is not a pandas DataFrame or a list of dictionaries
`HTTPError`	If the request is unsuccessful.

Examples:

Creating lineage from a pandas DataFrame.

>>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
>>> df = pd.DataFrame(data)
>>> fusion = Fusion()
>>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=df, catalog="c")

Creating lineage from a list of dictionaries.

>>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
>>> fusion = Fusion()
>>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=data, catalog="c")

Source code in py_src/fusion/fusion.py

def create_dataset_lineage(
    self,
    base_dataset: str,
    source_dataset_catalog_mapping: pd.DataFrame | list[dict[str, str]],
    catalog: str | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload lineage to a dataset.

    Args:
        base_dataset (str): A dataset identifier to which you want to add lineage.
        source_dataset_catalog_mapping (Union[pd.DataFrame, list[dict[str]]]): Mapping for the dataset
            identifier(s) and catalog(s) from which to add lineage.
        catalog (Optional[str], optional): Catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Raises:
        ValueError: If source_dataset_catalog_mapping is not a pandas DataFrame or a list of dictionaries
        HTTPError: If the request is unsuccessful.

    Examples:
        Creating lineage from a pandas DataFrame.
        >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
        >>> df = pd.DataFrame(data)
        >>> fusion = Fusion()
        >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=df, catalog="c")

        Creating lineage from a list of dictionaries.
        >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
        >>> fusion = Fusion()
        >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=data, catalog="c")

    """
    catalog = self._use_catalog(catalog)

    if isinstance(source_dataset_catalog_mapping, pd.DataFrame):
        dataset_mapping_list = [
            {"dataset": row["dataset"], "catalog": row["catalog"]}
            for _, row in source_dataset_catalog_mapping.iterrows()
        ]
    elif isinstance(source_dataset_catalog_mapping, list):
        dataset_mapping_list = source_dataset_catalog_mapping
    else:
        raise ValueError("source_dataset_catalog_mapping must be a pandas DataFrame or a list of dictionaries.")
    data = {"source": dataset_mapping_list}

    url = f"{self.root_url}catalogs/{catalog}/datasets/{base_dataset}/lineage"

    resp = self.session.post(url, json=data)

    resp.raise_for_status()

    return resp if return_resp_obj else None

dataset(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Source', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, **kwargs) ¶

Instantiate a Dataset object with this client for metadata creation.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Dataset identifier.	required
`title`	`str`	Dataset title. If not provided, defaults to identifier.	`''`
`category`	`str \| list[str] \| None`	A category or list of categories for the dataset.	`None`
`description`	`str`	Dataset description. If not provided, defaults to identifier.	`''`
`frequency`	`str`	The frequency of the dataset. Defaults to "Once".	`'Once'`
`is_internal_only_dataset`	`bool`	Flag for internal datasets. Defaults to False.	`False`
`is_third_party_data`	`bool`	Flag for third party data. Defaults to True.	`True`
`is_restricted`	`bool \| None`	Flag for restricted datasets. Defaults to None.	`None`
`is_raw_data`	`bool`	Flag for raw datasets. Defaults to True.	`True`
`maintainer`	`str \| None`	Dataset maintainer. Defaults to "J.P. Morgan Fusion".	`'J.P. Morgan Fusion'`
`source`	`str \| list[str] \| None`	Name of data vendor which provided the data. Defaults to None.	`None`
`region`	`str \| list[str] \| None`	Region. Defaults to None.	`None`
`publisher`	`str`	Name of vendor that publishes the data. Defaults to "J.P. Morgan".	`'J.P. Morgan'`
`product`	`str \| list[str] \| None`	Product to associate dataset with. Defaults to None.	`None`
`sub_category`	`str \| list[str] \| None`	Sub-category. Defaults to None.	`None`
`tags`	`str \| list[str] \| None`	Tags used for search purposes. Defaults to None.	`None`
`created_date`	`str \| None`	Created date. Defaults to None.	`None`
`modified_date`	`str \| None`	Modified date. Defaults to None.	`None`
`delivery_channel`	`str \| list[str]`	Delivery channel. Defaults to "API".	`'API'`
`language`	`str`	Language. Defaults to "English".	`'English'`
`status`	`str`	Status. Defaults to "Available".	`'Available'`
`type_`	`str \| None`	Dataset type. Defaults to "Source".	`'Source'`
`container_type`	`str \| None`	Container type. Defaults to "Snapshot-Full".	`'Snapshot-Full'`
`snowflake`	`str \| None`	Snowflake account connection. Defaults to None.	`None`
`complexity`	`str \| None`	Complexity. Defaults to None.	`None`
`is_immutable`	`bool \| None`	Flag for immutable datasets. Defaults to None.	`None`
`is_mnpi`	`bool \| None`	is_mnpi. Defaults to None.	`None`
`is_pci`	`bool \| None`	is_pci. Defaults to None.	`None`
`is_pii`	`bool \| None`	is_pii. Defaults to None.	`None`
`is_client`	`bool \| None`	is_client. Defaults to None.	`None`
`is_public`	`bool \| None`	is_public. Defaults to None.	`None`
`is_internal`	`bool \| None`	is_internal. Defaults to None.	`None`
`is_confidential`	`bool \| None`	is_confidential. Defaults to None.	`None`
`is_highly_confidential`	`bool \| None`	is_highly_confidential. Defaults to None.	`None`
`is_active`	`bool \| None`	is_active. Defaults to None.	`None`
`owners`	`list[str] \| None`	The owners of the dataset. Defaults to None.	`None`
`application_id`	`str \| None`	The application ID of the dataset. Defaults to None.	`None`

Returns:

Name	Type	Description
`Dataset`	`Dataset`	Fusion Dataset class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset(identifier="DATASET_1")

Note

See the dataset module for more information on functionalities of dataset objects.

Source code in py_src/fusion/fusion.py

def dataset(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Source",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    **kwargs: Any,
) -> Dataset:
    """Instantiate a Dataset object with this client for metadata creation.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Source".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.

    Returns:
        Dataset: Fusion Dataset class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset(identifier="DATASET_1")

    Note:
        See the dataset module for more information on functionalities of dataset objects.

    """
    dataset_obj = Dataset(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        **kwargs,
    )
    dataset_obj.client = self
    return dataset_obj

`dataset_resources(dataset, catalog=None, output=False)` ¶

List the resources available for a dataset, currently this will always be a datasetseries.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each resource

Source code in py_src/fusion/fusion.py

def dataset_resources(self, dataset: str, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
    """List the resources available for a dataset, currently this will always be a datasetseries.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each resource
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}"
    ds_res_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return ds_res_df

`datasetmember_resources(dataset, series, catalog=None, output=False)` ¶

List the available resources for a datasetseries member.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`series`	`str`	The datasetseries identifier	required
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each datasetseries member resource. Currently, this will always be distributions.

Source code in py_src/fusion/fusion.py

def datasetmember_resources(
    self,
    dataset: str,
    series: str,
    catalog: str | None = None,
    output: bool = False,
) -> pd.DataFrame:
    """List the available resources for a datasetseries member.

    Args:
        dataset (str): A dataset identifier
        series (str): The datasetseries identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each datasetseries member resource.
            Currently, this will always be distributions.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}"
    ds_mem_res_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return ds_mem_res_df

`delete_all_datasetmembers(dataset, catalog=None, return_resp_obj=False)` ¶

Delete all dataset members within a dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`catalog`	`str \| None`	A catalog identifier. Defaults to 'common'.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	list[requests.Response]: a list of response objects.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.delete_all_datasetmembers(dataset="dataset1")

Source code in py_src/fusion/fusion.py

def delete_all_datasetmembers(
    self,
    dataset: str,
    catalog: str | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete all dataset members within a dataset.

    Args:
        dataset (str): A dataset identifier
        catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        list[requests.Response]: a list of response objects.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.delete_all_datasetmembers(dataset="dataset1")

    """
    catalog = self._use_catalog(catalog)
    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
    resp = self.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

`delete_datasetmembers(dataset, series_members, catalog=None, return_resp_obj=False)` ¶

Delete dataset members.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`series_members`	`str \| list[str]`	A string or list of strings that are dataset series member	required
`catalog`	`str \| None`	A catalog identifier. Defaults to 'common'.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`list[Response] \| None`	list[requests.Response]: a list of response objects.

Examples:

Delete one dataset member.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.delete_datasetmembers(dataset="dataset1", series_members="series1")

Delete multiple dataset members.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.delete_datasetmembers(dataset="dataset1", series_members=["series1", "series2"])

Source code in py_src/fusion/fusion.py

def delete_datasetmembers(
    self,
    dataset: str,
    series_members: str | list[str],
    catalog: str | None = None,
    return_resp_obj: bool = False,
) -> list[requests.Response] | None:
    """Delete dataset members.

    Args:
        dataset (str): A dataset identifier
        series_members (str | list[str]): A string or list of strings that are dataset series member
        identifiers to delete.
        catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        list[requests.Response]: a list of response objects.

    Examples:
        Delete one dataset member.

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.delete_datasetmembers(dataset="dataset1", series_members="series1")

        Delete multiple dataset members.

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.delete_datasetmembers(dataset="dataset1", series_members=["series1", "series2"])

    """
    catalog = self._use_catalog(catalog)
    if isinstance(series_members, str):
        series_members = [series_members]
    responses = []
    for series_member in series_members:
        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series_member}"
        resp = self.session.delete(url)
        requests_raise_for_status(resp)
        responses.append(resp)
    return responses if return_resp_obj else None

`download(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, force_download=False, download_folder=None, return_paths=False, partitioning=None, preserve_original_name=False)` ¶

Downloads the requested distributions of a dataset to disk.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`dt_str`	`str`	Either a single date or a range identified by a start or end date, or both separated with a ":". Defaults to 'latest' which will return the most recent instance of the dataset. If more than one series member exists on the latest date, the series member identifiers will be sorted alphabetically and the last one will be downloaded.	`'latest'`
`dataset_format`	`str`	The file format, e.g. CSV or Parquet. Defaults to 'parquet'.	`'parquet'`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`n_par`	`int`	Specify how many distributions to download in parallel. Defaults to all cpus available.	`None`
`show_progress`	`bool`	Display a progress bar during data download Defaults to True.	`True`
`force_download`	`bool`	If True then will always download a file even if it is already on disk. Defaults to True.	`False`
`download_folder`	`str`	The path, absolute or relative, where downloaded files are saved. Defaults to download_folder as set in init	`None`
`return_paths`	`bool`	Return paths and success statuses of the downloaded files.	`False`
`partitioning`	`str`	Partitioning specification.	`None`
`preserve_original_name`	`bool`	Preserve the original name of the file. Defaults to False.	`False`

Source code in py_src/fusion/fusion.py

def download(  # noqa: PLR0912, PLR0913
    self,
    dataset: str,
    dt_str: str = "latest",
    dataset_format: str = "parquet",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    force_download: bool = False,
    download_folder: str | None = None,
    return_paths: bool = False,
    partitioning: str | None = None,
    preserve_original_name: bool = False,
) -> list[tuple[bool, str, str | None]] | None:
    """Downloads the requested distributions of a dataset to disk.

    Args:
        dataset (str): A dataset identifier
        dt_str (str, optional): Either a single date or a range identified by a start or end date,
            or both separated with a ":". Defaults to 'latest' which will return the most recent
            instance of the dataset. If more than one series member exists on the latest date, the
            series member identifiers will be sorted alphabetically and the last one will be downloaded.
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        force_download (bool, optional): If True then will always download a file even
            if it is already on disk. Defaults to True.
        download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
            Defaults to download_folder as set in __init__
        return_paths (bool, optional): Return paths and success statuses of the downloaded files.
        partitioning (str, optional): Partitioning specification.
        preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

    Returns:

    """
    catalog = self._use_catalog(catalog)

    valid_date_range = re.compile(r"^(\d{4}\d{2}\d{2})$|^((\d{4}\d{2}\d{2})?([:])(\d{4}\d{2}\d{2})?)$")

    if valid_date_range.match(dt_str) or dt_str == "latest":
        required_series = self._resolve_distro_tuples(dataset, dt_str, dataset_format, catalog)
    else:
        # sample data is limited to csv
        if dt_str == "sample":
            dataset_format = self.list_distributions(dataset, dt_str, catalog)["identifier"].iloc[0]
        required_series = [(catalog, dataset, dt_str, dataset_format)]

    if dataset_format not in RECOGNIZED_FORMATS + ["raw"]:
        raise ValueError(f"Dataset format {dataset_format} is not supported")

    if not download_folder:
        download_folder = self.download_folder

    download_folders = [download_folder] * len(required_series)

    if partitioning == "hive":
        members = [series[2].strip("/") for series in required_series]
        download_folders = [
            f"{download_folders[i]}/{series[0]}/{series[1]}/{members[i]}"
            for i, series in enumerate(required_series)
        ]

    for d in download_folders:
        if not self.fs.exists(d):
            self.fs.mkdir(d, create_parents=True)

    n_par = cpu_count(n_par)
    download_spec = [
        {
            "lfs": self.fs,
            "rpath": distribution_to_url(
                self.root_url,
                series[1],
                series[2],
                series[3],
                series[0],
                is_download=True,
            ),
            "lpath": distribution_to_filename(
                download_folders[i],
                series[1],
                series[2],
                series[3],
                series[0],
                partitioning=partitioning,
            ),
            "overwrite": force_download,
            "preserve_original_name": preserve_original_name,
        }
        for i, series in enumerate(required_series)
    ]

    logger.log(
        VERBOSE_LVL,
        f"Beginning {len(download_spec)} downloads in batches of {n_par}",
    )
    if show_progress:
        with joblib_progress("Downloading", total=len(download_spec)):
            res = Parallel(n_jobs=n_par)(
                delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
            )
    else:
        res = Parallel(n_jobs=n_par)(
            delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
        )

    if (len(res) > 0) and (not all(r[0] for r in res)):
        for r in res:
            if not r[0]:
                warnings.warn(f"The download of {r[1]} was not successful", stacklevel=2)
    return res if return_paths else None

`from_bytes(data, dataset, series_member='latest', catalog=None, distribution='parquet', show_progress=True, return_paths=False, chunk_size=5 * 2 20, from_date=None, to_date=None, file_name=None, kwargs)` ¶

Uploads data from an object in memory.

Parameters:

Name	Type	Description	Default
`data`	`str`	an object in memory to upload	required
`dataset`	`str`	Dataset name to which the bytes will be uploaded.	required
`series_member`	`str`	A single date or label. Defaults to 'latest' which will return the most recent.	`'latest'`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`distribution`	`str`	A distribution type, e.g. a file format or raw	`'parquet'`
`show_progress`	`bool`	Display a progress bar during data download Defaults to True.	`True`
`return_paths`	`bool`	Return paths and success statuses of the downloaded files.	`False`
`chunk_size`	`int`	Maximum chunk size.	`5 * 2 ** 20`
`from_date`	`str`	start of the data date range contained in the distribution, defaults to upload date	`None`
`to_date`	`str`	end of the data date range contained in the distribution, defaults to upload date.	`None`
`file_name`	`str`	file name to be used for the uploaded file. Defaults to Fusion standard naming.	`None`

Returns:

Type	Description
`list[tuple[bool, str, str \| None]] \| None`	Optional[list[tuple[bool, str, Optional[str]]]: a list of tuples, one for each distribution

Source code in py_src/fusion/fusion.py

def from_bytes(  # noqa: PLR0913
    self,
    data: BytesIO,
    dataset: str,
    series_member: str = "latest",
    catalog: str | None = None,
    distribution: str = "parquet",
    show_progress: bool = True,
    return_paths: bool = False,
    chunk_size: int = 5 * 2**20,
    from_date: str | None = None,
    to_date: str | None = None,
    file_name: str | None = None,
    **kwargs: Any,  # noqa: ARG002
) -> list[tuple[bool, str, str | None]] | None:
    """Uploads data from an object in memory.

    Args:
        data (str): an object in memory to upload
        dataset (str): Dataset name to which the bytes will be uploaded.
        series_member (str, optional): A single date or label. Defaults to 'latest' which will return
            the most recent.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        distribution (str, optional): A distribution type, e.g. a file format or raw
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        return_paths (bool, optional): Return paths and success statuses of the downloaded files.
        chunk_size (int, optional): Maximum chunk size.
        from_date (str, optional): start of the data date range contained in the distribution,
            defaults to upload date
        to_date (str, optional): end of the data date range contained in the distribution, defaults to upload date.
        file_name (str, optional): file name to be used for the uploaded file. Defaults to Fusion standard naming.

    Returns:
        Optional[list[tuple[bool, str, Optional[str]]]: a list of tuples, one for each distribution

    """
    catalog = self._use_catalog(catalog)

    fs_fusion = self.get_fusion_filesystem()
    if distribution not in RECOGNIZED_FORMATS + ["raw"]:
        raise ValueError(f"Dataset format {distribution} is not supported")

    is_raw = js.loads(fs_fusion.cat(f"{catalog}/datasets/{dataset}"))["isRawData"]
    local_url_eqiv = path_to_url(f"{dataset}__{catalog}__{series_member}.{distribution}", is_raw)

    data_map_df = pd.DataFrame(["", local_url_eqiv, file_name]).T
    data_map_df.columns = ["path", "url", "file_name"]  # type: ignore

    res = upload_files(
        fs_fusion,
        data,
        data_map_df,
        parallel=False,
        n_par=1,
        multipart=False,
        chunk_size=chunk_size,
        show_progress=show_progress,
        from_date=from_date,
        to_date=to_date,
    )

    if not all(r[0] for r in res):
        failed_res = [r for r in res if not r[0]]
        msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
        logger.warning(msg)
        warnings.warn(msg, stacklevel=2)

    return res if return_paths else None

`get_events(last_event_id=None, catalog=None, in_background=True, url='https://fusion.jpmorgan.com/api/v1/')` ¶

Run server sent event listener and print out the new events. Keyboard terminate to stop.

Parameters:

Name	Type	Description	Default
`last_event_id`	`str`	id of the last event.	`None`
`catalog`	`str`	catalog.	`None`
`in_background`	`bool`	execute event monitoring in the background (default = True).	`True`
`url`	`str`	subscription url.	`'https://fusion.jpmorgan.com/api/v1/'`

Source code in py_src/fusion/fusion.py

def get_events(
    self,
    last_event_id: str | None = None,
    catalog: str | None = None,
    in_background: bool = True,
    url: str = "https://fusion.jpmorgan.com/api/v1/",
) -> None | pd.DataFrame:
    """Run server sent event listener and print out the new events. Keyboard terminate to stop.

    Args:
        last_event_id (str): id of the last event.
        catalog (str): catalog.
        in_background (bool): execute event monitoring in the background (default = True).
        url (str): subscription url.
    Returns:
        Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
            If in_background is set to False then pandas DataFrame is output upon keyboard termination.
    """

    catalog = self._use_catalog(catalog)
    if not in_background:
        from sseclient import SSEClient

        _ = self.list_catalogs()  # refresh token
        interrupted = False
        messages = SSEClient(
            session=self.session,
            url=f"{url}catalogs/{catalog}/notifications/subscribe",
            last_id=last_event_id,
            headers={
                "authorization": f"bearer {self.credentials.bearer_token}",
            },
        )
        lst = []
        try:
            for msg in messages:
                event = js.loads(msg.data)
                if event["type"] != "HeartBeatNotification":
                    lst.append(event)
        except KeyboardInterrupt:
            interrupted = True
        except Exception as e:
            raise e
        finally:
            result = pd.DataFrame(lst) if interrupted or lst else None
        return result
    else:
        return self.events

`get_fusion_filesystem()` ¶

Creates Fusion Filesystem.

Returns: Fusion Filesystem

Source code in py_src/fusion/fusion.py

def get_fusion_filesystem(self) -> FusionHTTPFileSystem:
    """Creates Fusion Filesystem.

    Returns: Fusion Filesystem

    """
    return FusionHTTPFileSystem(client_kwargs={"root_url": self.root_url, "credentials": self.credentials})

input_dataflow(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Flow', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, producer_application_id=None, consumer_application_id=None, flow_details=None, **kwargs) ¶

Instantiate an Input Dataflow object with this client for metadata creation.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Dataset identifier.	required
`title`	`str`	Dataset title. If not provided, defaults to identifier.	`''`
`category`	`str \| list[str] \| None`	A category or list of categories for the dataset.	`None`
`description`	`str`	Dataset description. If not provided, defaults to identifier.	`''`
`frequency`	`str`	The frequency of the dataset. Defaults to "Once".	`'Once'`
`is_internal_only_dataset`	`bool`	Flag for internal datasets. Defaults to False.	`False`
`is_third_party_data`	`bool`	Flag for third party data. Defaults to True.	`True`
`is_restricted`	`bool \| None`	Flag for restricted datasets. Defaults to None.	`None`
`is_raw_data`	`bool`	Flag for raw datasets. Defaults to True.	`True`
`maintainer`	`str \| None`	Dataset maintainer. Defaults to "J.P. Morgan Fusion".	`'J.P. Morgan Fusion'`
`source`	`str \| list[str] \| None`	Name of data vendor which provided the data. Defaults to None.	`None`
`region`	`str \| list[str] \| None`	Region. Defaults to None.	`None`
`publisher`	`str`	Name of vendor that publishes the data. Defaults to "J.P. Morgan".	`'J.P. Morgan'`
`product`	`str \| list[str] \| None`	Product to associate dataset with. Defaults to None.	`None`
`sub_category`	`str \| list[str] \| None`	Sub-category. Defaults to None.	`None`
`tags`	`str \| list[str] \| None`	Tags used for search purposes. Defaults to None.	`None`
`created_date`	`str \| None`	Created date. Defaults to None.	`None`
`modified_date`	`str \| None`	Modified date. Defaults to None.	`None`
`delivery_channel`	`str \| list[str]`	Delivery channel. Defaults to "API".	`'API'`
`language`	`str`	Language. Defaults to "English".	`'English'`
`status`	`str`	Status. Defaults to "Available".	`'Available'`
`type_`	`str \| None`	Dataset type. Defaults to "Flow".	`'Flow'`
`container_type`	`str \| None`	Container type. Defaults to "Snapshot-Full".	`'Snapshot-Full'`
`snowflake`	`str \| None`	Snowflake account connection. Defaults to None.	`None`
`complexity`	`str \| None`	Complexity. Defaults to None.	`None`
`is_immutable`	`bool \| None`	Flag for immutable datasets. Defaults to None.	`None`
`is_mnpi`	`bool \| None`	is_mnpi. Defaults to None.	`None`
`is_pci`	`bool \| None`	is_pci. Defaults to None.	`None`
`is_pii`	`bool \| None`	is_pii. Defaults to None.	`None`
`is_client`	`bool \| None`	is_client. Defaults to None.	`None`
`is_public`	`bool \| None`	is_public. Defaults to None.	`None`
`is_internal`	`bool \| None`	is_internal. Defaults to None.	`None`
`is_confidential`	`bool \| None`	is_confidential. Defaults to None.	`None`
`is_highly_confidential`	`bool \| None`	is_highly_confidential. Defaults to None.	`None`
`is_active`	`bool \| None`	is_active. Defaults to None.	`None`
`owners`	`list[str] \| None`	The owners of the dataset. Defaults to None.	`None`
`application_id`	`str \| None`	The application ID of the dataset. Defaults to None.	`None`
`producer_application_id`	`dict[str, str] \| None`	The producer application ID (upstream application producing the flow).	`None`
`consumer_application_id`	`list[dict[str, str]] \| dict[str, str] \| None`	The consumer application ID (downstream application, consuming the flow).	`None`
`flow_details`	`dict[str, str] \| None`	The flow details. Specifies input versus output flow. Defaults to {"flowDirection": "Input"}.	`None`

Returns:

Name	Type	Description
`Dataset`	`InputDataFlow`	Fusion InputDataFlow class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.input_dataflow(identifier="MY_DATAFLOW")

Note

See the dataset module for more information on functionalities of input dataflow objects.

Source code in py_src/fusion/fusion.py

def input_dataflow(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Flow",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    producer_application_id: dict[str, str] | None = None,
    consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
    flow_details: dict[str, str] | None = None,
    **kwargs: Any,
) -> InputDataFlow:
    """Instantiate an Input Dataflow object with this client for metadata creation.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Flow".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.
        producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
            producing the flow).
        consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
            ID (downstream application, consuming the flow).
        flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
            Defaults to {"flowDirection": "Input"}.

    Returns:
        Dataset: Fusion InputDataFlow class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.input_dataflow(identifier="MY_DATAFLOW")

    Note:
        See the dataset module for more information on functionalities of input dataflow objects.

    """
    flow_details = {"flowDirection": "Input"} if flow_details is None else flow_details
    dataflow_obj = InputDataFlow(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        producer_application_id=producer_application_id,
        consumer_application_id=consumer_application_id,
        flow_details=flow_details,
        **kwargs,
    )
    dataflow_obj.client = self
    return dataflow_obj

`list_catalogs(output=False)` ¶

Lists the catalogs available to the API account.

Parameters:

Name	Type	Description	Default
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each catalog

Source code in py_src/fusion/fusion.py

def list_catalogs(self, output: bool = False) -> pd.DataFrame:
    """Lists the catalogs available to the API account.

    Args:
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each catalog
    """
    url = f"{self.root_url}catalogs/"
    cat_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return cat_df

`list_dataset_attributes(dataset, catalog=None, output=False, display_all_columns=False)` ¶

Returns the list of attributes that are in the dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`
`display_all_columns`	`bool`	If True displays all columns returned by the API, otherwise only the key columns are displayed	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each attribute

Source code in py_src/fusion/fusion.py

def list_dataset_attributes(
    self,
    dataset: str,
    catalog: str | None = None,
    output: bool = False,
    display_all_columns: bool = False,
) -> pd.DataFrame:
    """Returns the list of attributes that are in the dataset.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each attribute
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
    ds_attr_df = Fusion._call_for_dataframe(url, self.session)

    if "index" in ds_attr_df.columns: 
        ds_attr_df = ds_attr_df.sort_values(by="index").reset_index(drop=True)

    if not display_all_columns:
        ds_attr_df = ds_attr_df[
            ds_attr_df.columns.intersection(
                [
                    "identifier",
                    "title",
                    "dataType",
                    "isDatasetKey",
                    "description",
                    "source",
                ]
            )
        ]

    if output:
        pass

    return ds_attr_df

`list_dataset_lineage(dataset_id, catalog=None, output=False, max_results=-1)` ¶

List the upstream and downstream lineage of the dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`
`max_results`	`int`	Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.	`-1`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each resource

Raises:

Type	Description
`HTTPError`	If the dataset is not found in the catalog.

Source code in py_src/fusion/fusion.py

def list_dataset_lineage(
    self,
    dataset_id: str,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
) -> pd.DataFrame:
    """List the upstream and downstream lineage of the dataset.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each resource

    Raises:
        HTTPError: If the dataset is not found in the catalog.

    """
    catalog = self._use_catalog(catalog)

    url_dataset = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}"
    resp_dataset = self.session.get(url_dataset)
    resp_dataset.raise_for_status()

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}/lineage"
    resp = self.session.get(url)
    data = resp.json()
    relations_data = data["relations"]

    restricted_datasets = [
        dataset_metadata["identifier"]
        for dataset_metadata in data["datasets"]
        if dataset_metadata.get("status", None) == "Restricted"
    ]

    data_dict = {}

    for entry in relations_data:
        source_dataset_id = entry["source"]["dataset"]
        source_catalog = entry["source"]["catalog"]
        destination_dataset_id = entry["destination"]["dataset"]
        destination_catalog = entry["destination"]["catalog"]

        if destination_dataset_id == dataset_id:
            for dataset in data["datasets"]:
                if dataset["identifier"] == source_dataset_id and dataset.get("status", None) != "Restricted":
                    source_dataset_title = dataset["title"]
                elif dataset["identifier"] == source_dataset_id and dataset.get("status", None) == "Restricted":
                    source_dataset_title = "Access Restricted"
            data_dict[source_dataset_id] = (
                "source",
                source_catalog,
                source_dataset_title,
            )

        if source_dataset_id == dataset_id:
            for dataset in data["datasets"]:
                if dataset["identifier"] == destination_dataset_id and dataset.get("status", None) != "Restricted":
                    destination_dataset_title = dataset["title"]
                elif (
                    dataset["identifier"] == destination_dataset_id and dataset.get("status", None) == "Restricted"
                ):
                    destination_dataset_title = "Access Restricted"
            data_dict[destination_dataset_id] = (
                "produced",
                destination_catalog,
                destination_dataset_title,
            )

    output_data = {
        "type": [v[0] for v in data_dict.values()],
        "dataset_identifier": list(data_dict.keys()),
        "title": [v[2] for v in data_dict.values()],
        "catalog": [v[1] for v in data_dict.values()],
    }

    lineage_df = pd.DataFrame(output_data)
    lineage_df.loc[
        lineage_df["dataset_identifier"].isin(restricted_datasets),
        ["dataset_identifier", "catalog", "title"],
    ] = "Access Restricted"

    if max_results > -1:
        lineage_df = lineage_df[0:max_results]

    if output:
        pass

    return lineage_df

`list_datasetmembers(dataset, catalog=None, output=False, max_results=-1)` ¶

List the available members in the dataset series.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`
`max_results`	`int`	Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.	`-1`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: a dataframe with a row for each dataset member.

Source code in py_src/fusion/fusion.py

def list_datasetmembers(
    self,
    dataset: str,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
) -> pd.DataFrame:
    """List the available members in the dataset series.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.

    Returns:
        class:`pandas.DataFrame`: a dataframe with a row for each dataset member.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
    ds_members_df = Fusion._call_for_dataframe(url, self.session)

    if max_results > -1:
        ds_members_df = ds_members_df[0:max_results]

    if output:
        pass

    return ds_members_df

`list_datasets(contains=None, id_contains=False, product=None, catalog=None, output=False, max_results=-1, display_all_columns=False, status=None, dataset_type=None)` ¶

Get the datasets contained in a catalog.

Parameters:

Name	Type	Description	Default
`contains`	`Union[str, list]`	A string or a list of strings that are dataset identifiers to filter the datasets list. If a list is provided then it will return datasets whose identifier matches any of the strings. Defaults to None.	`None`
`id_contains`	`bool`	Filter datasets only where the string(s) are contained in the identifier, ignoring description.	`False`
`product`	`Union[str, list]`	A string or a list of strings that are product identifiers to filter the datasets list. Defaults to None.	`None`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`
`max_results`	`int`	Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.	`-1`
`display_all_columns`	`bool`	If True displays all columns returned by the API, otherwise only the key columns are displayed	`False`
`status`	`str`	filter the datasets by status, default is to show all results.	`None`
`dataset_type`	`str`	filter the datasets by type, default is to show all results.	`None`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: a dataframe with a row for each dataset.

Source code in py_src/fusion/fusion.py

def list_datasets(  # noqa: PLR0913
    self,
    contains: str | list[str] | None = None,
    id_contains: bool = False,
    product: str | list[str] | None = None,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
    display_all_columns: bool = False,
    status: str | None = None,
    dataset_type: str | None = None,
) -> pd.DataFrame:
    """Get the datasets contained in a catalog.

    Args:
        contains (Union[str, list], optional): A string or a list of strings that are dataset
            identifiers to filter the datasets list. If a list is provided then it will return
            datasets whose identifier matches any of the strings. Defaults to None.
        id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
            ignoring description.
        product (Union[str, list], optional): A string or a list of strings that are product
            identifiers to filter the datasets list. Defaults to None.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed
        status (str, optional): filter the datasets by status, default is to show all results.
        dataset_type (str, optional): filter the datasets by type, default is to show all results.

    Returns:
        class:`pandas.DataFrame`: a dataframe with a row for each dataset.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets"
    ds_df = Fusion._call_for_dataframe(url, self.session)

    if contains:
        if isinstance(contains, list):
            contains = "|".join(f"{s}" for s in contains)
        if id_contains:
            ds_df = ds_df[ds_df["identifier"].str.contains(contains, case=False)]
        else:
            ds_df = ds_df[
                ds_df["identifier"].str.contains(contains, case=False)
                | ds_df["description"].str.contains(contains, case=False)
            ]

    if product:
        url = f"{self.root_url}catalogs/{catalog}/productDatasets"
        prd_df = Fusion._call_for_dataframe(url, self.session)
        prd_df = (
            prd_df[prd_df["product"] == product]
            if isinstance(product, str)
            else prd_df[prd_df["product"].isin(product)]
        )
        ds_df = ds_df[ds_df["identifier"].str.lower().isin(prd_df["dataset"].str.lower())].reset_index(drop=True)

    if max_results > -1:
        ds_df = ds_df[0:max_results]

    ds_df["category"] = ds_df.category.str.join(", ")
    ds_df["region"] = ds_df.region.str.join(", ")
    if not display_all_columns:
        cols = [
            "identifier",
            "title",
            "containerType",
            "region",
            "category",
            "coverageStartDate",
            "coverageEndDate",
            "description",
            "status",
            "type",
        ]
        cols = [c for c in cols if c in ds_df.columns]
        ds_df = ds_df[cols]

    if status is not None:
        ds_df = ds_df[ds_df["status"] == status]

    if dataset_type is not None:
        ds_df = ds_df[ds_df["type"] == dataset_type]

    if output:
        pass

    return ds_df

`list_distributions(dataset, series, catalog=None, output=False)` ¶

List the available distributions (downloadable instances of the dataset with a format type).

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`series`	`str`	The datasetseries identifier	required
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each distribution.

Source code in py_src/fusion/fusion.py

def list_distributions(
    self,
    dataset: str,
    series: str,
    catalog: str | None = None,
    output: bool = False,
) -> pd.DataFrame:
    """List the available distributions (downloadable instances of the dataset with a format type).

    Args:
        dataset (str): A dataset identifier
        series (str): The datasetseries identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each distribution.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}/distributions"
    distros_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return distros_df

`list_product_dataset_mapping(dataset=None, product=None, catalog=None)` ¶

get the product to dataset linking contained in a catalog. A product is a grouping of datasets.

Parameters:

Name	Type	Description	Default
`dataset`	`str \| list[str] \| None`	A string or list of strings that are dataset	`None`
`product`	`str \| list[str] \| None`	A string or list of strings that are product	`None`
`catalog`	`str \| None`	A catalog identifier. Defaults to 'common'.	`None`

Returns:

Type	Description
`DataFrame`	pd.DataFrame: a dataframe with a row for each dataset to product mapping.

Source code in py_src/fusion/fusion.py

def list_product_dataset_mapping(
    self,
    dataset: str | list[str] | None = None,
    product: str | list[str] | None = None,
    catalog: str | None = None,
) -> pd.DataFrame:
    """get the product to dataset linking contained in  a catalog. A product is a grouping of datasets.

    Args:
        dataset (str | list[str] | None, optional): A string or list of strings that are dataset
        identifiers to filter the output. If a list is provided then it will return
        datasets whose identifier matches any of the strings. Defaults to None.
        product (str | list[str] | None, optional): A string or list of strings that are product
        identifiers to filter the output. If a list is provided then it will return
        products whose identifier matches any of the strings. Defaults to None.
        catalog (str | None, optional): A catalog identifier. Defaults to 'common'.

    Returns:
        pd.DataFrame: a dataframe with a row  for each dataset to product mapping.
    """
    catalog = self._use_catalog(catalog)
    url = f"{self.root_url}catalogs/{catalog}/productDatasets"
    mapping_df = pd.DataFrame(self._call_for_dataframe(url, self.session))

    if dataset:
        if isinstance(dataset, list):
            contains = "|".join(f"{s}" for s in dataset)
            mapping_df = mapping_df[mapping_df["dataset"].str.contains(contains, case=False)]
        if isinstance(dataset, str):
            mapping_df = mapping_df[mapping_df["dataset"].str.contains(dataset, case=False)]
    if product:
        if isinstance(product, list):
            contains = "|".join(f"{s}" for s in product)
            mapping_df = mapping_df[mapping_df["product"].str.contains(contains, case=False)]
        if isinstance(product, str):
            mapping_df = mapping_df[mapping_df["product"].str.contains(product, case=False)]
    return mapping_df

`list_products(contains=None, id_contains=False, catalog=None, output=False, max_results=-1, display_all_columns=False)` ¶

Get the products contained in a catalog. A product is a grouping of datasets.

Parameters:

Name	Type	Description	Default
`contains`	`Union[str, list]`	A string or a list of strings that are product identifiers to filter the products list. If a list is provided then it will return products whose identifier matches any of the strings. Defaults to None.	`None`
`id_contains`	`bool`	Filter datasets only where the string(s) are contained in the identifier, ignoring description.	`False`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`
`max_results`	`int`	Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.	`-1`
`display_all_columns`	`bool`	If True displays all columns returned by the API, otherwise only the key columns are displayed	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: a dataframe with a row for each product

Source code in py_src/fusion/fusion.py

def list_products(
    self,
    contains: str | list[str] | None = None,
    id_contains: bool = False,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
    display_all_columns: bool = False,
) -> pd.DataFrame:
    """Get the products contained in a catalog. A product is a grouping of datasets.

    Args:
        contains (Union[str, list], optional): A string or a list of strings that are product
            identifiers to filter the products list. If a list is provided then it will return
            products whose identifier matches any of the strings. Defaults to None.
        id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
            ignoring description.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed

    Returns:
        class:`pandas.DataFrame`: a dataframe with a row for each product
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/products"
    full_prod_df: pd.DataFrame = Fusion._call_for_dataframe(url, self.session)

    if contains:
        if isinstance(contains, list):
            contains = "|".join(f"{s}" for s in contains)
        if id_contains:
            filtered_df = full_prod_df[full_prod_df["identifier"].str.contains(contains, case=False)]
        else:
            filtered_df = full_prod_df[
                full_prod_df["identifier"].str.contains(contains, case=False)
                | full_prod_df["description"].str.contains(contains, case=False)
            ]
    else:
        filtered_df = full_prod_df

    filtered_df["category"] = filtered_df.category.str.join(", ")
    filtered_df["region"] = filtered_df.region.str.join(", ")
    if not display_all_columns:
        filtered_df = filtered_df[
            filtered_df.columns.intersection(
                [
                    "identifier",
                    "title",
                    "region",
                    "category",
                    "status",
                    "description",
                ]
            )
        ]

    if max_results > -1:
        filtered_df = filtered_df[0:max_results]

    if output:
        pass

    return filtered_df

`list_registered_attributes(catalog=None, output=False, display_all_columns=False)` ¶

Returns the list of attributes in a catalog.

Parameters:

Name	Type	Description	Default
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`output`	`bool`	If True then print the dataframe. Defaults to False.	`False`
`display_all_columns`	`bool`	If True displays all columns returned by the API, otherwise only the key columns are displayed	`False`

Returns:

Name	Type	Description
`class`	`DataFrame`	`pandas.DataFrame`: A dataframe with a row for each attribute

Source code in py_src/fusion/fusion.py

def list_registered_attributes(
    self,
    catalog: str | None = None,
    output: bool = False,
    display_all_columns: bool = False,
) -> pd.DataFrame:
    """Returns the list of attributes in a catalog.

    Args:
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each attribute
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/attributes"
    ds_attr_df = Fusion._call_for_dataframe(url, self.session).reset_index(drop=True)

    if not display_all_columns:
        ds_attr_df = ds_attr_df[
            ds_attr_df.columns.intersection(
                [
                    "identifier",
                    "title",
                    "dataType",
                    "description",
                    "publisher",
                    "applicationId",
                ]
            )
        ]

    if output:
        pass

    return ds_attr_df

`listen_to_events(last_event_id=None, catalog=None, url='https://fusion.jpmorgan.com/api/v1/')` ¶

Run server sent event listener in the background. Retrieve results by running get_events.

Parameters:

Name	Type	Description	Default
`last_event_id`	`str`	Last event ID (exclusive).	`None`
`catalog`	`str`	catalog.	`None`
`url`	`str`	subscription url.	`'https://fusion.jpmorgan.com/api/v1/'`

Source code in py_src/fusion/fusion.py

def listen_to_events(
    self,
    last_event_id: str | None = None,
    catalog: str | None = None,
    url: str = "https://fusion.jpmorgan.com/api/v1/",
) -> None | pd.DataFrame:
    """Run server sent event listener in the background. Retrieve results by running get_events.

    Args:
        last_event_id (str): Last event ID (exclusive).
        catalog (str): catalog.
        url (str): subscription url.
    Returns:
        Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
            If in_background is set to False then pandas DataFrame is output upon keyboard termination.
    """

    catalog = self._use_catalog(catalog)
    import asyncio
    import json
    import threading

    from aiohttp_sse_client import client as sse_client

    from .utils import get_client

    kwargs: dict[str, Any] = {}
    if last_event_id:
        kwargs = {"headers": {"Last-Event-ID": last_event_id}}

    async def async_events() -> None:
        """Events sync function.

        Returns:
            None
        """
        timeout = 1e100
        session = await get_client(self.credentials, timeout=timeout)
        async with sse_client.EventSource(
            f"{url}catalogs/{catalog}/notifications/subscribe",
            session=session,
            **kwargs,
        ) as messages:
            lst = []
            try:
                async for msg in messages:
                    event = json.loads(msg.data)
                    lst.append(event)
                    if self.events is None:
                        self.events = pd.DataFrame()
                    else:
                        self.events = pd.concat([self.events, pd.DataFrame(lst)], ignore_index=True)
            except TimeoutError as ex:
                raise ex from None
            except BaseException:
                raise

    _ = self.list_catalogs()  # refresh token
    if "headers" in kwargs:
        kwargs["headers"].update({"authorization": f"bearer {self.credentials.bearer_token}"})
    else:
        kwargs["headers"] = {
            "authorization": f"bearer {self.credentials.bearer_token}",
        }
    if "http" in self.credentials.proxies:
        kwargs["proxy"] = self.credentials.proxies["http"]
    elif "https" in self.credentials.proxies:
        kwargs["proxy"] = self.credentials.proxies["https"]
    th = threading.Thread(target=asyncio.run, args=(async_events(),), daemon=True)
    th.start()
    return None

output_dataflow(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Flow', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, producer_application_id=None, consumer_application_id=None, flow_details=None, **kwargs) ¶

Instantiate an Output Dataflow object with this client for metadata creation.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Dataset identifier.	required
`title`	`str`	Dataset title. If not provided, defaults to identifier.	`''`
`category`	`str \| list[str] \| None`	A category or list of categories for the dataset.	`None`
`description`	`str`	Dataset description. If not provided, defaults to identifier.	`''`
`frequency`	`str`	The frequency of the dataset. Defaults to "Once".	`'Once'`
`is_internal_only_dataset`	`bool`	Flag for internal datasets. Defaults to False.	`False`
`is_third_party_data`	`bool`	Flag for third party data. Defaults to True.	`True`
`is_restricted`	`bool \| None`	Flag for restricted datasets. Defaults to None.	`None`
`is_raw_data`	`bool`	Flag for raw datasets. Defaults to True.	`True`
`maintainer`	`str \| None`	Dataset maintainer. Defaults to "J.P. Morgan Fusion".	`'J.P. Morgan Fusion'`
`source`	`str \| list[str] \| None`	Name of data vendor which provided the data. Defaults to None.	`None`
`region`	`str \| list[str] \| None`	Region. Defaults to None.	`None`
`publisher`	`str`	Name of vendor that publishes the data. Defaults to "J.P. Morgan".	`'J.P. Morgan'`
`product`	`str \| list[str] \| None`	Product to associate dataset with. Defaults to None.	`None`
`sub_category`	`str \| list[str] \| None`	Sub-category. Defaults to None.	`None`
`tags`	`str \| list[str] \| None`	Tags used for search purposes. Defaults to None.	`None`
`created_date`	`str \| None`	Created date. Defaults to None.	`None`
`modified_date`	`str \| None`	Modified date. Defaults to None.	`None`
`delivery_channel`	`str \| list[str]`	Delivery channel. Defaults to "API".	`'API'`
`language`	`str`	Language. Defaults to "English".	`'English'`
`status`	`str`	Status. Defaults to "Available".	`'Available'`
`type_`	`str \| None`	Dataset type. Defaults to "Flow".	`'Flow'`
`container_type`	`str \| None`	Container type. Defaults to "Snapshot-Full".	`'Snapshot-Full'`
`snowflake`	`str \| None`	Snowflake account connection. Defaults to None.	`None`
`complexity`	`str \| None`	Complexity. Defaults to None.	`None`
`is_immutable`	`bool \| None`	Flag for immutable datasets. Defaults to None.	`None`
`is_mnpi`	`bool \| None`	is_mnpi. Defaults to None.	`None`
`is_pci`	`bool \| None`	is_pci. Defaults to None.	`None`
`is_pii`	`bool \| None`	is_pii. Defaults to None.	`None`
`is_client`	`bool \| None`	is_client. Defaults to None.	`None`
`is_public`	`bool \| None`	is_public. Defaults to None.	`None`
`is_internal`	`bool \| None`	is_internal. Defaults to None.	`None`
`is_confidential`	`bool \| None`	is_confidential. Defaults to None.	`None`
`is_highly_confidential`	`bool \| None`	is_highly_confidential. Defaults to None.	`None`
`is_active`	`bool \| None`	is_active. Defaults to None.	`None`
`owners`	`list[str] \| None`	The owners of the dataset. Defaults to None.	`None`
`application_id`	`str \| None`	The application ID of the dataset. Defaults to None.	`None`
`producer_application_id`	`dict[str, str] \| None`	The producer application ID (upstream application producing the flow).	`None`
`consumer_application_id`	`list[dict[str, str]] \| dict[str, str] \| None`	The consumer application ID (downstream application, consuming the flow).	`None`
`flow_details`	`dict[str, str] \| None`	The flow details. Specifies input versus output flow. Defaults to {"flowDirection": "Output"}.	`None`

Returns:

Name	Type	Description
`Dataset`	`OutputDataFlow`	Fusion OutputDataFlow class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.output_dataflow(identifier="MY_DATAFLOW")

Note

See the dataset module for more information on functionalities of output dataflow objects.

Source code in py_src/fusion/fusion.py

def output_dataflow(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Flow",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    producer_application_id: dict[str, str] | None = None,
    consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
    flow_details: dict[str, str] | None = None,
    **kwargs: Any,
) -> OutputDataFlow:
    """Instantiate an Output Dataflow object with this client for metadata creation.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Flow".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.
        producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
            producing the flow).
        consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
            ID (downstream application, consuming the flow).
        flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
            Defaults to {"flowDirection": "Output"}.

    Returns:
        Dataset: Fusion OutputDataFlow class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.output_dataflow(identifier="MY_DATAFLOW")

    Note:
        See the dataset module for more information on functionalities of output dataflow objects.

    """
    flow_details = {"flowDirection": "Output"} if flow_details is None else flow_details
    dataflow_obj = OutputDataFlow(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        producer_application_id=producer_application_id,
        consumer_application_id=consumer_application_id,
        flow_details=flow_details,
        **kwargs,
    )
    dataflow_obj.client = self
    return dataflow_obj

`product(identifier, title='', category=None, short_abstract='', description='', is_active=True, is_restricted=None, maintainer=None, region='Global', publisher='J.P. Morgan', sub_category=None, tag=None, delivery_channel='API', theme=None, release_date=None, language='English', status='Available', image='', logo='', dataset=None, **kwargs)` ¶

Instantiate a Product object with this client for metadata creation.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Product identifier.	required
`title`	`str`	Product title. If not provided, defaults to identifier.	`''`
`category`	`str \| list[str] \| None`	Category. Defaults to None.	`None`
`short_abstract`	`str`	Short description. Defaults to "".	`''`
`description`	`str`	Description. If not provided, defaults to identifier.	`''`
`is_active`	`bool`	Boolean for Active status. Defaults to True.	`True`
`is_restricted`	`bool \| None`	Flag for restricted products. Defaults to None.	`None`
`maintainer`	`str \| list[str] \| None`	Product maintainer. Defaults to None.	`None`
`region`	`str \| list[str] \| None`	Product region. Defaults to None.	`'Global'`
`publisher`	`str \| None`	Name of vendor that publishes the data. Defaults to None.	`'J.P. Morgan'`
`sub_category`	`str \| list[str] \| None`	Product sub-category. Defaults to None.	`None`
`tag`	`str \| list[str] \| None`	Tags used for search purposes. Defaults to None.	`None`
`delivery_channel`	`str \| list[str]`	Product delivery channel. Defaults to "API".	`'API'`
`theme`	`str \| None`	Product theme. Defaults to None.	`None`
`release_date`	`str \| None`	Product release date. Defaults to None.	`None`
`language`	`str`	Product language. Defaults to "English".	`'English'`
`status`	`str`	Product status. Defaults to "Available".	`'Available'`
`image`	`str`	Product image. Defaults to "".	`''`
`logo`	`str`	Product logo. Defaults to "".	`''`
`dataset`	`str \| list[str] \| None`	Product datasets. Defaults to None.	`None`

Returns:

Name	Type	Description
`Product`	`Product`	Fusion Product class instance.

Examples:

>>> fusion = Fusion()
>>> fusion.product(identifier="PRODUCT_1", title="Product")

Note

See the product module for more information on functionalities of product objects.

Source code in py_src/fusion/fusion.py

def product(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    short_abstract: str = "",
    description: str = "",
    is_active: bool = True,
    is_restricted: bool | None = None,
    maintainer: str | list[str] | None = None,
    region: str | list[str] = "Global",
    publisher: str = "J.P. Morgan",
    sub_category: str | list[str] | None = None,
    tag: str | list[str] | None = None,
    delivery_channel: str | list[str] = "API",
    theme: str | None = None,
    release_date: str | None = None,
    language: str = "English",
    status: str = "Available",
    image: str = "",
    logo: str = "",
    dataset: str | list[str] | None = None,
    **kwargs: Any,
) -> Product:
    """Instantiate a Product object with this client for metadata creation.

    Args:
        identifier (str): Product identifier.
        title (str, optional): Product title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): Category. Defaults to None.
        short_abstract (str, optional): Short description. Defaults to "".
        description (str, optional): Description. If not provided, defaults to identifier.
        is_active (bool, optional): Boolean for Active status. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted products. Defaults to None.
        maintainer (str | list[str] | None, optional): Product maintainer. Defaults to None.
        region (str | list[str] | None, optional): Product region. Defaults to None.
        publisher (str | None, optional): Name of vendor that publishes the data. Defaults to None.
        sub_category (str | list[str] | None, optional): Product sub-category. Defaults to None.
        tag (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        delivery_channel (str | list[str], optional): Product delivery channel. Defaults to "API".
        theme (str | None, optional): Product theme. Defaults to None.
        release_date (str | None, optional): Product release date. Defaults to None.
        language (str, optional): Product language. Defaults to "English".
        status (str, optional): Product status. Defaults to "Available".
        image (str, optional): Product image. Defaults to "".
        logo (str, optional): Product logo. Defaults to "".
        dataset (str | list[str] | None, optional): Product datasets. Defaults to None.

    Returns:
        Product: Fusion Product class instance.

    Examples:
        >>> fusion = Fusion()
        >>> fusion.product(identifier="PRODUCT_1", title="Product")

    Note:
        See the product module for more information on functionalities of product objects.

    """
    product_obj = Product(
        identifier=identifier,
        title=title,
        category=category,
        short_abstract=short_abstract,
        description=description,
        is_active=is_active,
        is_restricted=is_restricted,
        maintainer=maintainer,
        region=region,
        publisher=publisher,
        sub_category=sub_category,
        tag=tag,
        delivery_channel=delivery_channel,
        theme=theme,
        release_date=release_date,
        language=language,
        status=status,
        image=image,
        logo=logo,
        dataset=dataset,
        **kwargs,
    )
    product_obj.client = self
    return product_obj

report(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Report', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, report=None, **kwargs) ¶

Instantiate Report object with this client for metadata creation for managing regulatory reporting metadata.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Dataset identifier.	required
`title`	`str`	Dataset title. If not provided, defaults to identifier.	`''`
`category`	`str \| list[str] \| None`	A category or list of categories for the dataset.	`None`
`description`	`str`	Dataset description. If not provided, defaults to identifier.	`''`
`frequency`	`str`	The frequency of the dataset. Defaults to "Once".	`'Once'`
`is_internal_only_dataset`	`bool`	Flag for internal datasets. Defaults to False.	`False`
`is_third_party_data`	`bool`	Flag for third party data. Defaults to True.	`True`
`is_restricted`	`bool \| None`	Flag for restricted datasets. Defaults to None.	`None`
`is_raw_data`	`bool`	Flag for raw datasets. Defaults to True.	`True`
`maintainer`	`str \| None`	Dataset maintainer. Defaults to "J.P. Morgan Fusion".	`'J.P. Morgan Fusion'`
`source`	`str \| list[str] \| None`	Name of data vendor which provided the data. Defaults to None.	`None`
`region`	`str \| list[str] \| None`	Region. Defaults to None.	`None`
`publisher`	`str`	Name of vendor that publishes the data. Defaults to "J.P. Morgan".	`'J.P. Morgan'`
`product`	`str \| list[str] \| None`	Product to associate dataset with. Defaults to None.	`None`
`sub_category`	`str \| list[str] \| None`	Sub-category. Defaults to None.	`None`
`tags`	`str \| list[str] \| None`	Tags used for search purposes. Defaults to None.	`None`
`created_date`	`str \| None`	Created date. Defaults to None.	`None`
`modified_date`	`str \| None`	Modified date. Defaults to None.	`None`
`delivery_channel`	`str \| list[str]`	Delivery channel. Defaults to "API".	`'API'`
`language`	`str`	Language. Defaults to "English".	`'English'`
`status`	`str`	Status. Defaults to "Available".	`'Available'`
`type_`	`str \| None`	Dataset type. Defaults to "Source".	`'Report'`
`container_type`	`str \| None`	Container type. Defaults to "Snapshot-Full".	`'Snapshot-Full'`
`snowflake`	`str \| None`	Snowflake account connection. Defaults to None.	`None`
`complexity`	`str \| None`	Complexity. Defaults to None.	`None`
`is_immutable`	`bool \| None`	Flag for immutable datasets. Defaults to None.	`None`
`is_mnpi`	`bool \| None`	is_mnpi. Defaults to None.	`None`
`is_pci`	`bool \| None`	is_pci. Defaults to None.	`None`
`is_pii`	`bool \| None`	is_pii. Defaults to None.	`None`
`is_client`	`bool \| None`	is_client. Defaults to None.	`None`
`is_public`	`bool \| None`	is_public. Defaults to None.	`None`
`is_internal`	`bool \| None`	is_internal. Defaults to None.	`None`
`is_confidential`	`bool \| None`	is_confidential. Defaults to None.	`None`
`is_highly_confidential`	`bool \| None`	is_highly_confidential. Defaults to None.	`None`
`is_active`	`bool \| None`	is_active. Defaults to None.	`None`
`owners`	`list[str] \| None`	The owners of the dataset. Defaults to None.	`None`
`application_id`	`str \| None`	The application ID of the dataset. Defaults to None.	`None`
`report`	`dict[str, str] \| None`	The report metadata. Specifies the tier of the report. Required for registered reports to the catalog.	`None`

Returns:

Name	Type	Description
`Dataset`	`Report`	Fusion Dataset class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.report(identifier="DATASET_1")

Note

See the dataset module for more information on functionalities of report objects.

Source code in py_src/fusion/fusion.py

def report(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Report",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    report: dict[str, str] | None = None,
    **kwargs: Any,
) -> Report:
    """Instantiate Report object with this client for metadata creation for managing regulatory reporting metadata.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Source".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.
        report (dict[str, str] | None, optional): The report metadata. Specifies the tier of the report.
            Required for registered reports to the catalog.

    Returns:
        Dataset: Fusion Dataset class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.report(identifier="DATASET_1")

    Note:
        See the dataset module for more information on functionalities of report objects.

    """
    report_obj = Report(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        report=report,
        **kwargs,
    )
    report_obj.client = self
    return report_obj

`to_bytes(dataset, series_member, dataset_format='parquet', catalog=None)` ¶

Returns an instance of dataset (the distribution) as a bytes object.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`series_member`	`(str)`	A dataset series member identifier	required
`dataset_format`	`str`	The file format, e.g. CSV or Parquet. Defaults to 'parquet'.	`'parquet'`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`

Source code in py_src/fusion/fusion.py

def to_bytes(
    self,
    dataset: str,
    series_member: str,
    dataset_format: str = "parquet",
    catalog: str | None = None,
) -> BytesIO:
    """Returns an instance of dataset (the distribution) as a bytes object.

    Args:
        dataset (str): A dataset identifier
        series_member (str,): A dataset series member identifier
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
    """

    catalog = self._use_catalog(catalog)

    url = distribution_to_url(
        self.root_url,
        dataset,
        series_member,
        dataset_format,
        catalog,
    )

    return Fusion._call_for_bytes_object(url, self.session)

`to_df(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, dataframe_type='pandas', **kwargs)` ¶

Gets distributions for a specified date or date range and returns the data as a dataframe.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`dt_str`	`str`	Either a single date or a range identified by a start or end date, or both separated with a ":". Defaults to 'latest' which will return the most recent instance of the dataset.	`'latest'`
`dataset_format`	`str`	The file format, e.g. CSV or Parquet. Defaults to 'parquet'.	`'parquet'`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`n_par`	`int`	Specify how many distributions to download in parallel. Defaults to all cpus available.	`None`
`show_progress`	`bool`	Display a progress bar during data download Defaults to True.	`True`
`columns`	`List`	A list of columns to return from a parquet file. Defaults to None	`None`
`filters`	`List`	List[Tuple] or List[List[Tuple]] or None (default) Rows which do not match the filter predicate will be removed from scanned data. Partition keys embedded in a nested directory structure will be exploited to avoid loading files at all if they contain no matching rows. If use_legacy_dataset is True, filters can only reference partition keys and only a hive-style directory structure is supported. When setting use_legacy_dataset to False, also within-file level filtering and different partitioning schemes are supported. More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html	`None`
`force_download`	`bool`	If True then will always download a file even if it is already on disk. Defaults to False.	`False`
`download_folder`	`str`	The path, absolute or relative, where downloaded files are saved. Defaults to download_folder as set in init	`None`
`dataframe_type`	`str`	Type	`'pandas'`

Source code in py_src/fusion/fusion.py

def to_df(  # noqa: PLR0913
    self,
    dataset: str,
    dt_str: str = "latest",
    dataset_format: str = "parquet",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    columns: list[str] | None = None,
    filters: PyArrowFilterT | None = None,
    force_download: bool = False,
    download_folder: str | None = None,
    dataframe_type: str = "pandas",
    **kwargs: Any,
) -> pd.DataFrame:
    """Gets distributions for a specified date or date range and returns the data as a dataframe.

    Args:
        dataset (str): A dataset identifier
        dt_str (str, optional): Either a single date or a range identified by a start or end date,
            or both separated with a ":". Defaults to 'latest' which will return the most recent
            instance of the dataset.
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        columns (List, optional): A list of columns to return from a parquet file. Defaults to None
        filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
            Rows which do not match the filter predicate will be removed from scanned data.
            Partition keys embedded in a nested directory structure will be exploited to avoid
            loading files at all if they contain no matching rows. If use_legacy_dataset is True,
            filters can only reference partition keys and only a hive-style directory structure
            is supported. When setting use_legacy_dataset to False, also within-file level filtering
            and different partitioning schemes are supported.
            More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
        force_download (bool, optional): If True then will always download a file even
            if it is already on disk. Defaults to False.
        download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
            Defaults to download_folder as set in __init__
        dataframe_type (str, optional): Type
    Returns:
        class:`pandas.DataFrame`: a dataframe containing the requested data.
            If multiple dataset instances are retrieved then these are concatenated first.
    """
    catalog = self._use_catalog(catalog)

    # sample data is limited to csv
    if dt_str == "sample":
        dataset_format = "csv"

    if not download_folder:
        download_folder = self.download_folder
    download_res = self.download(
        dataset,
        dt_str,
        dataset_format,
        catalog,
        n_par,
        show_progress,
        force_download,
        download_folder,
        return_paths=True,
    )

    if not download_res:
        raise ValueError("Must specify 'return_paths=True' in download call to use this function")

    if not all(res[0] for res in download_res):
        failed_res = [res for res in download_res if not res[0]]
        raise Exception(
            f"Not all downloads were successfully completed. "
            f"Re-run to collect missing files. The following failed:\n{failed_res}"
        )

    files = [res[1] for res in download_res]

    pd_read_fn_map = {
        "csv": read_csv,
        "parquet": read_parquet,
        "parq": read_parquet,
        "json": read_json,
        "raw": read_csv,
    }

    pd_read_default_kwargs: dict[str, dict[str, object]] = {
        "csv": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
        "parquet": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
        "json": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
        "raw": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
    }

    pd_read_default_kwargs["parq"] = pd_read_default_kwargs["parquet"]

    pd_reader = pd_read_fn_map.get(dataset_format)
    pd_read_kwargs = pd_read_default_kwargs.get(dataset_format, {})
    if not pd_reader:
        raise Exception(f"No pandas function to read file in format {dataset_format}")

    pd_read_kwargs.update(kwargs)

    if len(files) == 0:
        raise APIResponseError(
            f"No series members for dataset: {dataset} "
            f"in date or date range: {dt_str} and format: {dataset_format}"
        )
    if dataset_format in ["parquet", "parq"]:
        data_df = pd_reader(files, **pd_read_kwargs)  # type: ignore
    elif dataset_format == "raw":
        dataframes = (
            pd.concat(
                [pd_reader(ZipFile(f).open(p), **pd_read_kwargs) for p in ZipFile(f).namelist()],  # type: ignore
                ignore_index=True,
            )
            for f in files
        )
        data_df = pd.concat(dataframes, ignore_index=True)
    else:
        dataframes = (pd_reader(f, **pd_read_kwargs) for f in files)  # type: ignore
        if dataframe_type == "pandas":
            data_df = pd.concat(dataframes, ignore_index=True)
        if dataframe_type == "polars":
            import polars as pl

            data_df = pl.concat(dataframes, how="diagonal")  # type: ignore

    return data_df

`to_table(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, **kwargs)` ¶

Gets distributions for a specified date or date range and returns the data as an arrow table.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	A dataset identifier	required
`dt_str`	`str`	Either a single date or a range identified by a start or end date, or both separated with a ":". Defaults to 'latest' which will return the most recent instance of the dataset.	`'latest'`
`dataset_format`	`str`	The file format, e.g. CSV or Parquet. Defaults to 'parquet'.	`'parquet'`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`n_par`	`int`	Specify how many distributions to download in parallel. Defaults to all cpus available.	`None`
`show_progress`	`bool`	Display a progress bar during data download Defaults to True.	`True`
`columns`	`List`	A list of columns to return from a parquet file. Defaults to None	`None`
`filters`	`List`	List[Tuple] or List[List[Tuple]] or None (default) Rows which do not match the filter predicate will be removed from scanned data. Partition keys embedded in a nested directory structure will be exploited to avoid loading files at all if they contain no matching rows. If use_legacy_dataset is True, filters can only reference partition keys and only a hive-style directory structure is supported. When setting use_legacy_dataset to False, also within-file level filtering and different partitioning schemes are supported. More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html	`None`
`force_download`	`bool`	If True then will always download a file even if it is already on disk. Defaults to False.	`False`
`download_folder`	`str`	The path, absolute or relative, where downloaded files are saved. Defaults to download_folder as set in init	`None`

Source code in py_src/fusion/fusion.py

def to_table(  # noqa: PLR0913
    self,
    dataset: str,
    dt_str: str = "latest",
    dataset_format: str = "parquet",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    columns: list[str] | None = None,
    filters: PyArrowFilterT | None = None,
    force_download: bool = False,
    download_folder: str | None = None,
    **kwargs: Any,
) -> pa.Table:
    """Gets distributions for a specified date or date range and returns the data as an arrow table.

    Args:
        dataset (str): A dataset identifier
        dt_str (str, optional): Either a single date or a range identified by a start or end date,
            or both separated with a ":". Defaults to 'latest' which will return the most recent
            instance of the dataset.
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        columns (List, optional): A list of columns to return from a parquet file. Defaults to None
        filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
            Rows which do not match the filter predicate will be removed from scanned data.
            Partition keys embedded in a nested directory structure will be exploited to avoid
            loading files at all if they contain no matching rows. If use_legacy_dataset is True,
            filters can only reference partition keys and only a hive-style directory structure
            is supported. When setting use_legacy_dataset to False, also within-file level filtering
            and different partitioning schemes are supported.
            More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
        force_download (bool, optional): If True then will always download a file even
            if it is already on disk. Defaults to False.
        download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
            Defaults to download_folder as set in __init__
    Returns:
        class:`pyarrow.Table`: a dataframe containing the requested data.
            If multiple dataset instances are retrieved then these are concatenated first.
    """
    catalog = self._use_catalog(catalog)
    n_par = cpu_count(n_par)
    if not download_folder:
        download_folder = self.download_folder
    download_res = self.download(
        dataset,
        dt_str,
        dataset_format,
        catalog,
        n_par,
        show_progress,
        force_download,
        download_folder,
        return_paths=True,
    )

    if not download_res:
        raise ValueError("Must specify 'return_paths=True' in download call to use this function")

    if not all(res[0] for res in download_res):
        failed_res = [res for res in download_res if not res[0]]
        raise RuntimeError(
            f"Not all downloads were successfully completed. "
            f"Re-run to collect missing files. The following failed:\n{failed_res}"
        )

    files = [res[1] for res in download_res]

    read_fn_map = {
        "csv": csv_to_table,
        "parquet": parquet_to_table,
        "parq": parquet_to_table,
        "json": json_to_table,
        "raw": csv_to_table,
    }

    read_default_kwargs: dict[str, dict[str, object]] = {
        "csv": {"columns": columns, "filters": filters, "fs": self.fs},
        "parquet": {"columns": columns, "filters": filters, "fs": self.fs},
        "json": {"columns": columns, "filters": filters, "fs": self.fs},
        "raw": {"columns": columns, "filters": filters, "fs": self.fs},
    }

    read_default_kwargs["parq"] = read_default_kwargs["parquet"]

    reader = read_fn_map.get(dataset_format)
    read_kwargs = read_default_kwargs.get(dataset_format, {})
    if not reader:
        raise AssertionError(f"No function to read file in format {dataset_format}")

    read_kwargs.update(kwargs)

    if len(files) == 0:
        raise APIResponseError(
            f"No series members for dataset: {dataset} "
            f"in date or date range: {dt_str} and format: {dataset_format}"
        )
    if dataset_format in ["parquet", "parq"]:
        tbl = reader(files, **read_kwargs)  # type: ignore
    else:
        tbl = (reader(f, **read_kwargs) for f in files)  # type: ignore
        tbl = pa.concat_tables(tbl)

    return tbl

`upload(path, dataset=None, dt_str='latest', catalog=None, n_par=None, show_progress=True, return_paths=False, multipart=True, chunk_size=5 * 2 ** 20, from_date=None, to_date=None, preserve_original_name=False, additional_headers=None)` ¶

Uploads the requested files/files to Fusion.

Parameters:

Name	Type	Description	Default
`path`	`str`	path to a file or a folder with files	required
`dataset`	`str`	Dataset identifier to which the file will be uploaded (for single file only). If not provided the dataset will be implied from file's name.	`None`
`dt_str`	`str`	A file name. Can be any string but is usually a date. Defaults to 'latest' which will return the most recent. Relevant for a single file upload only. If not provided the dataset will be implied from file's name.	`'latest'`
`catalog`	`str`	A catalog identifier. Defaults to 'common'.	`None`
`n_par`	`int`	Specify how many distributions to download in parallel. Defaults to all cpus available.	`None`
`show_progress`	`bool`	Display a progress bar during data download Defaults to True.	`True`
`return_paths`	`bool`	Return paths and success statuses of the downloaded files.	`False`
`multipart`	`bool`	Is multipart upload.	`True`
`chunk_size`	`int`	Maximum chunk size.	`5 * 2 ** 20`
`from_date`	`str`	start of the data date range contained in the distribution, defaults to upoad date	`None`
`to_date`	`str`	end of the data date range contained in the distribution, defaults to upload date.	`None`
`preserve_original_name`	`bool`	Preserve the original name of the file. Defaults to False.	`False`

Source code in py_src/fusion/fusion.py

def upload(  # noqa: PLR0913
    self,
    path: str,
    dataset: str | None = None,
    dt_str: str = "latest",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    return_paths: bool = False,
    multipart: bool = True,
    chunk_size: int = 5 * 2**20,
    from_date: str | None = None,
    to_date: str | None = None,
    preserve_original_name: bool | None = False,
    additional_headers: dict[str, str] | None = None,
) -> list[tuple[bool, str, str | None]] | None:
    """Uploads the requested files/files to Fusion.

    Args:
        path (str): path to a file or a folder with files
        dataset (str, optional): Dataset identifier to which the file will be uploaded (for single file only).
                                If not provided the dataset will be implied from file's name.
        dt_str (str, optional): A file name. Can be any string but is usually a date.
                                Defaults to 'latest' which will return the most recent.
                                Relevant for a single file upload only. If not provided the dataset will
                                be implied from file's name.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        return_paths (bool, optional): Return paths and success statuses of the downloaded files.
        multipart (bool, optional): Is multipart upload.
        chunk_size (int, optional): Maximum chunk size.
        from_date (str, optional): start of the data date range contained in the distribution,
            defaults to upoad date
        to_date (str, optional): end of the data date range contained in the distribution,
            defaults to upload date.
        preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

    Returns:


    """
    catalog = self._use_catalog(catalog)

    if not self.fs.exists(path):
        raise RuntimeError("The provided path does not exist")

    fs_fusion = self.get_fusion_filesystem()
    if self.fs.info(path)["type"] == "directory":
        file_path_lst = self.fs.find(path)
        local_file_validation = validate_file_names(file_path_lst, fs_fusion)
        file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
        file_name = [f.split("/")[-1] for f in file_path_lst]
        is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
        local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
    else:
        file_path_lst = [path]
        if not catalog or not dataset:
            local_file_validation = validate_file_names(file_path_lst, fs_fusion)
            file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
            is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
            local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
            if preserve_original_name:
                raise ValueError("preserve_original_name can only be used when catalog and dataset are provided.")
        else:
            date_identifier = re.compile(r"^(\d{4})(\d{2})(\d{2})$")
            if date_identifier.match(dt_str):
                dt_str = dt_str if dt_str != "latest" else pd.Timestamp("today").date().strftime("%Y%m%d")
                dt_str = pd.Timestamp(dt_str).date().strftime("%Y%m%d")

            if catalog not in fs_fusion.ls("") or dataset not in [
                i.split("/")[-1] for i in fs_fusion.ls(f"{catalog}/datasets")
            ]:
                msg = (
                    f"File file has not been uploaded, one of the catalog: {catalog} "
                    f"or dataset: {dataset} does not exit."
                )
                warnings.warn(msg, stacklevel=2)
                return [(False, path, msg)]
            file_format = path.split(".")[-1]
            file_name = [path.split("/")[-1]]
            file_format = "raw" if file_format not in RECOGNIZED_FORMATS else file_format

            local_url_eqiv = [
                "/".join(distribution_to_url("", dataset, dt_str, file_format, catalog, False).split("/")[1:])
            ]

    if not preserve_original_name:
        data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv]).T
        data_map_df.columns = pd.Index(["path", "url"])
    else:
        data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv, file_name]).T
        data_map_df.columns = pd.Index(["path", "url", "file_name"])

    n_par = cpu_count(n_par)
    parallel = len(data_map_df) > 1
    res = upload_files(
        fs_fusion,
        self.fs,
        data_map_df,
        parallel=parallel,
        n_par=n_par,
        multipart=multipart,
        chunk_size=chunk_size,
        show_progress=show_progress,
        from_date=from_date,
        to_date=to_date,
        additional_headers=additional_headers,
    )

    if not all(r[0] for r in res):
        failed_res = [r for r in res if not r[0]]
        msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
        logger.warning(msg)
        warnings.warn(msg, stacklevel=2)

    return res if return_paths else None

Synchronisation between the local filesystem and Fusion.

Parameters:

Name	Type	Description	Default
`fs_fusion`	`filesystem`	Fusion filesystem.	required
`fs_local`	`filesystem`	Local filesystem.	required
`products`	`list`	List of products.	`None`
`datasets`	`list`	List of datasets.	`None`
`catalog`	`str`	Fusion catalog.	`None`
`direction`	`str`	Direction of synchronisation: upload/download.	`'upload'`
`flatten`	`bool`	Flatten the folder structure.	`False`
`dataset_format`	`str`	Dataset format for upload/download.	`None`
`n_par`	`int`	Specify how many distributions to download in parallel. Defaults to all.	`None`
`show_progress`	`bool`	Display a progress bar during data download Defaults to True.	`True`
`local_path`	`str`	path to files in the local filesystem, e.g., "s3a://my_bucket/"	`''`
`log_level`	`int`	Logging level. Error level by default.	`ERROR`
`log_path`	`str`	The folder path where the log is stored. Defaults to ".".	`'.'`

Source code in py_src/fusion/fs_sync.py

def fsync(  # noqa: PLR0913
    fs_fusion: fsspec.filesystem,
    fs_local: fsspec.filesystem,
    products: Optional[list[str]] = None,
    datasets: Optional[list[str]] = None,
    catalog: Optional[str] = None,
    direction: str = "upload",
    flatten: bool = False,
    dataset_format: Optional[str] = None,
    n_par: Optional[int] = None,
    show_progress: bool = True,
    local_path: str = "",
    log_level: int = logging.ERROR,
    log_path: str = ".",
) -> None:
    """Synchronisation between the local filesystem and Fusion.

    Args:
        fs_fusion (fsspec.filesystem): Fusion filesystem.
        fs_local (fsspec.filesystem): Local filesystem.
        products (list): List of products.
        datasets (list): List of datasets.
        catalog (str): Fusion catalog.
        direction (str): Direction of synchronisation: upload/download.
        flatten (bool): Flatten the folder structure.
        dataset_format (str): Dataset format for upload/download.
        n_par (int, optional): Specify how many distributions to download in parallel. Defaults to all.
        show_progress (bool): Display a progress bar during data download Defaults to True.
        local_path (str): path to files in the local filesystem, e.g., "s3a://my_bucket/"
        log_level (int): Logging level. Error level by default.
        log_path (str): The folder path where the log is stored. Defaults to ".".

    Returns:

    """

    if logger.hasHandlers():
        logger.handlers.clear()
    file_handler = logging.FileHandler(filename="{}/{}".format(log_path, "fusion_fsync.log"))
    logging.addLevelName(VERBOSE_LVL, "VERBOSE")
    stdout_handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s.%(msecs)03d %(name)s:%(levelname)s %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    stdout_handler.setFormatter(formatter)
    logger.addHandler(stdout_handler)
    logger.addHandler(file_handler)
    logger.setLevel(log_level)

    catalog = catalog if catalog else "common"
    datasets = datasets if datasets else []
    products = products if products else []

    assert len(products) > 0 or len(datasets) > 0, "At least one list products or datasets should be non-empty."
    assert direction in [
        "upload",
        "download",
    ], "The direction must be either upload or download."

    if len(local_path) > 0 and local_path[-1] != "/":
        local_path += "/"

    for product in products:
        res = json.loads(fs_fusion.cat(f"{catalog}/products/{product}").decode())
        datasets += [r["identifier"] for r in res["resources"]]

    assert len(datasets) > 0, "The supplied products did not contain any datasets."

    local_state = pd.DataFrame()
    fusion_state = pd.DataFrame()
    while True:
        try:
            local_state_temp = _get_local_state(
                fs_local,
                fs_fusion,
                datasets,
                catalog,
                dataset_format,
                local_state,
                local_path,
            )
            fusion_state_temp = _get_fusion_df(fs_fusion, datasets, catalog, flatten, dataset_format)
            if not local_state_temp.equals(local_state) or not fusion_state_temp.equals(fusion_state):
                res = _synchronize(
                    fs_fusion,
                    fs_local,
                    local_state_temp,
                    fusion_state_temp,
                    direction,
                    n_par,
                    show_progress,
                    local_path,
                )
                if len(res) == 0 or all(i[0] for i in res):
                    local_state = local_state_temp
                    fusion_state = fusion_state_temp

                if not all(r[0] for r in res):
                    failed_res = [r for r in res if not r[0]]
                    msg = f"Not all {direction}s were successfully completed. The following failed:\n{failed_res}"
                    errs = [r for r in res if not r[2]]
                    logger.warning(msg)
                    logger.warning(errs)
                    warnings.warn(msg, stacklevel=2)

            else:
                logger.info("All synced, sleeping")
                time.sleep(10)

        except KeyboardInterrupt:  # noqa: PERF203
            if input("Type exit to exit: ") != "exit":
                continue
            break

        except Exception as _:
            logger.error("Exception thrown", exc_info=True)
            continue

Fusion Product class and functions.

`Product` `dataclass` ¶

Fusion Product class for managing product metadata in a Fusion catalog.

Attributes:

Name	Type	Description
`identifier`	`str`	A unique identifier for the product.
`title`	`str`	Product title. Defaults to "".
`category`	`str \| list[str] \| None`	Product category. Defaults to None.
`short_abstract`	`str`	Short abstract of the product. Defaults to "".
`description`	`str`	Product description. If not provided, defaults to identifier.
`is_active`	`bool`	Boolean for Active status. Defaults to True.
`is_restricted`	`bool \| None`	Flag for restricted products. Defaults to None.
`maintainer`	`str \| list[str] \| None`	Product maintainer. Defaults to None.
`region`	`str \| list[str] \| None`	Product region. Defaults to None.
`publisher`	`str \| None`	Name of vendor that publishes the data. Defaults to None.
`sub_category`	`str \| list[str] \| None`	Product sub-category. Defaults to None.
`tag`	`str \| list[str] \| None`	Tags used for search purposes. Defaults to None.
`delivery_channel`	`str \| list[str]`	Product delivery channel. Defaults to ["API"].
`theme`	`str \| None`	Product theme. Defaults to None.
`release_date`	`str \| None`	Product release date. Defaults to None.
`language`	`str`	Product language. Defaults to "English".
`status`	`str`	Product status. Defaults to "Available".
`image`	`str`	Product image. Defaults to "".
`logo`	`str`	Product logo. Defaults to "".
`dataset`	`str \| list[str] \| None`	Product datasets. Defaults to None.
`_client`	`Any`	Fusion client object. Defaults to None.

Source code in py_src/fusion/product.py

@dataclass
class Product(metaclass=CamelCaseMeta):
    """Fusion Product class for managing product metadata in a Fusion catalog.

    Attributes:
        identifier (str): A unique identifier for the product.
        title (str, optional): Product title. Defaults to "".
        category (str | list[str] | None, optional): Product category. Defaults to None.
        short_abstract (str, optional): Short abstract of the product. Defaults to "".
        description (str, optional): Product description. If not provided, defaults to identifier.
        is_active (bool, optional): Boolean for Active status. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted products. Defaults to None.
        maintainer (str | list[str] | None, optional): Product maintainer. Defaults to None.
        region (str | list[str] | None, optional): Product region. Defaults to None.
        publisher (str | None, optional): Name of vendor that publishes the data. Defaults to None.
        sub_category (str | list[str] | None, optional): Product sub-category. Defaults to None.
        tag (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        delivery_channel (str | list[str], optional): Product delivery channel. Defaults to ["API"].
        theme (str | None, optional): Product theme. Defaults to None.
        release_date (str | None, optional): Product release date. Defaults to None.
        language (str, optional): Product language. Defaults to "English".
        status (str, optional): Product status. Defaults to "Available".
        image (str, optional): Product image. Defaults to "".
        logo (str, optional): Product logo. Defaults to "".
        dataset (str | list[str] | None, optional): Product datasets. Defaults to None.
        _client (Any, optional): Fusion client object. Defaults to None.

    """

    identifier: str
    title: str = ""
    category: str | list[str] | None = None
    short_abstract: str = ""
    description: str = ""
    is_active: bool = True
    is_restricted: bool | None = None
    maintainer: str | list[str] | None = None
    region: str | list[str]  = field(default_factory=lambda: ["Global"])
    publisher: str = "J.P. Morgan"
    sub_category: str | list[str] | None = None
    tag: str | list[str] | None = None
    delivery_channel: str | list[str] = field(default_factory=lambda: ["API"])
    theme: str | None = None
    release_date: str | None = None
    language: str = "English"
    status: str = "Available"
    image: str = ""
    logo: str = ""
    dataset: str | list[str] | None = None

    _client: Fusion | None = field(init=False, repr=False, compare=False, default=None)

    def __repr__(self: Product) -> str:
        """Return an object representation of the Product object.

        Returns:
            str: Object representaiton of the product.

        """
        attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
        return f"Product(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

    def __post_init__(self: Product) -> None:
        """Format Product metadata fields after object instantiation."""
        self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
        self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
        self.description = tidy_string(self.description) if self.description != "" else self.title
        self.short_abstract = tidy_string(self.short_abstract) if self.short_abstract != "" else self.title
        self.description = tidy_string(self.description)
        self.category = (
            self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
        )
        self.tag = self.tag if isinstance(self.tag, list) or self.tag is None else make_list(self.tag)
        self.dataset = (
            self.dataset if isinstance(self.dataset, list) or self.dataset is None else make_list(self.dataset)
        )
        self.sub_category = (
            self.sub_category
            if isinstance(self.sub_category, list) or self.sub_category is None
            else make_list(self.sub_category)
        )
        self.is_active = self.is_active if isinstance(self.is_active, bool) else make_bool(self.is_active)
        self.is_restricted = (
            self.is_restricted
            if isinstance(self.is_restricted, bool) or self.is_restricted is None
            else make_bool(self.is_restricted)
        )
        self.maintainer = (
            self.maintainer
            if isinstance(self.maintainer, list) or self.maintainer is None
            else make_list(self.maintainer)
        )
        self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
        self.delivery_channel = (
            self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
        )
        self.release_date = convert_date_format(self.release_date) if self.release_date else None

    def __getattr__(self, name: str) -> Any:
        # Redirect attribute access to the snake_case version
        snake_name = camel_to_snake(name)
        if snake_name in self.__dict__:
            return self.__dict__[snake_name]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

    def __setattr__(self, name: str, value: Any) -> None:
        if name == "client":
            # Use the property setter for client
            object.__setattr__(self, name, value)
        else:
            snake_name = camel_to_snake(name)
            self.__dict__[snake_name] = value

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Product. Set automatically, if the Product is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product")
            >>> product.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    @classmethod
    def _from_series(cls: type[Product], series: pd.Series[Any]) -> Product:
        """Instantiate a Product object from a pandas Series.

        Args:
            series (pd.Series[Any]): Product metadata as a pandas Series.

        Returns:
            Product: Product object.

        """
        series = series.rename(lambda x: x.replace(" ", "").replace("_", "").lower())
        series = series.rename({"tag": "tags", "dataset": "datasets"})
        short_abstract = series.get("abstract", "")
        short_abstract = series.get("shortabstract", "") if short_abstract is None else short_abstract

        return cls(
            title=series.get("title", ""),
            identifier=series.get("identifier", ""),
            category=series.get("category", None),
            short_abstract=short_abstract,
            description=series.get("description", ""),
            theme=series.get("theme", None),
            release_date=series.get("releasedate", None),
            is_active=series.get("isactive", True),
            is_restricted=series.get("isrestricted", None),
            maintainer=series.get("maintainer", None),
            region=series.get("region", "Global"),
            publisher=series.get("publisher", "J.P. Morgan"),
            sub_category=series.get("subcategory", None),
            tag=series.get("tags", None),
            delivery_channel=series.get("deliverychannel", "API"),
            language=series.get("language", "English"),
            status=series.get("status", "Available"),
            dataset=series.get("datasets", None),
        )

    @classmethod
    def _from_dict(cls: type[Product], data: dict[str, Any]) -> Product:
        """Instantiate a Product object from a dictionary.

        Args:
            data (dict[str, Any]): Product metadata as a dictionary.

        Returns:
            Product: Product object.

        """
        keys = [f.name for f in fields(cls)]
        data = {camel_to_snake(k): v for k, v in data.items()}
        data = {k: v for k, v in data.items() if k in keys}
        return cls(**data)

    @classmethod
    def _from_csv(cls: type[Product], file_path: str, identifier: str | None = None) -> Product:
        """Instantiate a Product object from a CSV file.

        Args:
            file_path (str): Path to the CSV file.
            identifier (str | None, optional): Product identifer for filtering if multipler products are defined in csv.
                Defaults to None.

        Returns:
            Product: Product object.

        """
        data = pd.read_csv(file_path)

        return (
            Product._from_series(data[data["identifier"] == identifier].reset_index(drop=True).iloc[0])
            if identifier
            else Product._from_series(data.reset_index(drop=True).iloc[0])
        )

    def from_object(
        self,
        product_source: Product | dict[str, Any] | str | pd.Series[Any],
    ) -> Product:
        """Instantiate a Product object from a Product object, dictionary, path to CSV, JSON string, or pandas Series.

        Args:
            product_source (Product | dict[str, Any] | str | pd.Series[Any]): Product metadata source.

        Raises:
            TypeError: If the object provided is not a Product, dictionary, path to CSV file, JSON string,
            or pandas Series.

        Returns:
            Product: Product object.

        Examples:
            Instantiating a Product object from a dictionary:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product_dict = {
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data",
            ...     "short_abstract": "My product is awesome",
            ...     "description": "My product is very awesome",
            ...     "is_active": True,
            ...     "is_restricted": False,
            ...     "maintainer": "My Company",
            ...     "region": "Global",
            ...     "publisher": "My Company",
            ...     "sub_category": "Data",
            ...     "tag": "My Company",
            ...     "delivery_channel": "API",
            ...     "theme": "Data",
            ...     "release_date": "2021-01-01",
            ...     "language": "English",
            ...     "status": "Available"
            ... }
            >>> product = fusion.product("my_product").from_object(product_dict)

            Instantiating a Product object from a JSON string:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product_json = '{
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data",
            ...     "short_abstract": "My product is awesome",
            ...     "description": "My product is very awesome",
            ...     "is_active": True,
            ...     "is_restricted": False,
            ...     "maintainer": "My Company",
            ...     "region": "Global",
            ...     "publisher": "My Company",
            ...     "sub_category": "Data",
            ...     "tag": "My Company",
            ...     "delivery_channel": "API",
            ...     "theme": "Data",
            ...     "release_date": "2021-01-01",
            ...     "language": "English",
            ...     "status": "Available",
            ... }'
            >>> product = fusion.product("my_product").from_object(product_json)

            Instantiating a Product object from a CSV file:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_object("path/to/product.csv")

            Instantiating a Product object from a pandas Series:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product_series = pd.Series({
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data",
            ...     "short_abstract": "My product is awesome",
            ...     "description": "My product is very awesome",
            ...     "is_active": True,
            ...     "is_restricted": False,
            ...     "maintainer": "My Company",
            ...     "region": "Global",
            ...     "publisher": "My Company",
            ...     "sub_category": "Data",
            ...     "tag": "My Company",
            ...     "delivery_channel": "API",
            ...     "theme": "Data",
            ...     "release_date": "2021-01-01",
            ...     "language": "English",
            ...     "status": "Available",
            ... })
            >>> product = fusion.product("my_product").from_object(product_series)

        """
        if isinstance(product_source, Product):
            product = product_source
        elif isinstance(product_source, dict):
            product = Product._from_dict(product_source)
        elif isinstance(product_source, str):
            if _is_json(product_source):
                product = Product._from_dict(js.loads(product_source))
            else:
                product = Product._from_csv(product_source)
        elif isinstance(product_source, pd.Series):
            product = Product._from_series(product_source)
        else:
            raise TypeError(f"Could not resolve the object provided: {product_source}")
        product.client = self._client
        return product

    def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Product:
        """Instantiate a Product object from a Fusion catalog.

        Args:
            catalog (str | None, optional): Catalog identifer. Defaults to None.
            client (Fusion | None, optional): Fusion session. Defaults to None.
                If instantiated from a Fusion object, then the client is set automatically.

        Returns:
            Product: Product object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        resp = client.session.get(f"{client.root_url}catalogs/{catalog}/products")
        requests_raise_for_status(resp)
        list_products = resp.json()["resources"]
        dict_ = [dict_ for dict_ in list_products if dict_["identifier"] == self.identifier][0]
        product_obj = Product._from_dict(dict_)
        product_obj.client = client

        return product_obj

    def to_dict(self: Product) -> dict[str, Any]:
        """Convert the Product instance to a dictionary.

        Returns:
            dict[str, Any]: Product metadata as a dictionary.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product")
            >>> product_dict = product.to_dict()

        """
        product_dict = {
            snake_to_camel(k): v
            for k, v in self.__dict__.items()
            if not k.startswith("_")
        }
        return product_dict

    def create(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload a new product to a Fusion catalog.

        Args:
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            From scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product(
            ...     identifer="my_product"
            ...     title="My Product",
            ...     category="Data",
            ...     short_abstract="My product is awesome",
            ...     description="My product is very awesome",
            ...     )
            >>> product.create(catalog="my_catalog")

            From a dictionary:

            >>> product_dict = {
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data"
            ...     }
            >>> product = fusion.product("my_product").from_object(product_dict)
            >>> product.create(catalog="my_catalog")

            From a JSON string:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product_json = '{
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data"
            ...     }'
            >>> product = fusion.product("my_product").from_object(product_json)
            >>> product.create(catalog="my_catalog")

            From a CSV file:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_object("path/to/product.csv")
            >>> product.create(catalog="my_catalog")

            From a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product_series = pd.Series({
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data"
            ...     })
            >>> product = fusion.product("my_product").from_object(product_series)
            >>> product.create(catalog="my_catalog")

            From existing product in a catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_catalog()
            >>> product.identifier = "my_new_product"
            >>> product.create(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

        self.release_date = release_date
        self.delivery_channel = delivery_channel

        data = self.to_dict()

        url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
        resp: requests.Response = client.session.post(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def update(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Update an existing product in a Fusion catalog.

        Args:
            client (Fusion): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")
            >>> product.title = "My Updated Product Title"
            >>> product.update(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

        self.release_date = release_date
        self.delivery_channel = delivery_channel

        data = self.to_dict()

        url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
        resp: requests.Response = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def delete(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete a product from a Fusion catalog.

        Args:
            client (Fusion): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

         Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.product("my_product").delete(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
        resp: requests.Response = client.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def copy(
        self,
        catalog_to: str,
        catalog_from: str | None = None,
        client: Fusion | None = None,
        client_to: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Copy product from one Fusion catalog and/or environment to another by copy.

        Args:
            catalog_to (str): Catalog identifier to which to copy product.
            catalog_from (str, optional): A catalog identifier from which to copy product. Defaults to "common".
            client (Fusion): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.product("my_product").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

        """
        client = self._use_client(client)
        catalog_from = client._use_catalog(catalog_from)
        if client_to is None:
            client_to = client
        product_obj = self.from_catalog(catalog=catalog_from, client=client)
        product_obj.client = client_to
        resp = product_obj.create(catalog=catalog_to, return_resp_obj=True)
        return resp if return_resp_obj else None

`client: Fusion | None` `property` `writable` ¶

Return the client.

`__post_init__()` ¶

Format Product metadata fields after object instantiation.

Source code in py_src/fusion/product.py

def __post_init__(self: Product) -> None:
    """Format Product metadata fields after object instantiation."""
    self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
    self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
    self.description = tidy_string(self.description) if self.description != "" else self.title
    self.short_abstract = tidy_string(self.short_abstract) if self.short_abstract != "" else self.title
    self.description = tidy_string(self.description)
    self.category = (
        self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
    )
    self.tag = self.tag if isinstance(self.tag, list) or self.tag is None else make_list(self.tag)
    self.dataset = (
        self.dataset if isinstance(self.dataset, list) or self.dataset is None else make_list(self.dataset)
    )
    self.sub_category = (
        self.sub_category
        if isinstance(self.sub_category, list) or self.sub_category is None
        else make_list(self.sub_category)
    )
    self.is_active = self.is_active if isinstance(self.is_active, bool) else make_bool(self.is_active)
    self.is_restricted = (
        self.is_restricted
        if isinstance(self.is_restricted, bool) or self.is_restricted is None
        else make_bool(self.is_restricted)
    )
    self.maintainer = (
        self.maintainer
        if isinstance(self.maintainer, list) or self.maintainer is None
        else make_list(self.maintainer)
    )
    self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
    self.delivery_channel = (
        self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
    )
    self.release_date = convert_date_format(self.release_date) if self.release_date else None

`repr()` ¶

Return an object representation of the Product object.

Returns:

Name	Type	Description
`str`	`str`	Object representaiton of the product.

Source code in py_src/fusion/product.py

def __repr__(self: Product) -> str:
    """Return an object representation of the Product object.

    Returns:
        str: Object representaiton of the product.

    """
    attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
    return f"Product(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

`copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False)` ¶

Copy product from one Fusion catalog and/or environment to another by copy.

Parameters:

Name	Type	Description	Default
`catalog_to`	`str`	Catalog identifier to which to copy product.	required
`catalog_from`	`str`	A catalog identifier from which to copy product. Defaults to "common".	`None`
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`client_to`	`Fusion \| None`	Fusion client object. Defaults to current instance.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.product("my_product").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

Source code in py_src/fusion/product.py

def copy(
    self,
    catalog_to: str,
    catalog_from: str | None = None,
    client: Fusion | None = None,
    client_to: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Copy product from one Fusion catalog and/or environment to another by copy.

    Args:
        catalog_to (str): Catalog identifier to which to copy product.
        catalog_from (str, optional): A catalog identifier from which to copy product. Defaults to "common".
        client (Fusion): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.product("my_product").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

    """
    client = self._use_client(client)
    catalog_from = client._use_catalog(catalog_from)
    if client_to is None:
        client_to = client
    product_obj = self.from_catalog(catalog=catalog_from, client=client)
    product_obj.client = client_to
    resp = product_obj.create(catalog=catalog_to, return_resp_obj=True)
    return resp if return_resp_obj else None

`create(catalog=None, client=None, return_resp_obj=False)` ¶

Upload a new product to a Fusion catalog.

Parameters:

Name	Type	Description	Default
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

From scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product(
...     identifer="my_product"
...     title="My Product",
...     category="Data",
...     short_abstract="My product is awesome",
...     description="My product is very awesome",
...     )
>>> product.create(catalog="my_catalog")

From a dictionary:

>>> product_dict = {
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data"
...     }
>>> product = fusion.product("my_product").from_object(product_dict)
>>> product.create(catalog="my_catalog")

From a JSON string:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product_json = '{
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data"
...     }'
>>> product = fusion.product("my_product").from_object(product_json)
>>> product.create(catalog="my_catalog")

From a CSV file:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_object("path/to/product.csv")
>>> product.create(catalog="my_catalog")

From a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product_series = pd.Series({
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data"
...     })
>>> product = fusion.product("my_product").from_object(product_series)
>>> product.create(catalog="my_catalog")

From existing product in a catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_catalog()
>>> product.identifier = "my_new_product"
>>> product.create(catalog="my_catalog")

Source code in py_src/fusion/product.py

def create(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload a new product to a Fusion catalog.

    Args:
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        From scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product(
        ...     identifer="my_product"
        ...     title="My Product",
        ...     category="Data",
        ...     short_abstract="My product is awesome",
        ...     description="My product is very awesome",
        ...     )
        >>> product.create(catalog="my_catalog")

        From a dictionary:

        >>> product_dict = {
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data"
        ...     }
        >>> product = fusion.product("my_product").from_object(product_dict)
        >>> product.create(catalog="my_catalog")

        From a JSON string:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product_json = '{
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data"
        ...     }'
        >>> product = fusion.product("my_product").from_object(product_json)
        >>> product.create(catalog="my_catalog")

        From a CSV file:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_object("path/to/product.csv")
        >>> product.create(catalog="my_catalog")

        From a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product_series = pd.Series({
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data"
        ...     })
        >>> product = fusion.product("my_product").from_object(product_series)
        >>> product.create(catalog="my_catalog")

        From existing product in a catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_catalog()
        >>> product.identifier = "my_new_product"
        >>> product.create(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

    self.release_date = release_date
    self.delivery_channel = delivery_channel

    data = self.to_dict()

    url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
    resp: requests.Response = client.session.post(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

`delete(catalog=None, client=None, return_resp_obj=False)` ¶

Delete a product from a Fusion catalog.

Parameters:

Name	Type	Description	Default
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.product("my_product").delete(catalog="my_catalog")

Source code in py_src/fusion/product.py

def delete(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete a product from a Fusion catalog.

    Args:
        client (Fusion): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

     Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.product("my_product").delete(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
    resp: requests.Response = client.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

`from_catalog(catalog=None, client=None)` ¶

Instantiate a Product object from a Fusion catalog.

Parameters:

Name	Type	Description	Default
`catalog`	`str \| None`	Catalog identifer. Defaults to None.	`None`
`client`	`Fusion \| None`	Fusion session. Defaults to None. If instantiated from a Fusion object, then the client is set automatically.	`None`

Returns:

Name	Type	Description
`Product`	`Product`	Product object.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")

Source code in py_src/fusion/product.py

def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Product:
    """Instantiate a Product object from a Fusion catalog.

    Args:
        catalog (str | None, optional): Catalog identifer. Defaults to None.
        client (Fusion | None, optional): Fusion session. Defaults to None.
            If instantiated from a Fusion object, then the client is set automatically.

    Returns:
        Product: Product object.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    resp = client.session.get(f"{client.root_url}catalogs/{catalog}/products")
    requests_raise_for_status(resp)
    list_products = resp.json()["resources"]
    dict_ = [dict_ for dict_ in list_products if dict_["identifier"] == self.identifier][0]
    product_obj = Product._from_dict(dict_)
    product_obj.client = client

    return product_obj

`from_object(product_source)` ¶

Instantiate a Product object from a Product object, dictionary, path to CSV, JSON string, or pandas Series.

Parameters:

Name	Type	Description	Default
`product_source`	`Product \| dict[str, Any] \| str \| Series[Any]`	Product metadata source.	required

Raises:

Type	Description
`TypeError`	If the object provided is not a Product, dictionary, path to CSV file, JSON string,

Returns:

Name	Type	Description
`Product`	`Product`	Product object.

Examples:

Instantiating a Product object from a dictionary:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product_dict = {
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data",
...     "short_abstract": "My product is awesome",
...     "description": "My product is very awesome",
...     "is_active": True,
...     "is_restricted": False,
...     "maintainer": "My Company",
...     "region": "Global",
...     "publisher": "My Company",
...     "sub_category": "Data",
...     "tag": "My Company",
...     "delivery_channel": "API",
...     "theme": "Data",
...     "release_date": "2021-01-01",
...     "language": "English",
...     "status": "Available"
... }
>>> product = fusion.product("my_product").from_object(product_dict)

Instantiating a Product object from a JSON string:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product_json = '{
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data",
...     "short_abstract": "My product is awesome",
...     "description": "My product is very awesome",
...     "is_active": True,
...     "is_restricted": False,
...     "maintainer": "My Company",
...     "region": "Global",
...     "publisher": "My Company",
...     "sub_category": "Data",
...     "tag": "My Company",
...     "delivery_channel": "API",
...     "theme": "Data",
...     "release_date": "2021-01-01",
...     "language": "English",
...     "status": "Available",
... }'
>>> product = fusion.product("my_product").from_object(product_json)

Instantiating a Product object from a CSV file:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_object("path/to/product.csv")

Instantiating a Product object from a pandas Series:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product_series = pd.Series({
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data",
...     "short_abstract": "My product is awesome",
...     "description": "My product is very awesome",
...     "is_active": True,
...     "is_restricted": False,
...     "maintainer": "My Company",
...     "region": "Global",
...     "publisher": "My Company",
...     "sub_category": "Data",
...     "tag": "My Company",
...     "delivery_channel": "API",
...     "theme": "Data",
...     "release_date": "2021-01-01",
...     "language": "English",
...     "status": "Available",
... })
>>> product = fusion.product("my_product").from_object(product_series)

Source code in py_src/fusion/product.py

def from_object(
    self,
    product_source: Product | dict[str, Any] | str | pd.Series[Any],
) -> Product:
    """Instantiate a Product object from a Product object, dictionary, path to CSV, JSON string, or pandas Series.

    Args:
        product_source (Product | dict[str, Any] | str | pd.Series[Any]): Product metadata source.

    Raises:
        TypeError: If the object provided is not a Product, dictionary, path to CSV file, JSON string,
        or pandas Series.

    Returns:
        Product: Product object.

    Examples:
        Instantiating a Product object from a dictionary:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product_dict = {
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data",
        ...     "short_abstract": "My product is awesome",
        ...     "description": "My product is very awesome",
        ...     "is_active": True,
        ...     "is_restricted": False,
        ...     "maintainer": "My Company",
        ...     "region": "Global",
        ...     "publisher": "My Company",
        ...     "sub_category": "Data",
        ...     "tag": "My Company",
        ...     "delivery_channel": "API",
        ...     "theme": "Data",
        ...     "release_date": "2021-01-01",
        ...     "language": "English",
        ...     "status": "Available"
        ... }
        >>> product = fusion.product("my_product").from_object(product_dict)

        Instantiating a Product object from a JSON string:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product_json = '{
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data",
        ...     "short_abstract": "My product is awesome",
        ...     "description": "My product is very awesome",
        ...     "is_active": True,
        ...     "is_restricted": False,
        ...     "maintainer": "My Company",
        ...     "region": "Global",
        ...     "publisher": "My Company",
        ...     "sub_category": "Data",
        ...     "tag": "My Company",
        ...     "delivery_channel": "API",
        ...     "theme": "Data",
        ...     "release_date": "2021-01-01",
        ...     "language": "English",
        ...     "status": "Available",
        ... }'
        >>> product = fusion.product("my_product").from_object(product_json)

        Instantiating a Product object from a CSV file:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_object("path/to/product.csv")

        Instantiating a Product object from a pandas Series:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product_series = pd.Series({
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data",
        ...     "short_abstract": "My product is awesome",
        ...     "description": "My product is very awesome",
        ...     "is_active": True,
        ...     "is_restricted": False,
        ...     "maintainer": "My Company",
        ...     "region": "Global",
        ...     "publisher": "My Company",
        ...     "sub_category": "Data",
        ...     "tag": "My Company",
        ...     "delivery_channel": "API",
        ...     "theme": "Data",
        ...     "release_date": "2021-01-01",
        ...     "language": "English",
        ...     "status": "Available",
        ... })
        >>> product = fusion.product("my_product").from_object(product_series)

    """
    if isinstance(product_source, Product):
        product = product_source
    elif isinstance(product_source, dict):
        product = Product._from_dict(product_source)
    elif isinstance(product_source, str):
        if _is_json(product_source):
            product = Product._from_dict(js.loads(product_source))
        else:
            product = Product._from_csv(product_source)
    elif isinstance(product_source, pd.Series):
        product = Product._from_series(product_source)
    else:
        raise TypeError(f"Could not resolve the object provided: {product_source}")
    product.client = self._client
    return product

`to_dict()` ¶

Convert the Product instance to a dictionary.

Returns:

Type	Description
`dict[str, Any]`	dict[str, Any]: Product metadata as a dictionary.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product")
>>> product_dict = product.to_dict()

Source code in py_src/fusion/product.py

def to_dict(self: Product) -> dict[str, Any]:
    """Convert the Product instance to a dictionary.

    Returns:
        dict[str, Any]: Product metadata as a dictionary.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product")
        >>> product_dict = product.to_dict()

    """
    product_dict = {
        snake_to_camel(k): v
        for k, v in self.__dict__.items()
        if not k.startswith("_")
    }
    return product_dict

`update(catalog=None, client=None, return_resp_obj=False)` ¶

Update an existing product in a Fusion catalog.

Parameters:

Name	Type	Description	Default
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")
>>> product.title = "My Updated Product Title"
>>> product.update(catalog="my_catalog")

Source code in py_src/fusion/product.py

def update(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Update an existing product in a Fusion catalog.

    Args:
        client (Fusion): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")
        >>> product.title = "My Updated Product Title"
        >>> product.update(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

    self.release_date = release_date
    self.delivery_channel = delivery_channel

    data = self.to_dict()

    url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
    resp: requests.Response = client.session.put(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

Fusion Dataset class and functions.

`Dataset` `dataclass` ¶

Fusion Dataset class for managing dataset metadata in a Fusion catalog.

Attributes:

Name	Type	Description
`identifier`	`str`	A unique identifier for the dataset.
`title`	`str`	A title for the dataset. If not provided, defaults to identifier.
`category`	`str \| list[str] \| None`	A category or list of categories for the dataset. Defaults to None.
`description`	`str`	A description of the dataset. If not provided, defaults to identifier.
`frequency`	`str`	The frequency of the dataset. Defaults to "Once".
`is_internal_only_dataset`	`bool`	Flag for internal datasets. Defaults to False.
`is_third_party_data`	`bool`	Flag for third party data. Defaults to True.
`is_restricted`	`bool \| None`	Flag for restricted datasets. Defaults to None.
`is_raw_data`	`bool`	Flag for raw datasets. Defaults to True.
`maintainer`	`str \| None`	Dataset maintainer. Defaults to "J.P. Morgan Fusion".
`source`	`str \| list[str] \| None`	Name of data vendor which provided the data. Defaults to None.
`region`	`str \| list[str] \| None`	Region. Defaults to None.
`publisher`	`str`	Name of vendor that publishes the data. Defaults to "J.P. Morgan".
`product`	`str \| list[str] \| None`	Product to associate dataset with. Defaults to None.
`sub_category`	`str \| list[str] \| None`	Sub-category. Defaults to None.
`tags`	`str \| list[str] \| None`	Tags used for search purposes. Defaults to None.
`created_date`	`str \| None`	Created date. Defaults to None.
`modified_date`	`str \| None`	Modified date. Defaults to None.
`delivery_channel`	`str \| list[str]`	Delivery channel. Defaults to "API".
`language`	`str`	Language. Defaults to "English".
`status`	`str`	Status. Defaults to "Available".
`type_`	`str \| None`	Dataset type. Defaults to "Source".
`container_type`	`str \| None`	Container type. Defaults to "Snapshot-Full".
`snowflake`	`str \| None`	Snowflake account connection. Defaults to None.
`complexity`	`str \| None`	Complexity. Defaults to None.
`is_immutable`	`bool \| None`	Flag for immutable datasets. Defaults to None.
`is_mnpi`	`bool \| None`	is_mnpi. Defaults to None.
`is_pci`	`bool \| None`	is_pci. Defaults to None.
`is_pii`	`bool \| None`	is_pii. Defaults to None.
`is_client`	`bool \| None`	is_client. Defaults to None.
`is_public`	`bool \| None`	is_public. Defaults to None.
`is_internal`	`bool \| None`	is_internal. Defaults to None.
`is_confidential`	`bool \| None`	is_confidential. Defaults to None.
`is_highly_confidential`	`bool \| None`	is_highly_confidential. Defaults to None.
`is_active`	`bool \| None`	is_active. Defaults to None.
`owners`	`list[str] \| None`	The owners of the dataset. Defaults to None.
`application_id`	`str \| dict[str, str] \| None`	The application (most commonly seal ID) that the dataset/report/flow is owned by. Accepts string format for seal IDs, or a dictionary containing 'id' and 'type' as keys. Defaults to None.
`_client`	`Any`	A Fusion client object. Defaults to None.

Source code in py_src/fusion/dataset.py

@dataclass
class Dataset(metaclass=CamelCaseMeta):
    """Fusion Dataset class for managing dataset metadata in a Fusion catalog.

    Attributes:
        identifier (str): A unique identifier for the dataset.
        title (str, optional): A title for the dataset. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset. Defaults to None.
        description (str, optional): A description of the dataset. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Source".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | dict[str, str] | None, optional): The application (most commonly seal ID) that the 
            dataset/report/flow is owned by. Accepts string format for seal IDs, or a dictionary containing 'id' and
            'type' as keys. Defaults to None.
        _client (Any, optional): A Fusion client object. Defaults to None.

    """

    identifier: str
    title: str = ""
    category: str | list[str] | None = None
    description: str = ""
    frequency: str = "Once"
    is_internal_only_dataset: bool = False
    is_third_party_data: bool = True
    is_restricted: bool | None = None
    is_raw_data: bool = True
    maintainer: str | None = "J.P. Morgan Fusion"
    source: str | list[str] | None = None
    region: str | list[str] | None = None
    publisher: str = "J.P. Morgan"
    product: str | list[str] | None = None
    sub_category: str | list[str] | None = None
    tags: str | list[str] | None = None
    created_date: str | None = None
    modified_date: str | None = None
    delivery_channel: str | list[str] = field(default_factory=lambda: ["API"])
    language: str = "English"
    status: str = "Available"
    type_: str | None = "Source"
    container_type: str | None = "Snapshot-Full"
    snowflake: str | None = None
    complexity: str | None = None
    is_immutable: bool | None = None
    is_mnpi: bool | None = None
    is_pci: bool | None = None
    is_pii: bool | None = None
    is_client: bool | None = None
    is_public: bool | None = None
    is_internal: bool | None = None
    is_confidential: bool | None = None
    is_highly_confidential: bool | None = None
    is_active: bool | None = None
    owners: list[str] | None = None
    application_id: str | dict[str, str] | None = None

    _client: Fusion | None = field(init=False, repr=False, compare=False, default=None)

    def __repr__(self: Dataset) -> str:
        """Return an object representation of the Dataset object.

        Returns:
            str: Object representation of the dataset.

        """
        attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
        return f"Dataset(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

    def __post_init__(self: Dataset) -> None:
        """Format Dataset metadata fields after object initialization."""
        self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
        self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
        self.description = tidy_string(self.description) if self.description != "" else self.title
        self.category = (
            self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
        )
        self.delivery_channel = (
            self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
        )
        self.source = self.source if isinstance(self.source, list) or self.source is None else make_list(self.source)
        self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
        self.product = (
            self.product if isinstance(self.product, list) or self.product is None else make_list(self.product)
        )
        self.sub_category = (
            self.sub_category
            if isinstance(self.sub_category, list) or self.sub_category is None
            else make_list(self.sub_category)
        )
        self.tags = self.tags if isinstance(self.tags, list) or self.tags is None else make_list(self.tags)
        self.is_internal_only_dataset = (
            self.is_internal_only_dataset
            if isinstance(self.is_internal_only_dataset, bool)
            else make_bool(self.is_internal_only_dataset)
        )
        self.created_date = convert_date_format(self.created_date) if self.created_date else None
        self.modified_date = convert_date_format(self.modified_date) if self.modified_date else None
        self.owners = self.owners if isinstance(self.owners, list) or self.owners is None else make_list(self.owners)
        self.application_id = (
            {"id": str(self.application_id), "type": "Application (SEAL)"}
            if isinstance(self.application_id, str)
            else self.application_id
        )

    def __getattr__(self, name: str) -> Any:
        # Redirect attribute access to the snake_case version
        snake_name = camel_to_snake(name)
        if snake_name in self.__dict__:
            return self.__dict__[snake_name]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

    def __setattr__(self, name: str, value: Any) -> None:
        if name == "client":
            # Use the property setter for client
            object.__setattr__(self, name, value)
        else:
            snake_name = camel_to_snake(name)
            self.__dict__[snake_name] = value

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Dataset. Set automatically, if the Dataset is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset")
            >>> dataset.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    @classmethod
    def _from_series(cls: type[Dataset], series: pd.Series[Any]) -> Dataset:
        """Instantiate a Dataset object from a pandas Series.

        Args:
            series (pd.Series[Any]): Dataset metadata as a pandas Series.

        Returns:
            Dataset: Dataset object.

        """
        series = series.rename(lambda x: x.replace(" ", "").replace("_", "").lower())
        series = series.rename({"tag": "tags"})
        series = series.rename({"type_": "type"})
        series = series.rename({"productId": "product"})

        is_internal_only_dataset = series.get("isinternalonlydataset", None)
        is_internal_only_dataset = (
            make_bool(is_internal_only_dataset) if is_internal_only_dataset is not None else is_internal_only_dataset
        )
        is_restricted = series.get("isrestricted", None)
        is_restricted = make_bool(is_restricted) if is_restricted is not None else is_restricted
        is_immutable = series.get("isimmutable", None)
        is_immutable = make_bool(is_immutable) if is_immutable is not None else is_immutable
        is_mnpi = series.get("ismnpi", None)
        is_mnpi = make_bool(is_mnpi) if is_mnpi is not None else is_mnpi
        is_pci = series.get("ispci", None)
        is_pci = make_bool(is_pci) if is_pci is not None else is_pci
        is_pii = series.get("ispii", None)
        is_pii = make_bool(is_pii) if is_pii is not None else is_pii
        is_client = series.get("isclient", None)
        is_client = make_bool(is_client) if is_client is not None else is_client
        is_public = series.get("ispublic", None)
        is_public = make_bool(is_public) if is_public is not None else is_public
        is_internal = series.get("isinternal", None)
        is_internal = make_bool(is_internal) if is_internal is not None else is_internal
        is_confidential = series.get("isconfidential", None)
        is_confidential = make_bool(is_confidential) if is_confidential is not None else is_confidential
        is_highly_confidential = series.get("ishighlyconfidential", None)
        is_highly_confidential = (
            make_bool(is_highly_confidential) if is_highly_confidential is not None else is_highly_confidential
        )
        is_active = series.get("isactive", None)
        is_active = make_bool(is_active) if is_active is not None else is_active

        dataset = cls(
            identifier=series.get("identifier", ""),
            category=series.get("category", None),
            delivery_channel=series.get("deliverychannel", ["API"]),
            title=series.get("title", ""),
            description=series.get("description", ""),
            frequency=series.get("frequency", "Once"),
            is_internal_only_dataset=is_internal_only_dataset,  # type: ignore
            is_third_party_data=series.get("isthirdpartydata", True),
            is_restricted=is_restricted,
            is_raw_data=series.get("israwdata", True),
            maintainer=series.get("maintainer", "J.P. Morgan Fusion"),
            source=series.get("source", None),
            region=series.get("region", None),
            publisher=series.get("publisher", "J.P. Morgan"),
            product=series.get("product", None),
            sub_category=series.get("subcategory", None),
            tags=series.get("tags", None),
            container_type=series.get("containertype", "Snapshot-Full"),
            language=series.get("language", "English"),
            status=series.get("status", "Available"),
            type_=series.get("type", "Source"),
            created_date=series.get("createddate", None),
            modified_date=series.get("modifieddate", None),
            snowflake=series.get("snowflake", None),
            complexity=series.get("complexity", None),
            owners=series.get("owners", None),
            application_id=series.get("applicationid", None),
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
        )
        return dataset

    @classmethod
    def _from_dict(cls: type[Dataset], data: dict[str, Any]) -> Dataset:
        """Instantiate a Dataset object from a dictionary.

        Args:
            data (dict[str, Any]): Dataset metadata as a dictionary.

        Returns:
            Dataset: Dataset object.

        """
        keys = [f.name for f in fields(cls)]
        keys = ["type" if key == "type_" else key for key in keys]
        data = {camel_to_snake(k): v for k, v in data.items()}
        data = {k: v for k, v in data.items() if k in keys}
        if "type" in data:
            data["type_"] = data.pop("type")
        return cls(**data)

    @classmethod
    def _from_csv(cls: type[Dataset], file_path: str, identifier: str | None = None) -> Dataset:
        """Instantiate a Dataset object from a CSV file.

        Args:
            file_path (str): Path to the CSV file.
            identifier (str | None, optional): Dataset identifer for filtering if multipler datasets are defined in csv.
                Defaults to None.

        Returns:
            Dataset: Dataset object.

        """
        data = pd.read_csv(file_path)

        return (
            cls._from_series(data[data["identifier"] == identifier].reset_index(drop=True).iloc[0])
            if identifier
            else cls._from_series(data.reset_index(drop=True).iloc[0])
        )

    def from_object(
        self,
        dataset_source: Dataset | dict[str, Any] | str | pd.Series[Any],
    ) -> Dataset:
        """Instantiate a Dataset object from a Dataset object, dictionary, JSON string, path to CSV, or pandas Series.

        Args:
            dataset_source (Dataset | dict[str, Any] | str | pd.Series[Any]): Dataset metadata source.

        Raises:
            TypeError: If the object provided is not a Dataset, dictionary, JSON string, path to CSV file,
                or pandas Series.

        Returns:
            Dataset: Dataset object.

        Examples:
            Instantiate a Dataset object from a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_dict = {
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False,
            ...     "is_raw_data": True,
            ...     "maintainer": "J.P. Morgan Fusion",
            ...     "source": "J.P. Morgan",
            ...     }
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)

            Instantiate a Dataset object from a JSON string:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_json = '{
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False,
            ...     "is_raw_data": True,
            ...     "maintainer": "J.P. Morgan Fusion",
            ...     "source": "J.P. Morgan"
            ...     }'
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)

            Instantiate a Dataset object from a CSV file:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")

            Instantiate a Dataset object from a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_series = pd.Series({
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False,
            ...     "is_raw_data": True,
            ...     "maintainer": "J.P. Morgan Fusion",
            ...     "source": "J.P. Morgan"
            ...     })
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

        """
        if isinstance(dataset_source, Dataset):
            dataset = dataset_source
        elif isinstance(dataset_source, dict):
            dataset = self._from_dict(dataset_source)
        elif isinstance(dataset_source, str):
            if _is_json(dataset_source):
                dataset = self._from_dict(js.loads(dataset_source))
            else:
                dataset = self._from_csv(dataset_source)
        elif isinstance(dataset_source, pd.Series):
            dataset = self._from_series(dataset_source)
        else:
            raise TypeError(f"Could not resolve the object provided: {dataset_source}")

        dataset.client = self._client

        return dataset

    def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Dataset:
        """Instantiate a Dataset object from a Fusion catalog.

        Args:
            catalog (str | None, optional): Catalog identifer. Defaults to None.
            client (Fusion | None, optional): Fusion session. Defaults to None.
                If instantiated from a Fusion object, then the client is set automatically.

        Returns:
            Dataset: Dataset object.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        dataset = self.identifier
        resp = client.session.get(f"{client.root_url}catalogs/{catalog}/datasets")
        requests_raise_for_status(resp)
        list_datasets = resp.json()["resources"]
        dict_ = [dict_ for dict_ in list_datasets if dict_["identifier"] == dataset][0]
        dataset_obj = self._from_dict(dict_)
        dataset_obj.client = client

        prod_df = client.list_product_dataset_mapping(catalog=catalog)

        if dataset.lower() in list(prod_df.dataset.str.lower()):
            product = [prod_df[prod_df["dataset"].str.lower() == dataset.lower()]["product"].iloc[0]]
            dataset_obj.product = product

        return dataset_obj

    def to_dict(self) -> dict[str, Any]:
        """Convert the Dataset instance to a dictionary.

        Returns:
            dict[str, Any]: Dataset metadata as a dictionary.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset")
            >>> dataset_dict = dataset.to_dict()

        """
        dataset_dict = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}

        return dataset_dict

    def create(
        self,
        catalog: str | None = None,
        product: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload a new dataset to a Fusion catalog.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            product (str | None, optional): A product identifier to upload dataset to. If dataset object already has
                product attribute populated, the attribute will be overwritten by this value. Defaults to None.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            From scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset(
            ...     identifier= "my_dataset",
            ...     title= "My Dataset",
            ...     description= "My dataset description",
            ...     category= "Finance",
            ...     frequency= "Daily",
            ...     is_restricted= False
            ...     )
            >>> dataset.create(catalog="my_catalog")

            From a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_dict = {
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False
            ...     }
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)
            >>> dataset.create(catalog="my_catalog")

            From a JSON string:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_json = '{
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False
            ...     }'
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)
            >>> dataset.create(catalog="my_catalog")

            From a CSV file:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")
            >>> dataset.create(catalog="my_catalog")

            From a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_series = pd.Series({
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False
            ...     })
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

            From existing dataset in a catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
            >>> dataset.identifier = "my_new_dataset"
            >>> dataset.create(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

        self.product = [product] if product else self.product

        data = self.to_dict()

        if data.get("report", None) and data["report"]["tier"] == "":
            raise ValueError("Tier cannot be blank for reports.")


        url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
        resp: requests.Response = client.session.post(url, json=data)
        requests_raise_for_status(resp)

        return resp if return_resp_obj else None

    def update(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Updates a dataset via API from dataset object.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
            >>> dataset.title = "My Updated Dataset"
            >>> dataset.update(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

        data = self.to_dict()

        url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
        resp: requests.Response = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def delete(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete a dataset via API from its dataset identifier.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").delete(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
        resp: requests.Response = client.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def copy(
        self,
        catalog_to: str,
        catalog_from: str | None = None,
        client: Fusion | None = None,
        client_to: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Copy dataset from one catalog and/or environment to another by copy.

        Args:
            catalog_to (str): A catalog identifier to which to copy dataset.
            catalog_from (str, optional): A catalog identifier from which to copy dataset. Defaults to "common".
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

        """
        client = self._use_client(client)
        catalog_from = client._use_catalog(catalog_from)

        if client_to is None:
            client_to = client
        dataset_obj = self.from_catalog(catalog=catalog_from, client=client)
        dataset_obj.client = client_to
        resp = dataset_obj.create(client=client_to, catalog=catalog_to, return_resp_obj=True)
        return resp if return_resp_obj else None

    def activate(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Activate a dataset by setting the isActive flag to True.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").activate(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        dataset_obj = self.from_catalog(catalog=catalog, client=client)
        dataset_obj.is_active = True
        resp = dataset_obj.update(catalog=catalog, client=client, return_resp_obj=return_resp_obj)

        return resp if return_resp_obj else None

    def add_to_product(
        self,
        product: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Map dataset to a product.

        Args:
            product (str): A product identifier.
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").add_to_product(product="MY_PRODUCT", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        url = f"{client.root_url}catalogs/{catalog}/productDatasets"
        data = {"product": product, "datasets": [self.identifier]}
        resp = client.session.put(url=url, json=data)

        requests_raise_for_status(resp)

        return resp if return_resp_obj else None

    def remove_from_product(
        self,
        product: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete dataset to product mapping.

        Args:
            product (str): A product identifier.
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").remove_from_product(product="MY_PRODUCT", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        dataset = self.identifier
        url = f"{client.root_url}catalogs/{catalog}/productDatasets/{product}/{dataset}"
        resp = client.session.delete(url=url)

        requests_raise_for_status(resp)

        return resp if return_resp_obj else None

`client: Fusion | None` `property` `writable` ¶

Return the client.

`__post_init__()` ¶

Format Dataset metadata fields after object initialization.

Source code in py_src/fusion/dataset.py

def __post_init__(self: Dataset) -> None:
    """Format Dataset metadata fields after object initialization."""
    self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
    self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
    self.description = tidy_string(self.description) if self.description != "" else self.title
    self.category = (
        self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
    )
    self.delivery_channel = (
        self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
    )
    self.source = self.source if isinstance(self.source, list) or self.source is None else make_list(self.source)
    self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
    self.product = (
        self.product if isinstance(self.product, list) or self.product is None else make_list(self.product)
    )
    self.sub_category = (
        self.sub_category
        if isinstance(self.sub_category, list) or self.sub_category is None
        else make_list(self.sub_category)
    )
    self.tags = self.tags if isinstance(self.tags, list) or self.tags is None else make_list(self.tags)
    self.is_internal_only_dataset = (
        self.is_internal_only_dataset
        if isinstance(self.is_internal_only_dataset, bool)
        else make_bool(self.is_internal_only_dataset)
    )
    self.created_date = convert_date_format(self.created_date) if self.created_date else None
    self.modified_date = convert_date_format(self.modified_date) if self.modified_date else None
    self.owners = self.owners if isinstance(self.owners, list) or self.owners is None else make_list(self.owners)
    self.application_id = (
        {"id": str(self.application_id), "type": "Application (SEAL)"}
        if isinstance(self.application_id, str)
        else self.application_id
    )

`repr()` ¶

Return an object representation of the Dataset object.

Returns:

Name	Type	Description
`str`	`str`	Object representation of the dataset.

Source code in py_src/fusion/dataset.py

def __repr__(self: Dataset) -> str:
    """Return an object representation of the Dataset object.

    Returns:
        str: Object representation of the dataset.

    """
    attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
    return f"Dataset(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

`activate(catalog=None, client=None, return_resp_obj=False)` ¶

Activate a dataset by setting the isActive flag to True.

Parameters:

Name	Type	Description	Default
`catalog`	`str \| None`	A catalog identifier. Defaults to "common".	`None`
`client`	`Fusion \| None`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").activate(catalog="my_catalog")

Source code in py_src/fusion/dataset.py

def activate(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Activate a dataset by setting the isActive flag to True.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").activate(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    dataset_obj = self.from_catalog(catalog=catalog, client=client)
    dataset_obj.is_active = True
    resp = dataset_obj.update(catalog=catalog, client=client, return_resp_obj=return_resp_obj)

    return resp if return_resp_obj else None

`add_to_product(product, catalog=None, client=None, return_resp_obj=False)` ¶

Map dataset to a product.

Parameters:

Name	Type	Description	Default
`product`	`str`	A product identifier.	required
`catalog`	`str \| None`	A catalog identifier. Defaults to "common".	`None`
`client`	`Fusion \| None`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").add_to_product(product="MY_PRODUCT", catalog="my_catalog")

Source code in py_src/fusion/dataset.py

def add_to_product(
    self,
    product: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Map dataset to a product.

    Args:
        product (str): A product identifier.
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").add_to_product(product="MY_PRODUCT", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    url = f"{client.root_url}catalogs/{catalog}/productDatasets"
    data = {"product": product, "datasets": [self.identifier]}
    resp = client.session.put(url=url, json=data)

    requests_raise_for_status(resp)

    return resp if return_resp_obj else None

`copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False)` ¶

Copy dataset from one catalog and/or environment to another by copy.

Parameters:

Name	Type	Description	Default
`catalog_to`	`str`	A catalog identifier to which to copy dataset.	required
`catalog_from`	`str`	A catalog identifier from which to copy dataset. Defaults to "common".	`None`
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`client_to`	`Fusion \| None`	Fusion client object. Defaults to current instance.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

Source code in py_src/fusion/dataset.py

def copy(
    self,
    catalog_to: str,
    catalog_from: str | None = None,
    client: Fusion | None = None,
    client_to: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Copy dataset from one catalog and/or environment to another by copy.

    Args:
        catalog_to (str): A catalog identifier to which to copy dataset.
        catalog_from (str, optional): A catalog identifier from which to copy dataset. Defaults to "common".
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

    """
    client = self._use_client(client)
    catalog_from = client._use_catalog(catalog_from)

    if client_to is None:
        client_to = client
    dataset_obj = self.from_catalog(catalog=catalog_from, client=client)
    dataset_obj.client = client_to
    resp = dataset_obj.create(client=client_to, catalog=catalog_to, return_resp_obj=True)
    return resp if return_resp_obj else None

`create(catalog=None, product=None, client=None, return_resp_obj=False)` ¶

Upload a new dataset to a Fusion catalog.

Parameters:

Name	Type	Description	Default
`catalog`	`str \| None`	A catalog identifier. Defaults to "common".	`None`
`product`	`str \| None`	A product identifier to upload dataset to. If dataset object already has product attribute populated, the attribute will be overwritten by this value. Defaults to None.	`None`
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

From scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset(
...     identifier= "my_dataset",
...     title= "My Dataset",
...     description= "My dataset description",
...     category= "Finance",
...     frequency= "Daily",
...     is_restricted= False
...     )
>>> dataset.create(catalog="my_catalog")

From a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_dict = {
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False
...     }
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)
>>> dataset.create(catalog="my_catalog")

From a JSON string:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_json = '{
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False
...     }'
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)
>>> dataset.create(catalog="my_catalog")

From a CSV file:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")
>>> dataset.create(catalog="my_catalog")

From a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_series = pd.Series({
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False
...     })
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

From existing dataset in a catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
>>> dataset.identifier = "my_new_dataset"
>>> dataset.create(catalog="my_catalog")

Source code in py_src/fusion/dataset.py

def create(
    self,
    catalog: str | None = None,
    product: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload a new dataset to a Fusion catalog.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        product (str | None, optional): A product identifier to upload dataset to. If dataset object already has
            product attribute populated, the attribute will be overwritten by this value. Defaults to None.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        From scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset(
        ...     identifier= "my_dataset",
        ...     title= "My Dataset",
        ...     description= "My dataset description",
        ...     category= "Finance",
        ...     frequency= "Daily",
        ...     is_restricted= False
        ...     )
        >>> dataset.create(catalog="my_catalog")

        From a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_dict = {
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False
        ...     }
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)
        >>> dataset.create(catalog="my_catalog")

        From a JSON string:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_json = '{
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False
        ...     }'
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)
        >>> dataset.create(catalog="my_catalog")

        From a CSV file:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")
        >>> dataset.create(catalog="my_catalog")

        From a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_series = pd.Series({
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False
        ...     })
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

        From existing dataset in a catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
        >>> dataset.identifier = "my_new_dataset"
        >>> dataset.create(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

    self.product = [product] if product else self.product

    data = self.to_dict()

    if data.get("report", None) and data["report"]["tier"] == "":
        raise ValueError("Tier cannot be blank for reports.")


    url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
    resp: requests.Response = client.session.post(url, json=data)
    requests_raise_for_status(resp)

    return resp if return_resp_obj else None

`delete(catalog=None, client=None, return_resp_obj=False)` ¶

Delete a dataset via API from its dataset identifier.

Parameters:

Name	Type	Description	Default
`catalog`	`str \| None`	A catalog identifier. Defaults to "common".	`None`
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").delete(catalog="my_catalog")

Source code in py_src/fusion/dataset.py

def delete(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete a dataset via API from its dataset identifier.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").delete(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
    resp: requests.Response = client.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

`from_catalog(catalog=None, client=None)` ¶

Instantiate a Dataset object from a Fusion catalog.

Parameters:

Name	Type	Description	Default
`catalog`	`str \| None`	Catalog identifer. Defaults to None.	`None`
`client`	`Fusion \| None`	Fusion session. Defaults to None. If instantiated from a Fusion object, then the client is set automatically.	`None`

Returns:

Name	Type	Description
`Dataset`	`Dataset`	Dataset object.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")

Source code in py_src/fusion/dataset.py

def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Dataset:
    """Instantiate a Dataset object from a Fusion catalog.

    Args:
        catalog (str | None, optional): Catalog identifer. Defaults to None.
        client (Fusion | None, optional): Fusion session. Defaults to None.
            If instantiated from a Fusion object, then the client is set automatically.

    Returns:
        Dataset: Dataset object.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    dataset = self.identifier
    resp = client.session.get(f"{client.root_url}catalogs/{catalog}/datasets")
    requests_raise_for_status(resp)
    list_datasets = resp.json()["resources"]
    dict_ = [dict_ for dict_ in list_datasets if dict_["identifier"] == dataset][0]
    dataset_obj = self._from_dict(dict_)
    dataset_obj.client = client

    prod_df = client.list_product_dataset_mapping(catalog=catalog)

    if dataset.lower() in list(prod_df.dataset.str.lower()):
        product = [prod_df[prod_df["dataset"].str.lower() == dataset.lower()]["product"].iloc[0]]
        dataset_obj.product = product

    return dataset_obj

`from_object(dataset_source)` ¶

Instantiate a Dataset object from a Dataset object, dictionary, JSON string, path to CSV, or pandas Series.

Parameters:

Name	Type	Description	Default
`dataset_source`	`Dataset \| dict[str, Any] \| str \| Series[Any]`	Dataset metadata source.	required

Raises:

Type	Description
`TypeError`	If the object provided is not a Dataset, dictionary, JSON string, path to CSV file, or pandas Series.

Returns:

Name	Type	Description
`Dataset`	`Dataset`	Dataset object.

Examples:

Instantiate a Dataset object from a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_dict = {
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False,
...     "is_raw_data": True,
...     "maintainer": "J.P. Morgan Fusion",
...     "source": "J.P. Morgan",
...     }
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)

Instantiate a Dataset object from a JSON string:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_json = '{
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False,
...     "is_raw_data": True,
...     "maintainer": "J.P. Morgan Fusion",
...     "source": "J.P. Morgan"
...     }'
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)

Instantiate a Dataset object from a CSV file:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")

Instantiate a Dataset object from a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_series = pd.Series({
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False,
...     "is_raw_data": True,
...     "maintainer": "J.P. Morgan Fusion",
...     "source": "J.P. Morgan"
...     })
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

Source code in py_src/fusion/dataset.py

def from_object(
    self,
    dataset_source: Dataset | dict[str, Any] | str | pd.Series[Any],
) -> Dataset:
    """Instantiate a Dataset object from a Dataset object, dictionary, JSON string, path to CSV, or pandas Series.

    Args:
        dataset_source (Dataset | dict[str, Any] | str | pd.Series[Any]): Dataset metadata source.

    Raises:
        TypeError: If the object provided is not a Dataset, dictionary, JSON string, path to CSV file,
            or pandas Series.

    Returns:
        Dataset: Dataset object.

    Examples:
        Instantiate a Dataset object from a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_dict = {
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False,
        ...     "is_raw_data": True,
        ...     "maintainer": "J.P. Morgan Fusion",
        ...     "source": "J.P. Morgan",
        ...     }
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)

        Instantiate a Dataset object from a JSON string:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_json = '{
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False,
        ...     "is_raw_data": True,
        ...     "maintainer": "J.P. Morgan Fusion",
        ...     "source": "J.P. Morgan"
        ...     }'
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)

        Instantiate a Dataset object from a CSV file:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")

        Instantiate a Dataset object from a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_series = pd.Series({
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False,
        ...     "is_raw_data": True,
        ...     "maintainer": "J.P. Morgan Fusion",
        ...     "source": "J.P. Morgan"
        ...     })
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

    """
    if isinstance(dataset_source, Dataset):
        dataset = dataset_source
    elif isinstance(dataset_source, dict):
        dataset = self._from_dict(dataset_source)
    elif isinstance(dataset_source, str):
        if _is_json(dataset_source):
            dataset = self._from_dict(js.loads(dataset_source))
        else:
            dataset = self._from_csv(dataset_source)
    elif isinstance(dataset_source, pd.Series):
        dataset = self._from_series(dataset_source)
    else:
        raise TypeError(f"Could not resolve the object provided: {dataset_source}")

    dataset.client = self._client

    return dataset

`remove_from_product(product, catalog=None, client=None, return_resp_obj=False)` ¶

Delete dataset to product mapping.

Parameters:

Name	Type	Description	Default
`product`	`str`	A product identifier.	required
`catalog`	`str \| None`	A catalog identifier. Defaults to "common".	`None`
`client`	`Fusion \| None`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").remove_from_product(product="MY_PRODUCT", catalog="my_catalog")

Source code in py_src/fusion/dataset.py

def remove_from_product(
    self,
    product: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete dataset to product mapping.

    Args:
        product (str): A product identifier.
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").remove_from_product(product="MY_PRODUCT", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    dataset = self.identifier
    url = f"{client.root_url}catalogs/{catalog}/productDatasets/{product}/{dataset}"
    resp = client.session.delete(url=url)

    requests_raise_for_status(resp)

    return resp if return_resp_obj else None

`to_dict()` ¶

Convert the Dataset instance to a dictionary.

Returns:

Type	Description
`dict[str, Any]`	dict[str, Any]: Dataset metadata as a dictionary.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset")
>>> dataset_dict = dataset.to_dict()

Source code in py_src/fusion/dataset.py

def to_dict(self) -> dict[str, Any]:
    """Convert the Dataset instance to a dictionary.

    Returns:
        dict[str, Any]: Dataset metadata as a dictionary.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset")
        >>> dataset_dict = dataset.to_dict()

    """
    dataset_dict = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}

    return dataset_dict

`update(catalog=None, client=None, return_resp_obj=False)` ¶

Updates a dataset via API from dataset object.

Parameters:

Name	Type	Description	Default
`catalog`	`str \| None`	A catalog identifier. Defaults to "common".	`None`
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
>>> dataset.title = "My Updated Dataset"
>>> dataset.update(catalog="my_catalog")

Source code in py_src/fusion/dataset.py

def update(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Updates a dataset via API from dataset object.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
        >>> dataset.title = "My Updated Dataset"
        >>> dataset.update(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

    data = self.to_dict()

    url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
    resp: requests.Response = client.session.put(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

Fusion Product class and functions.

`Attribute` `dataclass` ¶

Fusion Attribute class for managing attributes metadata in a Fusion catalog.

Attributes:

Name	Type	Description
`identifier`	`str`	The unique identifier for the attribute.
`index`	`int`	Attribute index.
`data_type`	`str \| Types`	Datatype of attribute. Defaults to "String".
`title`	`str`	Attribute title. If not provided, defaults to identifier.
`description`	`str`	Attribute description. If not provided, defaults to identifier.
`is_dataset_key`	`bool`	Flag for primary keys. Defaults to False.
`source`	`str \| None`	Name of data vendor which provided the data. Defaults to None.
`source_field_id`	`str \| None`	Original identifier of attribute, if attribute has been renamed. If not provided, defaults to identifier.
`is_internal_dataset_key`	`bool \| None`	Flag for internal primary keys. Defaults to None.
`is_externally_visible`	`bool \| None`	Flag for externally visible attributes. Defaults to True.
`unit`	`Any \| None`	Unit of attribute. Defaults to None.
`multiplier`	`float`	Multiplier for unit. Defaults to 1.0.
`is_propagation_eligible`	`bool \| None`	Flag for propagation eligibility. Defaults to None.
`is_metric`	`bool \| None`	Flag for attributes that are metrics. Defaults to None.
`available_from`	`str \| None`	Date from which the attribute is available. Defaults to None.
`deprecated_from`	`str \| None`	Date from which the attribute is deprecated. Defaults to None.
`term`	`str`	Term. Defaults to "bizterm1".
`dataset`	`int \| None`	Dataset. Defaults to None.
`attribute_type`	`str \| None`	Attribute type. Defaults to None.
`application_id`	`str \| dict[str, str] \| None`	The seal ID of the dataset in string format, or a dictionary containing 'id' and 'type'. Used for catalog attributes. Defaults to None.
`publisher`	`str \| None`	Publisher of the attribute. Used for catalog attributes. Defaults to None.
`is_key_data_element`	`bool \| None`	Flag for key data elements. Used for attributes registered to Reports. Defaults to None.
`_client`	`Fusion \| None`	Fusion client object. Defaults to None.

Source code in py_src/fusion/attributes.py

@dataclass
class Attribute(metaclass=CamelCaseMeta):
    """Fusion Attribute class for managing attributes metadata in a Fusion catalog.

    Attributes:
        identifier (str): The unique identifier for the attribute.
        index (int): Attribute index.
        data_type (str | Types, optional): Datatype of attribute. Defaults to "String".
        title (str, optional): Attribute title. If not provided, defaults to identifier.
        description (str, optional): Attribute description. If not provided, defaults to identifier.
        is_dataset_key (bool, optional): Flag for primary keys. Defaults to False.
        source (str | None, optional): Name of data vendor which provided the data. Defaults to None.
        source_field_id (str | None, optional): Original identifier of attribute, if attribute has been renamed.
            If not provided, defaults to identifier.
        is_internal_dataset_key (bool | None, optional): Flag for internal primary keys. Defaults to None.
        is_externally_visible (bool | None, optional): Flag for externally visible attributes. Defaults to True.
        unit (Any | None, optional): Unit of attribute. Defaults to None.
        multiplier (float, optional): Multiplier for unit. Defaults to 1.0.
        is_propagation_eligible (bool | None, optional): Flag for propagation eligibility. Defaults to None.
        is_metric (bool | None, optional): Flag for attributes that are metrics. Defaults to None.
        available_from (str | None, optional): Date from which the attribute is available. Defaults to None.
        deprecated_from (str | None, optional): Date from which the attribute is deprecated. Defaults to None.
        term (str, optional): Term. Defaults to "bizterm1".
        dataset (int | None, optional): Dataset. Defaults to None.
        attribute_type (str | None, optional): Attribute type. Defaults to None.
        application_id (str | dict[str, str] | None, optional): The seal ID of the dataset in string format,
            or a dictionary containing 'id' and 'type'. Used for catalog attributes. Defaults to None.
        publisher (str | None, optional): Publisher of the attribute. Used for catalog attributes. Defaults to None.
        is_key_data_element (bool | None, optional): Flag for key data elements. Used for attributes registered to
            Reports. Defaults to None.
        _client (Fusion | None, optional): Fusion client object. Defaults to None.

    """

    identifier: str
    index: int
    data_type: Types = cast(Types, Types.String)
    title: str = ""
    description: str = ""
    is_dataset_key: bool = False
    source: str | None = None
    source_field_id: str | None = None
    is_internal_dataset_key: bool | None = None
    is_externally_visible: bool | None = True
    unit: Any | None = None
    multiplier: float = 1.0
    is_propagation_eligible: bool | None = None
    is_metric: bool | None = None
    available_from: str | None = None
    deprecated_from: str | None = None
    term: str = "bizterm1"
    dataset: int | None = None
    attribute_type: str | None = None
    application_id: str | dict[str, str] | None = None
    publisher: str | None = None
    is_key_data_element: bool | None = None

    _client: Fusion | None = field(init=False, repr=False, compare=False, default=None)

    def __str__(self: Attribute) -> str:
        """Format string representation."""
        attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
        return f"Attribute(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

    def __repr__(self: Attribute) -> str:
        """Format object representation."""
        s = ", ".join(f"{getattr(self, f.name)!r}" for f in fields(self) if not f.name.startswith("_"))
        return "(" + s + ")"

    def __post_init__(self: Attribute) -> None:
        """Format Attribute metadata fields after object initialization."""
        self.is_dataset_key = make_bool(self.is_dataset_key)
        self.identifier = tidy_string(self.identifier).lower().replace(" ", "_")
        self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
        self.description = tidy_string(self.description) if self.description and self.description != "" else self.title
        self.source_field_id = (
            tidy_string(self.source_field_id).lower().replace(" ", "_") if self.source_field_id else self.identifier
        )
        self.available_from = convert_date_format(self.available_from) if self.available_from else None
        self.deprecated_from = convert_date_format(self.deprecated_from) if self.deprecated_from else None
        self.data_type = Types[str(self.data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
        self.application_id = (
            {"id": str(self.application_id), "type": "Application (SEAL)"}
            if isinstance(self.application_id, str)
            else self.application_id
        )

    def __getattr__(self, name: str) -> Any:
        # Redirect attribute access to the snake_case version
        snake_name = camel_to_snake(name)
        if snake_name in self.__dict__:
            return self.__dict__[snake_name]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

    def __setattr__(self, name: str, value: Any) -> None:
        if name == "client":
            # Use the property setter for client
            object.__setattr__(self, name, value)
        else:
            snake_name = camel_to_snake(name)
            self.__dict__[snake_name] = value

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Dataset. Set automatically, if the Dataset is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attribute.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    @classmethod
    def _from_series(
        cls: type[Attribute],
        series: pd.Series[Any],
    ) -> Attribute:
        """Instantiate an Attribute object from a pandas Series.

        Args:
            series (pd.Series[Any]): Attribute metadata as a pandas Series.

        Returns:
            Attribute: Attribute object.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> series = pd.Series({
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... })
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)._from_series(series)

        """
        series = series.rename(lambda x: x.replace(" ", "").replace("_", "").lower()).replace(
            to_replace=np.nan, value=None
        )
        data_type = series.get("datatype", cast(Types, Types.String))
        data_type = series.get("type", cast(Types, Types.String)) if data_type is None else data_type
        source = series.get("source", None)
        source = source.strip() if isinstance(source, str) else source

        is_propagation_eligible = series.get("ispropagationeligible", None)
        is_propagation_eligible = (
            make_bool(is_propagation_eligible) if is_propagation_eligible is not None else is_propagation_eligible
        )
        is_metric = series.get("ismetric", None)
        is_metric = make_bool(is_metric) if is_metric is not None else is_metric
        is_internal_dataset_key = series.get("isinternaldatasetkey", None)
        is_internal_dataset_key = (
            make_bool(is_internal_dataset_key) if is_internal_dataset_key is not None else is_internal_dataset_key
        )
        is_externally_visible = series.get("isexternallyvisible", True)
        is_externally_visible = (
            make_bool(is_externally_visible) if is_externally_visible is not None else is_externally_visible
        )

        return cls(
            identifier=series.get("identifier", "").strip(),
            index=series.get("index", -1),
            data_type=Types[data_type.strip().split(".")[-1].title()],
            title=series.get("title", ""),
            description=series.get("description", ""),
            is_dataset_key=series.get("isdatasetkey", False),
            source=source,
            source_field_id=series.get("sourcefieldid", None),
            is_internal_dataset_key=is_internal_dataset_key,
            is_externally_visible=is_externally_visible,
            unit=series.get("unit", None),
            multiplier=series.get("multiplier", 1.0),
            is_propagation_eligible=is_propagation_eligible,
            is_metric=is_metric,
            available_from=series.get("availablefrom", None),
            deprecated_from=series.get("deprecatedfrom", None),
            term=series.get("term", "bizterm1"),
            dataset=series.get("dataset", None),
            attribute_type=series.get("attributetype", None),
        )

    @classmethod
    def _from_dict(cls: type[Attribute], data: dict[str, Any]) -> Attribute:
        """Instantiate an Attribute object from a dictionary.

        Args:
            data (dict[str, Any]): Attribute metadata as a dictionary.

        Returns:
            Attribute: Attribute object.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = {
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... }
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)._from_dict(data)

        """
        keys = [f.name for f in fields(cls)]
        data = {camel_to_snake(k): v for k, v in data.items()}
        data = {k: (None if pd.isna(v) else v) for k, v in data.items() if k in keys}
        if "data_type" in data:
            data["data_type"] = Types[data["data_type"].strip().rsplit(".", maxsplit=1)[-1].title()]
        return cls(**data)

    def from_object(
        self,
        attribute_source: Attribute | dict[str, Any] | pd.Series[Any],
    ) -> Attribute:
        """Instatiate an Attribute from an Attribute object, dictionary or pandas Series.

        Args:
            attribute_source (Attribute | dict[str, Any] | pd.Series[Any]): Attribute metadata source.

        Raises:
            TypeError: If the object provided is not an Attribute object, dictionary or pandas Series.

        Returns:
            Attribute: Attribute object.

        Examples:

            Instatiating a Attribute from a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = {
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... }
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)

            Instatiating a Attribute from a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> series = pd.Series({
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... })
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)

        """
        if isinstance(attribute_source, Attribute):
            attribute = attribute_source
        elif isinstance(attribute_source, dict):
            attribute = self._from_dict(attribute_source)
        elif isinstance(attribute_source, pd.Series):
            attribute = self._from_series(attribute_source)
        else:
            raise ValueError(f"Could not resolve the object provided: {attribute_source}")
        attribute.client = self._client
        return attribute

    def to_dict(self: Attribute) -> dict[str, Any]:
        """Convert object to dictionary.

        Returns:
            dict[str, Any]: Attribute metadata as a dictionary.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attribute_dict = attribute.to_dict()

        """
        result = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}
        result["unit"] = str(self.unit) if self.unit is not None else None
        result["dataType"] = self.data_type.name
        if "isKeyDataElement" in result:
            result["isCriticalDataElement"] = result.pop("isKeyDataElement")
        return result

    def create(
        self,
        dataset: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload a new attribute to a Fusion catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            Individually, from scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute0 = fusion.attribute(identifier="my_attribute_0", index=0)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")
            >>> attribute1 = fusion.attribute(identifier="my_attribute_1", index=1)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

            Individually, from a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = {
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ...    }
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

            Individually, from a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> series = pd.Series({
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... })
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        data = self.to_dict()
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
        resp = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def delete(
        self,
        dataset: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete an Attribute from a Fusion catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.attribute(identifier="my_attribute", index=0).delete(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
        resp = client.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def set_lineage(
        self,
        attributes: list[Attribute],
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Map an attribute to existing registered attributes in a Fusion catalog. Attributes from an output data flow
            can be mapped to existing registered input data flow attributes. This supports the case in which the
            generating application and receiving application store their attributes with different names.

        Args:
            attributes (str): List of Attribute objects to establish upstream lineage from.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> my_attr1 = fusion.attribute(identifier="my_attribute1", index=0, application_id="12345")
            >>> my_attr2 = fusion.attribute(identifier="my_attribute2", index=0, application_id="12345")
            >>> my_attr3 = fusion.attribute(identifier="my_attribute3", index=0, application_id="12345")
            >>> attrs = [my_attr1, my_attr2]
            >>> my_attr3.set_lineage(attributes=attrs, catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        if self.application_id is None:
            raise ValueError("The 'application_id' attribute is required for setting lineage.")
        target_attributes = []
        for attribute in attributes:
            if attribute.application_id is None:
                raise ValueError(f"The 'application_id' attribute is required for setting lineage.")
            attr_dict = {
                    "catalog": catalog,
                    "attribute": attribute.identifier,
                    "applicationId": attribute.application_id
                }
            target_attributes.append(attr_dict)

        url = f"{client.root_url}catalogs/{catalog}/attributes/lineage"
        data = [
            {
                "source": {
                    "catalog": catalog,
                    "attribute": self.identifier,
                    "applicationId": self.application_id
            },
            "targets": target_attributes
        }
        ]
        resp = client.session.post(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

`client: Fusion | None` `property` `writable` ¶

Return the client.

`__post_init__()` ¶

Format Attribute metadata fields after object initialization.

Source code in py_src/fusion/attributes.py

def __post_init__(self: Attribute) -> None:
    """Format Attribute metadata fields after object initialization."""
    self.is_dataset_key = make_bool(self.is_dataset_key)
    self.identifier = tidy_string(self.identifier).lower().replace(" ", "_")
    self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
    self.description = tidy_string(self.description) if self.description and self.description != "" else self.title
    self.source_field_id = (
        tidy_string(self.source_field_id).lower().replace(" ", "_") if self.source_field_id else self.identifier
    )
    self.available_from = convert_date_format(self.available_from) if self.available_from else None
    self.deprecated_from = convert_date_format(self.deprecated_from) if self.deprecated_from else None
    self.data_type = Types[str(self.data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
    self.application_id = (
        {"id": str(self.application_id), "type": "Application (SEAL)"}
        if isinstance(self.application_id, str)
        else self.application_id
    )

`repr()` ¶

Format object representation.

Source code in py_src/fusion/attributes.py

def __repr__(self: Attribute) -> str:
    """Format object representation."""
    s = ", ".join(f"{getattr(self, f.name)!r}" for f in fields(self) if not f.name.startswith("_"))
    return "(" + s + ")"

`str()` ¶

Format string representation.

Source code in py_src/fusion/attributes.py

def __str__(self: Attribute) -> str:
    """Format string representation."""
    attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
    return f"Attribute(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

`create(dataset, catalog=None, client=None, return_resp_obj=False)` ¶

Upload a new attribute to a Fusion catalog.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	Dataset identifier.	required
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

Individually, from scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute0 = fusion.attribute(identifier="my_attribute_0", index=0)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")
>>> attribute1 = fusion.attribute(identifier="my_attribute_1", index=1)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")

Individually, from a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = {
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
...    }
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")

Individually, from a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> series = pd.Series({
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
... })
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")

Source code in py_src/fusion/attributes.py

def create(
    self,
    dataset: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload a new attribute to a Fusion catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        Individually, from scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute0 = fusion.attribute(identifier="my_attribute_0", index=0)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")
        >>> attribute1 = fusion.attribute(identifier="my_attribute_1", index=1)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

        Individually, from a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = {
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ...    }
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

        Individually, from a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> series = pd.Series({
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ... })
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    data = self.to_dict()
    url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
    resp = client.session.put(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

`delete(dataset, catalog=None, client=None, return_resp_obj=False)` ¶

Delete an Attribute from a Fusion catalog.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	Dataset identifier.	required
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.attribute(identifier="my_attribute", index=0).delete(dataset="my_dataset", catalog="my_catalog")

Source code in py_src/fusion/attributes.py

def delete(
    self,
    dataset: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete an Attribute from a Fusion catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.attribute(identifier="my_attribute", index=0).delete(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
    resp = client.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

`from_object(attribute_source)` ¶

Instatiate an Attribute from an Attribute object, dictionary or pandas Series.

Parameters:

Name	Type	Description	Default
`attribute_source`	`Attribute \| dict[str, Any] \| Series[Any]`	Attribute metadata source.	required

Raises:

Type	Description
`TypeError`	If the object provided is not an Attribute object, dictionary or pandas Series.

Returns:

Name	Type	Description
`Attribute`	`Attribute`	Attribute object.

Instatiating a Attribute from a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = {
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
... }
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)

Instatiating a Attribute from a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> series = pd.Series({
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
... })
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)

Source code in py_src/fusion/attributes.py

def from_object(
    self,
    attribute_source: Attribute | dict[str, Any] | pd.Series[Any],
) -> Attribute:
    """Instatiate an Attribute from an Attribute object, dictionary or pandas Series.

    Args:
        attribute_source (Attribute | dict[str, Any] | pd.Series[Any]): Attribute metadata source.

    Raises:
        TypeError: If the object provided is not an Attribute object, dictionary or pandas Series.

    Returns:
        Attribute: Attribute object.

    Examples:

        Instatiating a Attribute from a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = {
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ... }
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)

        Instatiating a Attribute from a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> series = pd.Series({
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ... })
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)

    """
    if isinstance(attribute_source, Attribute):
        attribute = attribute_source
    elif isinstance(attribute_source, dict):
        attribute = self._from_dict(attribute_source)
    elif isinstance(attribute_source, pd.Series):
        attribute = self._from_series(attribute_source)
    else:
        raise ValueError(f"Could not resolve the object provided: {attribute_source}")
    attribute.client = self._client
    return attribute

`set_lineage(attributes, catalog=None, client=None, return_resp_obj=False)` ¶

Map an attribute to existing registered attributes in a Fusion catalog. Attributes from an output data flow can be mapped to existing registered input data flow attributes. This supports the case in which the generating application and receiving application store their attributes with different names.

Parameters:

Name	Type	Description	Default
`attributes`	`str`	List of Attribute objects to establish upstream lineage from.	required
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> my_attr1 = fusion.attribute(identifier="my_attribute1", index=0, application_id="12345")
>>> my_attr2 = fusion.attribute(identifier="my_attribute2", index=0, application_id="12345")
>>> my_attr3 = fusion.attribute(identifier="my_attribute3", index=0, application_id="12345")
>>> attrs = [my_attr1, my_attr2]
>>> my_attr3.set_lineage(attributes=attrs, catalog="my_catalog")

Source code in py_src/fusion/attributes.py

def set_lineage(
    self,
    attributes: list[Attribute],
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Map an attribute to existing registered attributes in a Fusion catalog. Attributes from an output data flow
        can be mapped to existing registered input data flow attributes. This supports the case in which the
        generating application and receiving application store their attributes with different names.

    Args:
        attributes (str): List of Attribute objects to establish upstream lineage from.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> my_attr1 = fusion.attribute(identifier="my_attribute1", index=0, application_id="12345")
        >>> my_attr2 = fusion.attribute(identifier="my_attribute2", index=0, application_id="12345")
        >>> my_attr3 = fusion.attribute(identifier="my_attribute3", index=0, application_id="12345")
        >>> attrs = [my_attr1, my_attr2]
        >>> my_attr3.set_lineage(attributes=attrs, catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    if self.application_id is None:
        raise ValueError("The 'application_id' attribute is required for setting lineage.")
    target_attributes = []
    for attribute in attributes:
        if attribute.application_id is None:
            raise ValueError(f"The 'application_id' attribute is required for setting lineage.")
        attr_dict = {
                "catalog": catalog,
                "attribute": attribute.identifier,
                "applicationId": attribute.application_id
            }
        target_attributes.append(attr_dict)

    url = f"{client.root_url}catalogs/{catalog}/attributes/lineage"
    data = [
        {
            "source": {
                "catalog": catalog,
                "attribute": self.identifier,
                "applicationId": self.application_id
        },
        "targets": target_attributes
    }
    ]
    resp = client.session.post(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

`to_dict()` ¶

Convert object to dictionary.

Returns:

Type	Description
`dict[str, Any]`	dict[str, Any]: Attribute metadata as a dictionary.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attribute_dict = attribute.to_dict()

Source code in py_src/fusion/attributes.py

def to_dict(self: Attribute) -> dict[str, Any]:
    """Convert object to dictionary.

    Returns:
        dict[str, Any]: Attribute metadata as a dictionary.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attribute_dict = attribute.to_dict()

    """
    result = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}
    result["unit"] = str(self.unit) if self.unit is not None else None
    result["dataType"] = self.data_type.name
    if "isKeyDataElement" in result:
        result["isCriticalDataElement"] = result.pop("isKeyDataElement")
    return result

`Attributes` `dataclass` ¶

Class representing a collection of Attribute instances for managing atrribute metadata in a Fusion catalog.

Attributes:

Name	Type	Description
`attributes`	`list[Attribute]`	List of Attribute instances.
`_client`	`Fusion \| None`	Fusion client object.

Source code in py_src/fusion/attributes.py

@dataclass
class Attributes:
    """Class representing a collection of Attribute instances for managing atrribute metadata in a Fusion catalog.

    Attributes:
        attributes (list[Attribute]): List of Attribute instances.
        _client (Fusion | None): Fusion client object.

    """

    attributes: list[Attribute] = field(default_factory=list)

    _client: Fusion | None = None

    def __str__(self) -> str:
        """String representation of the Attributes collection."""
        return (
            f"[\n" + ",\n ".join(f"{attr.__repr__()}" for attr in self.attributes) + "\n]" if self.attributes else "[]"
        )

    def __repr__(self) -> str:
        """Object representation of the Attributes collection."""
        return self.__str__()

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Dataset. Set automatically, if the Dataset is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes()
            >>> attributes.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    def add_attribute(self, attribute: Attribute) -> None:
        """Add an Attribute instance to the collection.

        Args:
            attribute (Attribute): Attribute instance to add.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes()
            >>> attributes.add_attribute(attribute)

        """
        self.attributes.append(attribute)

    def remove_attribute(self, identifier: str) -> bool:
        """Remove an Attribute instance from the collection by identifier.

        Args:
            identifier (str): Identifier of the Attribute to remove.

        Returns:
            bool: True if the Attribute was removed, False otherwise.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes.remove_attribute("my_attribute")

        """
        for attr in self.attributes:
            if attr.identifier == identifier:
                self.attributes.remove(attr)
                return True
        return False

    def get_attribute(self, identifier: str) -> Attribute | None:
        """Get an Attribute instance from the collection by identifier.

        Args:
            identifier (str): Identifier of the Attribute to retrieve.

        Returns:
            Attribute | None: The Attribute instance if found, None otherwise.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes =fusion.attributes(attributes=[attribute])
            >>> retrieved_attribute = attributes.get_attribute("my_attribute")

        """
        for attr in self.attributes:
            if attr.identifier == identifier:
                return attr
        return None

    def to_dict(self) -> dict[str, list[dict[str, Any]]]:
        """Convert the collection of Attribute instances to a list of dictionaries.

        Returns:
            dict[str, list[dict[str, Any]]]: Collection of Attribute instances as a dictionary.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes_dict = attributes.to_dict()

        """
        dict_out = {"attributes": [attr.to_dict() for attr in self.attributes]}
        return dict_out

    @classmethod
    def _from_dict_list(cls: type[Attributes], data: list[dict[str, Any]]) -> Attributes:
        """Create an Attributes instance from a list of dictionaries.

        Args:
            data (list[dict[str, Any]]): List of dictionaries representing Attribute instances.

        Returns:
            Attributes: Attributes instance.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = [
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ]
            >>> attributes = fusion.attributes()._from_dict_list(data)

        """
        attributes = [Attribute._from_dict(attr_data) for attr_data in data]
        return Attributes(attributes=attributes)

    @classmethod
    def _from_dataframe(cls: type[Attributes], data: pd.DataFrame) -> Attributes:
        """Create an Attributes instance from a pandas DataFrame.

        Args:
            data (pd.DataFrame): DataFrame representing Attribute instances.

        Returns:
            Attributes: Attributes instance.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> data = pd.DataFrame([
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ])
            >>> attributes = fusion.attributes()._from_dataframe(data)

        """
        data = data.replace(to_replace=np.nan, value=None)
        data = data.reset_index() if "index" not in data.columns else data
        attributes = [Attribute._from_series(series) for _, series in data.iterrows()]
        return Attributes(attributes=attributes)

    def from_object(
        self,
        attributes_source: list[Attribute] | list[dict[str, Any]] | pd.DataFrame,
    ) -> Attributes:
        """Instantiate an Attributes object from a list of Attribute objects, dictionaries or pandas DataFrame.

        Args:
            attributes_source (list[Attribute] | list[dict[str, Any]] | pd.DataFrame): Attributes metadata source.

        Raises:
            TypeError: If the object provided is not a list of Attribute objects, dictionaries or pandas DataFrame.

        Returns:
            Attributes: Attributes object.

        Examples:

            Instatiating Attributes from a list of dictionaries:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = [
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ]
            >>> attributes = fusion.attributes().from_object(data)

            Instatiating Attributes from a pandas DataFrame:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> data = pd.DataFrame([
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ])
            >>> attributes = fusion.attributes().from_object(data)

        """
        if isinstance(attributes_source, list):
            if all(isinstance(attr, Attribute) for attr in attributes_source):
                attributes = Attributes(cast(list[Attribute], attributes_source))
            elif all(isinstance(attr, dict) for attr in attributes_source):
                attributes = Attributes._from_dict_list(cast(list[dict[str, Any]], attributes_source))
        elif isinstance(attributes_source, pd.DataFrame):
            attributes = Attributes._from_dataframe(attributes_source)
        else:
            raise ValueError(f"Could not resolve the object provided: {attributes_source}")
        attributes.client = self._client
        return attributes

    def to_dataframe(self) -> pd.DataFrame:
        """Convert the collection of Attribute instances to a pandas DataFrame.

        Returns:
            pd.DataFrame: DataFrame representing the collection of Attribute instances.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes_df = attributes.to_dataframe()

        """
        if len(self.attributes) == 0:
            self.attributes = [Attribute(identifier="example_attribute", index=0)]
        data = [attr.to_dict() for attr in self.attributes]
        return pd.DataFrame(data)

    def from_catalog(self, dataset: str, catalog: str | None = None, client: Fusion | None = None) -> Attributes:
        """Instatiate an Attributes object from a dataset's attributes in a Fusion catalog.

        Args:
            dataset (str): The dataset identifier.
            catalog (str | None, optional): The catalog identifier. Defaults to None.
            client (Fusion | None, optional): Fusion session. Defaults to None.
                If instantiated from a Fusion object, then the client is set automatically.

        Returns:
            Attributes: An instance of the Attributes class with the attributes from the catalog.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
        response = client.session.get(url)
        requests_raise_for_status(response)
        list_attributes = response.json()["resources"]
        list_attributes = sorted(list_attributes, key=lambda x: x["index"])

        self.attributes = [Attribute._from_dict(attr_data) for attr_data in list_attributes]
        return self

    def create(
        self,
        dataset: str | None = None,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload the Attributes to a dataset in a Fusion catalog. If no dataset is provided,
            attributes are registered to the catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            From scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

            From a list of dictionaries:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = [
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ]
            >>> attributes = fusion.attributes().from_dict_list(data)
            >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

            From a pandas DataFrame:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> data = pd.DataFrame([
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ])
            >>> attributes = fusion.attributes().from_dataframe(data)
            >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

            From existing dataset's attributes in a Fusion catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
            >>> attributes.create(dataset="my_new_dataset", catalog="my_catalog")

            Register attributes to a catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0, application_id="123", publisher="JPM")
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes.create(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        data = self.to_dict()
        if dataset:
            url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
            resp = client.session.put(url, json=data)
            requests_raise_for_status(resp)
            return resp if return_resp_obj else None
        else:
            for attr in self.attributes:
                if attr.publisher is None:
                    raise ValueError("The 'publisher' attribute is required for catalog attributes.")
                if attr.application_id is None:
                    raise ValueError("The 'application_id' attribute is required for catalog attributes.")
            url = f"{client.root_url}catalogs/{catalog}/attributes"
            data_ = data.get("attributes", None)
            resp = client.session.post(url, json=data_)
            requests_raise_for_status(resp)
            return resp if return_resp_obj else None

    def delete(
        self,
        dataset: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> list[requests.Response] | None:
        """Delete the Attributes from a Fusion catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            list[requests.Response] | None: List of response objects from the API calls if return_resp_obj is True,
                otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
            >>> attributes.delete(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        responses = []
        for attr in self.attributes:
            resp = client.session.delete(
                f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{attr.identifier}"
            )
            requests_raise_for_status(resp)
            responses.append(resp)

        return responses if return_resp_obj else None

`client: Fusion | None` `property` `writable` ¶

Return the client.

`repr()` ¶

Object representation of the Attributes collection.

Source code in py_src/fusion/attributes.py

def __repr__(self) -> str:
    """Object representation of the Attributes collection."""
    return self.__str__()

`str()` ¶

String representation of the Attributes collection.

Source code in py_src/fusion/attributes.py

def __str__(self) -> str:
    """String representation of the Attributes collection."""
    return (
        f"[\n" + ",\n ".join(f"{attr.__repr__()}" for attr in self.attributes) + "\n]" if self.attributes else "[]"
    )

`add_attribute(attribute)` ¶

Add an Attribute instance to the collection.

Parameters:

Name	Type	Description	Default
`attribute`	`Attribute`	Attribute instance to add.	required

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes()
>>> attributes.add_attribute(attribute)

Source code in py_src/fusion/attributes.py

def add_attribute(self, attribute: Attribute) -> None:
    """Add an Attribute instance to the collection.

    Args:
        attribute (Attribute): Attribute instance to add.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes()
        >>> attributes.add_attribute(attribute)

    """
    self.attributes.append(attribute)

`create(dataset=None, catalog=None, client=None, return_resp_obj=False)` ¶

Upload the Attributes to a dataset in a Fusion catalog. If no dataset is provided, attributes are registered to the catalog.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	Dataset identifier.	`None`
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`Response \| None`	requests.Response \| None: The response object from the API call if return_resp_obj is True, otherwise None.

From scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes.create(dataset="my_dataset", catalog="my_catalog")

From a list of dictionaries:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = [
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ]
>>> attributes = fusion.attributes().from_dict_list(data)
>>> attributes.create(dataset="my_dataset", catalog="my_catalog")

From a pandas DataFrame:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> data = pd.DataFrame([
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ])
>>> attributes = fusion.attributes().from_dataframe(data)
>>> attributes.create(dataset="my_dataset", catalog="my_catalog")

From existing dataset's attributes in a Fusion catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
>>> attributes.create(dataset="my_new_dataset", catalog="my_catalog")

Register attributes to a catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0, application_id="123", publisher="JPM")
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes.create(catalog="my_catalog")

Source code in py_src/fusion/attributes.py

def create(
    self,
    dataset: str | None = None,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload the Attributes to a dataset in a Fusion catalog. If no dataset is provided,
        attributes are registered to the catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        From scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

        From a list of dictionaries:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = [
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ]
        >>> attributes = fusion.attributes().from_dict_list(data)
        >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

        From a pandas DataFrame:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> data = pd.DataFrame([
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ])
        >>> attributes = fusion.attributes().from_dataframe(data)
        >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

        From existing dataset's attributes in a Fusion catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
        >>> attributes.create(dataset="my_new_dataset", catalog="my_catalog")

        Register attributes to a catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0, application_id="123", publisher="JPM")
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes.create(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    data = self.to_dict()
    if dataset:
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
        resp = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None
    else:
        for attr in self.attributes:
            if attr.publisher is None:
                raise ValueError("The 'publisher' attribute is required for catalog attributes.")
            if attr.application_id is None:
                raise ValueError("The 'application_id' attribute is required for catalog attributes.")
        url = f"{client.root_url}catalogs/{catalog}/attributes"
        data_ = data.get("attributes", None)
        resp = client.session.post(url, json=data_)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

`delete(dataset, catalog=None, client=None, return_resp_obj=False)` ¶

Delete the Attributes from a Fusion catalog.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	Dataset identifier.	required
`client`	`Fusion`	A Fusion client object. Defaults to the instance's _client.	`None`
`catalog`	`str`	A catalog identifier. Defaults to None.	`None`
`return_resp_obj`	`bool`	If True then return the response object. Defaults to False.	`False`

Returns:

Type	Description
`list[Response] \| None`	list[requests.Response] \| None: List of response objects from the API calls if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
>>> attributes.delete(dataset="my_dataset", catalog="my_catalog")

Source code in py_src/fusion/attributes.py

def delete(
    self,
    dataset: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> list[requests.Response] | None:
    """Delete the Attributes from a Fusion catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        list[requests.Response] | None: List of response objects from the API calls if return_resp_obj is True,
            otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
        >>> attributes.delete(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    responses = []
    for attr in self.attributes:
        resp = client.session.delete(
            f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{attr.identifier}"
        )
        requests_raise_for_status(resp)
        responses.append(resp)

    return responses if return_resp_obj else None

`from_catalog(dataset, catalog=None, client=None)` ¶

Instatiate an Attributes object from a dataset's attributes in a Fusion catalog.

Parameters:

Name	Type	Description	Default
`dataset`	`str`	The dataset identifier.	required
`catalog`	`str \| None`	The catalog identifier. Defaults to None.	`None`
`client`	`Fusion \| None`	Fusion session. Defaults to None. If instantiated from a Fusion object, then the client is set automatically.	`None`

Returns:

Name	Type	Description
`Attributes`	`Attributes`	An instance of the Attributes class with the attributes from the catalog.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")

Source code in py_src/fusion/attributes.py

def from_catalog(self, dataset: str, catalog: str | None = None, client: Fusion | None = None) -> Attributes:
    """Instatiate an Attributes object from a dataset's attributes in a Fusion catalog.

    Args:
        dataset (str): The dataset identifier.
        catalog (str | None, optional): The catalog identifier. Defaults to None.
        client (Fusion | None, optional): Fusion session. Defaults to None.
            If instantiated from a Fusion object, then the client is set automatically.

    Returns:
        Attributes: An instance of the Attributes class with the attributes from the catalog.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
    response = client.session.get(url)
    requests_raise_for_status(response)
    list_attributes = response.json()["resources"]
    list_attributes = sorted(list_attributes, key=lambda x: x["index"])

    self.attributes = [Attribute._from_dict(attr_data) for attr_data in list_attributes]
    return self

`from_object(attributes_source)` ¶

Instantiate an Attributes object from a list of Attribute objects, dictionaries or pandas DataFrame.

Parameters:

Name	Type	Description	Default
`attributes_source`	`list[Attribute] \| list[dict[str, Any]] \| DataFrame`	Attributes metadata source.	required

Raises:

Type	Description
`TypeError`	If the object provided is not a list of Attribute objects, dictionaries or pandas DataFrame.

Returns:

Name	Type	Description
`Attributes`	`Attributes`	Attributes object.

Instatiating Attributes from a list of dictionaries:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = [
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ]
>>> attributes = fusion.attributes().from_object(data)

Instatiating Attributes from a pandas DataFrame:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> data = pd.DataFrame([
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ])
>>> attributes = fusion.attributes().from_object(data)

Source code in py_src/fusion/attributes.py

def from_object(
    self,
    attributes_source: list[Attribute] | list[dict[str, Any]] | pd.DataFrame,
) -> Attributes:
    """Instantiate an Attributes object from a list of Attribute objects, dictionaries or pandas DataFrame.

    Args:
        attributes_source (list[Attribute] | list[dict[str, Any]] | pd.DataFrame): Attributes metadata source.

    Raises:
        TypeError: If the object provided is not a list of Attribute objects, dictionaries or pandas DataFrame.

    Returns:
        Attributes: Attributes object.

    Examples:

        Instatiating Attributes from a list of dictionaries:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = [
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ]
        >>> attributes = fusion.attributes().from_object(data)

        Instatiating Attributes from a pandas DataFrame:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> data = pd.DataFrame([
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ])
        >>> attributes = fusion.attributes().from_object(data)

    """
    if isinstance(attributes_source, list):
        if all(isinstance(attr, Attribute) for attr in attributes_source):
            attributes = Attributes(cast(list[Attribute], attributes_source))
        elif all(isinstance(attr, dict) for attr in attributes_source):
            attributes = Attributes._from_dict_list(cast(list[dict[str, Any]], attributes_source))
    elif isinstance(attributes_source, pd.DataFrame):
        attributes = Attributes._from_dataframe(attributes_source)
    else:
        raise ValueError(f"Could not resolve the object provided: {attributes_source}")
    attributes.client = self._client
    return attributes

`get_attribute(identifier)` ¶

Get an Attribute instance from the collection by identifier.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Identifier of the Attribute to retrieve.	required

Returns:

Type	Description
`Attribute \| None`	Attribute \| None: The Attribute instance if found, None otherwise.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes =fusion.attributes(attributes=[attribute])
>>> retrieved_attribute = attributes.get_attribute("my_attribute")

Source code in py_src/fusion/attributes.py

def get_attribute(self, identifier: str) -> Attribute | None:
    """Get an Attribute instance from the collection by identifier.

    Args:
        identifier (str): Identifier of the Attribute to retrieve.

    Returns:
        Attribute | None: The Attribute instance if found, None otherwise.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes =fusion.attributes(attributes=[attribute])
        >>> retrieved_attribute = attributes.get_attribute("my_attribute")

    """
    for attr in self.attributes:
        if attr.identifier == identifier:
            return attr
    return None

`remove_attribute(identifier)` ¶

Remove an Attribute instance from the collection by identifier.

Parameters:

Name	Type	Description	Default
`identifier`	`str`	Identifier of the Attribute to remove.	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the Attribute was removed, False otherwise.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes.remove_attribute("my_attribute")

Source code in py_src/fusion/attributes.py

def remove_attribute(self, identifier: str) -> bool:
    """Remove an Attribute instance from the collection by identifier.

    Args:
        identifier (str): Identifier of the Attribute to remove.

    Returns:
        bool: True if the Attribute was removed, False otherwise.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes.remove_attribute("my_attribute")

    """
    for attr in self.attributes:
        if attr.identifier == identifier:
            self.attributes.remove(attr)
            return True
    return False

`to_dataframe()` ¶

Convert the collection of Attribute instances to a pandas DataFrame.

Returns:

Type	Description
`DataFrame`	pd.DataFrame: DataFrame representing the collection of Attribute instances.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes_df = attributes.to_dataframe()

Source code in py_src/fusion/attributes.py

def to_dataframe(self) -> pd.DataFrame:
    """Convert the collection of Attribute instances to a pandas DataFrame.

    Returns:
        pd.DataFrame: DataFrame representing the collection of Attribute instances.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes_df = attributes.to_dataframe()

    """
    if len(self.attributes) == 0:
        self.attributes = [Attribute(identifier="example_attribute", index=0)]
    data = [attr.to_dict() for attr in self.attributes]
    return pd.DataFrame(data)

`to_dict()` ¶

Convert the collection of Attribute instances to a list of dictionaries.

Returns:

Type	Description
`dict[str, list[dict[str, Any]]]`	dict[str, list[dict[str, Any]]]: Collection of Attribute instances as a dictionary.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes_dict = attributes.to_dict()

Source code in py_src/fusion/attributes.py

def to_dict(self) -> dict[str, list[dict[str, Any]]]:
    """Convert the collection of Attribute instances to a list of dictionaries.

    Returns:
        dict[str, list[dict[str, Any]]]: Collection of Attribute instances as a dictionary.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes_dict = attributes.to_dict()

    """
    dict_out = {"attributes": [attr.to_dict() for attr in self.attributes]}
    return dict_out

Modules

Fusion ¶

default_catalog: str property writable ¶

__init__(credentials='config/client_credentials.json', root_url='https://fusion.jpmorgan.com/api/v1/', download_folder='downloads', log_level=logging.ERROR, fs=None, log_path='.') ¶

__repr__() ¶

attributes(attributes=None) ¶

catalog_resources(catalog=None, output=False) ¶

create_dataset_lineage(base_dataset, source_dataset_catalog_mapping, catalog=None, return_resp_obj=False) ¶

dataset_resources(dataset, catalog=None, output=False) ¶

datasetmember_resources(dataset, series, catalog=None, output=False) ¶

delete_all_datasetmembers(dataset, catalog=None, return_resp_obj=False) ¶

delete_datasetmembers(dataset, series_members, catalog=None, return_resp_obj=False) ¶

download(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, force_download=False, download_folder=None, return_paths=False, partitioning=None, preserve_original_name=False) ¶

from_bytes(data, dataset, series_member='latest', catalog=None, distribution='parquet', show_progress=True, return_paths=False, chunk_size=5 * 2 ** 20, from_date=None, to_date=None, file_name=None, **kwargs) ¶

get_events(last_event_id=None, catalog=None, in_background=True, url='https://fusion.jpmorgan.com/api/v1/') ¶

get_fusion_filesystem() ¶

list_catalogs(output=False) ¶

list_dataset_attributes(dataset, catalog=None, output=False, display_all_columns=False) ¶

list_dataset_lineage(dataset_id, catalog=None, output=False, max_results=-1) ¶

list_datasetmembers(dataset, catalog=None, output=False, max_results=-1) ¶

list_datasets(contains=None, id_contains=False, product=None, catalog=None, output=False, max_results=-1, display_all_columns=False, status=None, dataset_type=None) ¶

list_distributions(dataset, series, catalog=None, output=False) ¶

list_product_dataset_mapping(dataset=None, product=None, catalog=None) ¶

list_products(contains=None, id_contains=False, catalog=None, output=False, max_results=-1, display_all_columns=False) ¶

list_registered_attributes(catalog=None, output=False, display_all_columns=False) ¶

listen_to_events(last_event_id=None, catalog=None, url='https://fusion.jpmorgan.com/api/v1/') ¶

to_bytes(dataset, series_member, dataset_format='parquet', catalog=None) ¶

to_df(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, dataframe_type='pandas', **kwargs) ¶

to_table(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, **kwargs) ¶

upload(path, dataset=None, dt_str='latest', catalog=None, n_par=None, show_progress=True, return_paths=False, multipart=True, chunk_size=5 * 2 ** 20, from_date=None, to_date=None, preserve_original_name=False, additional_headers=None) ¶

Product dataclass ¶

client: Fusion | None property writable ¶

__post_init__() ¶

__repr__() ¶

copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False) ¶

create(catalog=None, client=None, return_resp_obj=False) ¶

delete(catalog=None, client=None, return_resp_obj=False) ¶

from_catalog(catalog=None, client=None) ¶

from_object(product_source) ¶

to_dict() ¶

update(catalog=None, client=None, return_resp_obj=False) ¶

Dataset dataclass ¶

client: Fusion | None property writable ¶

__post_init__() ¶

__repr__() ¶

activate(catalog=None, client=None, return_resp_obj=False) ¶

add_to_product(product, catalog=None, client=None, return_resp_obj=False) ¶

copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False) ¶

create(catalog=None, product=None, client=None, return_resp_obj=False) ¶

delete(catalog=None, client=None, return_resp_obj=False) ¶

from_catalog(catalog=None, client=None) ¶

from_object(dataset_source) ¶

remove_from_product(product, catalog=None, client=None, return_resp_obj=False) ¶

to_dict() ¶

update(catalog=None, client=None, return_resp_obj=False) ¶

Attribute dataclass ¶

client: Fusion | None property writable ¶

__post_init__() ¶

__repr__() ¶

__str__() ¶

create(dataset, catalog=None, client=None, return_resp_obj=False) ¶

delete(dataset, catalog=None, client=None, return_resp_obj=False) ¶

from_object(attribute_source) ¶

set_lineage(attributes, catalog=None, client=None, return_resp_obj=False) ¶

to_dict() ¶

Attributes dataclass ¶

client: Fusion | None property writable ¶

__repr__() ¶

__str__() ¶

add_attribute(attribute) ¶

create(dataset=None, catalog=None, client=None, return_resp_obj=False) ¶

delete(dataset, catalog=None, client=None, return_resp_obj=False) ¶

from_catalog(dataset, catalog=None, client=None) ¶

from_object(attributes_source) ¶

get_attribute(identifier) ¶

remove_attribute(identifier) ¶

to_dataframe() ¶

to_dict() ¶

`Fusion` ¶

`default_catalog: str` `property` `writable` ¶

`init(credentials='config/client_credentials.json', root_url='https://fusion.jpmorgan.com/api/v1/', download_folder='downloads', log_level=logging.ERROR, fs=None, log_path='.')` ¶

`repr()` ¶

`attributes(attributes=None)` ¶

`catalog_resources(catalog=None, output=False)` ¶

`create_dataset_lineage(base_dataset, source_dataset_catalog_mapping, catalog=None, return_resp_obj=False)` ¶

`dataset_resources(dataset, catalog=None, output=False)` ¶

`datasetmember_resources(dataset, series, catalog=None, output=False)` ¶

`delete_all_datasetmembers(dataset, catalog=None, return_resp_obj=False)` ¶

`delete_datasetmembers(dataset, series_members, catalog=None, return_resp_obj=False)` ¶

`download(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, force_download=False, download_folder=None, return_paths=False, partitioning=None, preserve_original_name=False)` ¶

`from_bytes(data, dataset, series_member='latest', catalog=None, distribution='parquet', show_progress=True, return_paths=False, chunk_size=5 * 2 20, from_date=None, to_date=None, file_name=None, kwargs)` ¶

`get_events(last_event_id=None, catalog=None, in_background=True, url='https://fusion.jpmorgan.com/api/v1/')` ¶

`get_fusion_filesystem()` ¶

`list_catalogs(output=False)` ¶

`list_dataset_attributes(dataset, catalog=None, output=False, display_all_columns=False)` ¶

`list_dataset_lineage(dataset_id, catalog=None, output=False, max_results=-1)` ¶

`list_datasetmembers(dataset, catalog=None, output=False, max_results=-1)` ¶

`list_datasets(contains=None, id_contains=False, product=None, catalog=None, output=False, max_results=-1, display_all_columns=False, status=None, dataset_type=None)` ¶

`list_distributions(dataset, series, catalog=None, output=False)` ¶

`list_product_dataset_mapping(dataset=None, product=None, catalog=None)` ¶

`list_products(contains=None, id_contains=False, catalog=None, output=False, max_results=-1, display_all_columns=False)` ¶

`list_registered_attributes(catalog=None, output=False, display_all_columns=False)` ¶

`listen_to_events(last_event_id=None, catalog=None, url='https://fusion.jpmorgan.com/api/v1/')` ¶

`to_bytes(dataset, series_member, dataset_format='parquet', catalog=None)` ¶

`to_df(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, dataframe_type='pandas', **kwargs)` ¶

`to_table(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, **kwargs)` ¶

`upload(path, dataset=None, dt_str='latest', catalog=None, n_par=None, show_progress=True, return_paths=False, multipart=True, chunk_size=5 * 2 ** 20, from_date=None, to_date=None, preserve_original_name=False, additional_headers=None)` ¶

`Product` `dataclass` ¶

`client: Fusion | None` `property` `writable` ¶

`__post_init__()` ¶

`repr()` ¶

`copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False)` ¶

`create(catalog=None, client=None, return_resp_obj=False)` ¶

`delete(catalog=None, client=None, return_resp_obj=False)` ¶

`from_catalog(catalog=None, client=None)` ¶

`from_object(product_source)` ¶

`to_dict()` ¶

`update(catalog=None, client=None, return_resp_obj=False)` ¶

`Dataset` `dataclass` ¶

`client: Fusion | None` `property` `writable` ¶

`__post_init__()` ¶

`repr()` ¶

`activate(catalog=None, client=None, return_resp_obj=False)` ¶

`add_to_product(product, catalog=None, client=None, return_resp_obj=False)` ¶

`copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False)` ¶

`create(catalog=None, product=None, client=None, return_resp_obj=False)` ¶

`delete(catalog=None, client=None, return_resp_obj=False)` ¶

`from_catalog(catalog=None, client=None)` ¶

`from_object(dataset_source)` ¶

`remove_from_product(product, catalog=None, client=None, return_resp_obj=False)` ¶

`to_dict()` ¶

`update(catalog=None, client=None, return_resp_obj=False)` ¶

`Attribute` `dataclass` ¶

`client: Fusion | None` `property` `writable` ¶

`__post_init__()` ¶

`repr()` ¶

`str()` ¶

`create(dataset, catalog=None, client=None, return_resp_obj=False)` ¶

`delete(dataset, catalog=None, client=None, return_resp_obj=False)` ¶

`from_object(attribute_source)` ¶

`set_lineage(attributes, catalog=None, client=None, return_resp_obj=False)` ¶

`to_dict()` ¶

`Attributes` `dataclass` ¶

`client: Fusion | None` `property` `writable` ¶

`repr()` ¶

`str()` ¶

`add_attribute(attribute)` ¶

`create(dataset=None, catalog=None, client=None, return_resp_obj=False)` ¶

`delete(dataset, catalog=None, client=None, return_resp_obj=False)` ¶

`from_catalog(dataset, catalog=None, client=None)` ¶

`from_object(attributes_source)` ¶

`get_attribute(identifier)` ¶

`remove_attribute(identifier)` ¶

`to_dataframe()` ¶

`to_dict()` ¶