Skip to content

Modules

Main Fusion module.

Fusion

Core Fusion class for API access.

Source code in py_src/fusion/fusion.py
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
class Fusion:
    """Core Fusion class for API access."""

    @staticmethod
    def _call_for_dataframe(url: str, session: requests.Session) -> pd.DataFrame:
        """Private function that calls an API endpoint and returns the data as a pandas dataframe.

        Args:
            url (Union[FusionCredentials, Union[str, dict]): URL for an API endpoint with valid parameters.
            session (requests.Session): Specify a proxy if required to access the authentication server. Defaults to {}.

        Returns:
            pandas.DataFrame: a dataframe containing the requested data.
        """
        response = session.get(url)
        response.raise_for_status()
        table = response.json()["resources"]
        ret_df = pd.DataFrame(table).reset_index(drop=True)
        return ret_df

    @staticmethod
    def _call_for_bytes_object(url: str, session: requests.Session) -> BytesIO:
        """Private function that calls an API endpoint and returns the data as a bytes object in memory.

        Args:
            url (Union[FusionCredentials, Union[str, dict]): URL for an API endpoint with valid parameters.
            session (requests.Session): Specify a proxy if required to access the authentication server. Defaults to {}.

        Returns:
            io.BytesIO: in memory file content
        """

        response = session.get(url)
        response.raise_for_status()

        return BytesIO(response.content)

    def __init__(
        self,
        credentials: str | FusionCredentials = "config/client_credentials.json",
        root_url: str = "https://fusion.jpmorgan.com/api/v1/",
        download_folder: str = "downloads",
        log_level: int = logging.ERROR,
        fs: fsspec.filesystem = None,
        log_path: str = ".",
    ) -> None:
        """Constructor to instantiate a new Fusion object.

        Args:
            credentials (Union[str, FusionCredentials]): A path to a credentials file or a fully populated
            FusionCredentials object. Defaults to 'config/client_credentials.json'.
            root_url (_type_, optional): The API root URL.
                Defaults to "https://fusion.jpmorgan.com/api/v1/".
            download_folder (str, optional): The folder path where downloaded data files
                are saved. Defaults to "downloads".
            log_level (int, optional): Set the logging level. Defaults to logging.ERROR.
            fs (fsspec.filesystem): filesystem.
            log_path (str, optional): The folder path where the log is stored.
        """
        self._default_catalog = "common"

        self.root_url = root_url
        self.download_folder = download_folder
        Path(download_folder).mkdir(parents=True, exist_ok=True)

        if logger.hasHandlers():
            logger.handlers.clear()
        file_handler = logging.FileHandler(filename=f"{log_path}/fusion_sdk.log")
        logging.addLevelName(VERBOSE_LVL, "VERBOSE")
        stdout_handler = logging.StreamHandler(sys.stdout)
        formatter = logging.Formatter(
            "%(asctime)s.%(msecs)03d %(name)s:%(levelname)s %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S",
        )
        stdout_handler.setFormatter(formatter)
        logger.addHandler(stdout_handler)
        logger.addHandler(file_handler)
        logger.setLevel(log_level)

        if isinstance(credentials, FusionCredentials):
            self.credentials = credentials
        elif isinstance(credentials, str):
            self.credentials = FusionCredentials.from_file(Path(credentials))
        else:
            raise ValueError("credentials must be a path to a credentials file or FusionCredentials object")

        self.session = get_session(self.credentials, self.root_url)
        self.fs = fs if fs else get_default_fs()
        self.events: pd.DataFrame | None = None

    def __repr__(self) -> str:
        """Object representation to list all available methods."""
        return "Fusion object \nAvailable methods:\n" + tabulate(
            pd.DataFrame(  # type: ignore
                [
                    [
                        method_name
                        for method_name in dir(Fusion)
                        if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                    ]
                    + [p for p in dir(Fusion) if isinstance(getattr(Fusion, p), property)],
                    [
                        getattr(Fusion, method_name).__doc__.split("\n")[0]
                        for method_name in dir(Fusion)
                        if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                    ]
                    + [
                        getattr(Fusion, p).__doc__.split("\n")[0]
                        for p in dir(Fusion)
                        if isinstance(getattr(Fusion, p), property)
                    ],
                ]
            ).T.set_index(0),
            tablefmt="psql",
        )

    @property
    def default_catalog(self) -> str:
        """Returns the default catalog.

        Returns:
            None
        """
        return self._default_catalog

    @default_catalog.setter
    def default_catalog(self, catalog: str) -> None:
        """Allow the default catalog, which is "common" to be overridden.

        Args:
            catalog (str): The catalog to use as the default

        Returns:
            None
        """
        self._default_catalog = catalog

    def _use_catalog(self, catalog: str | None) -> str:
        """Determine which catalog to use in an API call.

        Args:
            catalog (str): The catalog value passed as an argument to an API function wrapper.

        Returns:
            str: The catalog to use
        """
        if catalog is None:
            return self.default_catalog

        return catalog

    def get_fusion_filesystem(self) -> FusionHTTPFileSystem:
        """Creates Fusion Filesystem.

        Returns: Fusion Filesystem

        """
        return FusionHTTPFileSystem(client_kwargs={"root_url": self.root_url, "credentials": self.credentials})

    def list_catalogs(self, output: bool = False) -> pd.DataFrame:
        """Lists the catalogs available to the API account.

        Args:
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each catalog
        """
        url = f"{self.root_url}catalogs/"
        cat_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return cat_df

    def catalog_resources(self, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
        """List the resources contained within the catalog, for example products and datasets.

        Args:
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
           class:`pandas.DataFrame`: A dataframe with a row for each resource within the catalog
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}"
        cat_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return cat_df

    def list_products(
        self,
        contains: str | list[str] | None = None,
        id_contains: bool = False,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
        display_all_columns: bool = False,
    ) -> pd.DataFrame:
        """Get the products contained in a catalog. A product is a grouping of datasets.

        Args:
            contains (Union[str, list], optional): A string or a list of strings that are product
                identifiers to filter the products list. If a list is provided then it will return
                products whose identifier matches any of the strings. Defaults to None.
            id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
                ignoring description.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed

        Returns:
            class:`pandas.DataFrame`: a dataframe with a row for each product
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/products"
        full_prod_df: pd.DataFrame = Fusion._call_for_dataframe(url, self.session)

        if contains:
            if isinstance(contains, list):
                contains = "|".join(f"{s}" for s in contains)
            if id_contains:
                filtered_df = full_prod_df[full_prod_df["identifier"].str.contains(contains, case=False)]
            else:
                filtered_df = full_prod_df[
                    full_prod_df["identifier"].str.contains(contains, case=False)
                    | full_prod_df["description"].str.contains(contains, case=False)
                ]
        else:
            filtered_df = full_prod_df

        filtered_df["category"] = filtered_df.category.str.join(", ")
        filtered_df["region"] = filtered_df.region.str.join(", ")
        if not display_all_columns:
            filtered_df = filtered_df[
                filtered_df.columns.intersection(
                    [
                        "identifier",
                        "title",
                        "region",
                        "category",
                        "status",
                        "description",
                    ]
                )
            ]

        if max_results > -1:
            filtered_df = filtered_df[0:max_results]

        if output:
            pass

        return filtered_df

    def list_datasets(  # noqa: PLR0913
        self,
        contains: str | list[str] | None = None,
        id_contains: bool = False,
        product: str | list[str] | None = None,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
        display_all_columns: bool = False,
        status: str | None = None,
        dataset_type: str | None = None,
    ) -> pd.DataFrame:
        """Get the datasets contained in a catalog.

        Args:
            contains (Union[str, list], optional): A string or a list of strings that are dataset
                identifiers to filter the datasets list. If a list is provided then it will return
                datasets whose identifier matches any of the strings. Defaults to None.
            id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
                ignoring description.
            product (Union[str, list], optional): A string or a list of strings that are product
                identifiers to filter the datasets list. Defaults to None.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed
            status (str, optional): filter the datasets by status, default is to show all results.
            dataset_type (str, optional): filter the datasets by type, default is to show all results.

        Returns:
            class:`pandas.DataFrame`: a dataframe with a row for each dataset.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets"
        ds_df = Fusion._call_for_dataframe(url, self.session)

        if contains:
            if isinstance(contains, list):
                contains = "|".join(f"{s}" for s in contains)
            if id_contains:
                ds_df = ds_df[ds_df["identifier"].str.contains(contains, case=False)]
            else:
                ds_df = ds_df[
                    ds_df["identifier"].str.contains(contains, case=False)
                    | ds_df["description"].str.contains(contains, case=False)
                ]

        if product:
            url = f"{self.root_url}catalogs/{catalog}/productDatasets"
            prd_df = Fusion._call_for_dataframe(url, self.session)
            prd_df = (
                prd_df[prd_df["product"] == product]
                if isinstance(product, str)
                else prd_df[prd_df["product"].isin(product)]
            )
            ds_df = ds_df[ds_df["identifier"].str.lower().isin(prd_df["dataset"].str.lower())].reset_index(drop=True)

        if max_results > -1:
            ds_df = ds_df[0:max_results]

        ds_df["category"] = ds_df.category.str.join(", ")
        ds_df["region"] = ds_df.region.str.join(", ")
        if not display_all_columns:
            cols = [
                "identifier",
                "title",
                "containerType",
                "region",
                "category",
                "coverageStartDate",
                "coverageEndDate",
                "description",
                "status",
                "type",
            ]
            cols = [c for c in cols if c in ds_df.columns]
            ds_df = ds_df[cols]

        if status is not None:
            ds_df = ds_df[ds_df["status"] == status]

        if dataset_type is not None:
            ds_df = ds_df[ds_df["type"] == dataset_type]

        if output:
            pass

        return ds_df

    def dataset_resources(self, dataset: str, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
        """List the resources available for a dataset, currently this will always be a datasetseries.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each resource
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}"
        ds_res_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return ds_res_df

    def list_dataset_attributes(
        self,
        dataset: str,
        catalog: str | None = None,
        output: bool = False,
        display_all_columns: bool = False,
    ) -> pd.DataFrame:
        """Returns the list of attributes that are in the dataset.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each attribute
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
        ds_attr_df = Fusion._call_for_dataframe(url, self.session)

        if "index" in ds_attr_df.columns: 
            ds_attr_df = ds_attr_df.sort_values(by="index").reset_index(drop=True)

        if not display_all_columns:
            ds_attr_df = ds_attr_df[
                ds_attr_df.columns.intersection(
                    [
                        "identifier",
                        "title",
                        "dataType",
                        "isDatasetKey",
                        "description",
                        "source",
                    ]
                )
            ]

        if output:
            pass

        return ds_attr_df

    def list_datasetmembers(
        self,
        dataset: str,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
    ) -> pd.DataFrame:
        """List the available members in the dataset series.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.

        Returns:
            class:`pandas.DataFrame`: a dataframe with a row for each dataset member.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
        ds_members_df = Fusion._call_for_dataframe(url, self.session)

        if max_results > -1:
            ds_members_df = ds_members_df[0:max_results]

        if output:
            pass

        return ds_members_df

    def datasetmember_resources(
        self,
        dataset: str,
        series: str,
        catalog: str | None = None,
        output: bool = False,
    ) -> pd.DataFrame:
        """List the available resources for a datasetseries member.

        Args:
            dataset (str): A dataset identifier
            series (str): The datasetseries identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each datasetseries member resource.
                Currently, this will always be distributions.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}"
        ds_mem_res_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return ds_mem_res_df

    def list_distributions(
        self,
        dataset: str,
        series: str,
        catalog: str | None = None,
        output: bool = False,
    ) -> pd.DataFrame:
        """List the available distributions (downloadable instances of the dataset with a format type).

        Args:
            dataset (str): A dataset identifier
            series (str): The datasetseries identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each distribution.
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}/distributions"
        distros_df = Fusion._call_for_dataframe(url, self.session)

        if output:
            pass

        return distros_df

    def _resolve_distro_tuples(
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
    ) -> list[tuple[str, str, str, str]]:
        """Resolve distribution tuples given specification params.

        A private utility function to generate a list of distribution tuples.
        Each tuple is a distribution, identified by catalog, dataset id,
        datasetseries member id, and the file format.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.

        Returns:
            list: a list of tuples, one for each distribution
        """
        catalog = self._use_catalog(catalog)

        datasetseries_list = self.list_datasetmembers(dataset, catalog)
        if len(datasetseries_list) == 0:
            raise AssertionError(f"There are no dataset members for dataset {dataset} in catalog {catalog}")

        if datasetseries_list.empty:
            raise APIResponseError(  # pragma: no cover
                f"No data available for dataset {dataset}. "
                f"Check that a valid dataset identifier and date/date range has been set."
            )

        if dt_str == "latest":
            dt_str = (
                datasetseries_list[
                    datasetseries_list["createdDate"] == datasetseries_list["createdDate"].to_numpy().max()
                ]
                .sort_values(by="identifier")
                .iloc[-1]["identifier"]
            )
            datasetseries_list = datasetseries_list[datasetseries_list["identifier"] == dt_str]
        else:
            parsed_dates = normalise_dt_param_str(dt_str)
            if len(parsed_dates) == 1:
                parsed_dates = (parsed_dates[0], parsed_dates[0])

            if parsed_dates[0]:
                datasetseries_list = datasetseries_list[
                    pd.Series([pd.to_datetime(i, errors="coerce") for i in datasetseries_list["identifier"]])
                    >= pd.to_datetime(parsed_dates[0])
                ].reset_index()

            if parsed_dates[1]:
                datasetseries_list = datasetseries_list[
                    pd.Series([pd.to_datetime(i, errors="coerce") for i in datasetseries_list["identifier"]])
                    <= pd.to_datetime(parsed_dates[1])
                ].reset_index()

        if len(datasetseries_list) == 0:
            raise APIResponseError(  # pragma: no cover
                f"No data available for dataset {dataset} in catalog {catalog}.\n"
                f"Check that a valid dataset identifier and date/date range has been set."
            )

        required_series = list(datasetseries_list["@id"])
        tups = [(catalog, dataset, series, dataset_format) for series in required_series]

        return tups

    def download(  # noqa: PLR0912, PLR0913
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        force_download: bool = False,
        download_folder: str | None = None,
        return_paths: bool = False,
        partitioning: str | None = None,
        preserve_original_name: bool = False,
    ) -> list[tuple[bool, str, str | None]] | None:
        """Downloads the requested distributions of a dataset to disk.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset. If more than one series member exists on the latest date, the
                series member identifiers will be sorted alphabetically and the last one will be downloaded.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            force_download (bool, optional): If True then will always download a file even
                if it is already on disk. Defaults to True.
            download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
                Defaults to download_folder as set in __init__
            return_paths (bool, optional): Return paths and success statuses of the downloaded files.
            partitioning (str, optional): Partitioning specification.
            preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

        Returns:

        """
        catalog = self._use_catalog(catalog)

        valid_date_range = re.compile(r"^(\d{4}\d{2}\d{2})$|^((\d{4}\d{2}\d{2})?([:])(\d{4}\d{2}\d{2})?)$")

        if valid_date_range.match(dt_str) or dt_str == "latest":
            required_series = self._resolve_distro_tuples(dataset, dt_str, dataset_format, catalog)
        else:
            # sample data is limited to csv
            if dt_str == "sample":
                dataset_format = self.list_distributions(dataset, dt_str, catalog)["identifier"].iloc[0]
            required_series = [(catalog, dataset, dt_str, dataset_format)]

        if dataset_format not in RECOGNIZED_FORMATS + ["raw"]:
            raise ValueError(f"Dataset format {dataset_format} is not supported")

        if not download_folder:
            download_folder = self.download_folder

        download_folders = [download_folder] * len(required_series)

        if partitioning == "hive":
            members = [series[2].strip("/") for series in required_series]
            download_folders = [
                f"{download_folders[i]}/{series[0]}/{series[1]}/{members[i]}"
                for i, series in enumerate(required_series)
            ]

        for d in download_folders:
            if not self.fs.exists(d):
                self.fs.mkdir(d, create_parents=True)

        n_par = cpu_count(n_par)
        download_spec = [
            {
                "lfs": self.fs,
                "rpath": distribution_to_url(
                    self.root_url,
                    series[1],
                    series[2],
                    series[3],
                    series[0],
                    is_download=True,
                ),
                "lpath": distribution_to_filename(
                    download_folders[i],
                    series[1],
                    series[2],
                    series[3],
                    series[0],
                    partitioning=partitioning,
                ),
                "overwrite": force_download,
                "preserve_original_name": preserve_original_name,
            }
            for i, series in enumerate(required_series)
        ]

        logger.log(
            VERBOSE_LVL,
            f"Beginning {len(download_spec)} downloads in batches of {n_par}",
        )
        if show_progress:
            with joblib_progress("Downloading", total=len(download_spec)):
                res = Parallel(n_jobs=n_par)(
                    delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
                )
        else:
            res = Parallel(n_jobs=n_par)(
                delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
            )

        if (len(res) > 0) and (not all(r[0] for r in res)):
            for r in res:
                if not r[0]:
                    warnings.warn(f"The download of {r[1]} was not successful", stacklevel=2)
        return res if return_paths else None

    def to_df(  # noqa: PLR0913
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        columns: list[str] | None = None,
        filters: PyArrowFilterT | None = None,
        force_download: bool = False,
        download_folder: str | None = None,
        dataframe_type: str = "pandas",
        **kwargs: Any,
    ) -> pd.DataFrame:
        """Gets distributions for a specified date or date range and returns the data as a dataframe.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            columns (List, optional): A list of columns to return from a parquet file. Defaults to None
            filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
                Rows which do not match the filter predicate will be removed from scanned data.
                Partition keys embedded in a nested directory structure will be exploited to avoid
                loading files at all if they contain no matching rows. If use_legacy_dataset is True,
                filters can only reference partition keys and only a hive-style directory structure
                is supported. When setting use_legacy_dataset to False, also within-file level filtering
                and different partitioning schemes are supported.
                More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
            force_download (bool, optional): If True then will always download a file even
                if it is already on disk. Defaults to False.
            download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
                Defaults to download_folder as set in __init__
            dataframe_type (str, optional): Type
        Returns:
            class:`pandas.DataFrame`: a dataframe containing the requested data.
                If multiple dataset instances are retrieved then these are concatenated first.
        """
        catalog = self._use_catalog(catalog)

        # sample data is limited to csv
        if dt_str == "sample":
            dataset_format = "csv"

        if not download_folder:
            download_folder = self.download_folder
        download_res = self.download(
            dataset,
            dt_str,
            dataset_format,
            catalog,
            n_par,
            show_progress,
            force_download,
            download_folder,
            return_paths=True,
        )

        if not download_res:
            raise ValueError("Must specify 'return_paths=True' in download call to use this function")

        if not all(res[0] for res in download_res):
            failed_res = [res for res in download_res if not res[0]]
            raise Exception(
                f"Not all downloads were successfully completed. "
                f"Re-run to collect missing files. The following failed:\n{failed_res}"
            )

        files = [res[1] for res in download_res]

        pd_read_fn_map = {
            "csv": read_csv,
            "parquet": read_parquet,
            "parq": read_parquet,
            "json": read_json,
            "raw": read_csv,
        }

        pd_read_default_kwargs: dict[str, dict[str, object]] = {
            "csv": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
            "parquet": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
            "json": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
            "raw": {
                "columns": columns,
                "filters": filters,
                "fs": self.fs,
                "dataframe_type": dataframe_type,
            },
        }

        pd_read_default_kwargs["parq"] = pd_read_default_kwargs["parquet"]

        pd_reader = pd_read_fn_map.get(dataset_format)
        pd_read_kwargs = pd_read_default_kwargs.get(dataset_format, {})
        if not pd_reader:
            raise Exception(f"No pandas function to read file in format {dataset_format}")

        pd_read_kwargs.update(kwargs)

        if len(files) == 0:
            raise APIResponseError(
                f"No series members for dataset: {dataset} "
                f"in date or date range: {dt_str} and format: {dataset_format}"
            )
        if dataset_format in ["parquet", "parq"]:
            data_df = pd_reader(files, **pd_read_kwargs)  # type: ignore
        elif dataset_format == "raw":
            dataframes = (
                pd.concat(
                    [pd_reader(ZipFile(f).open(p), **pd_read_kwargs) for p in ZipFile(f).namelist()],  # type: ignore
                    ignore_index=True,
                )
                for f in files
            )
            data_df = pd.concat(dataframes, ignore_index=True)
        else:
            dataframes = (pd_reader(f, **pd_read_kwargs) for f in files)  # type: ignore
            if dataframe_type == "pandas":
                data_df = pd.concat(dataframes, ignore_index=True)
            if dataframe_type == "polars":
                import polars as pl

                data_df = pl.concat(dataframes, how="diagonal")  # type: ignore

        return data_df

    def to_bytes(
        self,
        dataset: str,
        series_member: str,
        dataset_format: str = "parquet",
        catalog: str | None = None,
    ) -> BytesIO:
        """Returns an instance of dataset (the distribution) as a bytes object.

        Args:
            dataset (str): A dataset identifier
            series_member (str,): A dataset series member identifier
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
        """

        catalog = self._use_catalog(catalog)

        url = distribution_to_url(
            self.root_url,
            dataset,
            series_member,
            dataset_format,
            catalog,
        )

        return Fusion._call_for_bytes_object(url, self.session)

    def to_table(  # noqa: PLR0913
        self,
        dataset: str,
        dt_str: str = "latest",
        dataset_format: str = "parquet",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        columns: list[str] | None = None,
        filters: PyArrowFilterT | None = None,
        force_download: bool = False,
        download_folder: str | None = None,
        **kwargs: Any,
    ) -> pa.Table:
        """Gets distributions for a specified date or date range and returns the data as an arrow table.

        Args:
            dataset (str): A dataset identifier
            dt_str (str, optional): Either a single date or a range identified by a start or end date,
                or both separated with a ":". Defaults to 'latest' which will return the most recent
                instance of the dataset.
            dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            columns (List, optional): A list of columns to return from a parquet file. Defaults to None
            filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
                Rows which do not match the filter predicate will be removed from scanned data.
                Partition keys embedded in a nested directory structure will be exploited to avoid
                loading files at all if they contain no matching rows. If use_legacy_dataset is True,
                filters can only reference partition keys and only a hive-style directory structure
                is supported. When setting use_legacy_dataset to False, also within-file level filtering
                and different partitioning schemes are supported.
                More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
            force_download (bool, optional): If True then will always download a file even
                if it is already on disk. Defaults to False.
            download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
                Defaults to download_folder as set in __init__
        Returns:
            class:`pyarrow.Table`: a dataframe containing the requested data.
                If multiple dataset instances are retrieved then these are concatenated first.
        """
        catalog = self._use_catalog(catalog)
        n_par = cpu_count(n_par)
        if not download_folder:
            download_folder = self.download_folder
        download_res = self.download(
            dataset,
            dt_str,
            dataset_format,
            catalog,
            n_par,
            show_progress,
            force_download,
            download_folder,
            return_paths=True,
        )

        if not download_res:
            raise ValueError("Must specify 'return_paths=True' in download call to use this function")

        if not all(res[0] for res in download_res):
            failed_res = [res for res in download_res if not res[0]]
            raise RuntimeError(
                f"Not all downloads were successfully completed. "
                f"Re-run to collect missing files. The following failed:\n{failed_res}"
            )

        files = [res[1] for res in download_res]

        read_fn_map = {
            "csv": csv_to_table,
            "parquet": parquet_to_table,
            "parq": parquet_to_table,
            "json": json_to_table,
            "raw": csv_to_table,
        }

        read_default_kwargs: dict[str, dict[str, object]] = {
            "csv": {"columns": columns, "filters": filters, "fs": self.fs},
            "parquet": {"columns": columns, "filters": filters, "fs": self.fs},
            "json": {"columns": columns, "filters": filters, "fs": self.fs},
            "raw": {"columns": columns, "filters": filters, "fs": self.fs},
        }

        read_default_kwargs["parq"] = read_default_kwargs["parquet"]

        reader = read_fn_map.get(dataset_format)
        read_kwargs = read_default_kwargs.get(dataset_format, {})
        if not reader:
            raise AssertionError(f"No function to read file in format {dataset_format}")

        read_kwargs.update(kwargs)

        if len(files) == 0:
            raise APIResponseError(
                f"No series members for dataset: {dataset} "
                f"in date or date range: {dt_str} and format: {dataset_format}"
            )
        if dataset_format in ["parquet", "parq"]:
            tbl = reader(files, **read_kwargs)  # type: ignore
        else:
            tbl = (reader(f, **read_kwargs) for f in files)  # type: ignore
            tbl = pa.concat_tables(tbl)

        return tbl

    def upload(  # noqa: PLR0913
        self,
        path: str,
        dataset: str | None = None,
        dt_str: str = "latest",
        catalog: str | None = None,
        n_par: int | None = None,
        show_progress: bool = True,
        return_paths: bool = False,
        multipart: bool = True,
        chunk_size: int = 5 * 2**20,
        from_date: str | None = None,
        to_date: str | None = None,
        preserve_original_name: bool | None = False,
        additional_headers: dict[str, str] | None = None,
    ) -> list[tuple[bool, str, str | None]] | None:
        """Uploads the requested files/files to Fusion.

        Args:
            path (str): path to a file or a folder with files
            dataset (str, optional): Dataset identifier to which the file will be uploaded (for single file only).
                                    If not provided the dataset will be implied from file's name.
            dt_str (str, optional): A file name. Can be any string but is usually a date.
                                    Defaults to 'latest' which will return the most recent.
                                    Relevant for a single file upload only. If not provided the dataset will
                                    be implied from file's name.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            n_par (int, optional): Specify how many distributions to download in parallel.
                Defaults to all cpus available.
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            return_paths (bool, optional): Return paths and success statuses of the downloaded files.
            multipart (bool, optional): Is multipart upload.
            chunk_size (int, optional): Maximum chunk size.
            from_date (str, optional): start of the data date range contained in the distribution,
                defaults to upoad date
            to_date (str, optional): end of the data date range contained in the distribution,
                defaults to upload date.
            preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

        Returns:


        """
        catalog = self._use_catalog(catalog)

        if not self.fs.exists(path):
            raise RuntimeError("The provided path does not exist")

        fs_fusion = self.get_fusion_filesystem()
        if self.fs.info(path)["type"] == "directory":
            file_path_lst = self.fs.find(path)
            local_file_validation = validate_file_names(file_path_lst, fs_fusion)
            file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
            file_name = [f.split("/")[-1] for f in file_path_lst]
            is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
            local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
        else:
            file_path_lst = [path]
            if not catalog or not dataset:
                local_file_validation = validate_file_names(file_path_lst, fs_fusion)
                file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
                is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
                local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
                if preserve_original_name:
                    raise ValueError("preserve_original_name can only be used when catalog and dataset are provided.")
            else:
                date_identifier = re.compile(r"^(\d{4})(\d{2})(\d{2})$")
                if date_identifier.match(dt_str):
                    dt_str = dt_str if dt_str != "latest" else pd.Timestamp("today").date().strftime("%Y%m%d")
                    dt_str = pd.Timestamp(dt_str).date().strftime("%Y%m%d")

                if catalog not in fs_fusion.ls("") or dataset not in [
                    i.split("/")[-1] for i in fs_fusion.ls(f"{catalog}/datasets")
                ]:
                    msg = (
                        f"File file has not been uploaded, one of the catalog: {catalog} "
                        f"or dataset: {dataset} does not exit."
                    )
                    warnings.warn(msg, stacklevel=2)
                    return [(False, path, msg)]
                file_format = path.split(".")[-1]
                file_name = [path.split("/")[-1]]
                file_format = "raw" if file_format not in RECOGNIZED_FORMATS else file_format

                local_url_eqiv = [
                    "/".join(distribution_to_url("", dataset, dt_str, file_format, catalog, False).split("/")[1:])
                ]

        if not preserve_original_name:
            data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv]).T
            data_map_df.columns = pd.Index(["path", "url"])
        else:
            data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv, file_name]).T
            data_map_df.columns = pd.Index(["path", "url", "file_name"])

        n_par = cpu_count(n_par)
        parallel = len(data_map_df) > 1
        res = upload_files(
            fs_fusion,
            self.fs,
            data_map_df,
            parallel=parallel,
            n_par=n_par,
            multipart=multipart,
            chunk_size=chunk_size,
            show_progress=show_progress,
            from_date=from_date,
            to_date=to_date,
            additional_headers=additional_headers,
        )

        if not all(r[0] for r in res):
            failed_res = [r for r in res if not r[0]]
            msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
            logger.warning(msg)
            warnings.warn(msg, stacklevel=2)

        return res if return_paths else None

    def from_bytes(  # noqa: PLR0913
        self,
        data: BytesIO,
        dataset: str,
        series_member: str = "latest",
        catalog: str | None = None,
        distribution: str = "parquet",
        show_progress: bool = True,
        return_paths: bool = False,
        chunk_size: int = 5 * 2**20,
        from_date: str | None = None,
        to_date: str | None = None,
        file_name: str | None = None,
        **kwargs: Any,  # noqa: ARG002
    ) -> list[tuple[bool, str, str | None]] | None:
        """Uploads data from an object in memory.

        Args:
            data (str): an object in memory to upload
            dataset (str): Dataset name to which the bytes will be uploaded.
            series_member (str, optional): A single date or label. Defaults to 'latest' which will return
                the most recent.
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            distribution (str, optional): A distribution type, e.g. a file format or raw
            show_progress (bool, optional): Display a progress bar during data download Defaults to True.
            return_paths (bool, optional): Return paths and success statuses of the downloaded files.
            chunk_size (int, optional): Maximum chunk size.
            from_date (str, optional): start of the data date range contained in the distribution,
                defaults to upload date
            to_date (str, optional): end of the data date range contained in the distribution, defaults to upload date.
            file_name (str, optional): file name to be used for the uploaded file. Defaults to Fusion standard naming.

        Returns:
            Optional[list[tuple[bool, str, Optional[str]]]: a list of tuples, one for each distribution

        """
        catalog = self._use_catalog(catalog)

        fs_fusion = self.get_fusion_filesystem()
        if distribution not in RECOGNIZED_FORMATS + ["raw"]:
            raise ValueError(f"Dataset format {distribution} is not supported")

        is_raw = js.loads(fs_fusion.cat(f"{catalog}/datasets/{dataset}"))["isRawData"]
        local_url_eqiv = path_to_url(f"{dataset}__{catalog}__{series_member}.{distribution}", is_raw)

        data_map_df = pd.DataFrame(["", local_url_eqiv, file_name]).T
        data_map_df.columns = ["path", "url", "file_name"]  # type: ignore

        res = upload_files(
            fs_fusion,
            data,
            data_map_df,
            parallel=False,
            n_par=1,
            multipart=False,
            chunk_size=chunk_size,
            show_progress=show_progress,
            from_date=from_date,
            to_date=to_date,
        )

        if not all(r[0] for r in res):
            failed_res = [r for r in res if not r[0]]
            msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
            logger.warning(msg)
            warnings.warn(msg, stacklevel=2)

        return res if return_paths else None

    def listen_to_events(
        self,
        last_event_id: str | None = None,
        catalog: str | None = None,
        url: str = "https://fusion.jpmorgan.com/api/v1/",
    ) -> None | pd.DataFrame:
        """Run server sent event listener in the background. Retrieve results by running get_events.

        Args:
            last_event_id (str): Last event ID (exclusive).
            catalog (str): catalog.
            url (str): subscription url.
        Returns:
            Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
                If in_background is set to False then pandas DataFrame is output upon keyboard termination.
        """

        catalog = self._use_catalog(catalog)
        import asyncio
        import json
        import threading

        from aiohttp_sse_client import client as sse_client

        from .utils import get_client

        kwargs: dict[str, Any] = {}
        if last_event_id:
            kwargs = {"headers": {"Last-Event-ID": last_event_id}}

        async def async_events() -> None:
            """Events sync function.

            Returns:
                None
            """
            timeout = 1e100
            session = await get_client(self.credentials, timeout=timeout)
            async with sse_client.EventSource(
                f"{url}catalogs/{catalog}/notifications/subscribe",
                session=session,
                **kwargs,
            ) as messages:
                lst = []
                try:
                    async for msg in messages:
                        event = json.loads(msg.data)
                        lst.append(event)
                        if self.events is None:
                            self.events = pd.DataFrame()
                        else:
                            self.events = pd.concat([self.events, pd.DataFrame(lst)], ignore_index=True)
                except TimeoutError as ex:
                    raise ex from None
                except BaseException:
                    raise

        _ = self.list_catalogs()  # refresh token
        if "headers" in kwargs:
            kwargs["headers"].update({"authorization": f"bearer {self.credentials.bearer_token}"})
        else:
            kwargs["headers"] = {
                "authorization": f"bearer {self.credentials.bearer_token}",
            }
        if "http" in self.credentials.proxies:
            kwargs["proxy"] = self.credentials.proxies["http"]
        elif "https" in self.credentials.proxies:
            kwargs["proxy"] = self.credentials.proxies["https"]
        th = threading.Thread(target=asyncio.run, args=(async_events(),), daemon=True)
        th.start()
        return None

    def get_events(
        self,
        last_event_id: str | None = None,
        catalog: str | None = None,
        in_background: bool = True,
        url: str = "https://fusion.jpmorgan.com/api/v1/",
    ) -> None | pd.DataFrame:
        """Run server sent event listener and print out the new events. Keyboard terminate to stop.

        Args:
            last_event_id (str): id of the last event.
            catalog (str): catalog.
            in_background (bool): execute event monitoring in the background (default = True).
            url (str): subscription url.
        Returns:
            Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
                If in_background is set to False then pandas DataFrame is output upon keyboard termination.
        """

        catalog = self._use_catalog(catalog)
        if not in_background:
            from sseclient import SSEClient

            _ = self.list_catalogs()  # refresh token
            interrupted = False
            messages = SSEClient(
                session=self.session,
                url=f"{url}catalogs/{catalog}/notifications/subscribe",
                last_id=last_event_id,
                headers={
                    "authorization": f"bearer {self.credentials.bearer_token}",
                },
            )
            lst = []
            try:
                for msg in messages:
                    event = js.loads(msg.data)
                    if event["type"] != "HeartBeatNotification":
                        lst.append(event)
            except KeyboardInterrupt:
                interrupted = True
            except Exception as e:
                raise e
            finally:
                result = pd.DataFrame(lst) if interrupted or lst else None
            return result
        else:
            return self.events

    def list_dataset_lineage(
        self,
        dataset_id: str,
        catalog: str | None = None,
        output: bool = False,
        max_results: int = -1,
    ) -> pd.DataFrame:
        """List the upstream and downstream lineage of the dataset.

        Args:
            dataset (str): A dataset identifier
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            max_results (int, optional): Limit the number of rows returned in the dataframe.
                Defaults to -1 which returns all results.

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each resource

        Raises:
            HTTPError: If the dataset is not found in the catalog.

        """
        catalog = self._use_catalog(catalog)

        url_dataset = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}"
        resp_dataset = self.session.get(url_dataset)
        resp_dataset.raise_for_status()

        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}/lineage"
        resp = self.session.get(url)
        data = resp.json()
        relations_data = data["relations"]

        restricted_datasets = [
            dataset_metadata["identifier"]
            for dataset_metadata in data["datasets"]
            if dataset_metadata.get("status", None) == "Restricted"
        ]

        data_dict = {}

        for entry in relations_data:
            source_dataset_id = entry["source"]["dataset"]
            source_catalog = entry["source"]["catalog"]
            destination_dataset_id = entry["destination"]["dataset"]
            destination_catalog = entry["destination"]["catalog"]

            if destination_dataset_id == dataset_id:
                for dataset in data["datasets"]:
                    if dataset["identifier"] == source_dataset_id and dataset.get("status", None) != "Restricted":
                        source_dataset_title = dataset["title"]
                    elif dataset["identifier"] == source_dataset_id and dataset.get("status", None) == "Restricted":
                        source_dataset_title = "Access Restricted"
                data_dict[source_dataset_id] = (
                    "source",
                    source_catalog,
                    source_dataset_title,
                )

            if source_dataset_id == dataset_id:
                for dataset in data["datasets"]:
                    if dataset["identifier"] == destination_dataset_id and dataset.get("status", None) != "Restricted":
                        destination_dataset_title = dataset["title"]
                    elif (
                        dataset["identifier"] == destination_dataset_id and dataset.get("status", None) == "Restricted"
                    ):
                        destination_dataset_title = "Access Restricted"
                data_dict[destination_dataset_id] = (
                    "produced",
                    destination_catalog,
                    destination_dataset_title,
                )

        output_data = {
            "type": [v[0] for v in data_dict.values()],
            "dataset_identifier": list(data_dict.keys()),
            "title": [v[2] for v in data_dict.values()],
            "catalog": [v[1] for v in data_dict.values()],
        }

        lineage_df = pd.DataFrame(output_data)
        lineage_df.loc[
            lineage_df["dataset_identifier"].isin(restricted_datasets),
            ["dataset_identifier", "catalog", "title"],
        ] = "Access Restricted"

        if max_results > -1:
            lineage_df = lineage_df[0:max_results]

        if output:
            pass

        return lineage_df

    def create_dataset_lineage(
        self,
        base_dataset: str,
        source_dataset_catalog_mapping: pd.DataFrame | list[dict[str, str]],
        catalog: str | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload lineage to a dataset.

        Args:
            base_dataset (str): A dataset identifier to which you want to add lineage.
            source_dataset_catalog_mapping (Union[pd.DataFrame, list[dict[str]]]): Mapping for the dataset
                identifier(s) and catalog(s) from which to add lineage.
            catalog (Optional[str], optional): Catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Raises:
            ValueError: If source_dataset_catalog_mapping is not a pandas DataFrame or a list of dictionaries
            HTTPError: If the request is unsuccessful.

        Examples:
            Creating lineage from a pandas DataFrame.
            >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
            >>> df = pd.DataFrame(data)
            >>> fusion = Fusion()
            >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=df, catalog="c")

            Creating lineage from a list of dictionaries.
            >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
            >>> fusion = Fusion()
            >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=data, catalog="c")

        """
        catalog = self._use_catalog(catalog)

        if isinstance(source_dataset_catalog_mapping, pd.DataFrame):
            dataset_mapping_list = [
                {"dataset": row["dataset"], "catalog": row["catalog"]}
                for _, row in source_dataset_catalog_mapping.iterrows()
            ]
        elif isinstance(source_dataset_catalog_mapping, list):
            dataset_mapping_list = source_dataset_catalog_mapping
        else:
            raise ValueError("source_dataset_catalog_mapping must be a pandas DataFrame or a list of dictionaries.")
        data = {"source": dataset_mapping_list}

        url = f"{self.root_url}catalogs/{catalog}/datasets/{base_dataset}/lineage"

        resp = self.session.post(url, json=data)

        resp.raise_for_status()

        return resp if return_resp_obj else None

    def list_product_dataset_mapping(
        self,
        dataset: str | list[str] | None = None,
        product: str | list[str] | None = None,
        catalog: str | None = None,
    ) -> pd.DataFrame:
        """get the product to dataset linking contained in  a catalog. A product is a grouping of datasets.

        Args:
            dataset (str | list[str] | None, optional): A string or list of strings that are dataset
            identifiers to filter the output. If a list is provided then it will return
            datasets whose identifier matches any of the strings. Defaults to None.
            product (str | list[str] | None, optional): A string or list of strings that are product
            identifiers to filter the output. If a list is provided then it will return
            products whose identifier matches any of the strings. Defaults to None.
            catalog (str | None, optional): A catalog identifier. Defaults to 'common'.

        Returns:
            pd.DataFrame: a dataframe with a row  for each dataset to product mapping.
        """
        catalog = self._use_catalog(catalog)
        url = f"{self.root_url}catalogs/{catalog}/productDatasets"
        mapping_df = pd.DataFrame(self._call_for_dataframe(url, self.session))

        if dataset:
            if isinstance(dataset, list):
                contains = "|".join(f"{s}" for s in dataset)
                mapping_df = mapping_df[mapping_df["dataset"].str.contains(contains, case=False)]
            if isinstance(dataset, str):
                mapping_df = mapping_df[mapping_df["dataset"].str.contains(dataset, case=False)]
        if product:
            if isinstance(product, list):
                contains = "|".join(f"{s}" for s in product)
                mapping_df = mapping_df[mapping_df["product"].str.contains(contains, case=False)]
            if isinstance(product, str):
                mapping_df = mapping_df[mapping_df["product"].str.contains(product, case=False)]
        return mapping_df

    def product(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        short_abstract: str = "",
        description: str = "",
        is_active: bool = True,
        is_restricted: bool | None = None,
        maintainer: str | list[str] | None = None,
        region: str | list[str] = "Global",
        publisher: str = "J.P. Morgan",
        sub_category: str | list[str] | None = None,
        tag: str | list[str] | None = None,
        delivery_channel: str | list[str] = "API",
        theme: str | None = None,
        release_date: str | None = None,
        language: str = "English",
        status: str = "Available",
        image: str = "",
        logo: str = "",
        dataset: str | list[str] | None = None,
        **kwargs: Any,
    ) -> Product:
        """Instantiate a Product object with this client for metadata creation.

        Args:
            identifier (str): Product identifier.
            title (str, optional): Product title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): Category. Defaults to None.
            short_abstract (str, optional): Short description. Defaults to "".
            description (str, optional): Description. If not provided, defaults to identifier.
            is_active (bool, optional): Boolean for Active status. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted products. Defaults to None.
            maintainer (str | list[str] | None, optional): Product maintainer. Defaults to None.
            region (str | list[str] | None, optional): Product region. Defaults to None.
            publisher (str | None, optional): Name of vendor that publishes the data. Defaults to None.
            sub_category (str | list[str] | None, optional): Product sub-category. Defaults to None.
            tag (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            delivery_channel (str | list[str], optional): Product delivery channel. Defaults to "API".
            theme (str | None, optional): Product theme. Defaults to None.
            release_date (str | None, optional): Product release date. Defaults to None.
            language (str, optional): Product language. Defaults to "English".
            status (str, optional): Product status. Defaults to "Available".
            image (str, optional): Product image. Defaults to "".
            logo (str, optional): Product logo. Defaults to "".
            dataset (str | list[str] | None, optional): Product datasets. Defaults to None.

        Returns:
            Product: Fusion Product class instance.

        Examples:
            >>> fusion = Fusion()
            >>> fusion.product(identifier="PRODUCT_1", title="Product")

        Note:
            See the product module for more information on functionalities of product objects.

        """
        product_obj = Product(
            identifier=identifier,
            title=title,
            category=category,
            short_abstract=short_abstract,
            description=description,
            is_active=is_active,
            is_restricted=is_restricted,
            maintainer=maintainer,
            region=region,
            publisher=publisher,
            sub_category=sub_category,
            tag=tag,
            delivery_channel=delivery_channel,
            theme=theme,
            release_date=release_date,
            language=language,
            status=status,
            image=image,
            logo=logo,
            dataset=dataset,
            **kwargs,
        )
        product_obj.client = self
        return product_obj

    def dataset(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Source",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Dataset:
        """Instantiate a Dataset object with this client for metadata creation.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Source".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.

        Returns:
            Dataset: Fusion Dataset class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset(identifier="DATASET_1")

        Note:
            See the dataset module for more information on functionalities of dataset objects.

        """
        dataset_obj = Dataset(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            **kwargs,
        )
        dataset_obj.client = self
        return dataset_obj

    def attribute(  # noqa: PLR0913
        self,
        identifier: str,
        index: int,
        data_type: str | Types = "String",
        title: str = "",
        description: str = "",
        is_dataset_key: bool = False,
        source: str | None = None,
        source_field_id: str | None = None,
        is_internal_dataset_key: bool | None = None,
        is_externally_visible: bool | None = True,
        unit: Any | None = None,
        multiplier: float = 1.0,
        is_propagation_eligible: bool | None = None,
        is_metric: bool | None = None,
        available_from: str | None = None,
        deprecated_from: str | None = None,
        term: str = "bizterm1",
        dataset: int | None = None,
        attribute_type: str | None = None,
        application_id: str | dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Attribute:
        """Instantiate an Attribute object with this client for metadata creation.

        Args:
            identifier (str): The unique identifier for the attribute.
            index (int): Attribute index.
            data_type (str | Types, optional): Datatype of attribute. Defaults to "String".
            title (str, optional): Attribute title. If not provided, defaults to identifier.
            description (str, optional): Attribute description. If not provided, defaults to identifier.
            is_dataset_key (bool, optional): Flag for primary keys. Defaults to False.
            source (str | None, optional): Name of data vendor which provided the data. Defaults to None.
            source_field_id (str | None, optional): Original identifier of attribute, if attribute has been renamed.
                If not provided, defaults to identifier.
            is_internal_dataset_key (bool | None, optional): Flag for internal primary keys. Defaults to None.
            is_externally_visible (bool | None, optional): Flag for externally visible attributes. Defaults to True.
            unit (Any | None, optional): Unit of attribute. Defaults to None.
            multiplier (float, optional): Multiplier for unit. Defaults to 1.0.
            is_propagation_eligible (bool | None, optional): Flag for propagation eligibility. Defaults to None.
            is_metric (bool | None, optional): Flag for attributes that are metrics. Defaults to None.
            available_from (str | None, optional): Date from which the attribute is available. Defaults to None.
            deprecated_from (str | None, optional): Date from which the attribute is deprecated. Defaults to None.
            term (str, optional): Term. Defaults to "bizterm1".
            dataset (int | None, optional): Dataset. Defaults to None.
            attribute_type (str | None, optional): Attribute type. Defaults to None.

        Returns:
            Attribute: Fusion Attribute class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attr = fusion.attribute(identifier="attr1", index=0)

        Note:
            See the attributes module for more information on functionalities of attribute objects.

        """
        data_type = Types[str(data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
        attribute_obj = Attribute(
            identifier=identifier,
            index=index,
            data_type=data_type,
            title=title,
            description=description,
            is_dataset_key=is_dataset_key,
            source=source,
            source_field_id=source_field_id,
            is_internal_dataset_key=is_internal_dataset_key,
            is_externally_visible=is_externally_visible,
            unit=unit,
            multiplier=multiplier,
            is_propagation_eligible=is_propagation_eligible,
            is_metric=is_metric,
            available_from=available_from,
            deprecated_from=deprecated_from,
            term=term,
            dataset=dataset,
            attribute_type=attribute_type,
            application_id=application_id,
            **kwargs,
        )
        attribute_obj.client = self
        return attribute_obj

    def attributes(
        self,
        attributes: list[Attribute] | None = None,
    ) -> Attributes:
        """Instantiate an Attributes object with this client for metadata creation.

        Args:
            attributes (list[Attribute] | None, optional): List of Attribute objects. Defaults to None.

        Returns:
            Attributes: Fusion Attributes class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attr1 = fusion.attribute("attr1", 0)
            >>> attr2 = fusion.attribute("attr2", 1)
            >>> attrs = fusion.attributes([attr1, attr2])

        Note:
            See the attributes module for more information on functionalities of attributes object.

        """
        attributes_obj = Attributes(attributes=attributes or [])
        attributes_obj.client = self
        return attributes_obj

    def delete_datasetmembers(
        self,
        dataset: str,
        series_members: str | list[str],
        catalog: str | None = None,
        return_resp_obj: bool = False,
    ) -> list[requests.Response] | None:
        """Delete dataset members.

        Args:
            dataset (str): A dataset identifier
            series_members (str | list[str]): A string or list of strings that are dataset series member
            identifiers to delete.
            catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            list[requests.Response]: a list of response objects.

        Examples:
            Delete one dataset member.

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.delete_datasetmembers(dataset="dataset1", series_members="series1")

            Delete multiple dataset members.

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.delete_datasetmembers(dataset="dataset1", series_members=["series1", "series2"])

        """
        catalog = self._use_catalog(catalog)
        if isinstance(series_members, str):
            series_members = [series_members]
        responses = []
        for series_member in series_members:
            url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series_member}"
            resp = self.session.delete(url)
            requests_raise_for_status(resp)
            responses.append(resp)
        return responses if return_resp_obj else None

    def delete_all_datasetmembers(
        self,
        dataset: str,
        catalog: str | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete all dataset members within a dataset.

        Args:
            dataset (str): A dataset identifier
            catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            list[requests.Response]: a list of response objects.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.delete_all_datasetmembers(dataset="dataset1")

        """
        catalog = self._use_catalog(catalog)
        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
        resp = self.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def list_registered_attributes(
        self,
        catalog: str | None = None,
        output: bool = False,
        display_all_columns: bool = False,
    ) -> pd.DataFrame:
        """Returns the list of attributes in a catalog.

        Args:
            catalog (str, optional): A catalog identifier. Defaults to 'common'.
            output (bool, optional): If True then print the dataframe. Defaults to False.
            display_all_columns (bool, optional): If True displays all columns returned by the API,
                otherwise only the key columns are displayed

        Returns:
            class:`pandas.DataFrame`: A dataframe with a row for each attribute
        """
        catalog = self._use_catalog(catalog)

        url = f"{self.root_url}catalogs/{catalog}/attributes"
        ds_attr_df = Fusion._call_for_dataframe(url, self.session).reset_index(drop=True)

        if not display_all_columns:
            ds_attr_df = ds_attr_df[
                ds_attr_df.columns.intersection(
                    [
                        "identifier",
                        "title",
                        "dataType",
                        "description",
                        "publisher",
                        "applicationId",
                    ]
                )
            ]

        if output:
            pass

        return ds_attr_df

    def report(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Report",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        report: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> Report:
        """Instantiate Report object with this client for metadata creation for managing regulatory reporting metadata.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Source".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.
            report (dict[str, str] | None, optional): The report metadata. Specifies the tier of the report.
                Required for registered reports to the catalog.

        Returns:
            Dataset: Fusion Dataset class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.report(identifier="DATASET_1")

        Note:
            See the dataset module for more information on functionalities of report objects.

        """
        report_obj = Report(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            report=report,
            **kwargs,
        )
        report_obj.client = self
        return report_obj

    def input_dataflow(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Flow",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        producer_application_id: dict[str, str] | None = None,
        consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
        flow_details: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> InputDataFlow:
        """Instantiate an Input Dataflow object with this client for metadata creation.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Flow".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.
            producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
                producing the flow).
            consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
                ID (downstream application, consuming the flow).
            flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
                Defaults to {"flowDirection": "Input"}.

        Returns:
            Dataset: Fusion InputDataFlow class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.input_dataflow(identifier="MY_DATAFLOW")

        Note:
            See the dataset module for more information on functionalities of input dataflow objects.

        """
        flow_details = {"flowDirection": "Input"} if flow_details is None else flow_details
        dataflow_obj = InputDataFlow(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            producer_application_id=producer_application_id,
            consumer_application_id=consumer_application_id,
            flow_details=flow_details,
            **kwargs,
        )
        dataflow_obj.client = self
        return dataflow_obj

    def output_dataflow(  # noqa: PLR0913
        self,
        identifier: str,
        title: str = "",
        category: str | list[str] | None = None,
        description: str = "",
        frequency: str = "Once",
        is_internal_only_dataset: bool = False,
        is_third_party_data: bool = True,
        is_restricted: bool | None = None,
        is_raw_data: bool = True,
        maintainer: str | None = "J.P. Morgan Fusion",
        source: str | list[str] | None = None,
        region: str | list[str] | None = None,
        publisher: str = "J.P. Morgan",
        product: str | list[str] | None = None,
        sub_category: str | list[str] | None = None,
        tags: str | list[str] | None = None,
        created_date: str | None = None,
        modified_date: str | None = None,
        delivery_channel: str | list[str] = "API",
        language: str = "English",
        status: str = "Available",
        type_: str | None = "Flow",
        container_type: str | None = "Snapshot-Full",
        snowflake: str | None = None,
        complexity: str | None = None,
        is_immutable: bool | None = None,
        is_mnpi: bool | None = None,
        is_pci: bool | None = None,
        is_pii: bool | None = None,
        is_client: bool | None = None,
        is_public: bool | None = None,
        is_internal: bool | None = None,
        is_confidential: bool | None = None,
        is_highly_confidential: bool | None = None,
        is_active: bool | None = None,
        owners: list[str] | None = None,
        application_id: str | dict[str, str] | None = None,
        producer_application_id: dict[str, str] | None = None,
        consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
        flow_details: dict[str, str] | None = None,
        **kwargs: Any,
    ) -> OutputDataFlow:
        """Instantiate an Output Dataflow object with this client for metadata creation.

        Args:
            identifier (str): Dataset identifier.
            title (str, optional): Dataset title. If not provided, defaults to identifier.
            category (str | list[str] | None, optional): A category or list of categories for the dataset.
            Defaults to None.
            description (str, optional): Dataset description. If not provided, defaults to identifier.
            frequency (str, optional): The frequency of the dataset. Defaults to "Once".
            is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
            is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
            is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
            is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
            maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
            source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
            region (str | list[str] | None, optional): Region. Defaults to None.
            publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
            product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
            sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
            tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
            created_date (str | None, optional): Created date. Defaults to None.
            modified_date (str | None, optional): Modified date. Defaults to None.
            delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
            language (str, optional): Language. Defaults to "English".
            status (str, optional): Status. Defaults to "Available".
            type_ (str | None, optional): Dataset type. Defaults to "Flow".
            container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
            snowflake (str | None, optional): Snowflake account connection. Defaults to None.
            complexity (str | None, optional): Complexity. Defaults to None.
            is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
            is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
            is_pci (bool | None, optional): is_pci. Defaults to None.
            is_pii (bool | None, optional): is_pii. Defaults to None.
            is_client (bool | None, optional): is_client. Defaults to None.
            is_public (bool | None, optional): is_public. Defaults to None.
            is_internal (bool | None, optional): is_internal. Defaults to None.
            is_confidential (bool | None, optional): is_confidential. Defaults to None.
            is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
            is_active (bool | None, optional): is_active. Defaults to None.
            owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
            application_id (str | None, optional): The application ID of the dataset. Defaults to None.
            producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
                producing the flow).
            consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
                ID (downstream application, consuming the flow).
            flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
                Defaults to {"flowDirection": "Output"}.

        Returns:
            Dataset: Fusion OutputDataFlow class.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.output_dataflow(identifier="MY_DATAFLOW")

        Note:
            See the dataset module for more information on functionalities of output dataflow objects.

        """
        flow_details = {"flowDirection": "Output"} if flow_details is None else flow_details
        dataflow_obj = OutputDataFlow(
            identifier=identifier,
            title=title,
            category=category,
            description=description,
            frequency=frequency,
            is_internal_only_dataset=is_internal_only_dataset,
            is_third_party_data=is_third_party_data,
            is_restricted=is_restricted,
            is_raw_data=is_raw_data,
            maintainer=maintainer,
            source=source,
            region=region,
            publisher=publisher,
            product=product,
            sub_category=sub_category,
            tags=tags,
            created_date=created_date,
            modified_date=modified_date,
            delivery_channel=delivery_channel,
            language=language,
            status=status,
            type_=type_,
            container_type=container_type,
            snowflake=snowflake,
            complexity=complexity,
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
            owners=owners,
            application_id=application_id,
            producer_application_id=producer_application_id,
            consumer_application_id=consumer_application_id,
            flow_details=flow_details,
            **kwargs,
        )
        dataflow_obj.client = self
        return dataflow_obj

default_catalog: str property writable

Returns the default catalog.

Returns:

Type Description
str

None

__init__(credentials='config/client_credentials.json', root_url='https://fusion.jpmorgan.com/api/v1/', download_folder='downloads', log_level=logging.ERROR, fs=None, log_path='.')

Constructor to instantiate a new Fusion object.

Parameters:

Name Type Description Default
credentials Union[str, FusionCredentials]

A path to a credentials file or a fully populated

'config/client_credentials.json'
root_url _type_

The API root URL. Defaults to "https://fusion.jpmorgan.com/api/v1/".

'https://fusion.jpmorgan.com/api/v1/'
download_folder str

The folder path where downloaded data files are saved. Defaults to "downloads".

'downloads'
log_level int

Set the logging level. Defaults to logging.ERROR.

ERROR
fs filesystem

filesystem.

None
log_path str

The folder path where the log is stored.

'.'
Source code in py_src/fusion/fusion.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def __init__(
    self,
    credentials: str | FusionCredentials = "config/client_credentials.json",
    root_url: str = "https://fusion.jpmorgan.com/api/v1/",
    download_folder: str = "downloads",
    log_level: int = logging.ERROR,
    fs: fsspec.filesystem = None,
    log_path: str = ".",
) -> None:
    """Constructor to instantiate a new Fusion object.

    Args:
        credentials (Union[str, FusionCredentials]): A path to a credentials file or a fully populated
        FusionCredentials object. Defaults to 'config/client_credentials.json'.
        root_url (_type_, optional): The API root URL.
            Defaults to "https://fusion.jpmorgan.com/api/v1/".
        download_folder (str, optional): The folder path where downloaded data files
            are saved. Defaults to "downloads".
        log_level (int, optional): Set the logging level. Defaults to logging.ERROR.
        fs (fsspec.filesystem): filesystem.
        log_path (str, optional): The folder path where the log is stored.
    """
    self._default_catalog = "common"

    self.root_url = root_url
    self.download_folder = download_folder
    Path(download_folder).mkdir(parents=True, exist_ok=True)

    if logger.hasHandlers():
        logger.handlers.clear()
    file_handler = logging.FileHandler(filename=f"{log_path}/fusion_sdk.log")
    logging.addLevelName(VERBOSE_LVL, "VERBOSE")
    stdout_handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s.%(msecs)03d %(name)s:%(levelname)s %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    stdout_handler.setFormatter(formatter)
    logger.addHandler(stdout_handler)
    logger.addHandler(file_handler)
    logger.setLevel(log_level)

    if isinstance(credentials, FusionCredentials):
        self.credentials = credentials
    elif isinstance(credentials, str):
        self.credentials = FusionCredentials.from_file(Path(credentials))
    else:
        raise ValueError("credentials must be a path to a credentials file or FusionCredentials object")

    self.session = get_session(self.credentials, self.root_url)
    self.fs = fs if fs else get_default_fs()
    self.events: pd.DataFrame | None = None

__repr__()

Object representation to list all available methods.

Source code in py_src/fusion/fusion.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
def __repr__(self) -> str:
    """Object representation to list all available methods."""
    return "Fusion object \nAvailable methods:\n" + tabulate(
        pd.DataFrame(  # type: ignore
            [
                [
                    method_name
                    for method_name in dir(Fusion)
                    if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                ]
                + [p for p in dir(Fusion) if isinstance(getattr(Fusion, p), property)],
                [
                    getattr(Fusion, method_name).__doc__.split("\n")[0]
                    for method_name in dir(Fusion)
                    if callable(getattr(Fusion, method_name)) and not method_name.startswith("_")
                ]
                + [
                    getattr(Fusion, p).__doc__.split("\n")[0]
                    for p in dir(Fusion)
                    if isinstance(getattr(Fusion, p), property)
                ],
            ]
        ).T.set_index(0),
        tablefmt="psql",
    )

attribute(identifier, index, data_type='String', title='', description='', is_dataset_key=False, source=None, source_field_id=None, is_internal_dataset_key=None, is_externally_visible=True, unit=None, multiplier=1.0, is_propagation_eligible=None, is_metric=None, available_from=None, deprecated_from=None, term='bizterm1', dataset=None, attribute_type=None, application_id=None, **kwargs)

Instantiate an Attribute object with this client for metadata creation.

Parameters:

Name Type Description Default
identifier str

The unique identifier for the attribute.

required
index int

Attribute index.

required
data_type str | Types

Datatype of attribute. Defaults to "String".

'String'
title str

Attribute title. If not provided, defaults to identifier.

''
description str

Attribute description. If not provided, defaults to identifier.

''
is_dataset_key bool

Flag for primary keys. Defaults to False.

False
source str | None

Name of data vendor which provided the data. Defaults to None.

None
source_field_id str | None

Original identifier of attribute, if attribute has been renamed. If not provided, defaults to identifier.

None
is_internal_dataset_key bool | None

Flag for internal primary keys. Defaults to None.

None
is_externally_visible bool | None

Flag for externally visible attributes. Defaults to True.

True
unit Any | None

Unit of attribute. Defaults to None.

None
multiplier float

Multiplier for unit. Defaults to 1.0.

1.0
is_propagation_eligible bool | None

Flag for propagation eligibility. Defaults to None.

None
is_metric bool | None

Flag for attributes that are metrics. Defaults to None.

None
available_from str | None

Date from which the attribute is available. Defaults to None.

None
deprecated_from str | None

Date from which the attribute is deprecated. Defaults to None.

None
term str

Term. Defaults to "bizterm1".

'bizterm1'
dataset int | None

Dataset. Defaults to None.

None
attribute_type str | None

Attribute type. Defaults to None.

None

Returns:

Name Type Description
Attribute Attribute

Fusion Attribute class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attr = fusion.attribute(identifier="attr1", index=0)
Note

See the attributes module for more information on functionalities of attribute objects.

Source code in py_src/fusion/fusion.py
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
def attribute(  # noqa: PLR0913
    self,
    identifier: str,
    index: int,
    data_type: str | Types = "String",
    title: str = "",
    description: str = "",
    is_dataset_key: bool = False,
    source: str | None = None,
    source_field_id: str | None = None,
    is_internal_dataset_key: bool | None = None,
    is_externally_visible: bool | None = True,
    unit: Any | None = None,
    multiplier: float = 1.0,
    is_propagation_eligible: bool | None = None,
    is_metric: bool | None = None,
    available_from: str | None = None,
    deprecated_from: str | None = None,
    term: str = "bizterm1",
    dataset: int | None = None,
    attribute_type: str | None = None,
    application_id: str | dict[str, str] | None = None,
    **kwargs: Any,
) -> Attribute:
    """Instantiate an Attribute object with this client for metadata creation.

    Args:
        identifier (str): The unique identifier for the attribute.
        index (int): Attribute index.
        data_type (str | Types, optional): Datatype of attribute. Defaults to "String".
        title (str, optional): Attribute title. If not provided, defaults to identifier.
        description (str, optional): Attribute description. If not provided, defaults to identifier.
        is_dataset_key (bool, optional): Flag for primary keys. Defaults to False.
        source (str | None, optional): Name of data vendor which provided the data. Defaults to None.
        source_field_id (str | None, optional): Original identifier of attribute, if attribute has been renamed.
            If not provided, defaults to identifier.
        is_internal_dataset_key (bool | None, optional): Flag for internal primary keys. Defaults to None.
        is_externally_visible (bool | None, optional): Flag for externally visible attributes. Defaults to True.
        unit (Any | None, optional): Unit of attribute. Defaults to None.
        multiplier (float, optional): Multiplier for unit. Defaults to 1.0.
        is_propagation_eligible (bool | None, optional): Flag for propagation eligibility. Defaults to None.
        is_metric (bool | None, optional): Flag for attributes that are metrics. Defaults to None.
        available_from (str | None, optional): Date from which the attribute is available. Defaults to None.
        deprecated_from (str | None, optional): Date from which the attribute is deprecated. Defaults to None.
        term (str, optional): Term. Defaults to "bizterm1".
        dataset (int | None, optional): Dataset. Defaults to None.
        attribute_type (str | None, optional): Attribute type. Defaults to None.

    Returns:
        Attribute: Fusion Attribute class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attr = fusion.attribute(identifier="attr1", index=0)

    Note:
        See the attributes module for more information on functionalities of attribute objects.

    """
    data_type = Types[str(data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
    attribute_obj = Attribute(
        identifier=identifier,
        index=index,
        data_type=data_type,
        title=title,
        description=description,
        is_dataset_key=is_dataset_key,
        source=source,
        source_field_id=source_field_id,
        is_internal_dataset_key=is_internal_dataset_key,
        is_externally_visible=is_externally_visible,
        unit=unit,
        multiplier=multiplier,
        is_propagation_eligible=is_propagation_eligible,
        is_metric=is_metric,
        available_from=available_from,
        deprecated_from=deprecated_from,
        term=term,
        dataset=dataset,
        attribute_type=attribute_type,
        application_id=application_id,
        **kwargs,
    )
    attribute_obj.client = self
    return attribute_obj

attributes(attributes=None)

Instantiate an Attributes object with this client for metadata creation.

Parameters:

Name Type Description Default
attributes list[Attribute] | None

List of Attribute objects. Defaults to None.

None

Returns:

Name Type Description
Attributes Attributes

Fusion Attributes class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attr1 = fusion.attribute("attr1", 0)
>>> attr2 = fusion.attribute("attr2", 1)
>>> attrs = fusion.attributes([attr1, attr2])
Note

See the attributes module for more information on functionalities of attributes object.

Source code in py_src/fusion/fusion.py
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
def attributes(
    self,
    attributes: list[Attribute] | None = None,
) -> Attributes:
    """Instantiate an Attributes object with this client for metadata creation.

    Args:
        attributes (list[Attribute] | None, optional): List of Attribute objects. Defaults to None.

    Returns:
        Attributes: Fusion Attributes class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attr1 = fusion.attribute("attr1", 0)
        >>> attr2 = fusion.attribute("attr2", 1)
        >>> attrs = fusion.attributes([attr1, attr2])

    Note:
        See the attributes module for more information on functionalities of attributes object.

    """
    attributes_obj = Attributes(attributes=attributes or [])
    attributes_obj.client = self
    return attributes_obj

catalog_resources(catalog=None, output=False)

List the resources contained within the catalog, for example products and datasets.

Parameters:

Name Type Description Default
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each resource within the catalog

Source code in py_src/fusion/fusion.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
def catalog_resources(self, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
    """List the resources contained within the catalog, for example products and datasets.

    Args:
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
       class:`pandas.DataFrame`: A dataframe with a row for each resource within the catalog
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}"
    cat_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return cat_df

create_dataset_lineage(base_dataset, source_dataset_catalog_mapping, catalog=None, return_resp_obj=False)

Upload lineage to a dataset.

Parameters:

Name Type Description Default
base_dataset str

A dataset identifier to which you want to add lineage.

required
source_dataset_catalog_mapping Union[DataFrame, list[dict[str]]]

Mapping for the dataset identifier(s) and catalog(s) from which to add lineage.

required
catalog Optional[str]

Catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Raises:

Type Description
ValueError

If source_dataset_catalog_mapping is not a pandas DataFrame or a list of dictionaries

HTTPError

If the request is unsuccessful.

Examples:

Creating lineage from a pandas DataFrame.

>>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
>>> df = pd.DataFrame(data)
>>> fusion = Fusion()
>>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=df, catalog="c")

Creating lineage from a list of dictionaries.

>>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
>>> fusion = Fusion()
>>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=data, catalog="c")
Source code in py_src/fusion/fusion.py
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
def create_dataset_lineage(
    self,
    base_dataset: str,
    source_dataset_catalog_mapping: pd.DataFrame | list[dict[str, str]],
    catalog: str | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload lineage to a dataset.

    Args:
        base_dataset (str): A dataset identifier to which you want to add lineage.
        source_dataset_catalog_mapping (Union[pd.DataFrame, list[dict[str]]]): Mapping for the dataset
            identifier(s) and catalog(s) from which to add lineage.
        catalog (Optional[str], optional): Catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Raises:
        ValueError: If source_dataset_catalog_mapping is not a pandas DataFrame or a list of dictionaries
        HTTPError: If the request is unsuccessful.

    Examples:
        Creating lineage from a pandas DataFrame.
        >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
        >>> df = pd.DataFrame(data)
        >>> fusion = Fusion()
        >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=df, catalog="c")

        Creating lineage from a list of dictionaries.
        >>> data = [{"dataset": "a", "catalog": "a"}, {"dataset": "b", "catalog": "b"}]
        >>> fusion = Fusion()
        >>> fusion.create_dataset_lineage(base_dataset="c", source_dataset_catalog_mapping=data, catalog="c")

    """
    catalog = self._use_catalog(catalog)

    if isinstance(source_dataset_catalog_mapping, pd.DataFrame):
        dataset_mapping_list = [
            {"dataset": row["dataset"], "catalog": row["catalog"]}
            for _, row in source_dataset_catalog_mapping.iterrows()
        ]
    elif isinstance(source_dataset_catalog_mapping, list):
        dataset_mapping_list = source_dataset_catalog_mapping
    else:
        raise ValueError("source_dataset_catalog_mapping must be a pandas DataFrame or a list of dictionaries.")
    data = {"source": dataset_mapping_list}

    url = f"{self.root_url}catalogs/{catalog}/datasets/{base_dataset}/lineage"

    resp = self.session.post(url, json=data)

    resp.raise_for_status()

    return resp if return_resp_obj else None

dataset(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Source', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, **kwargs)

Instantiate a Dataset object with this client for metadata creation.

Parameters:

Name Type Description Default
identifier str

Dataset identifier.

required
title str

Dataset title. If not provided, defaults to identifier.

''
category str | list[str] | None

A category or list of categories for the dataset.

None
description str

Dataset description. If not provided, defaults to identifier.

''
frequency str

The frequency of the dataset. Defaults to "Once".

'Once'
is_internal_only_dataset bool

Flag for internal datasets. Defaults to False.

False
is_third_party_data bool

Flag for third party data. Defaults to True.

True
is_restricted bool | None

Flag for restricted datasets. Defaults to None.

None
is_raw_data bool

Flag for raw datasets. Defaults to True.

True
maintainer str | None

Dataset maintainer. Defaults to "J.P. Morgan Fusion".

'J.P. Morgan Fusion'
source str | list[str] | None

Name of data vendor which provided the data. Defaults to None.

None
region str | list[str] | None

Region. Defaults to None.

None
publisher str

Name of vendor that publishes the data. Defaults to "J.P. Morgan".

'J.P. Morgan'
product str | list[str] | None

Product to associate dataset with. Defaults to None.

None
sub_category str | list[str] | None

Sub-category. Defaults to None.

None
tags str | list[str] | None

Tags used for search purposes. Defaults to None.

None
created_date str | None

Created date. Defaults to None.

None
modified_date str | None

Modified date. Defaults to None.

None
delivery_channel str | list[str]

Delivery channel. Defaults to "API".

'API'
language str

Language. Defaults to "English".

'English'
status str

Status. Defaults to "Available".

'Available'
type_ str | None

Dataset type. Defaults to "Source".

'Source'
container_type str | None

Container type. Defaults to "Snapshot-Full".

'Snapshot-Full'
snowflake str | None

Snowflake account connection. Defaults to None.

None
complexity str | None

Complexity. Defaults to None.

None
is_immutable bool | None

Flag for immutable datasets. Defaults to None.

None
is_mnpi bool | None

is_mnpi. Defaults to None.

None
is_pci bool | None

is_pci. Defaults to None.

None
is_pii bool | None

is_pii. Defaults to None.

None
is_client bool | None

is_client. Defaults to None.

None
is_public bool | None

is_public. Defaults to None.

None
is_internal bool | None

is_internal. Defaults to None.

None
is_confidential bool | None

is_confidential. Defaults to None.

None
is_highly_confidential bool | None

is_highly_confidential. Defaults to None.

None
is_active bool | None

is_active. Defaults to None.

None
owners list[str] | None

The owners of the dataset. Defaults to None.

None
application_id str | None

The application ID of the dataset. Defaults to None.

None

Returns:

Name Type Description
Dataset Dataset

Fusion Dataset class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset(identifier="DATASET_1")
Note

See the dataset module for more information on functionalities of dataset objects.

Source code in py_src/fusion/fusion.py
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
def dataset(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Source",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    **kwargs: Any,
) -> Dataset:
    """Instantiate a Dataset object with this client for metadata creation.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Source".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.

    Returns:
        Dataset: Fusion Dataset class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset(identifier="DATASET_1")

    Note:
        See the dataset module for more information on functionalities of dataset objects.

    """
    dataset_obj = Dataset(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        **kwargs,
    )
    dataset_obj.client = self
    return dataset_obj

dataset_resources(dataset, catalog=None, output=False)

List the resources available for a dataset, currently this will always be a datasetseries.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each resource

Source code in py_src/fusion/fusion.py
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
def dataset_resources(self, dataset: str, catalog: str | None = None, output: bool = False) -> pd.DataFrame:
    """List the resources available for a dataset, currently this will always be a datasetseries.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each resource
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}"
    ds_res_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return ds_res_df

datasetmember_resources(dataset, series, catalog=None, output=False)

List the available resources for a datasetseries member.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
series str

The datasetseries identifier

required
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each datasetseries member resource. Currently, this will always be distributions.

Source code in py_src/fusion/fusion.py
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
def datasetmember_resources(
    self,
    dataset: str,
    series: str,
    catalog: str | None = None,
    output: bool = False,
) -> pd.DataFrame:
    """List the available resources for a datasetseries member.

    Args:
        dataset (str): A dataset identifier
        series (str): The datasetseries identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each datasetseries member resource.
            Currently, this will always be distributions.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}"
    ds_mem_res_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return ds_mem_res_df

delete_all_datasetmembers(dataset, catalog=None, return_resp_obj=False)

Delete all dataset members within a dataset.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
catalog str | None

A catalog identifier. Defaults to 'common'.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

list[requests.Response]: a list of response objects.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.delete_all_datasetmembers(dataset="dataset1")
Source code in py_src/fusion/fusion.py
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
def delete_all_datasetmembers(
    self,
    dataset: str,
    catalog: str | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete all dataset members within a dataset.

    Args:
        dataset (str): A dataset identifier
        catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        list[requests.Response]: a list of response objects.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.delete_all_datasetmembers(dataset="dataset1")

    """
    catalog = self._use_catalog(catalog)
    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
    resp = self.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

delete_datasetmembers(dataset, series_members, catalog=None, return_resp_obj=False)

Delete dataset members.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
series_members str | list[str]

A string or list of strings that are dataset series member

required
catalog str | None

A catalog identifier. Defaults to 'common'.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
list[Response] | None

list[requests.Response]: a list of response objects.

Examples:

Delete one dataset member.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.delete_datasetmembers(dataset="dataset1", series_members="series1")

Delete multiple dataset members.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.delete_datasetmembers(dataset="dataset1", series_members=["series1", "series2"])
Source code in py_src/fusion/fusion.py
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
def delete_datasetmembers(
    self,
    dataset: str,
    series_members: str | list[str],
    catalog: str | None = None,
    return_resp_obj: bool = False,
) -> list[requests.Response] | None:
    """Delete dataset members.

    Args:
        dataset (str): A dataset identifier
        series_members (str | list[str]): A string or list of strings that are dataset series member
        identifiers to delete.
        catalog (str | None, optional): A catalog identifier. Defaults to 'common'.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        list[requests.Response]: a list of response objects.

    Examples:
        Delete one dataset member.

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.delete_datasetmembers(dataset="dataset1", series_members="series1")

        Delete multiple dataset members.

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.delete_datasetmembers(dataset="dataset1", series_members=["series1", "series2"])

    """
    catalog = self._use_catalog(catalog)
    if isinstance(series_members, str):
        series_members = [series_members]
    responses = []
    for series_member in series_members:
        url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series_member}"
        resp = self.session.delete(url)
        requests_raise_for_status(resp)
        responses.append(resp)
    return responses if return_resp_obj else None

download(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, force_download=False, download_folder=None, return_paths=False, partitioning=None, preserve_original_name=False)

Downloads the requested distributions of a dataset to disk.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
dt_str str

Either a single date or a range identified by a start or end date, or both separated with a ":". Defaults to 'latest' which will return the most recent instance of the dataset. If more than one series member exists on the latest date, the series member identifiers will be sorted alphabetically and the last one will be downloaded.

'latest'
dataset_format str

The file format, e.g. CSV or Parquet. Defaults to 'parquet'.

'parquet'
catalog str

A catalog identifier. Defaults to 'common'.

None
n_par int

Specify how many distributions to download in parallel. Defaults to all cpus available.

None
show_progress bool

Display a progress bar during data download Defaults to True.

True
force_download bool

If True then will always download a file even if it is already on disk. Defaults to True.

False
download_folder str

The path, absolute or relative, where downloaded files are saved. Defaults to download_folder as set in init

None
return_paths bool

Return paths and success statuses of the downloaded files.

False
partitioning str

Partitioning specification.

None
preserve_original_name bool

Preserve the original name of the file. Defaults to False.

False
Source code in py_src/fusion/fusion.py
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
def download(  # noqa: PLR0912, PLR0913
    self,
    dataset: str,
    dt_str: str = "latest",
    dataset_format: str = "parquet",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    force_download: bool = False,
    download_folder: str | None = None,
    return_paths: bool = False,
    partitioning: str | None = None,
    preserve_original_name: bool = False,
) -> list[tuple[bool, str, str | None]] | None:
    """Downloads the requested distributions of a dataset to disk.

    Args:
        dataset (str): A dataset identifier
        dt_str (str, optional): Either a single date or a range identified by a start or end date,
            or both separated with a ":". Defaults to 'latest' which will return the most recent
            instance of the dataset. If more than one series member exists on the latest date, the
            series member identifiers will be sorted alphabetically and the last one will be downloaded.
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        force_download (bool, optional): If True then will always download a file even
            if it is already on disk. Defaults to True.
        download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
            Defaults to download_folder as set in __init__
        return_paths (bool, optional): Return paths and success statuses of the downloaded files.
        partitioning (str, optional): Partitioning specification.
        preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

    Returns:

    """
    catalog = self._use_catalog(catalog)

    valid_date_range = re.compile(r"^(\d{4}\d{2}\d{2})$|^((\d{4}\d{2}\d{2})?([:])(\d{4}\d{2}\d{2})?)$")

    if valid_date_range.match(dt_str) or dt_str == "latest":
        required_series = self._resolve_distro_tuples(dataset, dt_str, dataset_format, catalog)
    else:
        # sample data is limited to csv
        if dt_str == "sample":
            dataset_format = self.list_distributions(dataset, dt_str, catalog)["identifier"].iloc[0]
        required_series = [(catalog, dataset, dt_str, dataset_format)]

    if dataset_format not in RECOGNIZED_FORMATS + ["raw"]:
        raise ValueError(f"Dataset format {dataset_format} is not supported")

    if not download_folder:
        download_folder = self.download_folder

    download_folders = [download_folder] * len(required_series)

    if partitioning == "hive":
        members = [series[2].strip("/") for series in required_series]
        download_folders = [
            f"{download_folders[i]}/{series[0]}/{series[1]}/{members[i]}"
            for i, series in enumerate(required_series)
        ]

    for d in download_folders:
        if not self.fs.exists(d):
            self.fs.mkdir(d, create_parents=True)

    n_par = cpu_count(n_par)
    download_spec = [
        {
            "lfs": self.fs,
            "rpath": distribution_to_url(
                self.root_url,
                series[1],
                series[2],
                series[3],
                series[0],
                is_download=True,
            ),
            "lpath": distribution_to_filename(
                download_folders[i],
                series[1],
                series[2],
                series[3],
                series[0],
                partitioning=partitioning,
            ),
            "overwrite": force_download,
            "preserve_original_name": preserve_original_name,
        }
        for i, series in enumerate(required_series)
    ]

    logger.log(
        VERBOSE_LVL,
        f"Beginning {len(download_spec)} downloads in batches of {n_par}",
    )
    if show_progress:
        with joblib_progress("Downloading", total=len(download_spec)):
            res = Parallel(n_jobs=n_par)(
                delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
            )
    else:
        res = Parallel(n_jobs=n_par)(
            delayed(self.get_fusion_filesystem().download)(**spec) for spec in download_spec
        )

    if (len(res) > 0) and (not all(r[0] for r in res)):
        for r in res:
            if not r[0]:
                warnings.warn(f"The download of {r[1]} was not successful", stacklevel=2)
    return res if return_paths else None

from_bytes(data, dataset, series_member='latest', catalog=None, distribution='parquet', show_progress=True, return_paths=False, chunk_size=5 * 2 ** 20, from_date=None, to_date=None, file_name=None, **kwargs)

Uploads data from an object in memory.

Parameters:

Name Type Description Default
data str

an object in memory to upload

required
dataset str

Dataset name to which the bytes will be uploaded.

required
series_member str

A single date or label. Defaults to 'latest' which will return the most recent.

'latest'
catalog str

A catalog identifier. Defaults to 'common'.

None
distribution str

A distribution type, e.g. a file format or raw

'parquet'
show_progress bool

Display a progress bar during data download Defaults to True.

True
return_paths bool

Return paths and success statuses of the downloaded files.

False
chunk_size int

Maximum chunk size.

5 * 2 ** 20
from_date str

start of the data date range contained in the distribution, defaults to upload date

None
to_date str

end of the data date range contained in the distribution, defaults to upload date.

None
file_name str

file name to be used for the uploaded file. Defaults to Fusion standard naming.

None

Returns:

Type Description
list[tuple[bool, str, str | None]] | None

Optional[list[tuple[bool, str, Optional[str]]]: a list of tuples, one for each distribution

Source code in py_src/fusion/fusion.py
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
def from_bytes(  # noqa: PLR0913
    self,
    data: BytesIO,
    dataset: str,
    series_member: str = "latest",
    catalog: str | None = None,
    distribution: str = "parquet",
    show_progress: bool = True,
    return_paths: bool = False,
    chunk_size: int = 5 * 2**20,
    from_date: str | None = None,
    to_date: str | None = None,
    file_name: str | None = None,
    **kwargs: Any,  # noqa: ARG002
) -> list[tuple[bool, str, str | None]] | None:
    """Uploads data from an object in memory.

    Args:
        data (str): an object in memory to upload
        dataset (str): Dataset name to which the bytes will be uploaded.
        series_member (str, optional): A single date or label. Defaults to 'latest' which will return
            the most recent.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        distribution (str, optional): A distribution type, e.g. a file format or raw
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        return_paths (bool, optional): Return paths and success statuses of the downloaded files.
        chunk_size (int, optional): Maximum chunk size.
        from_date (str, optional): start of the data date range contained in the distribution,
            defaults to upload date
        to_date (str, optional): end of the data date range contained in the distribution, defaults to upload date.
        file_name (str, optional): file name to be used for the uploaded file. Defaults to Fusion standard naming.

    Returns:
        Optional[list[tuple[bool, str, Optional[str]]]: a list of tuples, one for each distribution

    """
    catalog = self._use_catalog(catalog)

    fs_fusion = self.get_fusion_filesystem()
    if distribution not in RECOGNIZED_FORMATS + ["raw"]:
        raise ValueError(f"Dataset format {distribution} is not supported")

    is_raw = js.loads(fs_fusion.cat(f"{catalog}/datasets/{dataset}"))["isRawData"]
    local_url_eqiv = path_to_url(f"{dataset}__{catalog}__{series_member}.{distribution}", is_raw)

    data_map_df = pd.DataFrame(["", local_url_eqiv, file_name]).T
    data_map_df.columns = ["path", "url", "file_name"]  # type: ignore

    res = upload_files(
        fs_fusion,
        data,
        data_map_df,
        parallel=False,
        n_par=1,
        multipart=False,
        chunk_size=chunk_size,
        show_progress=show_progress,
        from_date=from_date,
        to_date=to_date,
    )

    if not all(r[0] for r in res):
        failed_res = [r for r in res if not r[0]]
        msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
        logger.warning(msg)
        warnings.warn(msg, stacklevel=2)

    return res if return_paths else None

get_events(last_event_id=None, catalog=None, in_background=True, url='https://fusion.jpmorgan.com/api/v1/')

Run server sent event listener and print out the new events. Keyboard terminate to stop.

Parameters:

Name Type Description Default
last_event_id str

id of the last event.

None
catalog str

catalog.

None
in_background bool

execute event monitoring in the background (default = True).

True
url str

subscription url.

'https://fusion.jpmorgan.com/api/v1/'
Source code in py_src/fusion/fusion.py
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
def get_events(
    self,
    last_event_id: str | None = None,
    catalog: str | None = None,
    in_background: bool = True,
    url: str = "https://fusion.jpmorgan.com/api/v1/",
) -> None | pd.DataFrame:
    """Run server sent event listener and print out the new events. Keyboard terminate to stop.

    Args:
        last_event_id (str): id of the last event.
        catalog (str): catalog.
        in_background (bool): execute event monitoring in the background (default = True).
        url (str): subscription url.
    Returns:
        Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
            If in_background is set to False then pandas DataFrame is output upon keyboard termination.
    """

    catalog = self._use_catalog(catalog)
    if not in_background:
        from sseclient import SSEClient

        _ = self.list_catalogs()  # refresh token
        interrupted = False
        messages = SSEClient(
            session=self.session,
            url=f"{url}catalogs/{catalog}/notifications/subscribe",
            last_id=last_event_id,
            headers={
                "authorization": f"bearer {self.credentials.bearer_token}",
            },
        )
        lst = []
        try:
            for msg in messages:
                event = js.loads(msg.data)
                if event["type"] != "HeartBeatNotification":
                    lst.append(event)
        except KeyboardInterrupt:
            interrupted = True
        except Exception as e:
            raise e
        finally:
            result = pd.DataFrame(lst) if interrupted or lst else None
        return result
    else:
        return self.events

get_fusion_filesystem()

Creates Fusion Filesystem.

Returns: Fusion Filesystem

Source code in py_src/fusion/fusion.py
215
216
217
218
219
220
221
def get_fusion_filesystem(self) -> FusionHTTPFileSystem:
    """Creates Fusion Filesystem.

    Returns: Fusion Filesystem

    """
    return FusionHTTPFileSystem(client_kwargs={"root_url": self.root_url, "credentials": self.credentials})

input_dataflow(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Flow', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, producer_application_id=None, consumer_application_id=None, flow_details=None, **kwargs)

Instantiate an Input Dataflow object with this client for metadata creation.

Parameters:

Name Type Description Default
identifier str

Dataset identifier.

required
title str

Dataset title. If not provided, defaults to identifier.

''
category str | list[str] | None

A category or list of categories for the dataset.

None
description str

Dataset description. If not provided, defaults to identifier.

''
frequency str

The frequency of the dataset. Defaults to "Once".

'Once'
is_internal_only_dataset bool

Flag for internal datasets. Defaults to False.

False
is_third_party_data bool

Flag for third party data. Defaults to True.

True
is_restricted bool | None

Flag for restricted datasets. Defaults to None.

None
is_raw_data bool

Flag for raw datasets. Defaults to True.

True
maintainer str | None

Dataset maintainer. Defaults to "J.P. Morgan Fusion".

'J.P. Morgan Fusion'
source str | list[str] | None

Name of data vendor which provided the data. Defaults to None.

None
region str | list[str] | None

Region. Defaults to None.

None
publisher str

Name of vendor that publishes the data. Defaults to "J.P. Morgan".

'J.P. Morgan'
product str | list[str] | None

Product to associate dataset with. Defaults to None.

None
sub_category str | list[str] | None

Sub-category. Defaults to None.

None
tags str | list[str] | None

Tags used for search purposes. Defaults to None.

None
created_date str | None

Created date. Defaults to None.

None
modified_date str | None

Modified date. Defaults to None.

None
delivery_channel str | list[str]

Delivery channel. Defaults to "API".

'API'
language str

Language. Defaults to "English".

'English'
status str

Status. Defaults to "Available".

'Available'
type_ str | None

Dataset type. Defaults to "Flow".

'Flow'
container_type str | None

Container type. Defaults to "Snapshot-Full".

'Snapshot-Full'
snowflake str | None

Snowflake account connection. Defaults to None.

None
complexity str | None

Complexity. Defaults to None.

None
is_immutable bool | None

Flag for immutable datasets. Defaults to None.

None
is_mnpi bool | None

is_mnpi. Defaults to None.

None
is_pci bool | None

is_pci. Defaults to None.

None
is_pii bool | None

is_pii. Defaults to None.

None
is_client bool | None

is_client. Defaults to None.

None
is_public bool | None

is_public. Defaults to None.

None
is_internal bool | None

is_internal. Defaults to None.

None
is_confidential bool | None

is_confidential. Defaults to None.

None
is_highly_confidential bool | None

is_highly_confidential. Defaults to None.

None
is_active bool | None

is_active. Defaults to None.

None
owners list[str] | None

The owners of the dataset. Defaults to None.

None
application_id str | None

The application ID of the dataset. Defaults to None.

None
producer_application_id dict[str, str] | None

The producer application ID (upstream application producing the flow).

None
consumer_application_id list[dict[str, str]] | dict[str, str] | None

The consumer application ID (downstream application, consuming the flow).

None
flow_details dict[str, str] | None

The flow details. Specifies input versus output flow. Defaults to {"flowDirection": "Input"}.

None

Returns:

Name Type Description
Dataset InputDataFlow

Fusion InputDataFlow class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.input_dataflow(identifier="MY_DATAFLOW")
Note

See the dataset module for more information on functionalities of input dataflow objects.

Source code in py_src/fusion/fusion.py
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
def input_dataflow(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Flow",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    producer_application_id: dict[str, str] | None = None,
    consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
    flow_details: dict[str, str] | None = None,
    **kwargs: Any,
) -> InputDataFlow:
    """Instantiate an Input Dataflow object with this client for metadata creation.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Flow".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.
        producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
            producing the flow).
        consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
            ID (downstream application, consuming the flow).
        flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
            Defaults to {"flowDirection": "Input"}.

    Returns:
        Dataset: Fusion InputDataFlow class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.input_dataflow(identifier="MY_DATAFLOW")

    Note:
        See the dataset module for more information on functionalities of input dataflow objects.

    """
    flow_details = {"flowDirection": "Input"} if flow_details is None else flow_details
    dataflow_obj = InputDataFlow(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        producer_application_id=producer_application_id,
        consumer_application_id=consumer_application_id,
        flow_details=flow_details,
        **kwargs,
    )
    dataflow_obj.client = self
    return dataflow_obj

list_catalogs(output=False)

Lists the catalogs available to the API account.

Parameters:

Name Type Description Default
output bool

If True then print the dataframe. Defaults to False.

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each catalog

Source code in py_src/fusion/fusion.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def list_catalogs(self, output: bool = False) -> pd.DataFrame:
    """Lists the catalogs available to the API account.

    Args:
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each catalog
    """
    url = f"{self.root_url}catalogs/"
    cat_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return cat_df

list_dataset_attributes(dataset, catalog=None, output=False, display_all_columns=False)

Returns the list of attributes that are in the dataset.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False
display_all_columns bool

If True displays all columns returned by the API, otherwise only the key columns are displayed

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each attribute

Source code in py_src/fusion/fusion.py
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
def list_dataset_attributes(
    self,
    dataset: str,
    catalog: str | None = None,
    output: bool = False,
    display_all_columns: bool = False,
) -> pd.DataFrame:
    """Returns the list of attributes that are in the dataset.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each attribute
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
    ds_attr_df = Fusion._call_for_dataframe(url, self.session)

    if "index" in ds_attr_df.columns: 
        ds_attr_df = ds_attr_df.sort_values(by="index").reset_index(drop=True)

    if not display_all_columns:
        ds_attr_df = ds_attr_df[
            ds_attr_df.columns.intersection(
                [
                    "identifier",
                    "title",
                    "dataType",
                    "isDatasetKey",
                    "description",
                    "source",
                ]
            )
        ]

    if output:
        pass

    return ds_attr_df

list_dataset_lineage(dataset_id, catalog=None, output=False, max_results=-1)

List the upstream and downstream lineage of the dataset.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False
max_results int

Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.

-1

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each resource

Raises:

Type Description
HTTPError

If the dataset is not found in the catalog.

Source code in py_src/fusion/fusion.py
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
def list_dataset_lineage(
    self,
    dataset_id: str,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
) -> pd.DataFrame:
    """List the upstream and downstream lineage of the dataset.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each resource

    Raises:
        HTTPError: If the dataset is not found in the catalog.

    """
    catalog = self._use_catalog(catalog)

    url_dataset = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}"
    resp_dataset = self.session.get(url_dataset)
    resp_dataset.raise_for_status()

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset_id}/lineage"
    resp = self.session.get(url)
    data = resp.json()
    relations_data = data["relations"]

    restricted_datasets = [
        dataset_metadata["identifier"]
        for dataset_metadata in data["datasets"]
        if dataset_metadata.get("status", None) == "Restricted"
    ]

    data_dict = {}

    for entry in relations_data:
        source_dataset_id = entry["source"]["dataset"]
        source_catalog = entry["source"]["catalog"]
        destination_dataset_id = entry["destination"]["dataset"]
        destination_catalog = entry["destination"]["catalog"]

        if destination_dataset_id == dataset_id:
            for dataset in data["datasets"]:
                if dataset["identifier"] == source_dataset_id and dataset.get("status", None) != "Restricted":
                    source_dataset_title = dataset["title"]
                elif dataset["identifier"] == source_dataset_id and dataset.get("status", None) == "Restricted":
                    source_dataset_title = "Access Restricted"
            data_dict[source_dataset_id] = (
                "source",
                source_catalog,
                source_dataset_title,
            )

        if source_dataset_id == dataset_id:
            for dataset in data["datasets"]:
                if dataset["identifier"] == destination_dataset_id and dataset.get("status", None) != "Restricted":
                    destination_dataset_title = dataset["title"]
                elif (
                    dataset["identifier"] == destination_dataset_id and dataset.get("status", None) == "Restricted"
                ):
                    destination_dataset_title = "Access Restricted"
            data_dict[destination_dataset_id] = (
                "produced",
                destination_catalog,
                destination_dataset_title,
            )

    output_data = {
        "type": [v[0] for v in data_dict.values()],
        "dataset_identifier": list(data_dict.keys()),
        "title": [v[2] for v in data_dict.values()],
        "catalog": [v[1] for v in data_dict.values()],
    }

    lineage_df = pd.DataFrame(output_data)
    lineage_df.loc[
        lineage_df["dataset_identifier"].isin(restricted_datasets),
        ["dataset_identifier", "catalog", "title"],
    ] = "Access Restricted"

    if max_results > -1:
        lineage_df = lineage_df[0:max_results]

    if output:
        pass

    return lineage_df

list_datasetmembers(dataset, catalog=None, output=False, max_results=-1)

List the available members in the dataset series.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False
max_results int

Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.

-1

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: a dataframe with a row for each dataset member.

Source code in py_src/fusion/fusion.py
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
def list_datasetmembers(
    self,
    dataset: str,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
) -> pd.DataFrame:
    """List the available members in the dataset series.

    Args:
        dataset (str): A dataset identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.

    Returns:
        class:`pandas.DataFrame`: a dataframe with a row for each dataset member.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries"
    ds_members_df = Fusion._call_for_dataframe(url, self.session)

    if max_results > -1:
        ds_members_df = ds_members_df[0:max_results]

    if output:
        pass

    return ds_members_df

list_datasets(contains=None, id_contains=False, product=None, catalog=None, output=False, max_results=-1, display_all_columns=False, status=None, dataset_type=None)

Get the datasets contained in a catalog.

Parameters:

Name Type Description Default
contains Union[str, list]

A string or a list of strings that are dataset identifiers to filter the datasets list. If a list is provided then it will return datasets whose identifier matches any of the strings. Defaults to None.

None
id_contains bool

Filter datasets only where the string(s) are contained in the identifier, ignoring description.

False
product Union[str, list]

A string or a list of strings that are product identifiers to filter the datasets list. Defaults to None.

None
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False
max_results int

Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.

-1
display_all_columns bool

If True displays all columns returned by the API, otherwise only the key columns are displayed

False
status str

filter the datasets by status, default is to show all results.

None
dataset_type str

filter the datasets by type, default is to show all results.

None

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: a dataframe with a row for each dataset.

Source code in py_src/fusion/fusion.py
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
def list_datasets(  # noqa: PLR0913
    self,
    contains: str | list[str] | None = None,
    id_contains: bool = False,
    product: str | list[str] | None = None,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
    display_all_columns: bool = False,
    status: str | None = None,
    dataset_type: str | None = None,
) -> pd.DataFrame:
    """Get the datasets contained in a catalog.

    Args:
        contains (Union[str, list], optional): A string or a list of strings that are dataset
            identifiers to filter the datasets list. If a list is provided then it will return
            datasets whose identifier matches any of the strings. Defaults to None.
        id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
            ignoring description.
        product (Union[str, list], optional): A string or a list of strings that are product
            identifiers to filter the datasets list. Defaults to None.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed
        status (str, optional): filter the datasets by status, default is to show all results.
        dataset_type (str, optional): filter the datasets by type, default is to show all results.

    Returns:
        class:`pandas.DataFrame`: a dataframe with a row for each dataset.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets"
    ds_df = Fusion._call_for_dataframe(url, self.session)

    if contains:
        if isinstance(contains, list):
            contains = "|".join(f"{s}" for s in contains)
        if id_contains:
            ds_df = ds_df[ds_df["identifier"].str.contains(contains, case=False)]
        else:
            ds_df = ds_df[
                ds_df["identifier"].str.contains(contains, case=False)
                | ds_df["description"].str.contains(contains, case=False)
            ]

    if product:
        url = f"{self.root_url}catalogs/{catalog}/productDatasets"
        prd_df = Fusion._call_for_dataframe(url, self.session)
        prd_df = (
            prd_df[prd_df["product"] == product]
            if isinstance(product, str)
            else prd_df[prd_df["product"].isin(product)]
        )
        ds_df = ds_df[ds_df["identifier"].str.lower().isin(prd_df["dataset"].str.lower())].reset_index(drop=True)

    if max_results > -1:
        ds_df = ds_df[0:max_results]

    ds_df["category"] = ds_df.category.str.join(", ")
    ds_df["region"] = ds_df.region.str.join(", ")
    if not display_all_columns:
        cols = [
            "identifier",
            "title",
            "containerType",
            "region",
            "category",
            "coverageStartDate",
            "coverageEndDate",
            "description",
            "status",
            "type",
        ]
        cols = [c for c in cols if c in ds_df.columns]
        ds_df = ds_df[cols]

    if status is not None:
        ds_df = ds_df[ds_df["status"] == status]

    if dataset_type is not None:
        ds_df = ds_df[ds_df["type"] == dataset_type]

    if output:
        pass

    return ds_df

list_distributions(dataset, series, catalog=None, output=False)

List the available distributions (downloadable instances of the dataset with a format type).

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
series str

The datasetseries identifier

required
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each distribution.

Source code in py_src/fusion/fusion.py
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
def list_distributions(
    self,
    dataset: str,
    series: str,
    catalog: str | None = None,
    output: bool = False,
) -> pd.DataFrame:
    """List the available distributions (downloadable instances of the dataset with a format type).

    Args:
        dataset (str): A dataset identifier
        series (str): The datasetseries identifier
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each distribution.
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/datasets/{dataset}/datasetseries/{series}/distributions"
    distros_df = Fusion._call_for_dataframe(url, self.session)

    if output:
        pass

    return distros_df

list_product_dataset_mapping(dataset=None, product=None, catalog=None)

get the product to dataset linking contained in a catalog. A product is a grouping of datasets.

Parameters:

Name Type Description Default
dataset str | list[str] | None

A string or list of strings that are dataset

None
product str | list[str] | None

A string or list of strings that are product

None
catalog str | None

A catalog identifier. Defaults to 'common'.

None

Returns:

Type Description
DataFrame

pd.DataFrame: a dataframe with a row for each dataset to product mapping.

Source code in py_src/fusion/fusion.py
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
def list_product_dataset_mapping(
    self,
    dataset: str | list[str] | None = None,
    product: str | list[str] | None = None,
    catalog: str | None = None,
) -> pd.DataFrame:
    """get the product to dataset linking contained in  a catalog. A product is a grouping of datasets.

    Args:
        dataset (str | list[str] | None, optional): A string or list of strings that are dataset
        identifiers to filter the output. If a list is provided then it will return
        datasets whose identifier matches any of the strings. Defaults to None.
        product (str | list[str] | None, optional): A string or list of strings that are product
        identifiers to filter the output. If a list is provided then it will return
        products whose identifier matches any of the strings. Defaults to None.
        catalog (str | None, optional): A catalog identifier. Defaults to 'common'.

    Returns:
        pd.DataFrame: a dataframe with a row  for each dataset to product mapping.
    """
    catalog = self._use_catalog(catalog)
    url = f"{self.root_url}catalogs/{catalog}/productDatasets"
    mapping_df = pd.DataFrame(self._call_for_dataframe(url, self.session))

    if dataset:
        if isinstance(dataset, list):
            contains = "|".join(f"{s}" for s in dataset)
            mapping_df = mapping_df[mapping_df["dataset"].str.contains(contains, case=False)]
        if isinstance(dataset, str):
            mapping_df = mapping_df[mapping_df["dataset"].str.contains(dataset, case=False)]
    if product:
        if isinstance(product, list):
            contains = "|".join(f"{s}" for s in product)
            mapping_df = mapping_df[mapping_df["product"].str.contains(contains, case=False)]
        if isinstance(product, str):
            mapping_df = mapping_df[mapping_df["product"].str.contains(product, case=False)]
    return mapping_df

list_products(contains=None, id_contains=False, catalog=None, output=False, max_results=-1, display_all_columns=False)

Get the products contained in a catalog. A product is a grouping of datasets.

Parameters:

Name Type Description Default
contains Union[str, list]

A string or a list of strings that are product identifiers to filter the products list. If a list is provided then it will return products whose identifier matches any of the strings. Defaults to None.

None
id_contains bool

Filter datasets only where the string(s) are contained in the identifier, ignoring description.

False
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False
max_results int

Limit the number of rows returned in the dataframe. Defaults to -1 which returns all results.

-1
display_all_columns bool

If True displays all columns returned by the API, otherwise only the key columns are displayed

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: a dataframe with a row for each product

Source code in py_src/fusion/fusion.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def list_products(
    self,
    contains: str | list[str] | None = None,
    id_contains: bool = False,
    catalog: str | None = None,
    output: bool = False,
    max_results: int = -1,
    display_all_columns: bool = False,
) -> pd.DataFrame:
    """Get the products contained in a catalog. A product is a grouping of datasets.

    Args:
        contains (Union[str, list], optional): A string or a list of strings that are product
            identifiers to filter the products list. If a list is provided then it will return
            products whose identifier matches any of the strings. Defaults to None.
        id_contains (bool): Filter datasets only where the string(s) are contained in the identifier,
            ignoring description.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        max_results (int, optional): Limit the number of rows returned in the dataframe.
            Defaults to -1 which returns all results.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed

    Returns:
        class:`pandas.DataFrame`: a dataframe with a row for each product
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/products"
    full_prod_df: pd.DataFrame = Fusion._call_for_dataframe(url, self.session)

    if contains:
        if isinstance(contains, list):
            contains = "|".join(f"{s}" for s in contains)
        if id_contains:
            filtered_df = full_prod_df[full_prod_df["identifier"].str.contains(contains, case=False)]
        else:
            filtered_df = full_prod_df[
                full_prod_df["identifier"].str.contains(contains, case=False)
                | full_prod_df["description"].str.contains(contains, case=False)
            ]
    else:
        filtered_df = full_prod_df

    filtered_df["category"] = filtered_df.category.str.join(", ")
    filtered_df["region"] = filtered_df.region.str.join(", ")
    if not display_all_columns:
        filtered_df = filtered_df[
            filtered_df.columns.intersection(
                [
                    "identifier",
                    "title",
                    "region",
                    "category",
                    "status",
                    "description",
                ]
            )
        ]

    if max_results > -1:
        filtered_df = filtered_df[0:max_results]

    if output:
        pass

    return filtered_df

list_registered_attributes(catalog=None, output=False, display_all_columns=False)

Returns the list of attributes in a catalog.

Parameters:

Name Type Description Default
catalog str

A catalog identifier. Defaults to 'common'.

None
output bool

If True then print the dataframe. Defaults to False.

False
display_all_columns bool

If True displays all columns returned by the API, otherwise only the key columns are displayed

False

Returns:

Name Type Description
class DataFrame

pandas.DataFrame: A dataframe with a row for each attribute

Source code in py_src/fusion/fusion.py
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
def list_registered_attributes(
    self,
    catalog: str | None = None,
    output: bool = False,
    display_all_columns: bool = False,
) -> pd.DataFrame:
    """Returns the list of attributes in a catalog.

    Args:
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        output (bool, optional): If True then print the dataframe. Defaults to False.
        display_all_columns (bool, optional): If True displays all columns returned by the API,
            otherwise only the key columns are displayed

    Returns:
        class:`pandas.DataFrame`: A dataframe with a row for each attribute
    """
    catalog = self._use_catalog(catalog)

    url = f"{self.root_url}catalogs/{catalog}/attributes"
    ds_attr_df = Fusion._call_for_dataframe(url, self.session).reset_index(drop=True)

    if not display_all_columns:
        ds_attr_df = ds_attr_df[
            ds_attr_df.columns.intersection(
                [
                    "identifier",
                    "title",
                    "dataType",
                    "description",
                    "publisher",
                    "applicationId",
                ]
            )
        ]

    if output:
        pass

    return ds_attr_df

listen_to_events(last_event_id=None, catalog=None, url='https://fusion.jpmorgan.com/api/v1/')

Run server sent event listener in the background. Retrieve results by running get_events.

Parameters:

Name Type Description Default
last_event_id str

Last event ID (exclusive).

None
catalog str

catalog.

None
url str

subscription url.

'https://fusion.jpmorgan.com/api/v1/'
Source code in py_src/fusion/fusion.py
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
def listen_to_events(
    self,
    last_event_id: str | None = None,
    catalog: str | None = None,
    url: str = "https://fusion.jpmorgan.com/api/v1/",
) -> None | pd.DataFrame:
    """Run server sent event listener in the background. Retrieve results by running get_events.

    Args:
        last_event_id (str): Last event ID (exclusive).
        catalog (str): catalog.
        url (str): subscription url.
    Returns:
        Union[None, class:`pandas.DataFrame`]: If in_background is True then the function returns no output.
            If in_background is set to False then pandas DataFrame is output upon keyboard termination.
    """

    catalog = self._use_catalog(catalog)
    import asyncio
    import json
    import threading

    from aiohttp_sse_client import client as sse_client

    from .utils import get_client

    kwargs: dict[str, Any] = {}
    if last_event_id:
        kwargs = {"headers": {"Last-Event-ID": last_event_id}}

    async def async_events() -> None:
        """Events sync function.

        Returns:
            None
        """
        timeout = 1e100
        session = await get_client(self.credentials, timeout=timeout)
        async with sse_client.EventSource(
            f"{url}catalogs/{catalog}/notifications/subscribe",
            session=session,
            **kwargs,
        ) as messages:
            lst = []
            try:
                async for msg in messages:
                    event = json.loads(msg.data)
                    lst.append(event)
                    if self.events is None:
                        self.events = pd.DataFrame()
                    else:
                        self.events = pd.concat([self.events, pd.DataFrame(lst)], ignore_index=True)
            except TimeoutError as ex:
                raise ex from None
            except BaseException:
                raise

    _ = self.list_catalogs()  # refresh token
    if "headers" in kwargs:
        kwargs["headers"].update({"authorization": f"bearer {self.credentials.bearer_token}"})
    else:
        kwargs["headers"] = {
            "authorization": f"bearer {self.credentials.bearer_token}",
        }
    if "http" in self.credentials.proxies:
        kwargs["proxy"] = self.credentials.proxies["http"]
    elif "https" in self.credentials.proxies:
        kwargs["proxy"] = self.credentials.proxies["https"]
    th = threading.Thread(target=asyncio.run, args=(async_events(),), daemon=True)
    th.start()
    return None

output_dataflow(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Flow', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, producer_application_id=None, consumer_application_id=None, flow_details=None, **kwargs)

Instantiate an Output Dataflow object with this client for metadata creation.

Parameters:

Name Type Description Default
identifier str

Dataset identifier.

required
title str

Dataset title. If not provided, defaults to identifier.

''
category str | list[str] | None

A category or list of categories for the dataset.

None
description str

Dataset description. If not provided, defaults to identifier.

''
frequency str

The frequency of the dataset. Defaults to "Once".

'Once'
is_internal_only_dataset bool

Flag for internal datasets. Defaults to False.

False
is_third_party_data bool

Flag for third party data. Defaults to True.

True
is_restricted bool | None

Flag for restricted datasets. Defaults to None.

None
is_raw_data bool

Flag for raw datasets. Defaults to True.

True
maintainer str | None

Dataset maintainer. Defaults to "J.P. Morgan Fusion".

'J.P. Morgan Fusion'
source str | list[str] | None

Name of data vendor which provided the data. Defaults to None.

None
region str | list[str] | None

Region. Defaults to None.

None
publisher str

Name of vendor that publishes the data. Defaults to "J.P. Morgan".

'J.P. Morgan'
product str | list[str] | None

Product to associate dataset with. Defaults to None.

None
sub_category str | list[str] | None

Sub-category. Defaults to None.

None
tags str | list[str] | None

Tags used for search purposes. Defaults to None.

None
created_date str | None

Created date. Defaults to None.

None
modified_date str | None

Modified date. Defaults to None.

None
delivery_channel str | list[str]

Delivery channel. Defaults to "API".

'API'
language str

Language. Defaults to "English".

'English'
status str

Status. Defaults to "Available".

'Available'
type_ str | None

Dataset type. Defaults to "Flow".

'Flow'
container_type str | None

Container type. Defaults to "Snapshot-Full".

'Snapshot-Full'
snowflake str | None

Snowflake account connection. Defaults to None.

None
complexity str | None

Complexity. Defaults to None.

None
is_immutable bool | None

Flag for immutable datasets. Defaults to None.

None
is_mnpi bool | None

is_mnpi. Defaults to None.

None
is_pci bool | None

is_pci. Defaults to None.

None
is_pii bool | None

is_pii. Defaults to None.

None
is_client bool | None

is_client. Defaults to None.

None
is_public bool | None

is_public. Defaults to None.

None
is_internal bool | None

is_internal. Defaults to None.

None
is_confidential bool | None

is_confidential. Defaults to None.

None
is_highly_confidential bool | None

is_highly_confidential. Defaults to None.

None
is_active bool | None

is_active. Defaults to None.

None
owners list[str] | None

The owners of the dataset. Defaults to None.

None
application_id str | None

The application ID of the dataset. Defaults to None.

None
producer_application_id dict[str, str] | None

The producer application ID (upstream application producing the flow).

None
consumer_application_id list[dict[str, str]] | dict[str, str] | None

The consumer application ID (downstream application, consuming the flow).

None
flow_details dict[str, str] | None

The flow details. Specifies input versus output flow. Defaults to {"flowDirection": "Output"}.

None

Returns:

Name Type Description
Dataset OutputDataFlow

Fusion OutputDataFlow class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.output_dataflow(identifier="MY_DATAFLOW")
Note

See the dataset module for more information on functionalities of output dataflow objects.

Source code in py_src/fusion/fusion.py
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
def output_dataflow(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Flow",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    producer_application_id: dict[str, str] | None = None,
    consumer_application_id: list[dict[str, str]] | dict[str, str] | None = None,
    flow_details: dict[str, str] | None = None,
    **kwargs: Any,
) -> OutputDataFlow:
    """Instantiate an Output Dataflow object with this client for metadata creation.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Flow".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.
        producer_application_id (dict[str, str] | None, optional): The producer application ID (upstream application
            producing the flow).
        consumer_application_id (list[dict[str, str]] | dict[str, str] | None, optional): The consumer application 
            ID (downstream application, consuming the flow).
        flow_details (dict[str, str] | None, optional): The flow details. Specifies input versus output flow.
            Defaults to {"flowDirection": "Output"}.

    Returns:
        Dataset: Fusion OutputDataFlow class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.output_dataflow(identifier="MY_DATAFLOW")

    Note:
        See the dataset module for more information on functionalities of output dataflow objects.

    """
    flow_details = {"flowDirection": "Output"} if flow_details is None else flow_details
    dataflow_obj = OutputDataFlow(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        producer_application_id=producer_application_id,
        consumer_application_id=consumer_application_id,
        flow_details=flow_details,
        **kwargs,
    )
    dataflow_obj.client = self
    return dataflow_obj

product(identifier, title='', category=None, short_abstract='', description='', is_active=True, is_restricted=None, maintainer=None, region='Global', publisher='J.P. Morgan', sub_category=None, tag=None, delivery_channel='API', theme=None, release_date=None, language='English', status='Available', image='', logo='', dataset=None, **kwargs)

Instantiate a Product object with this client for metadata creation.

Parameters:

Name Type Description Default
identifier str

Product identifier.

required
title str

Product title. If not provided, defaults to identifier.

''
category str | list[str] | None

Category. Defaults to None.

None
short_abstract str

Short description. Defaults to "".

''
description str

Description. If not provided, defaults to identifier.

''
is_active bool

Boolean for Active status. Defaults to True.

True
is_restricted bool | None

Flag for restricted products. Defaults to None.

None
maintainer str | list[str] | None

Product maintainer. Defaults to None.

None
region str | list[str] | None

Product region. Defaults to None.

'Global'
publisher str | None

Name of vendor that publishes the data. Defaults to None.

'J.P. Morgan'
sub_category str | list[str] | None

Product sub-category. Defaults to None.

None
tag str | list[str] | None

Tags used for search purposes. Defaults to None.

None
delivery_channel str | list[str]

Product delivery channel. Defaults to "API".

'API'
theme str | None

Product theme. Defaults to None.

None
release_date str | None

Product release date. Defaults to None.

None
language str

Product language. Defaults to "English".

'English'
status str

Product status. Defaults to "Available".

'Available'
image str

Product image. Defaults to "".

''
logo str

Product logo. Defaults to "".

''
dataset str | list[str] | None

Product datasets. Defaults to None.

None

Returns:

Name Type Description
Product Product

Fusion Product class instance.

Examples:

>>> fusion = Fusion()
>>> fusion.product(identifier="PRODUCT_1", title="Product")
Note

See the product module for more information on functionalities of product objects.

Source code in py_src/fusion/fusion.py
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
def product(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    short_abstract: str = "",
    description: str = "",
    is_active: bool = True,
    is_restricted: bool | None = None,
    maintainer: str | list[str] | None = None,
    region: str | list[str] = "Global",
    publisher: str = "J.P. Morgan",
    sub_category: str | list[str] | None = None,
    tag: str | list[str] | None = None,
    delivery_channel: str | list[str] = "API",
    theme: str | None = None,
    release_date: str | None = None,
    language: str = "English",
    status: str = "Available",
    image: str = "",
    logo: str = "",
    dataset: str | list[str] | None = None,
    **kwargs: Any,
) -> Product:
    """Instantiate a Product object with this client for metadata creation.

    Args:
        identifier (str): Product identifier.
        title (str, optional): Product title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): Category. Defaults to None.
        short_abstract (str, optional): Short description. Defaults to "".
        description (str, optional): Description. If not provided, defaults to identifier.
        is_active (bool, optional): Boolean for Active status. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted products. Defaults to None.
        maintainer (str | list[str] | None, optional): Product maintainer. Defaults to None.
        region (str | list[str] | None, optional): Product region. Defaults to None.
        publisher (str | None, optional): Name of vendor that publishes the data. Defaults to None.
        sub_category (str | list[str] | None, optional): Product sub-category. Defaults to None.
        tag (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        delivery_channel (str | list[str], optional): Product delivery channel. Defaults to "API".
        theme (str | None, optional): Product theme. Defaults to None.
        release_date (str | None, optional): Product release date. Defaults to None.
        language (str, optional): Product language. Defaults to "English".
        status (str, optional): Product status. Defaults to "Available".
        image (str, optional): Product image. Defaults to "".
        logo (str, optional): Product logo. Defaults to "".
        dataset (str | list[str] | None, optional): Product datasets. Defaults to None.

    Returns:
        Product: Fusion Product class instance.

    Examples:
        >>> fusion = Fusion()
        >>> fusion.product(identifier="PRODUCT_1", title="Product")

    Note:
        See the product module for more information on functionalities of product objects.

    """
    product_obj = Product(
        identifier=identifier,
        title=title,
        category=category,
        short_abstract=short_abstract,
        description=description,
        is_active=is_active,
        is_restricted=is_restricted,
        maintainer=maintainer,
        region=region,
        publisher=publisher,
        sub_category=sub_category,
        tag=tag,
        delivery_channel=delivery_channel,
        theme=theme,
        release_date=release_date,
        language=language,
        status=status,
        image=image,
        logo=logo,
        dataset=dataset,
        **kwargs,
    )
    product_obj.client = self
    return product_obj

report(identifier, title='', category=None, description='', frequency='Once', is_internal_only_dataset=False, is_third_party_data=True, is_restricted=None, is_raw_data=True, maintainer='J.P. Morgan Fusion', source=None, region=None, publisher='J.P. Morgan', product=None, sub_category=None, tags=None, created_date=None, modified_date=None, delivery_channel='API', language='English', status='Available', type_='Report', container_type='Snapshot-Full', snowflake=None, complexity=None, is_immutable=None, is_mnpi=None, is_pci=None, is_pii=None, is_client=None, is_public=None, is_internal=None, is_confidential=None, is_highly_confidential=None, is_active=None, owners=None, application_id=None, report=None, **kwargs)

Instantiate Report object with this client for metadata creation for managing regulatory reporting metadata.

Parameters:

Name Type Description Default
identifier str

Dataset identifier.

required
title str

Dataset title. If not provided, defaults to identifier.

''
category str | list[str] | None

A category or list of categories for the dataset.

None
description str

Dataset description. If not provided, defaults to identifier.

''
frequency str

The frequency of the dataset. Defaults to "Once".

'Once'
is_internal_only_dataset bool

Flag for internal datasets. Defaults to False.

False
is_third_party_data bool

Flag for third party data. Defaults to True.

True
is_restricted bool | None

Flag for restricted datasets. Defaults to None.

None
is_raw_data bool

Flag for raw datasets. Defaults to True.

True
maintainer str | None

Dataset maintainer. Defaults to "J.P. Morgan Fusion".

'J.P. Morgan Fusion'
source str | list[str] | None

Name of data vendor which provided the data. Defaults to None.

None
region str | list[str] | None

Region. Defaults to None.

None
publisher str

Name of vendor that publishes the data. Defaults to "J.P. Morgan".

'J.P. Morgan'
product str | list[str] | None

Product to associate dataset with. Defaults to None.

None
sub_category str | list[str] | None

Sub-category. Defaults to None.

None
tags str | list[str] | None

Tags used for search purposes. Defaults to None.

None
created_date str | None

Created date. Defaults to None.

None
modified_date str | None

Modified date. Defaults to None.

None
delivery_channel str | list[str]

Delivery channel. Defaults to "API".

'API'
language str

Language. Defaults to "English".

'English'
status str

Status. Defaults to "Available".

'Available'
type_ str | None

Dataset type. Defaults to "Source".

'Report'
container_type str | None

Container type. Defaults to "Snapshot-Full".

'Snapshot-Full'
snowflake str | None

Snowflake account connection. Defaults to None.

None
complexity str | None

Complexity. Defaults to None.

None
is_immutable bool | None

Flag for immutable datasets. Defaults to None.

None
is_mnpi bool | None

is_mnpi. Defaults to None.

None
is_pci bool | None

is_pci. Defaults to None.

None
is_pii bool | None

is_pii. Defaults to None.

None
is_client bool | None

is_client. Defaults to None.

None
is_public bool | None

is_public. Defaults to None.

None
is_internal bool | None

is_internal. Defaults to None.

None
is_confidential bool | None

is_confidential. Defaults to None.

None
is_highly_confidential bool | None

is_highly_confidential. Defaults to None.

None
is_active bool | None

is_active. Defaults to None.

None
owners list[str] | None

The owners of the dataset. Defaults to None.

None
application_id str | None

The application ID of the dataset. Defaults to None.

None
report dict[str, str] | None

The report metadata. Specifies the tier of the report. Required for registered reports to the catalog.

None

Returns:

Name Type Description
Dataset Report

Fusion Dataset class.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.report(identifier="DATASET_1")
Note

See the dataset module for more information on functionalities of report objects.

Source code in py_src/fusion/fusion.py
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
def report(  # noqa: PLR0913
    self,
    identifier: str,
    title: str = "",
    category: str | list[str] | None = None,
    description: str = "",
    frequency: str = "Once",
    is_internal_only_dataset: bool = False,
    is_third_party_data: bool = True,
    is_restricted: bool | None = None,
    is_raw_data: bool = True,
    maintainer: str | None = "J.P. Morgan Fusion",
    source: str | list[str] | None = None,
    region: str | list[str] | None = None,
    publisher: str = "J.P. Morgan",
    product: str | list[str] | None = None,
    sub_category: str | list[str] | None = None,
    tags: str | list[str] | None = None,
    created_date: str | None = None,
    modified_date: str | None = None,
    delivery_channel: str | list[str] = "API",
    language: str = "English",
    status: str = "Available",
    type_: str | None = "Report",
    container_type: str | None = "Snapshot-Full",
    snowflake: str | None = None,
    complexity: str | None = None,
    is_immutable: bool | None = None,
    is_mnpi: bool | None = None,
    is_pci: bool | None = None,
    is_pii: bool | None = None,
    is_client: bool | None = None,
    is_public: bool | None = None,
    is_internal: bool | None = None,
    is_confidential: bool | None = None,
    is_highly_confidential: bool | None = None,
    is_active: bool | None = None,
    owners: list[str] | None = None,
    application_id: str | dict[str, str] | None = None,
    report: dict[str, str] | None = None,
    **kwargs: Any,
) -> Report:
    """Instantiate Report object with this client for metadata creation for managing regulatory reporting metadata.

    Args:
        identifier (str): Dataset identifier.
        title (str, optional): Dataset title. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset.
        Defaults to None.
        description (str, optional): Dataset description. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Source".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | None, optional): The application ID of the dataset. Defaults to None.
        report (dict[str, str] | None, optional): The report metadata. Specifies the tier of the report.
            Required for registered reports to the catalog.

    Returns:
        Dataset: Fusion Dataset class.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.report(identifier="DATASET_1")

    Note:
        See the dataset module for more information on functionalities of report objects.

    """
    report_obj = Report(
        identifier=identifier,
        title=title,
        category=category,
        description=description,
        frequency=frequency,
        is_internal_only_dataset=is_internal_only_dataset,
        is_third_party_data=is_third_party_data,
        is_restricted=is_restricted,
        is_raw_data=is_raw_data,
        maintainer=maintainer,
        source=source,
        region=region,
        publisher=publisher,
        product=product,
        sub_category=sub_category,
        tags=tags,
        created_date=created_date,
        modified_date=modified_date,
        delivery_channel=delivery_channel,
        language=language,
        status=status,
        type_=type_,
        container_type=container_type,
        snowflake=snowflake,
        complexity=complexity,
        is_immutable=is_immutable,
        is_mnpi=is_mnpi,
        is_pci=is_pci,
        is_pii=is_pii,
        is_client=is_client,
        is_public=is_public,
        is_internal=is_internal,
        is_confidential=is_confidential,
        is_highly_confidential=is_highly_confidential,
        is_active=is_active,
        owners=owners,
        application_id=application_id,
        report=report,
        **kwargs,
    )
    report_obj.client = self
    return report_obj

to_bytes(dataset, series_member, dataset_format='parquet', catalog=None)

Returns an instance of dataset (the distribution) as a bytes object.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
series_member (str)

A dataset series member identifier

required
dataset_format str

The file format, e.g. CSV or Parquet. Defaults to 'parquet'.

'parquet'
catalog str

A catalog identifier. Defaults to 'common'.

None
Source code in py_src/fusion/fusion.py
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
def to_bytes(
    self,
    dataset: str,
    series_member: str,
    dataset_format: str = "parquet",
    catalog: str | None = None,
) -> BytesIO:
    """Returns an instance of dataset (the distribution) as a bytes object.

    Args:
        dataset (str): A dataset identifier
        series_member (str,): A dataset series member identifier
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
    """

    catalog = self._use_catalog(catalog)

    url = distribution_to_url(
        self.root_url,
        dataset,
        series_member,
        dataset_format,
        catalog,
    )

    return Fusion._call_for_bytes_object(url, self.session)

to_df(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, dataframe_type='pandas', **kwargs)

Gets distributions for a specified date or date range and returns the data as a dataframe.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
dt_str str

Either a single date or a range identified by a start or end date, or both separated with a ":". Defaults to 'latest' which will return the most recent instance of the dataset.

'latest'
dataset_format str

The file format, e.g. CSV or Parquet. Defaults to 'parquet'.

'parquet'
catalog str

A catalog identifier. Defaults to 'common'.

None
n_par int

Specify how many distributions to download in parallel. Defaults to all cpus available.

None
show_progress bool

Display a progress bar during data download Defaults to True.

True
columns List

A list of columns to return from a parquet file. Defaults to None

None
filters List

List[Tuple] or List[List[Tuple]] or None (default) Rows which do not match the filter predicate will be removed from scanned data. Partition keys embedded in a nested directory structure will be exploited to avoid loading files at all if they contain no matching rows. If use_legacy_dataset is True, filters can only reference partition keys and only a hive-style directory structure is supported. When setting use_legacy_dataset to False, also within-file level filtering and different partitioning schemes are supported. More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html

None
force_download bool

If True then will always download a file even if it is already on disk. Defaults to False.

False
download_folder str

The path, absolute or relative, where downloaded files are saved. Defaults to download_folder as set in init

None
dataframe_type str

Type

'pandas'
Source code in py_src/fusion/fusion.py
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
def to_df(  # noqa: PLR0913
    self,
    dataset: str,
    dt_str: str = "latest",
    dataset_format: str = "parquet",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    columns: list[str] | None = None,
    filters: PyArrowFilterT | None = None,
    force_download: bool = False,
    download_folder: str | None = None,
    dataframe_type: str = "pandas",
    **kwargs: Any,
) -> pd.DataFrame:
    """Gets distributions for a specified date or date range and returns the data as a dataframe.

    Args:
        dataset (str): A dataset identifier
        dt_str (str, optional): Either a single date or a range identified by a start or end date,
            or both separated with a ":". Defaults to 'latest' which will return the most recent
            instance of the dataset.
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        columns (List, optional): A list of columns to return from a parquet file. Defaults to None
        filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
            Rows which do not match the filter predicate will be removed from scanned data.
            Partition keys embedded in a nested directory structure will be exploited to avoid
            loading files at all if they contain no matching rows. If use_legacy_dataset is True,
            filters can only reference partition keys and only a hive-style directory structure
            is supported. When setting use_legacy_dataset to False, also within-file level filtering
            and different partitioning schemes are supported.
            More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
        force_download (bool, optional): If True then will always download a file even
            if it is already on disk. Defaults to False.
        download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
            Defaults to download_folder as set in __init__
        dataframe_type (str, optional): Type
    Returns:
        class:`pandas.DataFrame`: a dataframe containing the requested data.
            If multiple dataset instances are retrieved then these are concatenated first.
    """
    catalog = self._use_catalog(catalog)

    # sample data is limited to csv
    if dt_str == "sample":
        dataset_format = "csv"

    if not download_folder:
        download_folder = self.download_folder
    download_res = self.download(
        dataset,
        dt_str,
        dataset_format,
        catalog,
        n_par,
        show_progress,
        force_download,
        download_folder,
        return_paths=True,
    )

    if not download_res:
        raise ValueError("Must specify 'return_paths=True' in download call to use this function")

    if not all(res[0] for res in download_res):
        failed_res = [res for res in download_res if not res[0]]
        raise Exception(
            f"Not all downloads were successfully completed. "
            f"Re-run to collect missing files. The following failed:\n{failed_res}"
        )

    files = [res[1] for res in download_res]

    pd_read_fn_map = {
        "csv": read_csv,
        "parquet": read_parquet,
        "parq": read_parquet,
        "json": read_json,
        "raw": read_csv,
    }

    pd_read_default_kwargs: dict[str, dict[str, object]] = {
        "csv": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
        "parquet": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
        "json": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
        "raw": {
            "columns": columns,
            "filters": filters,
            "fs": self.fs,
            "dataframe_type": dataframe_type,
        },
    }

    pd_read_default_kwargs["parq"] = pd_read_default_kwargs["parquet"]

    pd_reader = pd_read_fn_map.get(dataset_format)
    pd_read_kwargs = pd_read_default_kwargs.get(dataset_format, {})
    if not pd_reader:
        raise Exception(f"No pandas function to read file in format {dataset_format}")

    pd_read_kwargs.update(kwargs)

    if len(files) == 0:
        raise APIResponseError(
            f"No series members for dataset: {dataset} "
            f"in date or date range: {dt_str} and format: {dataset_format}"
        )
    if dataset_format in ["parquet", "parq"]:
        data_df = pd_reader(files, **pd_read_kwargs)  # type: ignore
    elif dataset_format == "raw":
        dataframes = (
            pd.concat(
                [pd_reader(ZipFile(f).open(p), **pd_read_kwargs) for p in ZipFile(f).namelist()],  # type: ignore
                ignore_index=True,
            )
            for f in files
        )
        data_df = pd.concat(dataframes, ignore_index=True)
    else:
        dataframes = (pd_reader(f, **pd_read_kwargs) for f in files)  # type: ignore
        if dataframe_type == "pandas":
            data_df = pd.concat(dataframes, ignore_index=True)
        if dataframe_type == "polars":
            import polars as pl

            data_df = pl.concat(dataframes, how="diagonal")  # type: ignore

    return data_df

to_table(dataset, dt_str='latest', dataset_format='parquet', catalog=None, n_par=None, show_progress=True, columns=None, filters=None, force_download=False, download_folder=None, **kwargs)

Gets distributions for a specified date or date range and returns the data as an arrow table.

Parameters:

Name Type Description Default
dataset str

A dataset identifier

required
dt_str str

Either a single date or a range identified by a start or end date, or both separated with a ":". Defaults to 'latest' which will return the most recent instance of the dataset.

'latest'
dataset_format str

The file format, e.g. CSV or Parquet. Defaults to 'parquet'.

'parquet'
catalog str

A catalog identifier. Defaults to 'common'.

None
n_par int

Specify how many distributions to download in parallel. Defaults to all cpus available.

None
show_progress bool

Display a progress bar during data download Defaults to True.

True
columns List

A list of columns to return from a parquet file. Defaults to None

None
filters List

List[Tuple] or List[List[Tuple]] or None (default) Rows which do not match the filter predicate will be removed from scanned data. Partition keys embedded in a nested directory structure will be exploited to avoid loading files at all if they contain no matching rows. If use_legacy_dataset is True, filters can only reference partition keys and only a hive-style directory structure is supported. When setting use_legacy_dataset to False, also within-file level filtering and different partitioning schemes are supported. More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html

None
force_download bool

If True then will always download a file even if it is already on disk. Defaults to False.

False
download_folder str

The path, absolute or relative, where downloaded files are saved. Defaults to download_folder as set in init

None
Source code in py_src/fusion/fusion.py
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
def to_table(  # noqa: PLR0913
    self,
    dataset: str,
    dt_str: str = "latest",
    dataset_format: str = "parquet",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    columns: list[str] | None = None,
    filters: PyArrowFilterT | None = None,
    force_download: bool = False,
    download_folder: str | None = None,
    **kwargs: Any,
) -> pa.Table:
    """Gets distributions for a specified date or date range and returns the data as an arrow table.

    Args:
        dataset (str): A dataset identifier
        dt_str (str, optional): Either a single date or a range identified by a start or end date,
            or both separated with a ":". Defaults to 'latest' which will return the most recent
            instance of the dataset.
        dataset_format (str, optional): The file format, e.g. CSV or Parquet. Defaults to 'parquet'.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        columns (List, optional): A list of columns to return from a parquet file. Defaults to None
        filters (List, optional): List[Tuple] or List[List[Tuple]] or None (default)
            Rows which do not match the filter predicate will be removed from scanned data.
            Partition keys embedded in a nested directory structure will be exploited to avoid
            loading files at all if they contain no matching rows. If use_legacy_dataset is True,
            filters can only reference partition keys and only a hive-style directory structure
            is supported. When setting use_legacy_dataset to False, also within-file level filtering
            and different partitioning schemes are supported.
            More on https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
        force_download (bool, optional): If True then will always download a file even
            if it is already on disk. Defaults to False.
        download_folder (str, optional): The path, absolute or relative, where downloaded files are saved.
            Defaults to download_folder as set in __init__
    Returns:
        class:`pyarrow.Table`: a dataframe containing the requested data.
            If multiple dataset instances are retrieved then these are concatenated first.
    """
    catalog = self._use_catalog(catalog)
    n_par = cpu_count(n_par)
    if not download_folder:
        download_folder = self.download_folder
    download_res = self.download(
        dataset,
        dt_str,
        dataset_format,
        catalog,
        n_par,
        show_progress,
        force_download,
        download_folder,
        return_paths=True,
    )

    if not download_res:
        raise ValueError("Must specify 'return_paths=True' in download call to use this function")

    if not all(res[0] for res in download_res):
        failed_res = [res for res in download_res if not res[0]]
        raise RuntimeError(
            f"Not all downloads were successfully completed. "
            f"Re-run to collect missing files. The following failed:\n{failed_res}"
        )

    files = [res[1] for res in download_res]

    read_fn_map = {
        "csv": csv_to_table,
        "parquet": parquet_to_table,
        "parq": parquet_to_table,
        "json": json_to_table,
        "raw": csv_to_table,
    }

    read_default_kwargs: dict[str, dict[str, object]] = {
        "csv": {"columns": columns, "filters": filters, "fs": self.fs},
        "parquet": {"columns": columns, "filters": filters, "fs": self.fs},
        "json": {"columns": columns, "filters": filters, "fs": self.fs},
        "raw": {"columns": columns, "filters": filters, "fs": self.fs},
    }

    read_default_kwargs["parq"] = read_default_kwargs["parquet"]

    reader = read_fn_map.get(dataset_format)
    read_kwargs = read_default_kwargs.get(dataset_format, {})
    if not reader:
        raise AssertionError(f"No function to read file in format {dataset_format}")

    read_kwargs.update(kwargs)

    if len(files) == 0:
        raise APIResponseError(
            f"No series members for dataset: {dataset} "
            f"in date or date range: {dt_str} and format: {dataset_format}"
        )
    if dataset_format in ["parquet", "parq"]:
        tbl = reader(files, **read_kwargs)  # type: ignore
    else:
        tbl = (reader(f, **read_kwargs) for f in files)  # type: ignore
        tbl = pa.concat_tables(tbl)

    return tbl

upload(path, dataset=None, dt_str='latest', catalog=None, n_par=None, show_progress=True, return_paths=False, multipart=True, chunk_size=5 * 2 ** 20, from_date=None, to_date=None, preserve_original_name=False, additional_headers=None)

Uploads the requested files/files to Fusion.

Parameters:

Name Type Description Default
path str

path to a file or a folder with files

required
dataset str

Dataset identifier to which the file will be uploaded (for single file only). If not provided the dataset will be implied from file's name.

None
dt_str str

A file name. Can be any string but is usually a date. Defaults to 'latest' which will return the most recent. Relevant for a single file upload only. If not provided the dataset will be implied from file's name.

'latest'
catalog str

A catalog identifier. Defaults to 'common'.

None
n_par int

Specify how many distributions to download in parallel. Defaults to all cpus available.

None
show_progress bool

Display a progress bar during data download Defaults to True.

True
return_paths bool

Return paths and success statuses of the downloaded files.

False
multipart bool

Is multipart upload.

True
chunk_size int

Maximum chunk size.

5 * 2 ** 20
from_date str

start of the data date range contained in the distribution, defaults to upoad date

None
to_date str

end of the data date range contained in the distribution, defaults to upload date.

None
preserve_original_name bool

Preserve the original name of the file. Defaults to False.

False
Source code in py_src/fusion/fusion.py
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
def upload(  # noqa: PLR0913
    self,
    path: str,
    dataset: str | None = None,
    dt_str: str = "latest",
    catalog: str | None = None,
    n_par: int | None = None,
    show_progress: bool = True,
    return_paths: bool = False,
    multipart: bool = True,
    chunk_size: int = 5 * 2**20,
    from_date: str | None = None,
    to_date: str | None = None,
    preserve_original_name: bool | None = False,
    additional_headers: dict[str, str] | None = None,
) -> list[tuple[bool, str, str | None]] | None:
    """Uploads the requested files/files to Fusion.

    Args:
        path (str): path to a file or a folder with files
        dataset (str, optional): Dataset identifier to which the file will be uploaded (for single file only).
                                If not provided the dataset will be implied from file's name.
        dt_str (str, optional): A file name. Can be any string but is usually a date.
                                Defaults to 'latest' which will return the most recent.
                                Relevant for a single file upload only. If not provided the dataset will
                                be implied from file's name.
        catalog (str, optional): A catalog identifier. Defaults to 'common'.
        n_par (int, optional): Specify how many distributions to download in parallel.
            Defaults to all cpus available.
        show_progress (bool, optional): Display a progress bar during data download Defaults to True.
        return_paths (bool, optional): Return paths and success statuses of the downloaded files.
        multipart (bool, optional): Is multipart upload.
        chunk_size (int, optional): Maximum chunk size.
        from_date (str, optional): start of the data date range contained in the distribution,
            defaults to upoad date
        to_date (str, optional): end of the data date range contained in the distribution,
            defaults to upload date.
        preserve_original_name (bool, optional): Preserve the original name of the file. Defaults to False.

    Returns:


    """
    catalog = self._use_catalog(catalog)

    if not self.fs.exists(path):
        raise RuntimeError("The provided path does not exist")

    fs_fusion = self.get_fusion_filesystem()
    if self.fs.info(path)["type"] == "directory":
        file_path_lst = self.fs.find(path)
        local_file_validation = validate_file_names(file_path_lst, fs_fusion)
        file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
        file_name = [f.split("/")[-1] for f in file_path_lst]
        is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
        local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
    else:
        file_path_lst = [path]
        if not catalog or not dataset:
            local_file_validation = validate_file_names(file_path_lst, fs_fusion)
            file_path_lst = [f for flag, f in zip(local_file_validation, file_path_lst) if flag]
            is_raw_lst = is_dataset_raw(file_path_lst, fs_fusion)
            local_url_eqiv = [path_to_url(i, r) for i, r in zip(file_path_lst, is_raw_lst)]
            if preserve_original_name:
                raise ValueError("preserve_original_name can only be used when catalog and dataset are provided.")
        else:
            date_identifier = re.compile(r"^(\d{4})(\d{2})(\d{2})$")
            if date_identifier.match(dt_str):
                dt_str = dt_str if dt_str != "latest" else pd.Timestamp("today").date().strftime("%Y%m%d")
                dt_str = pd.Timestamp(dt_str).date().strftime("%Y%m%d")

            if catalog not in fs_fusion.ls("") or dataset not in [
                i.split("/")[-1] for i in fs_fusion.ls(f"{catalog}/datasets")
            ]:
                msg = (
                    f"File file has not been uploaded, one of the catalog: {catalog} "
                    f"or dataset: {dataset} does not exit."
                )
                warnings.warn(msg, stacklevel=2)
                return [(False, path, msg)]
            file_format = path.split(".")[-1]
            file_name = [path.split("/")[-1]]
            file_format = "raw" if file_format not in RECOGNIZED_FORMATS else file_format

            local_url_eqiv = [
                "/".join(distribution_to_url("", dataset, dt_str, file_format, catalog, False).split("/")[1:])
            ]

    if not preserve_original_name:
        data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv]).T
        data_map_df.columns = pd.Index(["path", "url"])
    else:
        data_map_df = pd.DataFrame([file_path_lst, local_url_eqiv, file_name]).T
        data_map_df.columns = pd.Index(["path", "url", "file_name"])

    n_par = cpu_count(n_par)
    parallel = len(data_map_df) > 1
    res = upload_files(
        fs_fusion,
        self.fs,
        data_map_df,
        parallel=parallel,
        n_par=n_par,
        multipart=multipart,
        chunk_size=chunk_size,
        show_progress=show_progress,
        from_date=from_date,
        to_date=to_date,
        additional_headers=additional_headers,
    )

    if not all(r[0] for r in res):
        failed_res = [r for r in res if not r[0]]
        msg = f"Not all uploads were successfully completed. The following failed:\n{failed_res}"
        logger.warning(msg)
        warnings.warn(msg, stacklevel=2)

    return res if return_paths else None

Synchronisation between the local filesystem and Fusion.

Parameters:

Name Type Description Default
fs_fusion filesystem

Fusion filesystem.

required
fs_local filesystem

Local filesystem.

required
products list

List of products.

None
datasets list

List of datasets.

None
catalog str

Fusion catalog.

None
direction str

Direction of synchronisation: upload/download.

'upload'
flatten bool

Flatten the folder structure.

False
dataset_format str

Dataset format for upload/download.

None
n_par int

Specify how many distributions to download in parallel. Defaults to all.

None
show_progress bool

Display a progress bar during data download Defaults to True.

True
local_path str

path to files in the local filesystem, e.g., "s3a://my_bucket/"

''
log_level int

Logging level. Error level by default.

ERROR
log_path str

The folder path where the log is stored. Defaults to ".".

'.'
Source code in py_src/fusion/fs_sync.py
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
def fsync(  # noqa: PLR0913
    fs_fusion: fsspec.filesystem,
    fs_local: fsspec.filesystem,
    products: Optional[list[str]] = None,
    datasets: Optional[list[str]] = None,
    catalog: Optional[str] = None,
    direction: str = "upload",
    flatten: bool = False,
    dataset_format: Optional[str] = None,
    n_par: Optional[int] = None,
    show_progress: bool = True,
    local_path: str = "",
    log_level: int = logging.ERROR,
    log_path: str = ".",
) -> None:
    """Synchronisation between the local filesystem and Fusion.

    Args:
        fs_fusion (fsspec.filesystem): Fusion filesystem.
        fs_local (fsspec.filesystem): Local filesystem.
        products (list): List of products.
        datasets (list): List of datasets.
        catalog (str): Fusion catalog.
        direction (str): Direction of synchronisation: upload/download.
        flatten (bool): Flatten the folder structure.
        dataset_format (str): Dataset format for upload/download.
        n_par (int, optional): Specify how many distributions to download in parallel. Defaults to all.
        show_progress (bool): Display a progress bar during data download Defaults to True.
        local_path (str): path to files in the local filesystem, e.g., "s3a://my_bucket/"
        log_level (int): Logging level. Error level by default.
        log_path (str): The folder path where the log is stored. Defaults to ".".

    Returns:

    """

    if logger.hasHandlers():
        logger.handlers.clear()
    file_handler = logging.FileHandler(filename="{}/{}".format(log_path, "fusion_fsync.log"))
    logging.addLevelName(VERBOSE_LVL, "VERBOSE")
    stdout_handler = logging.StreamHandler(sys.stdout)
    formatter = logging.Formatter(
        "%(asctime)s.%(msecs)03d %(name)s:%(levelname)s %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    stdout_handler.setFormatter(formatter)
    logger.addHandler(stdout_handler)
    logger.addHandler(file_handler)
    logger.setLevel(log_level)

    catalog = catalog if catalog else "common"
    datasets = datasets if datasets else []
    products = products if products else []

    assert len(products) > 0 or len(datasets) > 0, "At least one list products or datasets should be non-empty."
    assert direction in [
        "upload",
        "download",
    ], "The direction must be either upload or download."

    if len(local_path) > 0 and local_path[-1] != "/":
        local_path += "/"

    for product in products:
        res = json.loads(fs_fusion.cat(f"{catalog}/products/{product}").decode())
        datasets += [r["identifier"] for r in res["resources"]]

    assert len(datasets) > 0, "The supplied products did not contain any datasets."

    local_state = pd.DataFrame()
    fusion_state = pd.DataFrame()
    while True:
        try:
            local_state_temp = _get_local_state(
                fs_local,
                fs_fusion,
                datasets,
                catalog,
                dataset_format,
                local_state,
                local_path,
            )
            fusion_state_temp = _get_fusion_df(fs_fusion, datasets, catalog, flatten, dataset_format)
            if not local_state_temp.equals(local_state) or not fusion_state_temp.equals(fusion_state):
                res = _synchronize(
                    fs_fusion,
                    fs_local,
                    local_state_temp,
                    fusion_state_temp,
                    direction,
                    n_par,
                    show_progress,
                    local_path,
                )
                if len(res) == 0 or all(i[0] for i in res):
                    local_state = local_state_temp
                    fusion_state = fusion_state_temp

                if not all(r[0] for r in res):
                    failed_res = [r for r in res if not r[0]]
                    msg = f"Not all {direction}s were successfully completed. The following failed:\n{failed_res}"
                    errs = [r for r in res if not r[2]]
                    logger.warning(msg)
                    logger.warning(errs)
                    warnings.warn(msg, stacklevel=2)

            else:
                logger.info("All synced, sleeping")
                time.sleep(10)

        except KeyboardInterrupt:  # noqa: PERF203
            if input("Type exit to exit: ") != "exit":
                continue
            break

        except Exception as _:
            logger.error("Exception thrown", exc_info=True)
            continue

Fusion Product class and functions.

Product dataclass

Fusion Product class for managing product metadata in a Fusion catalog.

Attributes:

Name Type Description
identifier str

A unique identifier for the product.

title str

Product title. Defaults to "".

category str | list[str] | None

Product category. Defaults to None.

short_abstract str

Short abstract of the product. Defaults to "".

description str

Product description. If not provided, defaults to identifier.

is_active bool

Boolean for Active status. Defaults to True.

is_restricted bool | None

Flag for restricted products. Defaults to None.

maintainer str | list[str] | None

Product maintainer. Defaults to None.

region str | list[str] | None

Product region. Defaults to None.

publisher str | None

Name of vendor that publishes the data. Defaults to None.

sub_category str | list[str] | None

Product sub-category. Defaults to None.

tag str | list[str] | None

Tags used for search purposes. Defaults to None.

delivery_channel str | list[str]

Product delivery channel. Defaults to ["API"].

theme str | None

Product theme. Defaults to None.

release_date str | None

Product release date. Defaults to None.

language str

Product language. Defaults to "English".

status str

Product status. Defaults to "Available".

image str

Product image. Defaults to "".

logo str

Product logo. Defaults to "".

dataset str | list[str] | None

Product datasets. Defaults to None.

_client Any

Fusion client object. Defaults to None.

Source code in py_src/fusion/product.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
@dataclass
class Product(metaclass=CamelCaseMeta):
    """Fusion Product class for managing product metadata in a Fusion catalog.

    Attributes:
        identifier (str): A unique identifier for the product.
        title (str, optional): Product title. Defaults to "".
        category (str | list[str] | None, optional): Product category. Defaults to None.
        short_abstract (str, optional): Short abstract of the product. Defaults to "".
        description (str, optional): Product description. If not provided, defaults to identifier.
        is_active (bool, optional): Boolean for Active status. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted products. Defaults to None.
        maintainer (str | list[str] | None, optional): Product maintainer. Defaults to None.
        region (str | list[str] | None, optional): Product region. Defaults to None.
        publisher (str | None, optional): Name of vendor that publishes the data. Defaults to None.
        sub_category (str | list[str] | None, optional): Product sub-category. Defaults to None.
        tag (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        delivery_channel (str | list[str], optional): Product delivery channel. Defaults to ["API"].
        theme (str | None, optional): Product theme. Defaults to None.
        release_date (str | None, optional): Product release date. Defaults to None.
        language (str, optional): Product language. Defaults to "English".
        status (str, optional): Product status. Defaults to "Available".
        image (str, optional): Product image. Defaults to "".
        logo (str, optional): Product logo. Defaults to "".
        dataset (str | list[str] | None, optional): Product datasets. Defaults to None.
        _client (Any, optional): Fusion client object. Defaults to None.

    """

    identifier: str
    title: str = ""
    category: str | list[str] | None = None
    short_abstract: str = ""
    description: str = ""
    is_active: bool = True
    is_restricted: bool | None = None
    maintainer: str | list[str] | None = None
    region: str | list[str]  = field(default_factory=lambda: ["Global"])
    publisher: str = "J.P. Morgan"
    sub_category: str | list[str] | None = None
    tag: str | list[str] | None = None
    delivery_channel: str | list[str] = field(default_factory=lambda: ["API"])
    theme: str | None = None
    release_date: str | None = None
    language: str = "English"
    status: str = "Available"
    image: str = ""
    logo: str = ""
    dataset: str | list[str] | None = None

    _client: Fusion | None = field(init=False, repr=False, compare=False, default=None)

    def __repr__(self: Product) -> str:
        """Return an object representation of the Product object.

        Returns:
            str: Object representaiton of the product.

        """
        attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
        return f"Product(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

    def __post_init__(self: Product) -> None:
        """Format Product metadata fields after object instantiation."""
        self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
        self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
        self.description = tidy_string(self.description) if self.description != "" else self.title
        self.short_abstract = tidy_string(self.short_abstract) if self.short_abstract != "" else self.title
        self.description = tidy_string(self.description)
        self.category = (
            self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
        )
        self.tag = self.tag if isinstance(self.tag, list) or self.tag is None else make_list(self.tag)
        self.dataset = (
            self.dataset if isinstance(self.dataset, list) or self.dataset is None else make_list(self.dataset)
        )
        self.sub_category = (
            self.sub_category
            if isinstance(self.sub_category, list) or self.sub_category is None
            else make_list(self.sub_category)
        )
        self.is_active = self.is_active if isinstance(self.is_active, bool) else make_bool(self.is_active)
        self.is_restricted = (
            self.is_restricted
            if isinstance(self.is_restricted, bool) or self.is_restricted is None
            else make_bool(self.is_restricted)
        )
        self.maintainer = (
            self.maintainer
            if isinstance(self.maintainer, list) or self.maintainer is None
            else make_list(self.maintainer)
        )
        self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
        self.delivery_channel = (
            self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
        )
        self.release_date = convert_date_format(self.release_date) if self.release_date else None

    def __getattr__(self, name: str) -> Any:
        # Redirect attribute access to the snake_case version
        snake_name = camel_to_snake(name)
        if snake_name in self.__dict__:
            return self.__dict__[snake_name]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

    def __setattr__(self, name: str, value: Any) -> None:
        if name == "client":
            # Use the property setter for client
            object.__setattr__(self, name, value)
        else:
            snake_name = camel_to_snake(name)
            self.__dict__[snake_name] = value

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Product. Set automatically, if the Product is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product")
            >>> product.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    @classmethod
    def _from_series(cls: type[Product], series: pd.Series[Any]) -> Product:
        """Instantiate a Product object from a pandas Series.

        Args:
            series (pd.Series[Any]): Product metadata as a pandas Series.

        Returns:
            Product: Product object.

        """
        series = series.rename(lambda x: x.replace(" ", "").replace("_", "").lower())
        series = series.rename({"tag": "tags", "dataset": "datasets"})
        short_abstract = series.get("abstract", "")
        short_abstract = series.get("shortabstract", "") if short_abstract is None else short_abstract

        return cls(
            title=series.get("title", ""),
            identifier=series.get("identifier", ""),
            category=series.get("category", None),
            short_abstract=short_abstract,
            description=series.get("description", ""),
            theme=series.get("theme", None),
            release_date=series.get("releasedate", None),
            is_active=series.get("isactive", True),
            is_restricted=series.get("isrestricted", None),
            maintainer=series.get("maintainer", None),
            region=series.get("region", "Global"),
            publisher=series.get("publisher", "J.P. Morgan"),
            sub_category=series.get("subcategory", None),
            tag=series.get("tags", None),
            delivery_channel=series.get("deliverychannel", "API"),
            language=series.get("language", "English"),
            status=series.get("status", "Available"),
            dataset=series.get("datasets", None),
        )

    @classmethod
    def _from_dict(cls: type[Product], data: dict[str, Any]) -> Product:
        """Instantiate a Product object from a dictionary.

        Args:
            data (dict[str, Any]): Product metadata as a dictionary.

        Returns:
            Product: Product object.

        """
        keys = [f.name for f in fields(cls)]
        data = {camel_to_snake(k): v for k, v in data.items()}
        data = {k: v for k, v in data.items() if k in keys}
        return cls(**data)

    @classmethod
    def _from_csv(cls: type[Product], file_path: str, identifier: str | None = None) -> Product:
        """Instantiate a Product object from a CSV file.

        Args:
            file_path (str): Path to the CSV file.
            identifier (str | None, optional): Product identifer for filtering if multipler products are defined in csv.
                Defaults to None.

        Returns:
            Product: Product object.

        """
        data = pd.read_csv(file_path)

        return (
            Product._from_series(data[data["identifier"] == identifier].reset_index(drop=True).iloc[0])
            if identifier
            else Product._from_series(data.reset_index(drop=True).iloc[0])
        )

    def from_object(
        self,
        product_source: Product | dict[str, Any] | str | pd.Series[Any],
    ) -> Product:
        """Instantiate a Product object from a Product object, dictionary, path to CSV, JSON string, or pandas Series.

        Args:
            product_source (Product | dict[str, Any] | str | pd.Series[Any]): Product metadata source.

        Raises:
            TypeError: If the object provided is not a Product, dictionary, path to CSV file, JSON string,
            or pandas Series.

        Returns:
            Product: Product object.

        Examples:
            Instantiating a Product object from a dictionary:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product_dict = {
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data",
            ...     "short_abstract": "My product is awesome",
            ...     "description": "My product is very awesome",
            ...     "is_active": True,
            ...     "is_restricted": False,
            ...     "maintainer": "My Company",
            ...     "region": "Global",
            ...     "publisher": "My Company",
            ...     "sub_category": "Data",
            ...     "tag": "My Company",
            ...     "delivery_channel": "API",
            ...     "theme": "Data",
            ...     "release_date": "2021-01-01",
            ...     "language": "English",
            ...     "status": "Available"
            ... }
            >>> product = fusion.product("my_product").from_object(product_dict)

            Instantiating a Product object from a JSON string:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product_json = '{
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data",
            ...     "short_abstract": "My product is awesome",
            ...     "description": "My product is very awesome",
            ...     "is_active": True,
            ...     "is_restricted": False,
            ...     "maintainer": "My Company",
            ...     "region": "Global",
            ...     "publisher": "My Company",
            ...     "sub_category": "Data",
            ...     "tag": "My Company",
            ...     "delivery_channel": "API",
            ...     "theme": "Data",
            ...     "release_date": "2021-01-01",
            ...     "language": "English",
            ...     "status": "Available",
            ... }'
            >>> product = fusion.product("my_product").from_object(product_json)

            Instantiating a Product object from a CSV file:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_object("path/to/product.csv")

            Instantiating a Product object from a pandas Series:

            >>> from fusion import Fusion
            >>> from fusion.product import Product
            >>> fusion = Fusion()
            >>> product_series = pd.Series({
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data",
            ...     "short_abstract": "My product is awesome",
            ...     "description": "My product is very awesome",
            ...     "is_active": True,
            ...     "is_restricted": False,
            ...     "maintainer": "My Company",
            ...     "region": "Global",
            ...     "publisher": "My Company",
            ...     "sub_category": "Data",
            ...     "tag": "My Company",
            ...     "delivery_channel": "API",
            ...     "theme": "Data",
            ...     "release_date": "2021-01-01",
            ...     "language": "English",
            ...     "status": "Available",
            ... })
            >>> product = fusion.product("my_product").from_object(product_series)

        """
        if isinstance(product_source, Product):
            product = product_source
        elif isinstance(product_source, dict):
            product = Product._from_dict(product_source)
        elif isinstance(product_source, str):
            if _is_json(product_source):
                product = Product._from_dict(js.loads(product_source))
            else:
                product = Product._from_csv(product_source)
        elif isinstance(product_source, pd.Series):
            product = Product._from_series(product_source)
        else:
            raise TypeError(f"Could not resolve the object provided: {product_source}")
        product.client = self._client
        return product

    def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Product:
        """Instantiate a Product object from a Fusion catalog.

        Args:
            catalog (str | None, optional): Catalog identifer. Defaults to None.
            client (Fusion | None, optional): Fusion session. Defaults to None.
                If instantiated from a Fusion object, then the client is set automatically.

        Returns:
            Product: Product object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        resp = client.session.get(f"{client.root_url}catalogs/{catalog}/products")
        requests_raise_for_status(resp)
        list_products = resp.json()["resources"]
        dict_ = [dict_ for dict_ in list_products if dict_["identifier"] == self.identifier][0]
        product_obj = Product._from_dict(dict_)
        product_obj.client = client

        return product_obj

    def to_dict(self: Product) -> dict[str, Any]:
        """Convert the Product instance to a dictionary.

        Returns:
            dict[str, Any]: Product metadata as a dictionary.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product")
            >>> product_dict = product.to_dict()

        """
        product_dict = {
            snake_to_camel(k): v
            for k, v in self.__dict__.items()
            if not k.startswith("_")
        }
        return product_dict

    def create(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload a new product to a Fusion catalog.

        Args:
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            From scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product(
            ...     identifer="my_product"
            ...     title="My Product",
            ...     category="Data",
            ...     short_abstract="My product is awesome",
            ...     description="My product is very awesome",
            ...     )
            >>> product.create(catalog="my_catalog")

            From a dictionary:

            >>> product_dict = {
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data"
            ...     }
            >>> product = fusion.product("my_product").from_object(product_dict)
            >>> product.create(catalog="my_catalog")

            From a JSON string:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product_json = '{
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data"
            ...     }'
            >>> product = fusion.product("my_product").from_object(product_json)
            >>> product.create(catalog="my_catalog")

            From a CSV file:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_object("path/to/product.csv")
            >>> product.create(catalog="my_catalog")

            From a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product_series = pd.Series({
            ...     "identifier": "my_product",
            ...     "title": "My Product",
            ...     "category": "Data"
            ...     })
            >>> product = fusion.product("my_product").from_object(product_series)
            >>> product.create(catalog="my_catalog")

            From existing product in a catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_catalog()
            >>> product.identifier = "my_new_product"
            >>> product.create(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

        self.release_date = release_date
        self.delivery_channel = delivery_channel

        data = self.to_dict()

        url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
        resp: requests.Response = client.session.post(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def update(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Update an existing product in a Fusion catalog.

        Args:
            client (Fusion): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")
            >>> product.title = "My Updated Product Title"
            >>> product.update(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

        self.release_date = release_date
        self.delivery_channel = delivery_channel

        data = self.to_dict()

        url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
        resp: requests.Response = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def delete(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete a product from a Fusion catalog.

        Args:
            client (Fusion): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

         Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.product("my_product").delete(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
        resp: requests.Response = client.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def copy(
        self,
        catalog_to: str,
        catalog_from: str | None = None,
        client: Fusion | None = None,
        client_to: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Copy product from one Fusion catalog and/or environment to another by copy.

        Args:
            catalog_to (str): Catalog identifier to which to copy product.
            catalog_from (str, optional): A catalog identifier from which to copy product. Defaults to "common".
            client (Fusion): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.product("my_product").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

        """
        client = self._use_client(client)
        catalog_from = client._use_catalog(catalog_from)
        if client_to is None:
            client_to = client
        product_obj = self.from_catalog(catalog=catalog_from, client=client)
        product_obj.client = client_to
        resp = product_obj.create(catalog=catalog_to, return_resp_obj=True)
        return resp if return_resp_obj else None

client: Fusion | None property writable

Return the client.

__post_init__()

Format Product metadata fields after object instantiation.

Source code in py_src/fusion/product.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def __post_init__(self: Product) -> None:
    """Format Product metadata fields after object instantiation."""
    self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
    self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
    self.description = tidy_string(self.description) if self.description != "" else self.title
    self.short_abstract = tidy_string(self.short_abstract) if self.short_abstract != "" else self.title
    self.description = tidy_string(self.description)
    self.category = (
        self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
    )
    self.tag = self.tag if isinstance(self.tag, list) or self.tag is None else make_list(self.tag)
    self.dataset = (
        self.dataset if isinstance(self.dataset, list) or self.dataset is None else make_list(self.dataset)
    )
    self.sub_category = (
        self.sub_category
        if isinstance(self.sub_category, list) or self.sub_category is None
        else make_list(self.sub_category)
    )
    self.is_active = self.is_active if isinstance(self.is_active, bool) else make_bool(self.is_active)
    self.is_restricted = (
        self.is_restricted
        if isinstance(self.is_restricted, bool) or self.is_restricted is None
        else make_bool(self.is_restricted)
    )
    self.maintainer = (
        self.maintainer
        if isinstance(self.maintainer, list) or self.maintainer is None
        else make_list(self.maintainer)
    )
    self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
    self.delivery_channel = (
        self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
    )
    self.release_date = convert_date_format(self.release_date) if self.release_date else None

__repr__()

Return an object representation of the Product object.

Returns:

Name Type Description
str str

Object representaiton of the product.

Source code in py_src/fusion/product.py
81
82
83
84
85
86
87
88
89
def __repr__(self: Product) -> str:
    """Return an object representation of the Product object.

    Returns:
        str: Object representaiton of the product.

    """
    attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
    return f"Product(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False)

Copy product from one Fusion catalog and/or environment to another by copy.

Parameters:

Name Type Description Default
catalog_to str

Catalog identifier to which to copy product.

required
catalog_from str

A catalog identifier from which to copy product. Defaults to "common".

None
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
client_to Fusion | None

Fusion client object. Defaults to current instance.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.product("my_product").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")
Source code in py_src/fusion/product.py
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
def copy(
    self,
    catalog_to: str,
    catalog_from: str | None = None,
    client: Fusion | None = None,
    client_to: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Copy product from one Fusion catalog and/or environment to another by copy.

    Args:
        catalog_to (str): Catalog identifier to which to copy product.
        catalog_from (str, optional): A catalog identifier from which to copy product. Defaults to "common".
        client (Fusion): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.product("my_product").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

    """
    client = self._use_client(client)
    catalog_from = client._use_catalog(catalog_from)
    if client_to is None:
        client_to = client
    product_obj = self.from_catalog(catalog=catalog_from, client=client)
    product_obj.client = client_to
    resp = product_obj.create(catalog=catalog_to, return_resp_obj=True)
    return resp if return_resp_obj else None

create(catalog=None, client=None, return_resp_obj=False)

Upload a new product to a Fusion catalog.

Parameters:

Name Type Description Default
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

From scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product(
...     identifer="my_product"
...     title="My Product",
...     category="Data",
...     short_abstract="My product is awesome",
...     description="My product is very awesome",
...     )
>>> product.create(catalog="my_catalog")

From a dictionary:

>>> product_dict = {
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data"
...     }
>>> product = fusion.product("my_product").from_object(product_dict)
>>> product.create(catalog="my_catalog")

From a JSON string:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product_json = '{
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data"
...     }'
>>> product = fusion.product("my_product").from_object(product_json)
>>> product.create(catalog="my_catalog")

From a CSV file:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_object("path/to/product.csv")
>>> product.create(catalog="my_catalog")

From a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product_series = pd.Series({
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data"
...     })
>>> product = fusion.product("my_product").from_object(product_series)
>>> product.create(catalog="my_catalog")

From existing product in a catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_catalog()
>>> product.identifier = "my_new_product"
>>> product.create(catalog="my_catalog")
Source code in py_src/fusion/product.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
def create(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload a new product to a Fusion catalog.

    Args:
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        From scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product(
        ...     identifer="my_product"
        ...     title="My Product",
        ...     category="Data",
        ...     short_abstract="My product is awesome",
        ...     description="My product is very awesome",
        ...     )
        >>> product.create(catalog="my_catalog")

        From a dictionary:

        >>> product_dict = {
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data"
        ...     }
        >>> product = fusion.product("my_product").from_object(product_dict)
        >>> product.create(catalog="my_catalog")

        From a JSON string:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product_json = '{
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data"
        ...     }'
        >>> product = fusion.product("my_product").from_object(product_json)
        >>> product.create(catalog="my_catalog")

        From a CSV file:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_object("path/to/product.csv")
        >>> product.create(catalog="my_catalog")

        From a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product_series = pd.Series({
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data"
        ...     })
        >>> product = fusion.product("my_product").from_object(product_series)
        >>> product.create(catalog="my_catalog")

        From existing product in a catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_catalog()
        >>> product.identifier = "my_new_product"
        >>> product.create(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

    self.release_date = release_date
    self.delivery_channel = delivery_channel

    data = self.to_dict()

    url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
    resp: requests.Response = client.session.post(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

delete(catalog=None, client=None, return_resp_obj=False)

Delete a product from a Fusion catalog.

Parameters:

Name Type Description Default
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.product("my_product").delete(catalog="my_catalog")
Source code in py_src/fusion/product.py
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
def delete(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete a product from a Fusion catalog.

    Args:
        client (Fusion): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

     Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.product("my_product").delete(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
    resp: requests.Response = client.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

from_catalog(catalog=None, client=None)

Instantiate a Product object from a Fusion catalog.

Parameters:

Name Type Description Default
catalog str | None

Catalog identifer. Defaults to None.

None
client Fusion | None

Fusion session. Defaults to None. If instantiated from a Fusion object, then the client is set automatically.

None

Returns:

Name Type Description
Product Product

Product object.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")
Source code in py_src/fusion/product.py
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Product:
    """Instantiate a Product object from a Fusion catalog.

    Args:
        catalog (str | None, optional): Catalog identifer. Defaults to None.
        client (Fusion | None, optional): Fusion session. Defaults to None.
            If instantiated from a Fusion object, then the client is set automatically.

    Returns:
        Product: Product object.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    resp = client.session.get(f"{client.root_url}catalogs/{catalog}/products")
    requests_raise_for_status(resp)
    list_products = resp.json()["resources"]
    dict_ = [dict_ for dict_ in list_products if dict_["identifier"] == self.identifier][0]
    product_obj = Product._from_dict(dict_)
    product_obj.client = client

    return product_obj

from_object(product_source)

Instantiate a Product object from a Product object, dictionary, path to CSV, JSON string, or pandas Series.

Parameters:

Name Type Description Default
product_source Product | dict[str, Any] | str | Series[Any]

Product metadata source.

required

Raises:

Type Description
TypeError

If the object provided is not a Product, dictionary, path to CSV file, JSON string,

Returns:

Name Type Description
Product Product

Product object.

Examples:

Instantiating a Product object from a dictionary:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product_dict = {
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data",
...     "short_abstract": "My product is awesome",
...     "description": "My product is very awesome",
...     "is_active": True,
...     "is_restricted": False,
...     "maintainer": "My Company",
...     "region": "Global",
...     "publisher": "My Company",
...     "sub_category": "Data",
...     "tag": "My Company",
...     "delivery_channel": "API",
...     "theme": "Data",
...     "release_date": "2021-01-01",
...     "language": "English",
...     "status": "Available"
... }
>>> product = fusion.product("my_product").from_object(product_dict)

Instantiating a Product object from a JSON string:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product_json = '{
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data",
...     "short_abstract": "My product is awesome",
...     "description": "My product is very awesome",
...     "is_active": True,
...     "is_restricted": False,
...     "maintainer": "My Company",
...     "region": "Global",
...     "publisher": "My Company",
...     "sub_category": "Data",
...     "tag": "My Company",
...     "delivery_channel": "API",
...     "theme": "Data",
...     "release_date": "2021-01-01",
...     "language": "English",
...     "status": "Available",
... }'
>>> product = fusion.product("my_product").from_object(product_json)

Instantiating a Product object from a CSV file:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_object("path/to/product.csv")

Instantiating a Product object from a pandas Series:

>>> from fusion import Fusion
>>> from fusion.product import Product
>>> fusion = Fusion()
>>> product_series = pd.Series({
...     "identifier": "my_product",
...     "title": "My Product",
...     "category": "Data",
...     "short_abstract": "My product is awesome",
...     "description": "My product is very awesome",
...     "is_active": True,
...     "is_restricted": False,
...     "maintainer": "My Company",
...     "region": "Global",
...     "publisher": "My Company",
...     "sub_category": "Data",
...     "tag": "My Company",
...     "delivery_channel": "API",
...     "theme": "Data",
...     "release_date": "2021-01-01",
...     "language": "English",
...     "status": "Available",
... })
>>> product = fusion.product("my_product").from_object(product_series)
Source code in py_src/fusion/product.py
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def from_object(
    self,
    product_source: Product | dict[str, Any] | str | pd.Series[Any],
) -> Product:
    """Instantiate a Product object from a Product object, dictionary, path to CSV, JSON string, or pandas Series.

    Args:
        product_source (Product | dict[str, Any] | str | pd.Series[Any]): Product metadata source.

    Raises:
        TypeError: If the object provided is not a Product, dictionary, path to CSV file, JSON string,
        or pandas Series.

    Returns:
        Product: Product object.

    Examples:
        Instantiating a Product object from a dictionary:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product_dict = {
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data",
        ...     "short_abstract": "My product is awesome",
        ...     "description": "My product is very awesome",
        ...     "is_active": True,
        ...     "is_restricted": False,
        ...     "maintainer": "My Company",
        ...     "region": "Global",
        ...     "publisher": "My Company",
        ...     "sub_category": "Data",
        ...     "tag": "My Company",
        ...     "delivery_channel": "API",
        ...     "theme": "Data",
        ...     "release_date": "2021-01-01",
        ...     "language": "English",
        ...     "status": "Available"
        ... }
        >>> product = fusion.product("my_product").from_object(product_dict)

        Instantiating a Product object from a JSON string:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product_json = '{
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data",
        ...     "short_abstract": "My product is awesome",
        ...     "description": "My product is very awesome",
        ...     "is_active": True,
        ...     "is_restricted": False,
        ...     "maintainer": "My Company",
        ...     "region": "Global",
        ...     "publisher": "My Company",
        ...     "sub_category": "Data",
        ...     "tag": "My Company",
        ...     "delivery_channel": "API",
        ...     "theme": "Data",
        ...     "release_date": "2021-01-01",
        ...     "language": "English",
        ...     "status": "Available",
        ... }'
        >>> product = fusion.product("my_product").from_object(product_json)

        Instantiating a Product object from a CSV file:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_object("path/to/product.csv")

        Instantiating a Product object from a pandas Series:

        >>> from fusion import Fusion
        >>> from fusion.product import Product
        >>> fusion = Fusion()
        >>> product_series = pd.Series({
        ...     "identifier": "my_product",
        ...     "title": "My Product",
        ...     "category": "Data",
        ...     "short_abstract": "My product is awesome",
        ...     "description": "My product is very awesome",
        ...     "is_active": True,
        ...     "is_restricted": False,
        ...     "maintainer": "My Company",
        ...     "region": "Global",
        ...     "publisher": "My Company",
        ...     "sub_category": "Data",
        ...     "tag": "My Company",
        ...     "delivery_channel": "API",
        ...     "theme": "Data",
        ...     "release_date": "2021-01-01",
        ...     "language": "English",
        ...     "status": "Available",
        ... })
        >>> product = fusion.product("my_product").from_object(product_series)

    """
    if isinstance(product_source, Product):
        product = product_source
    elif isinstance(product_source, dict):
        product = Product._from_dict(product_source)
    elif isinstance(product_source, str):
        if _is_json(product_source):
            product = Product._from_dict(js.loads(product_source))
        else:
            product = Product._from_csv(product_source)
    elif isinstance(product_source, pd.Series):
        product = Product._from_series(product_source)
    else:
        raise TypeError(f"Could not resolve the object provided: {product_source}")
    product.client = self._client
    return product

to_dict()

Convert the Product instance to a dictionary.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Product metadata as a dictionary.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product")
>>> product_dict = product.to_dict()
Source code in py_src/fusion/product.py
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
def to_dict(self: Product) -> dict[str, Any]:
    """Convert the Product instance to a dictionary.

    Returns:
        dict[str, Any]: Product metadata as a dictionary.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product")
        >>> product_dict = product.to_dict()

    """
    product_dict = {
        snake_to_camel(k): v
        for k, v in self.__dict__.items()
        if not k.startswith("_")
    }
    return product_dict

update(catalog=None, client=None, return_resp_obj=False)

Update an existing product in a Fusion catalog.

Parameters:

Name Type Description Default
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")
>>> product.title = "My Updated Product Title"
>>> product.update(catalog="my_catalog")
Source code in py_src/fusion/product.py
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
def update(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Update an existing product in a Fusion catalog.

    Args:
        client (Fusion): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> product = fusion.product("my_product").from_catalog(catalog="my_catalog")
        >>> product.title = "My Updated Product Title"
        >>> product.update(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    release_date = self.release_date if self.release_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    delivery_channel = self.delivery_channel if self.delivery_channel else ["API"]

    self.release_date = release_date
    self.delivery_channel = delivery_channel

    data = self.to_dict()

    url = f"{client.root_url}catalogs/{catalog}/products/{self.identifier}"
    resp: requests.Response = client.session.put(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

Fusion Dataset class and functions.

Dataset dataclass

Fusion Dataset class for managing dataset metadata in a Fusion catalog.

Attributes:

Name Type Description
identifier str

A unique identifier for the dataset.

title str

A title for the dataset. If not provided, defaults to identifier.

category str | list[str] | None

A category or list of categories for the dataset. Defaults to None.

description str

A description of the dataset. If not provided, defaults to identifier.

frequency str

The frequency of the dataset. Defaults to "Once".

is_internal_only_dataset bool

Flag for internal datasets. Defaults to False.

is_third_party_data bool

Flag for third party data. Defaults to True.

is_restricted bool | None

Flag for restricted datasets. Defaults to None.

is_raw_data bool

Flag for raw datasets. Defaults to True.

maintainer str | None

Dataset maintainer. Defaults to "J.P. Morgan Fusion".

source str | list[str] | None

Name of data vendor which provided the data. Defaults to None.

region str | list[str] | None

Region. Defaults to None.

publisher str

Name of vendor that publishes the data. Defaults to "J.P. Morgan".

product str | list[str] | None

Product to associate dataset with. Defaults to None.

sub_category str | list[str] | None

Sub-category. Defaults to None.

tags str | list[str] | None

Tags used for search purposes. Defaults to None.

created_date str | None

Created date. Defaults to None.

modified_date str | None

Modified date. Defaults to None.

delivery_channel str | list[str]

Delivery channel. Defaults to "API".

language str

Language. Defaults to "English".

status str

Status. Defaults to "Available".

type_ str | None

Dataset type. Defaults to "Source".

container_type str | None

Container type. Defaults to "Snapshot-Full".

snowflake str | None

Snowflake account connection. Defaults to None.

complexity str | None

Complexity. Defaults to None.

is_immutable bool | None

Flag for immutable datasets. Defaults to None.

is_mnpi bool | None

is_mnpi. Defaults to None.

is_pci bool | None

is_pci. Defaults to None.

is_pii bool | None

is_pii. Defaults to None.

is_client bool | None

is_client. Defaults to None.

is_public bool | None

is_public. Defaults to None.

is_internal bool | None

is_internal. Defaults to None.

is_confidential bool | None

is_confidential. Defaults to None.

is_highly_confidential bool | None

is_highly_confidential. Defaults to None.

is_active bool | None

is_active. Defaults to None.

owners list[str] | None

The owners of the dataset. Defaults to None.

application_id str | dict[str, str] | None

The application (most commonly seal ID) that the dataset/report/flow is owned by. Accepts string format for seal IDs, or a dictionary containing 'id' and 'type' as keys. Defaults to None.

_client Any

A Fusion client object. Defaults to None.

Source code in py_src/fusion/dataset.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
@dataclass
class Dataset(metaclass=CamelCaseMeta):
    """Fusion Dataset class for managing dataset metadata in a Fusion catalog.

    Attributes:
        identifier (str): A unique identifier for the dataset.
        title (str, optional): A title for the dataset. If not provided, defaults to identifier.
        category (str | list[str] | None, optional): A category or list of categories for the dataset. Defaults to None.
        description (str, optional): A description of the dataset. If not provided, defaults to identifier.
        frequency (str, optional): The frequency of the dataset. Defaults to "Once".
        is_internal_only_dataset (bool, optional): Flag for internal datasets. Defaults to False.
        is_third_party_data (bool, optional): Flag for third party data. Defaults to True.
        is_restricted (bool | None, optional): Flag for restricted datasets. Defaults to None.
        is_raw_data (bool, optional): Flag for raw datasets. Defaults to True.
        maintainer (str | None, optional): Dataset maintainer. Defaults to "J.P. Morgan Fusion".
        source (str | list[str] | None, optional): Name of data vendor which provided the data. Defaults to None.
        region (str | list[str] | None, optional): Region. Defaults to None.
        publisher (str, optional): Name of vendor that publishes the data. Defaults to "J.P. Morgan".
        product (str | list[str] | None, optional): Product to associate dataset with. Defaults to None.
        sub_category (str | list[str] | None, optional): Sub-category. Defaults to None.
        tags (str | list[str] | None, optional): Tags used for search purposes. Defaults to None.
        created_date (str | None, optional): Created date. Defaults to None.
        modified_date (str | None, optional): Modified date. Defaults to None.
        delivery_channel (str | list[str], optional): Delivery channel. Defaults to "API".
        language (str, optional): Language. Defaults to "English".
        status (str, optional): Status. Defaults to "Available".
        type_ (str | None, optional): Dataset type. Defaults to "Source".
        container_type (str | None, optional): Container type. Defaults to "Snapshot-Full".
        snowflake (str | None, optional): Snowflake account connection. Defaults to None.
        complexity (str | None, optional): Complexity. Defaults to None.
        is_immutable (bool | None, optional): Flag for immutable datasets. Defaults to None.
        is_mnpi (bool | None, optional): is_mnpi. Defaults to None.
        is_pci (bool | None, optional): is_pci. Defaults to None.
        is_pii (bool | None, optional): is_pii. Defaults to None.
        is_client (bool | None, optional): is_client. Defaults to None.
        is_public (bool | None, optional): is_public. Defaults to None.
        is_internal (bool | None, optional): is_internal. Defaults to None.
        is_confidential (bool | None, optional): is_confidential. Defaults to None.
        is_highly_confidential (bool | None, optional): is_highly_confidential. Defaults to None.
        is_active (bool | None, optional): is_active. Defaults to None.
        owners (list[str] | None, optional): The owners of the dataset. Defaults to None.
        application_id (str | dict[str, str] | None, optional): The application (most commonly seal ID) that the 
            dataset/report/flow is owned by. Accepts string format for seal IDs, or a dictionary containing 'id' and
            'type' as keys. Defaults to None.
        _client (Any, optional): A Fusion client object. Defaults to None.

    """

    identifier: str
    title: str = ""
    category: str | list[str] | None = None
    description: str = ""
    frequency: str = "Once"
    is_internal_only_dataset: bool = False
    is_third_party_data: bool = True
    is_restricted: bool | None = None
    is_raw_data: bool = True
    maintainer: str | None = "J.P. Morgan Fusion"
    source: str | list[str] | None = None
    region: str | list[str] | None = None
    publisher: str = "J.P. Morgan"
    product: str | list[str] | None = None
    sub_category: str | list[str] | None = None
    tags: str | list[str] | None = None
    created_date: str | None = None
    modified_date: str | None = None
    delivery_channel: str | list[str] = field(default_factory=lambda: ["API"])
    language: str = "English"
    status: str = "Available"
    type_: str | None = "Source"
    container_type: str | None = "Snapshot-Full"
    snowflake: str | None = None
    complexity: str | None = None
    is_immutable: bool | None = None
    is_mnpi: bool | None = None
    is_pci: bool | None = None
    is_pii: bool | None = None
    is_client: bool | None = None
    is_public: bool | None = None
    is_internal: bool | None = None
    is_confidential: bool | None = None
    is_highly_confidential: bool | None = None
    is_active: bool | None = None
    owners: list[str] | None = None
    application_id: str | dict[str, str] | None = None

    _client: Fusion | None = field(init=False, repr=False, compare=False, default=None)

    def __repr__(self: Dataset) -> str:
        """Return an object representation of the Dataset object.

        Returns:
            str: Object representation of the dataset.

        """
        attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
        return f"Dataset(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

    def __post_init__(self: Dataset) -> None:
        """Format Dataset metadata fields after object initialization."""
        self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
        self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
        self.description = tidy_string(self.description) if self.description != "" else self.title
        self.category = (
            self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
        )
        self.delivery_channel = (
            self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
        )
        self.source = self.source if isinstance(self.source, list) or self.source is None else make_list(self.source)
        self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
        self.product = (
            self.product if isinstance(self.product, list) or self.product is None else make_list(self.product)
        )
        self.sub_category = (
            self.sub_category
            if isinstance(self.sub_category, list) or self.sub_category is None
            else make_list(self.sub_category)
        )
        self.tags = self.tags if isinstance(self.tags, list) or self.tags is None else make_list(self.tags)
        self.is_internal_only_dataset = (
            self.is_internal_only_dataset
            if isinstance(self.is_internal_only_dataset, bool)
            else make_bool(self.is_internal_only_dataset)
        )
        self.created_date = convert_date_format(self.created_date) if self.created_date else None
        self.modified_date = convert_date_format(self.modified_date) if self.modified_date else None
        self.owners = self.owners if isinstance(self.owners, list) or self.owners is None else make_list(self.owners)
        self.application_id = (
            {"id": str(self.application_id), "type": "Application (SEAL)"}
            if isinstance(self.application_id, str)
            else self.application_id
        )

    def __getattr__(self, name: str) -> Any:
        # Redirect attribute access to the snake_case version
        snake_name = camel_to_snake(name)
        if snake_name in self.__dict__:
            return self.__dict__[snake_name]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

    def __setattr__(self, name: str, value: Any) -> None:
        if name == "client":
            # Use the property setter for client
            object.__setattr__(self, name, value)
        else:
            snake_name = camel_to_snake(name)
            self.__dict__[snake_name] = value

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Dataset. Set automatically, if the Dataset is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset")
            >>> dataset.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    @classmethod
    def _from_series(cls: type[Dataset], series: pd.Series[Any]) -> Dataset:
        """Instantiate a Dataset object from a pandas Series.

        Args:
            series (pd.Series[Any]): Dataset metadata as a pandas Series.

        Returns:
            Dataset: Dataset object.

        """
        series = series.rename(lambda x: x.replace(" ", "").replace("_", "").lower())
        series = series.rename({"tag": "tags"})
        series = series.rename({"type_": "type"})
        series = series.rename({"productId": "product"})

        is_internal_only_dataset = series.get("isinternalonlydataset", None)
        is_internal_only_dataset = (
            make_bool(is_internal_only_dataset) if is_internal_only_dataset is not None else is_internal_only_dataset
        )
        is_restricted = series.get("isrestricted", None)
        is_restricted = make_bool(is_restricted) if is_restricted is not None else is_restricted
        is_immutable = series.get("isimmutable", None)
        is_immutable = make_bool(is_immutable) if is_immutable is not None else is_immutable
        is_mnpi = series.get("ismnpi", None)
        is_mnpi = make_bool(is_mnpi) if is_mnpi is not None else is_mnpi
        is_pci = series.get("ispci", None)
        is_pci = make_bool(is_pci) if is_pci is not None else is_pci
        is_pii = series.get("ispii", None)
        is_pii = make_bool(is_pii) if is_pii is not None else is_pii
        is_client = series.get("isclient", None)
        is_client = make_bool(is_client) if is_client is not None else is_client
        is_public = series.get("ispublic", None)
        is_public = make_bool(is_public) if is_public is not None else is_public
        is_internal = series.get("isinternal", None)
        is_internal = make_bool(is_internal) if is_internal is not None else is_internal
        is_confidential = series.get("isconfidential", None)
        is_confidential = make_bool(is_confidential) if is_confidential is not None else is_confidential
        is_highly_confidential = series.get("ishighlyconfidential", None)
        is_highly_confidential = (
            make_bool(is_highly_confidential) if is_highly_confidential is not None else is_highly_confidential
        )
        is_active = series.get("isactive", None)
        is_active = make_bool(is_active) if is_active is not None else is_active

        dataset = cls(
            identifier=series.get("identifier", ""),
            category=series.get("category", None),
            delivery_channel=series.get("deliverychannel", ["API"]),
            title=series.get("title", ""),
            description=series.get("description", ""),
            frequency=series.get("frequency", "Once"),
            is_internal_only_dataset=is_internal_only_dataset,  # type: ignore
            is_third_party_data=series.get("isthirdpartydata", True),
            is_restricted=is_restricted,
            is_raw_data=series.get("israwdata", True),
            maintainer=series.get("maintainer", "J.P. Morgan Fusion"),
            source=series.get("source", None),
            region=series.get("region", None),
            publisher=series.get("publisher", "J.P. Morgan"),
            product=series.get("product", None),
            sub_category=series.get("subcategory", None),
            tags=series.get("tags", None),
            container_type=series.get("containertype", "Snapshot-Full"),
            language=series.get("language", "English"),
            status=series.get("status", "Available"),
            type_=series.get("type", "Source"),
            created_date=series.get("createddate", None),
            modified_date=series.get("modifieddate", None),
            snowflake=series.get("snowflake", None),
            complexity=series.get("complexity", None),
            owners=series.get("owners", None),
            application_id=series.get("applicationid", None),
            is_immutable=is_immutable,
            is_mnpi=is_mnpi,
            is_pci=is_pci,
            is_pii=is_pii,
            is_client=is_client,
            is_public=is_public,
            is_internal=is_internal,
            is_confidential=is_confidential,
            is_highly_confidential=is_highly_confidential,
            is_active=is_active,
        )
        return dataset

    @classmethod
    def _from_dict(cls: type[Dataset], data: dict[str, Any]) -> Dataset:
        """Instantiate a Dataset object from a dictionary.

        Args:
            data (dict[str, Any]): Dataset metadata as a dictionary.

        Returns:
            Dataset: Dataset object.

        """
        keys = [f.name for f in fields(cls)]
        keys = ["type" if key == "type_" else key for key in keys]
        data = {camel_to_snake(k): v for k, v in data.items()}
        data = {k: v for k, v in data.items() if k in keys}
        if "type" in data:
            data["type_"] = data.pop("type")
        return cls(**data)

    @classmethod
    def _from_csv(cls: type[Dataset], file_path: str, identifier: str | None = None) -> Dataset:
        """Instantiate a Dataset object from a CSV file.

        Args:
            file_path (str): Path to the CSV file.
            identifier (str | None, optional): Dataset identifer for filtering if multipler datasets are defined in csv.
                Defaults to None.

        Returns:
            Dataset: Dataset object.

        """
        data = pd.read_csv(file_path)

        return (
            cls._from_series(data[data["identifier"] == identifier].reset_index(drop=True).iloc[0])
            if identifier
            else cls._from_series(data.reset_index(drop=True).iloc[0])
        )

    def from_object(
        self,
        dataset_source: Dataset | dict[str, Any] | str | pd.Series[Any],
    ) -> Dataset:
        """Instantiate a Dataset object from a Dataset object, dictionary, JSON string, path to CSV, or pandas Series.

        Args:
            dataset_source (Dataset | dict[str, Any] | str | pd.Series[Any]): Dataset metadata source.

        Raises:
            TypeError: If the object provided is not a Dataset, dictionary, JSON string, path to CSV file,
                or pandas Series.

        Returns:
            Dataset: Dataset object.

        Examples:
            Instantiate a Dataset object from a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_dict = {
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False,
            ...     "is_raw_data": True,
            ...     "maintainer": "J.P. Morgan Fusion",
            ...     "source": "J.P. Morgan",
            ...     }
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)

            Instantiate a Dataset object from a JSON string:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_json = '{
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False,
            ...     "is_raw_data": True,
            ...     "maintainer": "J.P. Morgan Fusion",
            ...     "source": "J.P. Morgan"
            ...     }'
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)

            Instantiate a Dataset object from a CSV file:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")

            Instantiate a Dataset object from a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_series = pd.Series({
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False,
            ...     "is_raw_data": True,
            ...     "maintainer": "J.P. Morgan Fusion",
            ...     "source": "J.P. Morgan"
            ...     })
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

        """
        if isinstance(dataset_source, Dataset):
            dataset = dataset_source
        elif isinstance(dataset_source, dict):
            dataset = self._from_dict(dataset_source)
        elif isinstance(dataset_source, str):
            if _is_json(dataset_source):
                dataset = self._from_dict(js.loads(dataset_source))
            else:
                dataset = self._from_csv(dataset_source)
        elif isinstance(dataset_source, pd.Series):
            dataset = self._from_series(dataset_source)
        else:
            raise TypeError(f"Could not resolve the object provided: {dataset_source}")

        dataset.client = self._client

        return dataset

    def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Dataset:
        """Instantiate a Dataset object from a Fusion catalog.

        Args:
            catalog (str | None, optional): Catalog identifer. Defaults to None.
            client (Fusion | None, optional): Fusion session. Defaults to None.
                If instantiated from a Fusion object, then the client is set automatically.

        Returns:
            Dataset: Dataset object.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        dataset = self.identifier
        resp = client.session.get(f"{client.root_url}catalogs/{catalog}/datasets")
        requests_raise_for_status(resp)
        list_datasets = resp.json()["resources"]
        dict_ = [dict_ for dict_ in list_datasets if dict_["identifier"] == dataset][0]
        dataset_obj = self._from_dict(dict_)
        dataset_obj.client = client

        prod_df = client.list_product_dataset_mapping(catalog=catalog)

        if dataset.lower() in list(prod_df.dataset.str.lower()):
            product = [prod_df[prod_df["dataset"].str.lower() == dataset.lower()]["product"].iloc[0]]
            dataset_obj.product = product

        return dataset_obj

    def to_dict(self) -> dict[str, Any]:
        """Convert the Dataset instance to a dictionary.

        Returns:
            dict[str, Any]: Dataset metadata as a dictionary.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset")
            >>> dataset_dict = dataset.to_dict()

        """
        dataset_dict = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}

        return dataset_dict

    def create(
        self,
        catalog: str | None = None,
        product: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload a new dataset to a Fusion catalog.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            product (str | None, optional): A product identifier to upload dataset to. If dataset object already has
                product attribute populated, the attribute will be overwritten by this value. Defaults to None.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            From scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset(
            ...     identifier= "my_dataset",
            ...     title= "My Dataset",
            ...     description= "My dataset description",
            ...     category= "Finance",
            ...     frequency= "Daily",
            ...     is_restricted= False
            ...     )
            >>> dataset.create(catalog="my_catalog")

            From a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_dict = {
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False
            ...     }
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)
            >>> dataset.create(catalog="my_catalog")

            From a JSON string:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_json = '{
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False
            ...     }'
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)
            >>> dataset.create(catalog="my_catalog")

            From a CSV file:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")
            >>> dataset.create(catalog="my_catalog")

            From a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset_series = pd.Series({
            ...     "identifier": "my_dataset",
            ...     "title": "My Dataset",
            ...     "description": "My dataset description",
            ...     "category": "Finance",
            ...     "frequency": "Daily",
            ...     "is_restricted": False
            ...     })
            >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

            From existing dataset in a catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
            >>> dataset.identifier = "my_new_dataset"
            >>> dataset.create(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

        self.product = [product] if product else self.product

        data = self.to_dict()

        if data.get("report", None) and data["report"]["tier"] == "":
            raise ValueError("Tier cannot be blank for reports.")


        url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
        resp: requests.Response = client.session.post(url, json=data)
        requests_raise_for_status(resp)

        return resp if return_resp_obj else None

    def update(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Updates a dataset via API from dataset object.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
            >>> dataset.title = "My Updated Dataset"
            >>> dataset.update(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
        self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

        data = self.to_dict()

        url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
        resp: requests.Response = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def delete(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete a dataset via API from its dataset identifier.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").delete(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
        resp: requests.Response = client.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def copy(
        self,
        catalog_to: str,
        catalog_from: str | None = None,
        client: Fusion | None = None,
        client_to: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Copy dataset from one catalog and/or environment to another by copy.

        Args:
            catalog_to (str): A catalog identifier to which to copy dataset.
            catalog_from (str, optional): A catalog identifier from which to copy dataset. Defaults to "common".
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> dataset = fusion.dataset("my_dataset").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

        """
        client = self._use_client(client)
        catalog_from = client._use_catalog(catalog_from)

        if client_to is None:
            client_to = client
        dataset_obj = self.from_catalog(catalog=catalog_from, client=client)
        dataset_obj.client = client_to
        resp = dataset_obj.create(client=client_to, catalog=catalog_to, return_resp_obj=True)
        return resp if return_resp_obj else None

    def activate(
        self,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Activate a dataset by setting the isActive flag to True.

        Args:
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").activate(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        dataset_obj = self.from_catalog(catalog=catalog, client=client)
        dataset_obj.is_active = True
        resp = dataset_obj.update(catalog=catalog, client=client, return_resp_obj=return_resp_obj)

        return resp if return_resp_obj else None

    def add_to_product(
        self,
        product: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Map dataset to a product.

        Args:
            product (str): A product identifier.
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").add_to_product(product="MY_PRODUCT", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        url = f"{client.root_url}catalogs/{catalog}/productDatasets"
        data = {"product": product, "datasets": [self.identifier]}
        resp = client.session.put(url=url, json=data)

        requests_raise_for_status(resp)

        return resp if return_resp_obj else None

    def remove_from_product(
        self,
        product: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete dataset to product mapping.

        Args:
            product (str): A product identifier.
            catalog (str | None, optional): A catalog identifier. Defaults to "common".
            client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.dataset("my_dataset").remove_from_product(product="MY_PRODUCT", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        dataset = self.identifier
        url = f"{client.root_url}catalogs/{catalog}/productDatasets/{product}/{dataset}"
        resp = client.session.delete(url=url)

        requests_raise_for_status(resp)

        return resp if return_resp_obj else None

client: Fusion | None property writable

Return the client.

__post_init__()

Format Dataset metadata fields after object initialization.

Source code in py_src/fusion/dataset.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def __post_init__(self: Dataset) -> None:
    """Format Dataset metadata fields after object initialization."""
    self.identifier = tidy_string(self.identifier).upper().replace(" ", "_")
    self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
    self.description = tidy_string(self.description) if self.description != "" else self.title
    self.category = (
        self.category if isinstance(self.category, list) or self.category is None else make_list(self.category)
    )
    self.delivery_channel = (
        self.delivery_channel if isinstance(self.delivery_channel, list) else make_list(self.delivery_channel)
    )
    self.source = self.source if isinstance(self.source, list) or self.source is None else make_list(self.source)
    self.region = self.region if isinstance(self.region, list) or self.region is None else make_list(self.region)
    self.product = (
        self.product if isinstance(self.product, list) or self.product is None else make_list(self.product)
    )
    self.sub_category = (
        self.sub_category
        if isinstance(self.sub_category, list) or self.sub_category is None
        else make_list(self.sub_category)
    )
    self.tags = self.tags if isinstance(self.tags, list) or self.tags is None else make_list(self.tags)
    self.is_internal_only_dataset = (
        self.is_internal_only_dataset
        if isinstance(self.is_internal_only_dataset, bool)
        else make_bool(self.is_internal_only_dataset)
    )
    self.created_date = convert_date_format(self.created_date) if self.created_date else None
    self.modified_date = convert_date_format(self.modified_date) if self.modified_date else None
    self.owners = self.owners if isinstance(self.owners, list) or self.owners is None else make_list(self.owners)
    self.application_id = (
        {"id": str(self.application_id), "type": "Application (SEAL)"}
        if isinstance(self.application_id, str)
        else self.application_id
    )

__repr__()

Return an object representation of the Dataset object.

Returns:

Name Type Description
str str

Object representation of the dataset.

Source code in py_src/fusion/dataset.py
117
118
119
120
121
122
123
124
125
def __repr__(self: Dataset) -> str:
    """Return an object representation of the Dataset object.

    Returns:
        str: Object representation of the dataset.

    """
    attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
    return f"Dataset(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

activate(catalog=None, client=None, return_resp_obj=False)

Activate a dataset by setting the isActive flag to True.

Parameters:

Name Type Description Default
catalog str | None

A catalog identifier. Defaults to "common".

None
client Fusion | None

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False
>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").activate(catalog="my_catalog")
Source code in py_src/fusion/dataset.py
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
def activate(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Activate a dataset by setting the isActive flag to True.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").activate(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    dataset_obj = self.from_catalog(catalog=catalog, client=client)
    dataset_obj.is_active = True
    resp = dataset_obj.update(catalog=catalog, client=client, return_resp_obj=return_resp_obj)

    return resp if return_resp_obj else None

add_to_product(product, catalog=None, client=None, return_resp_obj=False)

Map dataset to a product.

Parameters:

Name Type Description Default
product str

A product identifier.

required
catalog str | None

A catalog identifier. Defaults to "common".

None
client Fusion | None

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").add_to_product(product="MY_PRODUCT", catalog="my_catalog")
Source code in py_src/fusion/dataset.py
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
def add_to_product(
    self,
    product: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Map dataset to a product.

    Args:
        product (str): A product identifier.
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").add_to_product(product="MY_PRODUCT", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    url = f"{client.root_url}catalogs/{catalog}/productDatasets"
    data = {"product": product, "datasets": [self.identifier]}
    resp = client.session.put(url=url, json=data)

    requests_raise_for_status(resp)

    return resp if return_resp_obj else None

copy(catalog_to, catalog_from=None, client=None, client_to=None, return_resp_obj=False)

Copy dataset from one catalog and/or environment to another by copy.

Parameters:

Name Type Description Default
catalog_to str

A catalog identifier to which to copy dataset.

required
catalog_from str

A catalog identifier from which to copy dataset. Defaults to "common".

None
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
client_to Fusion | None

Fusion client object. Defaults to current instance.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")
Source code in py_src/fusion/dataset.py
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
def copy(
    self,
    catalog_to: str,
    catalog_from: str | None = None,
    client: Fusion | None = None,
    client_to: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Copy dataset from one catalog and/or environment to another by copy.

    Args:
        catalog_to (str): A catalog identifier to which to copy dataset.
        catalog_from (str, optional): A catalog identifier from which to copy dataset. Defaults to "common".
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        client_to (Fusion | None, optional): Fusion client object. Defaults to current instance.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").copy(catalog_from="my_catalog", catalog_to="my_new_catalog")

    """
    client = self._use_client(client)
    catalog_from = client._use_catalog(catalog_from)

    if client_to is None:
        client_to = client
    dataset_obj = self.from_catalog(catalog=catalog_from, client=client)
    dataset_obj.client = client_to
    resp = dataset_obj.create(client=client_to, catalog=catalog_to, return_resp_obj=True)
    return resp if return_resp_obj else None

create(catalog=None, product=None, client=None, return_resp_obj=False)

Upload a new dataset to a Fusion catalog.

Parameters:

Name Type Description Default
catalog str | None

A catalog identifier. Defaults to "common".

None
product str | None

A product identifier to upload dataset to. If dataset object already has product attribute populated, the attribute will be overwritten by this value. Defaults to None.

None
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

From scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset(
...     identifier= "my_dataset",
...     title= "My Dataset",
...     description= "My dataset description",
...     category= "Finance",
...     frequency= "Daily",
...     is_restricted= False
...     )
>>> dataset.create(catalog="my_catalog")

From a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_dict = {
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False
...     }
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)
>>> dataset.create(catalog="my_catalog")

From a JSON string:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_json = '{
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False
...     }'
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)
>>> dataset.create(catalog="my_catalog")

From a CSV file:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")
>>> dataset.create(catalog="my_catalog")

From a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_series = pd.Series({
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False
...     })
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

From existing dataset in a catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
>>> dataset.identifier = "my_new_dataset"
>>> dataset.create(catalog="my_catalog")
Source code in py_src/fusion/dataset.py
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
def create(
    self,
    catalog: str | None = None,
    product: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload a new dataset to a Fusion catalog.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        product (str | None, optional): A product identifier to upload dataset to. If dataset object already has
            product attribute populated, the attribute will be overwritten by this value. Defaults to None.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        From scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset(
        ...     identifier= "my_dataset",
        ...     title= "My Dataset",
        ...     description= "My dataset description",
        ...     category= "Finance",
        ...     frequency= "Daily",
        ...     is_restricted= False
        ...     )
        >>> dataset.create(catalog="my_catalog")

        From a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_dict = {
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False
        ...     }
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)
        >>> dataset.create(catalog="my_catalog")

        From a JSON string:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_json = '{
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False
        ...     }'
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)
        >>> dataset.create(catalog="my_catalog")

        From a CSV file:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")
        >>> dataset.create(catalog="my_catalog")

        From a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_series = pd.Series({
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False
        ...     })
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

        From existing dataset in a catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
        >>> dataset.identifier = "my_new_dataset"
        >>> dataset.create(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

    self.product = [product] if product else self.product

    data = self.to_dict()

    if data.get("report", None) and data["report"]["tier"] == "":
        raise ValueError("Tier cannot be blank for reports.")


    url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
    resp: requests.Response = client.session.post(url, json=data)
    requests_raise_for_status(resp)

    return resp if return_resp_obj else None

delete(catalog=None, client=None, return_resp_obj=False)

Delete a dataset via API from its dataset identifier.

Parameters:

Name Type Description Default
catalog str | None

A catalog identifier. Defaults to "common".

None
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").delete(catalog="my_catalog")
Source code in py_src/fusion/dataset.py
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
def delete(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete a dataset via API from its dataset identifier.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").delete(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
    resp: requests.Response = client.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

from_catalog(catalog=None, client=None)

Instantiate a Dataset object from a Fusion catalog.

Parameters:

Name Type Description Default
catalog str | None

Catalog identifer. Defaults to None.

None
client Fusion | None

Fusion session. Defaults to None. If instantiated from a Fusion object, then the client is set automatically.

None

Returns:

Name Type Description
Dataset Dataset

Dataset object.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
Source code in py_src/fusion/dataset.py
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
def from_catalog(self, catalog: str | None = None, client: Fusion | None = None) -> Dataset:
    """Instantiate a Dataset object from a Fusion catalog.

    Args:
        catalog (str | None, optional): Catalog identifer. Defaults to None.
        client (Fusion | None, optional): Fusion session. Defaults to None.
            If instantiated from a Fusion object, then the client is set automatically.

    Returns:
        Dataset: Dataset object.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    dataset = self.identifier
    resp = client.session.get(f"{client.root_url}catalogs/{catalog}/datasets")
    requests_raise_for_status(resp)
    list_datasets = resp.json()["resources"]
    dict_ = [dict_ for dict_ in list_datasets if dict_["identifier"] == dataset][0]
    dataset_obj = self._from_dict(dict_)
    dataset_obj.client = client

    prod_df = client.list_product_dataset_mapping(catalog=catalog)

    if dataset.lower() in list(prod_df.dataset.str.lower()):
        product = [prod_df[prod_df["dataset"].str.lower() == dataset.lower()]["product"].iloc[0]]
        dataset_obj.product = product

    return dataset_obj

from_object(dataset_source)

Instantiate a Dataset object from a Dataset object, dictionary, JSON string, path to CSV, or pandas Series.

Parameters:

Name Type Description Default
dataset_source Dataset | dict[str, Any] | str | Series[Any]

Dataset metadata source.

required

Raises:

Type Description
TypeError

If the object provided is not a Dataset, dictionary, JSON string, path to CSV file, or pandas Series.

Returns:

Name Type Description
Dataset Dataset

Dataset object.

Examples:

Instantiate a Dataset object from a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_dict = {
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False,
...     "is_raw_data": True,
...     "maintainer": "J.P. Morgan Fusion",
...     "source": "J.P. Morgan",
...     }
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)

Instantiate a Dataset object from a JSON string:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_json = '{
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False,
...     "is_raw_data": True,
...     "maintainer": "J.P. Morgan Fusion",
...     "source": "J.P. Morgan"
...     }'
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)

Instantiate a Dataset object from a CSV file:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")

Instantiate a Dataset object from a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset_series = pd.Series({
...     "identifier": "my_dataset",
...     "title": "My Dataset",
...     "description": "My dataset description",
...     "category": "Finance",
...     "frequency": "Daily",
...     "is_restricted": False,
...     "is_raw_data": True,
...     "maintainer": "J.P. Morgan Fusion",
...     "source": "J.P. Morgan"
...     })
>>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)
Source code in py_src/fusion/dataset.py
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
def from_object(
    self,
    dataset_source: Dataset | dict[str, Any] | str | pd.Series[Any],
) -> Dataset:
    """Instantiate a Dataset object from a Dataset object, dictionary, JSON string, path to CSV, or pandas Series.

    Args:
        dataset_source (Dataset | dict[str, Any] | str | pd.Series[Any]): Dataset metadata source.

    Raises:
        TypeError: If the object provided is not a Dataset, dictionary, JSON string, path to CSV file,
            or pandas Series.

    Returns:
        Dataset: Dataset object.

    Examples:
        Instantiate a Dataset object from a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_dict = {
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False,
        ...     "is_raw_data": True,
        ...     "maintainer": "J.P. Morgan Fusion",
        ...     "source": "J.P. Morgan",
        ...     }
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_dict)

        Instantiate a Dataset object from a JSON string:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_json = '{
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False,
        ...     "is_raw_data": True,
        ...     "maintainer": "J.P. Morgan Fusion",
        ...     "source": "J.P. Morgan"
        ...     }'
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_json)

        Instantiate a Dataset object from a CSV file:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_object("path/to/dataset.csv")

        Instantiate a Dataset object from a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset_series = pd.Series({
        ...     "identifier": "my_dataset",
        ...     "title": "My Dataset",
        ...     "description": "My dataset description",
        ...     "category": "Finance",
        ...     "frequency": "Daily",
        ...     "is_restricted": False,
        ...     "is_raw_data": True,
        ...     "maintainer": "J.P. Morgan Fusion",
        ...     "source": "J.P. Morgan"
        ...     })
        >>> dataset = fusion.dataset("my_dataset").from_object(dataset_series)

    """
    if isinstance(dataset_source, Dataset):
        dataset = dataset_source
    elif isinstance(dataset_source, dict):
        dataset = self._from_dict(dataset_source)
    elif isinstance(dataset_source, str):
        if _is_json(dataset_source):
            dataset = self._from_dict(js.loads(dataset_source))
        else:
            dataset = self._from_csv(dataset_source)
    elif isinstance(dataset_source, pd.Series):
        dataset = self._from_series(dataset_source)
    else:
        raise TypeError(f"Could not resolve the object provided: {dataset_source}")

    dataset.client = self._client

    return dataset

remove_from_product(product, catalog=None, client=None, return_resp_obj=False)

Delete dataset to product mapping.

Parameters:

Name Type Description Default
product str

A product identifier.

required
catalog str | None

A catalog identifier. Defaults to "common".

None
client Fusion | None

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.dataset("my_dataset").remove_from_product(product="MY_PRODUCT", catalog="my_catalog")
Source code in py_src/fusion/dataset.py
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
def remove_from_product(
    self,
    product: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete dataset to product mapping.

    Args:
        product (str): A product identifier.
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion | None, optional):  A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.dataset("my_dataset").remove_from_product(product="MY_PRODUCT", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    dataset = self.identifier
    url = f"{client.root_url}catalogs/{catalog}/productDatasets/{product}/{dataset}"
    resp = client.session.delete(url=url)

    requests_raise_for_status(resp)

    return resp if return_resp_obj else None

to_dict()

Convert the Dataset instance to a dictionary.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Dataset metadata as a dictionary.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset")
>>> dataset_dict = dataset.to_dict()
Source code in py_src/fusion/dataset.py
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
def to_dict(self) -> dict[str, Any]:
    """Convert the Dataset instance to a dictionary.

    Returns:
        dict[str, Any]: Dataset metadata as a dictionary.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset")
        >>> dataset_dict = dataset.to_dict()

    """
    dataset_dict = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}

    return dataset_dict

update(catalog=None, client=None, return_resp_obj=False)

Updates a dataset via API from dataset object.

Parameters:

Name Type Description Default
catalog str | None

A catalog identifier. Defaults to "common".

None
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
>>> dataset.title = "My Updated Dataset"
>>> dataset.update(catalog="my_catalog")
Source code in py_src/fusion/dataset.py
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
def update(
    self,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Updates a dataset via API from dataset object.

    Args:
        catalog (str | None, optional): A catalog identifier. Defaults to "common".
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> dataset = fusion.dataset("my_dataset").from_catalog(catalog="my_catalog")
        >>> dataset.title = "My Updated Dataset"
        >>> dataset.update(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    self.created_date = self.created_date if self.created_date else pd.Timestamp("today").strftime("%Y-%m-%d")
    self.modified_date = self.modified_date if self.modified_date else pd.Timestamp("today").strftime("%Y-%m-%d")

    data = self.to_dict()

    url = f"{client.root_url}catalogs/{catalog}/datasets/{self.identifier}"
    resp: requests.Response = client.session.put(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

Fusion Product class and functions.

Attribute dataclass

Fusion Attribute class for managing attributes metadata in a Fusion catalog.

Attributes:

Name Type Description
identifier str

The unique identifier for the attribute.

index int

Attribute index.

data_type str | Types

Datatype of attribute. Defaults to "String".

title str

Attribute title. If not provided, defaults to identifier.

description str

Attribute description. If not provided, defaults to identifier.

is_dataset_key bool

Flag for primary keys. Defaults to False.

source str | None

Name of data vendor which provided the data. Defaults to None.

source_field_id str | None

Original identifier of attribute, if attribute has been renamed. If not provided, defaults to identifier.

is_internal_dataset_key bool | None

Flag for internal primary keys. Defaults to None.

is_externally_visible bool | None

Flag for externally visible attributes. Defaults to True.

unit Any | None

Unit of attribute. Defaults to None.

multiplier float

Multiplier for unit. Defaults to 1.0.

is_propagation_eligible bool | None

Flag for propagation eligibility. Defaults to None.

is_metric bool | None

Flag for attributes that are metrics. Defaults to None.

available_from str | None

Date from which the attribute is available. Defaults to None.

deprecated_from str | None

Date from which the attribute is deprecated. Defaults to None.

term str

Term. Defaults to "bizterm1".

dataset int | None

Dataset. Defaults to None.

attribute_type str | None

Attribute type. Defaults to None.

application_id str | dict[str, str] | None

The seal ID of the dataset in string format, or a dictionary containing 'id' and 'type'. Used for catalog attributes. Defaults to None.

publisher str | None

Publisher of the attribute. Used for catalog attributes. Defaults to None.

is_key_data_element bool | None

Flag for key data elements. Used for attributes registered to Reports. Defaults to None.

_client Fusion | None

Fusion client object. Defaults to None.

Source code in py_src/fusion/attributes.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
@dataclass
class Attribute(metaclass=CamelCaseMeta):
    """Fusion Attribute class for managing attributes metadata in a Fusion catalog.

    Attributes:
        identifier (str): The unique identifier for the attribute.
        index (int): Attribute index.
        data_type (str | Types, optional): Datatype of attribute. Defaults to "String".
        title (str, optional): Attribute title. If not provided, defaults to identifier.
        description (str, optional): Attribute description. If not provided, defaults to identifier.
        is_dataset_key (bool, optional): Flag for primary keys. Defaults to False.
        source (str | None, optional): Name of data vendor which provided the data. Defaults to None.
        source_field_id (str | None, optional): Original identifier of attribute, if attribute has been renamed.
            If not provided, defaults to identifier.
        is_internal_dataset_key (bool | None, optional): Flag for internal primary keys. Defaults to None.
        is_externally_visible (bool | None, optional): Flag for externally visible attributes. Defaults to True.
        unit (Any | None, optional): Unit of attribute. Defaults to None.
        multiplier (float, optional): Multiplier for unit. Defaults to 1.0.
        is_propagation_eligible (bool | None, optional): Flag for propagation eligibility. Defaults to None.
        is_metric (bool | None, optional): Flag for attributes that are metrics. Defaults to None.
        available_from (str | None, optional): Date from which the attribute is available. Defaults to None.
        deprecated_from (str | None, optional): Date from which the attribute is deprecated. Defaults to None.
        term (str, optional): Term. Defaults to "bizterm1".
        dataset (int | None, optional): Dataset. Defaults to None.
        attribute_type (str | None, optional): Attribute type. Defaults to None.
        application_id (str | dict[str, str] | None, optional): The seal ID of the dataset in string format,
            or a dictionary containing 'id' and 'type'. Used for catalog attributes. Defaults to None.
        publisher (str | None, optional): Publisher of the attribute. Used for catalog attributes. Defaults to None.
        is_key_data_element (bool | None, optional): Flag for key data elements. Used for attributes registered to
            Reports. Defaults to None.
        _client (Fusion | None, optional): Fusion client object. Defaults to None.

    """

    identifier: str
    index: int
    data_type: Types = cast(Types, Types.String)
    title: str = ""
    description: str = ""
    is_dataset_key: bool = False
    source: str | None = None
    source_field_id: str | None = None
    is_internal_dataset_key: bool | None = None
    is_externally_visible: bool | None = True
    unit: Any | None = None
    multiplier: float = 1.0
    is_propagation_eligible: bool | None = None
    is_metric: bool | None = None
    available_from: str | None = None
    deprecated_from: str | None = None
    term: str = "bizterm1"
    dataset: int | None = None
    attribute_type: str | None = None
    application_id: str | dict[str, str] | None = None
    publisher: str | None = None
    is_key_data_element: bool | None = None

    _client: Fusion | None = field(init=False, repr=False, compare=False, default=None)

    def __str__(self: Attribute) -> str:
        """Format string representation."""
        attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
        return f"Attribute(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

    def __repr__(self: Attribute) -> str:
        """Format object representation."""
        s = ", ".join(f"{getattr(self, f.name)!r}" for f in fields(self) if not f.name.startswith("_"))
        return "(" + s + ")"

    def __post_init__(self: Attribute) -> None:
        """Format Attribute metadata fields after object initialization."""
        self.is_dataset_key = make_bool(self.is_dataset_key)
        self.identifier = tidy_string(self.identifier).lower().replace(" ", "_")
        self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
        self.description = tidy_string(self.description) if self.description and self.description != "" else self.title
        self.source_field_id = (
            tidy_string(self.source_field_id).lower().replace(" ", "_") if self.source_field_id else self.identifier
        )
        self.available_from = convert_date_format(self.available_from) if self.available_from else None
        self.deprecated_from = convert_date_format(self.deprecated_from) if self.deprecated_from else None
        self.data_type = Types[str(self.data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
        self.application_id = (
            {"id": str(self.application_id), "type": "Application (SEAL)"}
            if isinstance(self.application_id, str)
            else self.application_id
        )

    def __getattr__(self, name: str) -> Any:
        # Redirect attribute access to the snake_case version
        snake_name = camel_to_snake(name)
        if snake_name in self.__dict__:
            return self.__dict__[snake_name]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")

    def __setattr__(self, name: str, value: Any) -> None:
        if name == "client":
            # Use the property setter for client
            object.__setattr__(self, name, value)
        else:
            snake_name = camel_to_snake(name)
            self.__dict__[snake_name] = value

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Dataset. Set automatically, if the Dataset is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attribute.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    @classmethod
    def _from_series(
        cls: type[Attribute],
        series: pd.Series[Any],
    ) -> Attribute:
        """Instantiate an Attribute object from a pandas Series.

        Args:
            series (pd.Series[Any]): Attribute metadata as a pandas Series.

        Returns:
            Attribute: Attribute object.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> series = pd.Series({
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... })
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)._from_series(series)

        """
        series = series.rename(lambda x: x.replace(" ", "").replace("_", "").lower()).replace(
            to_replace=np.nan, value=None
        )
        data_type = series.get("datatype", cast(Types, Types.String))
        data_type = series.get("type", cast(Types, Types.String)) if data_type is None else data_type
        source = series.get("source", None)
        source = source.strip() if isinstance(source, str) else source

        is_propagation_eligible = series.get("ispropagationeligible", None)
        is_propagation_eligible = (
            make_bool(is_propagation_eligible) if is_propagation_eligible is not None else is_propagation_eligible
        )
        is_metric = series.get("ismetric", None)
        is_metric = make_bool(is_metric) if is_metric is not None else is_metric
        is_internal_dataset_key = series.get("isinternaldatasetkey", None)
        is_internal_dataset_key = (
            make_bool(is_internal_dataset_key) if is_internal_dataset_key is not None else is_internal_dataset_key
        )
        is_externally_visible = series.get("isexternallyvisible", True)
        is_externally_visible = (
            make_bool(is_externally_visible) if is_externally_visible is not None else is_externally_visible
        )

        return cls(
            identifier=series.get("identifier", "").strip(),
            index=series.get("index", -1),
            data_type=Types[data_type.strip().split(".")[-1].title()],
            title=series.get("title", ""),
            description=series.get("description", ""),
            is_dataset_key=series.get("isdatasetkey", False),
            source=source,
            source_field_id=series.get("sourcefieldid", None),
            is_internal_dataset_key=is_internal_dataset_key,
            is_externally_visible=is_externally_visible,
            unit=series.get("unit", None),
            multiplier=series.get("multiplier", 1.0),
            is_propagation_eligible=is_propagation_eligible,
            is_metric=is_metric,
            available_from=series.get("availablefrom", None),
            deprecated_from=series.get("deprecatedfrom", None),
            term=series.get("term", "bizterm1"),
            dataset=series.get("dataset", None),
            attribute_type=series.get("attributetype", None),
        )

    @classmethod
    def _from_dict(cls: type[Attribute], data: dict[str, Any]) -> Attribute:
        """Instantiate an Attribute object from a dictionary.

        Args:
            data (dict[str, Any]): Attribute metadata as a dictionary.

        Returns:
            Attribute: Attribute object.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = {
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... }
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)._from_dict(data)

        """
        keys = [f.name for f in fields(cls)]
        data = {camel_to_snake(k): v for k, v in data.items()}
        data = {k: (None if pd.isna(v) else v) for k, v in data.items() if k in keys}
        if "data_type" in data:
            data["data_type"] = Types[data["data_type"].strip().rsplit(".", maxsplit=1)[-1].title()]
        return cls(**data)

    def from_object(
        self,
        attribute_source: Attribute | dict[str, Any] | pd.Series[Any],
    ) -> Attribute:
        """Instatiate an Attribute from an Attribute object, dictionary or pandas Series.

        Args:
            attribute_source (Attribute | dict[str, Any] | pd.Series[Any]): Attribute metadata source.

        Raises:
            TypeError: If the object provided is not an Attribute object, dictionary or pandas Series.

        Returns:
            Attribute: Attribute object.

        Examples:

            Instatiating a Attribute from a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = {
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... }
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)

            Instatiating a Attribute from a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> series = pd.Series({
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... })
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)

        """
        if isinstance(attribute_source, Attribute):
            attribute = attribute_source
        elif isinstance(attribute_source, dict):
            attribute = self._from_dict(attribute_source)
        elif isinstance(attribute_source, pd.Series):
            attribute = self._from_series(attribute_source)
        else:
            raise ValueError(f"Could not resolve the object provided: {attribute_source}")
        attribute.client = self._client
        return attribute

    def to_dict(self: Attribute) -> dict[str, Any]:
        """Convert object to dictionary.

        Returns:
            dict[str, Any]: Attribute metadata as a dictionary.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attribute_dict = attribute.to_dict()

        """
        result = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}
        result["unit"] = str(self.unit) if self.unit is not None else None
        result["dataType"] = self.data_type.name
        if "isKeyDataElement" in result:
            result["isCriticalDataElement"] = result.pop("isKeyDataElement")
        return result

    def create(
        self,
        dataset: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload a new attribute to a Fusion catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            Individually, from scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute0 = fusion.attribute(identifier="my_attribute_0", index=0)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")
            >>> attribute1 = fusion.attribute(identifier="my_attribute_1", index=1)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

            Individually, from a dictionary:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = {
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ...    }
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

            Individually, from a pandas Series:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> series = pd.Series({
            ...     "identifier": "my_attribute",
            ...     "index": 0,
            ...     "data_type": "String",
            ...     "title": "My Attribute",
            ...     "description": "My attribute description"
            ... })
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)
            >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        data = self.to_dict()
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
        resp = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def delete(
        self,
        dataset: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Delete an Attribute from a Fusion catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> fusion.attribute(identifier="my_attribute", index=0).delete(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
        resp = client.session.delete(url)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

    def set_lineage(
        self,
        attributes: list[Attribute],
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Map an attribute to existing registered attributes in a Fusion catalog. Attributes from an output data flow
            can be mapped to existing registered input data flow attributes. This supports the case in which the
            generating application and receiving application store their attributes with different names.

        Args:
            attributes (str): List of Attribute objects to establish upstream lineage from.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> my_attr1 = fusion.attribute(identifier="my_attribute1", index=0, application_id="12345")
            >>> my_attr2 = fusion.attribute(identifier="my_attribute2", index=0, application_id="12345")
            >>> my_attr3 = fusion.attribute(identifier="my_attribute3", index=0, application_id="12345")
            >>> attrs = [my_attr1, my_attr2]
            >>> my_attr3.set_lineage(attributes=attrs, catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)

        if self.application_id is None:
            raise ValueError("The 'application_id' attribute is required for setting lineage.")
        target_attributes = []
        for attribute in attributes:
            if attribute.application_id is None:
                raise ValueError(f"The 'application_id' attribute is required for setting lineage.")
            attr_dict = {
                    "catalog": catalog,
                    "attribute": attribute.identifier,
                    "applicationId": attribute.application_id
                }
            target_attributes.append(attr_dict)

        url = f"{client.root_url}catalogs/{catalog}/attributes/lineage"
        data = [
            {
                "source": {
                    "catalog": catalog,
                    "attribute": self.identifier,
                    "applicationId": self.application_id
            },
            "targets": target_attributes
        }
        ]
        resp = client.session.post(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

client: Fusion | None property writable

Return the client.

__post_init__()

Format Attribute metadata fields after object initialization.

Source code in py_src/fusion/attributes.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def __post_init__(self: Attribute) -> None:
    """Format Attribute metadata fields after object initialization."""
    self.is_dataset_key = make_bool(self.is_dataset_key)
    self.identifier = tidy_string(self.identifier).lower().replace(" ", "_")
    self.title = tidy_string(self.title) if self.title != "" else self.identifier.replace("_", " ").title()
    self.description = tidy_string(self.description) if self.description and self.description != "" else self.title
    self.source_field_id = (
        tidy_string(self.source_field_id).lower().replace(" ", "_") if self.source_field_id else self.identifier
    )
    self.available_from = convert_date_format(self.available_from) if self.available_from else None
    self.deprecated_from = convert_date_format(self.deprecated_from) if self.deprecated_from else None
    self.data_type = Types[str(self.data_type).strip().rsplit(".", maxsplit=1)[-1].title()]
    self.application_id = (
        {"id": str(self.application_id), "type": "Application (SEAL)"}
        if isinstance(self.application_id, str)
        else self.application_id
    )

__repr__()

Format object representation.

Source code in py_src/fusion/attributes.py
92
93
94
95
def __repr__(self: Attribute) -> str:
    """Format object representation."""
    s = ", ".join(f"{getattr(self, f.name)!r}" for f in fields(self) if not f.name.startswith("_"))
    return "(" + s + ")"

__str__()

Format string representation.

Source code in py_src/fusion/attributes.py
87
88
89
90
def __str__(self: Attribute) -> str:
    """Format string representation."""
    attrs = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
    return f"Attribute(\n" + ",\n ".join(f"{k}={v!r}" for k, v in attrs.items()) + "\n)"

create(dataset, catalog=None, client=None, return_resp_obj=False)

Upload a new attribute to a Fusion catalog.

Parameters:

Name Type Description Default
dataset str

Dataset identifier.

required
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

Individually, from scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute0 = fusion.attribute(identifier="my_attribute_0", index=0)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")
>>> attribute1 = fusion.attribute(identifier="my_attribute_1", index=1)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")

Individually, from a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = {
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
...    }
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")

Individually, from a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> series = pd.Series({
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
... })
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)
>>> attribute.create(dataset="my_dataset", catalog="my_catalog")
Source code in py_src/fusion/attributes.py
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
def create(
    self,
    dataset: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload a new attribute to a Fusion catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        Individually, from scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute0 = fusion.attribute(identifier="my_attribute_0", index=0)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")
        >>> attribute1 = fusion.attribute(identifier="my_attribute_1", index=1)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

        Individually, from a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = {
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ...    }
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

        Individually, from a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> series = pd.Series({
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ... })
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)
        >>> attribute.create(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    data = self.to_dict()
    url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
    resp = client.session.put(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

delete(dataset, catalog=None, client=None, return_resp_obj=False)

Delete an Attribute from a Fusion catalog.

Parameters:

Name Type Description Default
dataset str

Dataset identifier.

required
client Fusion

A Fusion client object. Defaults to the instance's _client.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> fusion.attribute(identifier="my_attribute", index=0).delete(dataset="my_dataset", catalog="my_catalog")
Source code in py_src/fusion/attributes.py
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
def delete(
    self,
    dataset: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Delete an Attribute from a Fusion catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> fusion.attribute(identifier="my_attribute", index=0).delete(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{self.identifier}"
    resp = client.session.delete(url)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

from_object(attribute_source)

Instatiate an Attribute from an Attribute object, dictionary or pandas Series.

Parameters:

Name Type Description Default
attribute_source Attribute | dict[str, Any] | Series[Any]

Attribute metadata source.

required

Raises:

Type Description
TypeError

If the object provided is not an Attribute object, dictionary or pandas Series.

Returns:

Name Type Description
Attribute Attribute

Attribute object.

Instatiating a Attribute from a dictionary:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = {
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
... }
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)

Instatiating a Attribute from a pandas Series:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> series = pd.Series({
...     "identifier": "my_attribute",
...     "index": 0,
...     "data_type": "String",
...     "title": "My Attribute",
...     "description": "My attribute description"
... })
>>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)
Source code in py_src/fusion/attributes.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
def from_object(
    self,
    attribute_source: Attribute | dict[str, Any] | pd.Series[Any],
) -> Attribute:
    """Instatiate an Attribute from an Attribute object, dictionary or pandas Series.

    Args:
        attribute_source (Attribute | dict[str, Any] | pd.Series[Any]): Attribute metadata source.

    Raises:
        TypeError: If the object provided is not an Attribute object, dictionary or pandas Series.

    Returns:
        Attribute: Attribute object.

    Examples:

        Instatiating a Attribute from a dictionary:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = {
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ... }
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(data)

        Instatiating a Attribute from a pandas Series:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> series = pd.Series({
        ...     "identifier": "my_attribute",
        ...     "index": 0,
        ...     "data_type": "String",
        ...     "title": "My Attribute",
        ...     "description": "My attribute description"
        ... })
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0).from_object(series)

    """
    if isinstance(attribute_source, Attribute):
        attribute = attribute_source
    elif isinstance(attribute_source, dict):
        attribute = self._from_dict(attribute_source)
    elif isinstance(attribute_source, pd.Series):
        attribute = self._from_series(attribute_source)
    else:
        raise ValueError(f"Could not resolve the object provided: {attribute_source}")
    attribute.client = self._client
    return attribute

set_lineage(attributes, catalog=None, client=None, return_resp_obj=False)

Map an attribute to existing registered attributes in a Fusion catalog. Attributes from an output data flow can be mapped to existing registered input data flow attributes. This supports the case in which the generating application and receiving application store their attributes with different names.

Parameters:

Name Type Description Default
attributes str

List of Attribute objects to establish upstream lineage from.

required
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> my_attr1 = fusion.attribute(identifier="my_attribute1", index=0, application_id="12345")
>>> my_attr2 = fusion.attribute(identifier="my_attribute2", index=0, application_id="12345")
>>> my_attr3 = fusion.attribute(identifier="my_attribute3", index=0, application_id="12345")
>>> attrs = [my_attr1, my_attr2]
>>> my_attr3.set_lineage(attributes=attrs, catalog="my_catalog")
Source code in py_src/fusion/attributes.py
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
def set_lineage(
    self,
    attributes: list[Attribute],
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Map an attribute to existing registered attributes in a Fusion catalog. Attributes from an output data flow
        can be mapped to existing registered input data flow attributes. This supports the case in which the
        generating application and receiving application store their attributes with different names.

    Args:
        attributes (str): List of Attribute objects to establish upstream lineage from.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> my_attr1 = fusion.attribute(identifier="my_attribute1", index=0, application_id="12345")
        >>> my_attr2 = fusion.attribute(identifier="my_attribute2", index=0, application_id="12345")
        >>> my_attr3 = fusion.attribute(identifier="my_attribute3", index=0, application_id="12345")
        >>> attrs = [my_attr1, my_attr2]
        >>> my_attr3.set_lineage(attributes=attrs, catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)

    if self.application_id is None:
        raise ValueError("The 'application_id' attribute is required for setting lineage.")
    target_attributes = []
    for attribute in attributes:
        if attribute.application_id is None:
            raise ValueError(f"The 'application_id' attribute is required for setting lineage.")
        attr_dict = {
                "catalog": catalog,
                "attribute": attribute.identifier,
                "applicationId": attribute.application_id
            }
        target_attributes.append(attr_dict)

    url = f"{client.root_url}catalogs/{catalog}/attributes/lineage"
    data = [
        {
            "source": {
                "catalog": catalog,
                "attribute": self.identifier,
                "applicationId": self.application_id
        },
        "targets": target_attributes
    }
    ]
    resp = client.session.post(url, json=data)
    requests_raise_for_status(resp)
    return resp if return_resp_obj else None

to_dict()

Convert object to dictionary.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Attribute metadata as a dictionary.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attribute_dict = attribute.to_dict()
Source code in py_src/fusion/attributes.py
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
def to_dict(self: Attribute) -> dict[str, Any]:
    """Convert object to dictionary.

    Returns:
        dict[str, Any]: Attribute metadata as a dictionary.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attribute_dict = attribute.to_dict()

    """
    result = {snake_to_camel(k): v for k, v in self.__dict__.items() if not k.startswith("_")}
    result["unit"] = str(self.unit) if self.unit is not None else None
    result["dataType"] = self.data_type.name
    if "isKeyDataElement" in result:
        result["isCriticalDataElement"] = result.pop("isKeyDataElement")
    return result

Attributes dataclass

Class representing a collection of Attribute instances for managing atrribute metadata in a Fusion catalog.

Attributes:

Name Type Description
attributes list[Attribute]

List of Attribute instances.

_client Fusion | None

Fusion client object.

Source code in py_src/fusion/attributes.py
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
@dataclass
class Attributes:
    """Class representing a collection of Attribute instances for managing atrribute metadata in a Fusion catalog.

    Attributes:
        attributes (list[Attribute]): List of Attribute instances.
        _client (Fusion | None): Fusion client object.

    """

    attributes: list[Attribute] = field(default_factory=list)

    _client: Fusion | None = None

    def __str__(self) -> str:
        """String representation of the Attributes collection."""
        return (
            f"[\n" + ",\n ".join(f"{attr.__repr__()}" for attr in self.attributes) + "\n]" if self.attributes else "[]"
        )

    def __repr__(self) -> str:
        """Object representation of the Attributes collection."""
        return self.__str__()

    @property
    def client(self) -> Fusion | None:
        """Return the client."""
        return self._client

    @client.setter
    def client(self, client: Fusion | None) -> None:
        """Set the client for the Dataset. Set automatically, if the Dataset is instantiated from a Fusion object.

        Args:
            client (Any): Fusion client object.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes()
            >>> attributes.client = fusion

        """
        self._client = client

    def _use_client(self, client: Fusion | None) -> Fusion:
        """Determine client."""

        res = self._client if client is None else client
        if res is None:
            raise ValueError("A Fusion client object is required.")
        return res

    def add_attribute(self, attribute: Attribute) -> None:
        """Add an Attribute instance to the collection.

        Args:
            attribute (Attribute): Attribute instance to add.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes()
            >>> attributes.add_attribute(attribute)

        """
        self.attributes.append(attribute)

    def remove_attribute(self, identifier: str) -> bool:
        """Remove an Attribute instance from the collection by identifier.

        Args:
            identifier (str): Identifier of the Attribute to remove.

        Returns:
            bool: True if the Attribute was removed, False otherwise.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes.remove_attribute("my_attribute")

        """
        for attr in self.attributes:
            if attr.identifier == identifier:
                self.attributes.remove(attr)
                return True
        return False

    def get_attribute(self, identifier: str) -> Attribute | None:
        """Get an Attribute instance from the collection by identifier.

        Args:
            identifier (str): Identifier of the Attribute to retrieve.

        Returns:
            Attribute | None: The Attribute instance if found, None otherwise.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes =fusion.attributes(attributes=[attribute])
            >>> retrieved_attribute = attributes.get_attribute("my_attribute")

        """
        for attr in self.attributes:
            if attr.identifier == identifier:
                return attr
        return None

    def to_dict(self) -> dict[str, list[dict[str, Any]]]:
        """Convert the collection of Attribute instances to a list of dictionaries.

        Returns:
            dict[str, list[dict[str, Any]]]: Collection of Attribute instances as a dictionary.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes_dict = attributes.to_dict()

        """
        dict_out = {"attributes": [attr.to_dict() for attr in self.attributes]}
        return dict_out

    @classmethod
    def _from_dict_list(cls: type[Attributes], data: list[dict[str, Any]]) -> Attributes:
        """Create an Attributes instance from a list of dictionaries.

        Args:
            data (list[dict[str, Any]]): List of dictionaries representing Attribute instances.

        Returns:
            Attributes: Attributes instance.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = [
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ]
            >>> attributes = fusion.attributes()._from_dict_list(data)

        """
        attributes = [Attribute._from_dict(attr_data) for attr_data in data]
        return Attributes(attributes=attributes)

    @classmethod
    def _from_dataframe(cls: type[Attributes], data: pd.DataFrame) -> Attributes:
        """Create an Attributes instance from a pandas DataFrame.

        Args:
            data (pd.DataFrame): DataFrame representing Attribute instances.

        Returns:
            Attributes: Attributes instance.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> data = pd.DataFrame([
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ])
            >>> attributes = fusion.attributes()._from_dataframe(data)

        """
        data = data.replace(to_replace=np.nan, value=None)
        data = data.reset_index() if "index" not in data.columns else data
        attributes = [Attribute._from_series(series) for _, series in data.iterrows()]
        return Attributes(attributes=attributes)

    def from_object(
        self,
        attributes_source: list[Attribute] | list[dict[str, Any]] | pd.DataFrame,
    ) -> Attributes:
        """Instantiate an Attributes object from a list of Attribute objects, dictionaries or pandas DataFrame.

        Args:
            attributes_source (list[Attribute] | list[dict[str, Any]] | pd.DataFrame): Attributes metadata source.

        Raises:
            TypeError: If the object provided is not a list of Attribute objects, dictionaries or pandas DataFrame.

        Returns:
            Attributes: Attributes object.

        Examples:

            Instatiating Attributes from a list of dictionaries:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = [
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ]
            >>> attributes = fusion.attributes().from_object(data)

            Instatiating Attributes from a pandas DataFrame:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> data = pd.DataFrame([
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ])
            >>> attributes = fusion.attributes().from_object(data)

        """
        if isinstance(attributes_source, list):
            if all(isinstance(attr, Attribute) for attr in attributes_source):
                attributes = Attributes(cast(list[Attribute], attributes_source))
            elif all(isinstance(attr, dict) for attr in attributes_source):
                attributes = Attributes._from_dict_list(cast(list[dict[str, Any]], attributes_source))
        elif isinstance(attributes_source, pd.DataFrame):
            attributes = Attributes._from_dataframe(attributes_source)
        else:
            raise ValueError(f"Could not resolve the object provided: {attributes_source}")
        attributes.client = self._client
        return attributes

    def to_dataframe(self) -> pd.DataFrame:
        """Convert the collection of Attribute instances to a pandas DataFrame.

        Returns:
            pd.DataFrame: DataFrame representing the collection of Attribute instances.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes_df = attributes.to_dataframe()

        """
        if len(self.attributes) == 0:
            self.attributes = [Attribute(identifier="example_attribute", index=0)]
        data = [attr.to_dict() for attr in self.attributes]
        return pd.DataFrame(data)

    def from_catalog(self, dataset: str, catalog: str | None = None, client: Fusion | None = None) -> Attributes:
        """Instatiate an Attributes object from a dataset's attributes in a Fusion catalog.

        Args:
            dataset (str): The dataset identifier.
            catalog (str | None, optional): The catalog identifier. Defaults to None.
            client (Fusion | None, optional): Fusion session. Defaults to None.
                If instantiated from a Fusion object, then the client is set automatically.

        Returns:
            Attributes: An instance of the Attributes class with the attributes from the catalog.

        Examples:
            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
        response = client.session.get(url)
        requests_raise_for_status(response)
        list_attributes = response.json()["resources"]
        list_attributes = sorted(list_attributes, key=lambda x: x["index"])

        self.attributes = [Attribute._from_dict(attr_data) for attr_data in list_attributes]
        return self

    def create(
        self,
        dataset: str | None = None,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> requests.Response | None:
        """Upload the Attributes to a dataset in a Fusion catalog. If no dataset is provided,
            attributes are registered to the catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
                If instantiated from a Fusion object, then the client is set automatically.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

        Examples:

            From scratch:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

            From a list of dictionaries:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> data = [
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ]
            >>> attributes = fusion.attributes().from_dict_list(data)
            >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

            From a pandas DataFrame:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> import pandas as pd
            >>> data = pd.DataFrame([
            ...     {
            ...         "identifier": "my_attribute",
            ...         "index": 0,
            ...         "data_type": "String",
            ...         "title": "My Attribute",
            ...         "description": "My attribute description"
            ...     }
            ... ])
            >>> attributes = fusion.attributes().from_dataframe(data)
            >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

            From existing dataset's attributes in a Fusion catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
            >>> attributes.create(dataset="my_new_dataset", catalog="my_catalog")

            Register attributes to a catalog:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attribute = fusion.attribute(identifier="my_attribute", index=0, application_id="123", publisher="JPM")
            >>> attributes = fusion.attributes(attributes=[attribute])
            >>> attributes.create(catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        data = self.to_dict()
        if dataset:
            url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
            resp = client.session.put(url, json=data)
            requests_raise_for_status(resp)
            return resp if return_resp_obj else None
        else:
            for attr in self.attributes:
                if attr.publisher is None:
                    raise ValueError("The 'publisher' attribute is required for catalog attributes.")
                if attr.application_id is None:
                    raise ValueError("The 'application_id' attribute is required for catalog attributes.")
            url = f"{client.root_url}catalogs/{catalog}/attributes"
            data_ = data.get("attributes", None)
            resp = client.session.post(url, json=data_)
            requests_raise_for_status(resp)
            return resp if return_resp_obj else None

    def delete(
        self,
        dataset: str,
        catalog: str | None = None,
        client: Fusion | None = None,
        return_resp_obj: bool = False,
    ) -> list[requests.Response] | None:
        """Delete the Attributes from a Fusion catalog.

        Args:
            dataset (str): Dataset identifier.
            client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            catalog (str, optional): A catalog identifier. Defaults to None.
            return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

        Returns:
            list[requests.Response] | None: List of response objects from the API calls if return_resp_obj is True,
                otherwise None.

        Examples:

            >>> from fusion import Fusion
            >>> fusion = Fusion()
            >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
            >>> attributes.delete(dataset="my_dataset", catalog="my_catalog")

        """
        client = self._use_client(client)
        catalog = client._use_catalog(catalog)
        responses = []
        for attr in self.attributes:
            resp = client.session.delete(
                f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{attr.identifier}"
            )
            requests_raise_for_status(resp)
            responses.append(resp)

        return responses if return_resp_obj else None

client: Fusion | None property writable

Return the client.

__repr__()

Object representation of the Attributes collection.

Source code in py_src/fusion/attributes.py
524
525
526
def __repr__(self) -> str:
    """Object representation of the Attributes collection."""
    return self.__str__()

__str__()

String representation of the Attributes collection.

Source code in py_src/fusion/attributes.py
518
519
520
521
522
def __str__(self) -> str:
    """String representation of the Attributes collection."""
    return (
        f"[\n" + ",\n ".join(f"{attr.__repr__()}" for attr in self.attributes) + "\n]" if self.attributes else "[]"
    )

add_attribute(attribute)

Add an Attribute instance to the collection.

Parameters:

Name Type Description Default
attribute Attribute

Attribute instance to add.

required
>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes()
>>> attributes.add_attribute(attribute)
Source code in py_src/fusion/attributes.py
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
def add_attribute(self, attribute: Attribute) -> None:
    """Add an Attribute instance to the collection.

    Args:
        attribute (Attribute): Attribute instance to add.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes()
        >>> attributes.add_attribute(attribute)

    """
    self.attributes.append(attribute)

create(dataset=None, catalog=None, client=None, return_resp_obj=False)

Upload the Attributes to a dataset in a Fusion catalog. If no dataset is provided, attributes are registered to the catalog.

Parameters:

Name Type Description Default
dataset str

Dataset identifier.

None
client Fusion

A Fusion client object. Defaults to the instance's _client. If instantiated from a Fusion object, then the client is set automatically.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
Response | None

requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

From scratch:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes.create(dataset="my_dataset", catalog="my_catalog")

From a list of dictionaries:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = [
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ]
>>> attributes = fusion.attributes().from_dict_list(data)
>>> attributes.create(dataset="my_dataset", catalog="my_catalog")

From a pandas DataFrame:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> data = pd.DataFrame([
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ])
>>> attributes = fusion.attributes().from_dataframe(data)
>>> attributes.create(dataset="my_dataset", catalog="my_catalog")

From existing dataset's attributes in a Fusion catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
>>> attributes.create(dataset="my_new_dataset", catalog="my_catalog")

Register attributes to a catalog:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0, application_id="123", publisher="JPM")
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes.create(catalog="my_catalog")
Source code in py_src/fusion/attributes.py
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
def create(
    self,
    dataset: str | None = None,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> requests.Response | None:
    """Upload the Attributes to a dataset in a Fusion catalog. If no dataset is provided,
        attributes are registered to the catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
            If instantiated from a Fusion object, then the client is set automatically.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        requests.Response | None: The response object from the API call if return_resp_obj is True, otherwise None.

    Examples:

        From scratch:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

        From a list of dictionaries:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = [
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ]
        >>> attributes = fusion.attributes().from_dict_list(data)
        >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

        From a pandas DataFrame:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> data = pd.DataFrame([
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ])
        >>> attributes = fusion.attributes().from_dataframe(data)
        >>> attributes.create(dataset="my_dataset", catalog="my_catalog")

        From existing dataset's attributes in a Fusion catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
        >>> attributes.create(dataset="my_new_dataset", catalog="my_catalog")

        Register attributes to a catalog:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0, application_id="123", publisher="JPM")
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes.create(catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    data = self.to_dict()
    if dataset:
        url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
        resp = client.session.put(url, json=data)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None
    else:
        for attr in self.attributes:
            if attr.publisher is None:
                raise ValueError("The 'publisher' attribute is required for catalog attributes.")
            if attr.application_id is None:
                raise ValueError("The 'application_id' attribute is required for catalog attributes.")
        url = f"{client.root_url}catalogs/{catalog}/attributes"
        data_ = data.get("attributes", None)
        resp = client.session.post(url, json=data_)
        requests_raise_for_status(resp)
        return resp if return_resp_obj else None

delete(dataset, catalog=None, client=None, return_resp_obj=False)

Delete the Attributes from a Fusion catalog.

Parameters:

Name Type Description Default
dataset str

Dataset identifier.

required
client Fusion

A Fusion client object. Defaults to the instance's _client.

None
catalog str

A catalog identifier. Defaults to None.

None
return_resp_obj bool

If True then return the response object. Defaults to False.

False

Returns:

Type Description
list[Response] | None

list[requests.Response] | None: List of response objects from the API calls if return_resp_obj is True, otherwise None.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
>>> attributes.delete(dataset="my_dataset", catalog="my_catalog")
Source code in py_src/fusion/attributes.py
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
def delete(
    self,
    dataset: str,
    catalog: str | None = None,
    client: Fusion | None = None,
    return_resp_obj: bool = False,
) -> list[requests.Response] | None:
    """Delete the Attributes from a Fusion catalog.

    Args:
        dataset (str): Dataset identifier.
        client (Fusion, optional): A Fusion client object. Defaults to the instance's _client.
        catalog (str, optional): A catalog identifier. Defaults to None.
        return_resp_obj (bool, optional): If True then return the response object. Defaults to False.

    Returns:
        list[requests.Response] | None: List of response objects from the API calls if return_resp_obj is True,
            otherwise None.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
        >>> attributes.delete(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    responses = []
    for attr in self.attributes:
        resp = client.session.delete(
            f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes/{attr.identifier}"
        )
        requests_raise_for_status(resp)
        responses.append(resp)

    return responses if return_resp_obj else None

from_catalog(dataset, catalog=None, client=None)

Instatiate an Attributes object from a dataset's attributes in a Fusion catalog.

Parameters:

Name Type Description Default
dataset str

The dataset identifier.

required
catalog str | None

The catalog identifier. Defaults to None.

None
client Fusion | None

Fusion session. Defaults to None. If instantiated from a Fusion object, then the client is set automatically.

None

Returns:

Name Type Description
Attributes Attributes

An instance of the Attributes class with the attributes from the catalog.

Examples:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")
Source code in py_src/fusion/attributes.py
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
def from_catalog(self, dataset: str, catalog: str | None = None, client: Fusion | None = None) -> Attributes:
    """Instatiate an Attributes object from a dataset's attributes in a Fusion catalog.

    Args:
        dataset (str): The dataset identifier.
        catalog (str | None, optional): The catalog identifier. Defaults to None.
        client (Fusion | None, optional): Fusion session. Defaults to None.
            If instantiated from a Fusion object, then the client is set automatically.

    Returns:
        Attributes: An instance of the Attributes class with the attributes from the catalog.

    Examples:
        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attributes = fusion.attributes().from_catalog(dataset="my_dataset", catalog="my_catalog")

    """
    client = self._use_client(client)
    catalog = client._use_catalog(catalog)
    url = f"{client.root_url}catalogs/{catalog}/datasets/{dataset}/attributes"
    response = client.session.get(url)
    requests_raise_for_status(response)
    list_attributes = response.json()["resources"]
    list_attributes = sorted(list_attributes, key=lambda x: x["index"])

    self.attributes = [Attribute._from_dict(attr_data) for attr_data in list_attributes]
    return self

from_object(attributes_source)

Instantiate an Attributes object from a list of Attribute objects, dictionaries or pandas DataFrame.

Parameters:

Name Type Description Default
attributes_source list[Attribute] | list[dict[str, Any]] | DataFrame

Attributes metadata source.

required

Raises:

Type Description
TypeError

If the object provided is not a list of Attribute objects, dictionaries or pandas DataFrame.

Returns:

Name Type Description
Attributes Attributes

Attributes object.

Instatiating Attributes from a list of dictionaries:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> data = [
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ]
>>> attributes = fusion.attributes().from_object(data)

Instatiating Attributes from a pandas DataFrame:

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> data = pd.DataFrame([
...     {
...         "identifier": "my_attribute",
...         "index": 0,
...         "data_type": "String",
...         "title": "My Attribute",
...         "description": "My attribute description"
...     }
... ])
>>> attributes = fusion.attributes().from_object(data)
Source code in py_src/fusion/attributes.py
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
def from_object(
    self,
    attributes_source: list[Attribute] | list[dict[str, Any]] | pd.DataFrame,
) -> Attributes:
    """Instantiate an Attributes object from a list of Attribute objects, dictionaries or pandas DataFrame.

    Args:
        attributes_source (list[Attribute] | list[dict[str, Any]] | pd.DataFrame): Attributes metadata source.

    Raises:
        TypeError: If the object provided is not a list of Attribute objects, dictionaries or pandas DataFrame.

    Returns:
        Attributes: Attributes object.

    Examples:

        Instatiating Attributes from a list of dictionaries:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> data = [
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ]
        >>> attributes = fusion.attributes().from_object(data)

        Instatiating Attributes from a pandas DataFrame:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> data = pd.DataFrame([
        ...     {
        ...         "identifier": "my_attribute",
        ...         "index": 0,
        ...         "data_type": "String",
        ...         "title": "My Attribute",
        ...         "description": "My attribute description"
        ...     }
        ... ])
        >>> attributes = fusion.attributes().from_object(data)

    """
    if isinstance(attributes_source, list):
        if all(isinstance(attr, Attribute) for attr in attributes_source):
            attributes = Attributes(cast(list[Attribute], attributes_source))
        elif all(isinstance(attr, dict) for attr in attributes_source):
            attributes = Attributes._from_dict_list(cast(list[dict[str, Any]], attributes_source))
    elif isinstance(attributes_source, pd.DataFrame):
        attributes = Attributes._from_dataframe(attributes_source)
    else:
        raise ValueError(f"Could not resolve the object provided: {attributes_source}")
    attributes.client = self._client
    return attributes

get_attribute(identifier)

Get an Attribute instance from the collection by identifier.

Parameters:

Name Type Description Default
identifier str

Identifier of the Attribute to retrieve.

required

Returns:

Type Description
Attribute | None

Attribute | None: The Attribute instance if found, None otherwise.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes =fusion.attributes(attributes=[attribute])
>>> retrieved_attribute = attributes.get_attribute("my_attribute")
Source code in py_src/fusion/attributes.py
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
def get_attribute(self, identifier: str) -> Attribute | None:
    """Get an Attribute instance from the collection by identifier.

    Args:
        identifier (str): Identifier of the Attribute to retrieve.

    Returns:
        Attribute | None: The Attribute instance if found, None otherwise.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes =fusion.attributes(attributes=[attribute])
        >>> retrieved_attribute = attributes.get_attribute("my_attribute")

    """
    for attr in self.attributes:
        if attr.identifier == identifier:
            return attr
    return None

remove_attribute(identifier)

Remove an Attribute instance from the collection by identifier.

Parameters:

Name Type Description Default
identifier str

Identifier of the Attribute to remove.

required

Returns:

Name Type Description
bool bool

True if the Attribute was removed, False otherwise.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes.remove_attribute("my_attribute")
Source code in py_src/fusion/attributes.py
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
def remove_attribute(self, identifier: str) -> bool:
    """Remove an Attribute instance from the collection by identifier.

    Args:
        identifier (str): Identifier of the Attribute to remove.

    Returns:
        bool: True if the Attribute was removed, False otherwise.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes.remove_attribute("my_attribute")

    """
    for attr in self.attributes:
        if attr.identifier == identifier:
            self.attributes.remove(attr)
            return True
    return False

to_dataframe()

Convert the collection of Attribute instances to a pandas DataFrame.

Returns:

Type Description
DataFrame

pd.DataFrame: DataFrame representing the collection of Attribute instances.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> import pandas as pd
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes_df = attributes.to_dataframe()
Source code in py_src/fusion/attributes.py
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
def to_dataframe(self) -> pd.DataFrame:
    """Convert the collection of Attribute instances to a pandas DataFrame.

    Returns:
        pd.DataFrame: DataFrame representing the collection of Attribute instances.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> import pandas as pd
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes_df = attributes.to_dataframe()

    """
    if len(self.attributes) == 0:
        self.attributes = [Attribute(identifier="example_attribute", index=0)]
    data = [attr.to_dict() for attr in self.attributes]
    return pd.DataFrame(data)

to_dict()

Convert the collection of Attribute instances to a list of dictionaries.

Returns:

Type Description
dict[str, list[dict[str, Any]]]

dict[str, list[dict[str, Any]]]: Collection of Attribute instances as a dictionary.

>>> from fusion import Fusion
>>> fusion = Fusion()
>>> attribute = fusion.attribute(identifier="my_attribute", index=0)
>>> attributes = fusion.attributes(attributes=[attribute])
>>> attributes_dict = attributes.to_dict()
Source code in py_src/fusion/attributes.py
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
def to_dict(self) -> dict[str, list[dict[str, Any]]]:
    """Convert the collection of Attribute instances to a list of dictionaries.

    Returns:
        dict[str, list[dict[str, Any]]]: Collection of Attribute instances as a dictionary.

    Examples:

        >>> from fusion import Fusion
        >>> fusion = Fusion()
        >>> attribute = fusion.attribute(identifier="my_attribute", index=0)
        >>> attributes = fusion.attributes(attributes=[attribute])
        >>> attributes_dict = attributes.to_dict()

    """
    dict_out = {"attributes": [attr.to_dict() for attr in self.attributes]}
    return dict_out