Skip to content

Store

KoshStore

Bases: object

Kosh store, relies on Sina

Source code in kosh/store.py
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
class KoshStore(object):
    """Kosh store, relies on Sina"""

    def __init__(self, db_uri=None, username=os.environ.get("USER", "default"), db=None,
                 keyspace=None, sync=True, dataset_record_type="dataset",
                 verbose=True, use_lock_file=False, kosh_reserved_record_types=[],
                 read_only=False, allow_connection_pooling=False, ensemble_predicate=None,
                 execution_options={}, connection_type='write', lock_strategy=None,
                 session_sort_by=None, session_sort_by_descending=False, verbose_attributes=False):
        """__init__ initialize a new Sina-based store

        :param db: type of database, defaults to 'sql', can be 'cass'
        :type db: str, optional
        :param username: user name defaults to user id
        :type username: str
        :param db_uri: uri to sql file or list of cassandra node ips, defaults to None
        :type db_uri: str or list, optional
        :param keyspace: cassandra keyspace, defaults to None
        :type keyspace: str, optional
        :param sync: Does Kosh sync automatically to the db (True) or on demand (False)
        :type sync: bool
        :param dataset_record_type: Kosh element type is "dataset" this can change the default
                                    This is useful if reading in other sina db
        :type dataset_record_type: str
        :param verbose: verbose message
        :type verbose: bool
        :param use_lock_file: If you receive sqlite threads access error, turning this on might help
        :type use_lock_file: bool
        :param kosh_reserved_record_types: list of record types that are reserved for Kosh internal
                                           use, will be ignored when searching store
        :type kosh_reserved_record_types: list of strings
        :param read_only: Can we modify the database source?
        :type read_only: bool
        :param allow_connection_pooling: Allow "pooling" behavior that recycles connections,
                                        which may prevent them from closing fully on .close().
                                        Only used for the sql backend.
        :type allow_connection_pooling: bool
        :param ensemble_predicate: The predicate for the relationship to an ensemble
        :type ensemble_predicate: str
        :param execution_options: execution options keyword to pass to sina store record_dao at creation time
        :type execution_options: dict
        :param connection_type: whether to create a write, append, or read only store
        :type connection_type: str
        :param lock_strategy: The LockStrategy to apply to class methods
        :type lock_strategy: LockStrategy
        :raises ConnectionRefusedError: Could not connect to cassandra
        :raises SystemError: more than one user match.
        :param session_sort_by: A default sort_by key for find operations for this store
        :type session_sort_by: str
        :param session_sort_by_descending: A default sort_by_descending flag for this store
        :type session_sort_by_descending: bool
        :param verbose_attributes: Should we print attribute values of over length 30?
        :type verbose_attributes: bool
        """
        atexit.register(self._clean_up)
        from sina import connect as sina_connect
        from sina.utils import Negation
        if lock_strategy is None:
            lock_strategy = lock_strategies.NoLocking()
        self.lock_strategy = lock_strategy
        self.session_sort_by = session_sort_by
        self.session_sort_by_descending = session_sort_by_descending
        self.verbose_attributes = verbose_attributes

        with lock_strategy:
            if db_uri is not None and "://" in db_uri and use_lock_file:
                warnings.warn("You cannot use `lock_file` on non file-based db, turning it off", ResourceWarning)
                use_lock_file = False
            if connection_type not in ['write', 'append', 'read']:
                raise ValueError(
                        "`connection_type` must be one of ['write', 'append', 'read']")
            self.__connection_type__ = connection_type
            self.use_lock_file = use_lock_file
            if connection_type == 'read':
                read_only = True
            if read_only:
                sync = False
            self.__read_only__ = read_only
            self.__sync__ = sync
            self.__sync__dict__ = {}
            self.__sync__deleted__ = {}

            if db is None:
                db = 'sql'
            self._dataset_record_type = dataset_record_type
            self.db_uri = db_uri
            if db == "sql":
                if db_uri is not None and not os.path.exists(db_uri):
                    if "://" in db_uri:
                        self.__sina_store = sina_connect(
                            db_uri, read_only=read_only, connection_type=connection_type)
                        self.__sina_store._record_dao.session.connection(execution_options=execution_options)
                    else:
                        raise ValueError(
                            "Kosh store could not be found at: {}".format(db_uri))
                else:
                    self.lock()
                    if db_uri is not None:
                        db_pth = os.path.abspath(db_uri)
                    else:
                        db_pth = None
                    self.__sina_store = sina_connect(database=db_pth,
                                                     read_only=read_only,
                                                     database_type=db,
                                                     allow_connection_pooling=allow_connection_pooling,
                                                     connection_type=connection_type)
                    self.__sina_store._record_dao.session.connection(execution_options=execution_options)
                    self.unlock()
            elif db.lower().startswith('cass'):
                self.__sina_store = sina_connect(
                    keyspace=keyspace, database=db_uri,
                    database_type='cassandra', read_only=read_only,
                    allow_connection_pooling=allow_connection_pooling,
                    connection_type=connection_type)

            rec = update_store_and_get_info_record(self.__sina_store.records, ensemble_predicate)

            self._cached_features_ = rec["user_defined"]["cached_features"]
            self._sources_type = rec["data"]["sources_type"]["value"]
            self._users_type = rec["data"]["users_type"]["value"]
            self._groups_type = rec["data"]["groups_type"]["value"]
            self._loaders_type = rec["data"]["loaders_type"]["value"]
            self._ensembles_type = rec["data"]["ensembles_type"]["value"]
            self._ensemble_predicate = rec["data"]["ensemble_predicate"]["value"]
            self._kosh_reserved_record_types = kosh_reserved_record_types + \
                rec["data"]["reserved_types"]["value"]
            kosh_reserved = list(self._kosh_reserved_record_types)
            kosh_reserved.remove(self._sources_type)
            self._kosh_datasets_and_sources = Negation(kosh_reserved)

            # Associated stores
            self._associated_stores_ = []
            if "associated_stores" in rec["data"]:
                for store in rec["data"]["associated_stores"]["value"]:
                    try:
                        self._associated_stores_.append(kosh.connect(store, read_only=read_only, sync=sync,
                                                                     connection_type=connection_type))
                    except Exception:  # most likely a sqlalchemy.exc.DatabaseError
                        warnings.warn("Could not open associated store: {}".format(store))

            self.lock()
            self.__dict__["__record_handler__"] = self.__sina_store.records
            self.unlock()
            users_filter = list(self.__record_handler__.find_with_type(
                self._users_type, ids_only=True))
            names_filter = list(
                self.__record_handler__.find_with_data(
                    username=username))
            inter_recs = set(users_filter).intersection(set(names_filter))
            if len(inter_recs) == 0:
                # raise ConnectionRefusedError("Unknown user: {}".format(username))
                # For now just letting anyone log in as anonymous
                warnings.warn("Unknown user, you will be logged as anonymous user")
                names_filter = self.__record_handler__.find_with_data(
                    username="anonymous")
                self.__user_id__ = "anonymous"
            elif len(inter_recs) > 1:
                raise SystemError("Internal error, more than one user match!")
            else:
                self.__user_id__ = list(inter_recs)[0]
            self.storeLoader = KoshSinaLoader
            self.loaders = {}
            self.add_loader(self.storeLoader)

            # self.storeLoader = KoshLoader
            self.add_loader(KoshFileLoader)
            self.add_loader(JSONLoader)
            self.add_loader(NpyLoader)
            self.add_loader(NumpyTxtLoader)
            self.add_loader(PandasLoader)
            try:
                self.add_loader(HDF5Loader)
            except Exception:  # no h5py module?
                if verbose:
                    warnings.warn("Could not add hdf5 loader, check if you have h5py installed."
                                  " Pass verbose=False when creating the store to turn this message off")
            try:
                self.add_loader(PILLoader)
            except Exception:  # no PIL?
                if verbose:
                    warnings.warn("Could not add pil loader, check if you have pillow installed."
                                  " Pass verbose=False when creating the store to turn this message off")
            self.add_loader(PGMLoader)
            try:
                self.add_loader(UltraLoader)
            except Exception:  # no pydv?
                if verbose:
                    warnings.warn("Could not add ultra files loader, check if you have pydv installed."
                                  " Pass verbose=False when creating the store to turn this message off")
            try:
                self.add_loader(SidreMeshBlueprintFieldLoader)
            except Exception:  # no conduit?
                if verbose:
                    warnings.warn("Could not add sidre blueprint meshfield loader, check if you have conduit installed."
                                  " Pass verbose=False when creating the store to turn this message off")
            # Now let's add the loaders in the store
            for rec_loader in self.__record_handler__.find_with_type("koshloader"):
                loader = kosh_pickler.loads(rec_loader.data["code"]["value"])
                self.add_loader(loader)
            self._added_unsync_mem_store = sina_connect(None)
            self._cached_loaders = collections.OrderedDict()
            # self._cached_features = "cached_features"

            # Ok we need to map the KoshFileLoader back to whatever the source_type is
            # in this store
            ks = self.loaders["file"]
            for loader in ks:
                loader.types[self._sources_type] = loader.types["file"]
            self.loaders[self._sources_type] = self.loaders["file"]

    @lock_strategies.lock_method
    def __enter__(self):
        return self

    @lock_strategies.lock_method
    def add_loader(self, loader, save=False):
        """Adds a loader to the store

        :param loader: The Kosh loader you want to add to the store
        :type loader: KoshLoader
        :param save: Do we also save it in store for later re-use
        :type save: bool

        :return: None
        :rtype: None
        """
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        # We add a loader we need to clear the cache
        self._cached_loaders = collections.OrderedDict()

        for k in loader.types:
            if k in self.loaders:
                if loader not in self.loaders[k]:
                    self.loaders[k].append(loader)
            else:
                self.loaders[k] = [loader, ]

        if save:  # do we save it in store
            self.save_loader(loader)

    @lock_strategies.lock_method
    def delete_loader(self, loader, permanently=False):
        """Removes a loader from the store and possible from its db

        :param loader: The Kosh loader you want to remove
        :type loader: KoshLoader
        :param permanently: Do we also remove it from the db if saved there?
        :type permanently: bool

        :return: None
        :rtype: None
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        # We deleted a loader we need to clear the cache
        existing_cached = self._cached_loaders.values()
        empty_types = []
        for k in loader.types:
            if k in self.loaders:
                if loader in self.loaders[k]:
                    self.loaders[k].remove(loader)
                    if len(self.loaders[k]) == 0:
                        # ok we took them all out
                        empty_types.append(k)
                    __listed_features_cache = self._cached_features_
                    yank = []
                    for id in __listed_features_cache.keys():
                        for ld, _ in existing_cached:
                            if isinstance(ld, loader) and id.startswith(ld.signature):
                                yank.append(id)
                    for id in yank:
                        del __listed_features_cache[id]
                    self._cached_features_ = __listed_features_cache
        for k in empty_types:
            del self.loaders[k]
        self._cached_loaders = collections.OrderedDict()

        if permanently:  # Remove it from saved in db as well
            pickled = kosh_pickler.dumps(loader)
            rec = next(self.find(types="koshloader", code=pickled, ids_only=True), None)
            if rec is not None:
                _update_record(rec, self, delete=True)

    @lock_strategies.lock_method
    def remove_loader(self, loader):
        """Removes a loader from the store and its db

        :param loader: The Kosh loader you want to add to the store
        :type loader: KoshLoader

        :return: None
        :rtype: None
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        self.delete_loader(loader, permanently=True)

    @lock_strategies.lock_method
    def lock(self):
        """Attempts to lock the store, helps when many concurrent requests are made to the store"""
        if not self.use_lock_file or "://" in self.db_uri:
            return
        locked = False
        while not locked:
            try:
                self.lock_file = open(self.db_uri + ".handle", "w")
                fcntl.lockf(self.lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
                locked = True
            except Exception:
                time.sleep(0.1)

    @lock_strategies.lock_method
    def unlock(self):
        """Unlocks the store so other can access it"""
        if not self.use_lock_file or "://" in self.db_uri:
            return
        fcntl.lockf(self.lock_file, fcntl.LOCK_UN)
        self.lock_file.close()
        # Wrapping this in a try/except
        # In case concurrency by same user
        # already removed the file
        try:
            os.remove(self.lock_file.name)
        except Exception:
            pass

    @lock_strategies.lock_method
    def _clean_up(self):
        """delete the KoshStore object"""
        self.close()
        if not self.use_lock_file or "://" in self.db_uri:
            return
        name = self.lock_file.name
        self.lock_file.close()
        if os.path.exists(name):
            os.remove(name)

    @lock_strategies.lock_method
    def get_sina_store(self):
        """Returns the sina store object"""
        return self.__sina_store

    @lock_strategies.lock_method
    def get_sina_records(self):
        """Returns sina store's records"""
        return self.__sina_store.records

    @lock_strategies.lock_method
    def close(self):
        """closes store and sina related things"""
        try:
            # We need to update the features
            recs = self.get_sina_records()
            store_rec = get_store_info_record(recs)
            cached_features = get_store_info_record_attribute(recs, "cached_features")
            cached_features.update(self._cached_features_)
            store_rec["user_defined"]["cached_features"] = cached_features
            _update_record(store_rec, self)
        except Exception:  # store is likely closed already
            pass
        self.__sina_store.close()
        gc.collect()

    @lock_strategies.lock_method
    def __exit__(self, exc_type, exc_value, exc_tb):
        self.close()

    @lock_strategies.lock_method
    def delete_all_contents(self, force=""):
        """
        Delete EVERYTHING in a datastore; this cannot be undone.

        :param force: This function is meant to raise a confirmation prompt. If you
                      want to use it in an automated script (and you're sure of
                      what you're doing), set this to "SKIP PROMPT".
        :type force: str
        :returns: whether the deletion happened.
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        ret = self.__sina_store.delete_all_contents(force=force)
        update_store_and_get_info_record(self.__sina_store.records)
        create_kosh_users(self.__sina_store.records)
        return ret

    @lock_strategies.lock_method
    def save_loader(self, loader):
        """Save a loader to the store
        Executed immediately even in async mode

        :param loader: Loader to save
        :type loader: KoshLoader
        """
        from sina.model import Record
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        pickled = kosh_pickler.dumps(loader)
        rec = next(self.find(types="koshloader", code=pickled, ids_only=True), None)
        if rec is not None:
            # already in store
            return
        rec = Record(id=uuid.uuid4().hex, type="koshloader", user_defined={'kosh_information': {}})
        rec.add_data("code", pickled)
        _update_record(rec, self)

    @lock_strategies.lock_method
    def get_record(self, Id):
        """Gets the sina record tied to an id
        tags record with time of last access to db
        :param Id: record id
        :type Id: str
        :return: sina record
        :rtype: sina.model.Record
        """
        if (not self.__sync__) and Id in self.__sync__dict__:
            record = self.__sync__dict__[Id]
        else:
            record = self.__record_handler__.get(Id)
            self.__sync__dict__[Id] = record
            if not self.__sync__:  # we are not autosyncing
                try:
                    keys = list(record["user_defined"]['kosh_information'].keys())
                except KeyError:
                    record["user_defined"]['kosh_information'] = {}
                    keys = []
                for key in keys:
                    if key[-14:] == "_last_modified":
                        del record["user_defined"]['kosh_information'][key]

        return record

    @lock_strategies.lock_method
    def delete(self, Id):
        """remove a record from store.
        for datasets dissociate all associated data first.

        :param Id: unique Id or kosh_obj
        :type Id: str
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        if not isinstance(Id, six.string_types):
            Id = Id.id
        rec = self.get_record(Id)
        if rec.type not in self._kosh_reserved_record_types:
            kosh_obj = self.open(Id)
            for uri in list(rec["files"].keys()):
                # Let's dissociate to remove unused kosh objects as well
                kosh_obj.dissociate(uri)
        if not self.__sync__:
            _update_record(Id, self, self._added_unsync_mem_store, delete=True)
            if Id in self.__sync__dict__:
                del self.__sync__dict__[Id]
                self.__sync__deleted__[Id] = rec
                rec["user_defined"]['kosh_information']["deleted_time"] = time.time()
        else:
            _update_record(Id, self, delete=True)

    @lock_strategies.lock_method
    def create_ensemble(self, name="Unnamed Ensemble", id=None, metadata={}, schema=None, **kargs):
        """Create a Kosh ensemble object
        :param name: name for the dataset, defaults to None
        :type name: str, optional
        :param id: unique Id, defaults to None which means use uuid4()
        :type id: str, optional
        :param metadata: dictionary of attribute/value pair for the dataset, defaults to {}
        :type metadata: dict, optional
        :param schema: a KoshSchema object to validate datasets and when setting attributes
        :type schema: KoshSchema
        :param kargs: extra keyword arguments (ignored)
        :type kargs: dict
        :raises RuntimeError: Dataset already exists
        :return: KoshEnsemble
        :rtype: KoshEnsemble
        """
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        return self.create(name=name, id=id, metadata=metadata, schema=schema, sina_type=self._ensembles_type, **kargs)

    @lock_strategies.lock_method
    def create(self, name="Unnamed Dataset", id=None,
               metadata={}, schema=None, sina_type=None, **kargs):
        """create a new (possibly named) dataset

        :param name: name for the dataset, defaults to None
        :type name: str, optional
        :param id: unique Id, defaults to None which means use uuid4()
        :type id: str, optional
        :param metadata: dictionary of attribute/value pair for the dataset, defaults to {}
        :type metadata: dict, optional
        :param schema: a KoshSchema object to validate datasets and when setting attributes
        :type schema: KoshSchema
        :param sina_type: If you want to query the store for a specific sina record type, not just a dataset
        :type sina_type: str
        :param alias_feature: Dictionary of feature aliases
        :type alias_feature: dict, opt
        :param kargs: extra keyword arguments (ignored)
        :type kargs: dict
        :raises RuntimeError: Dataset already exists
        :return: KoshDataset
        :rtype: KoshDataset
        """
        from sina.model import Record
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        if "datasetId" in kargs:
            if id is None:
                warnings.warn(
                    "'datasetId' has been deprecated in favor of 'id'",
                    DeprecationWarning)
                id = kargs["datasetId"]
            else:
                raise ValueError(
                    "'datasetId' is deprecated in favor of 'id' which you already set here")

        if sina_type is None:
            sina_type = self._dataset_record_type
        if id is None:
            Id = uuid.uuid4().hex
        else:
            if id in self.__record_handler__.find_with_type(
                    sina_type, ids_only=True):
                raise RuntimeError(
                    "Dataset id {} already exists".format(id))
            Id = id

        metadata = metadata.copy()
        metadata["creator"] = self.__user_id__
        if "creation_date" in metadata:  # if statement for self.import_dataset()
            try:  # Correct format, don't do anything
                datetime.strptime(metadata["creation_date"], 'YYYY-MM-DD HH:MM:SS.microseconds')
            except ValueError:
                metadata["creation_date"] = str(datetime.fromtimestamp(time.time()))
        else:
            metadata["creation_date"] = str(datetime.fromtimestamp(time.time()))

        if "name" not in metadata:
            metadata["name"] = name
        metadata["_associated_data_"] = None
        for k in metadata:
            if k == 'alias_feature':
                metadata[k] = {'value':  kosh_pickler.dumps(metadata[k])}
            else:
                metadata[k] = {'value': metadata[k]}
        rec = Record(id=Id, type=sina_type, data=metadata, user_defined={'kosh_information': {}})
        if self.__sync__:
            _update_record(rec, self)
        else:
            self.__sync__dict__[Id] = rec
            _update_record(rec, self, self._added_unsync_mem_store)
        try:
            if sina_type == self._ensembles_type:
                out = KoshEnsemble(Id, store=self, schema=schema, record=rec)
            else:
                out = KoshDataset(Id, store=self, schema=schema, record=rec)
        except Exception as err:  # probably schema validation error
            if self.__sync__:
                _update_record(Id, self, delete=True)
            else:
                del self.__sync__dict__[Id]
                _update_record(rec, self, self._added_unsync_mem_store, delete=True)
            raise err
        return out

    @lock_strategies.lock_method
    def _find_loader(self, Id, verbose=False, requestorId=None, use_cache=True):
        """_find_loader returns a loader that can open Id

        :param Id: Id of the object to load
        :type Id: str
        :param verbose: verbose mode will show errors
        :type verbose: bool
        :param requestorId: The id of the dataset requesting data
        :type requestorId: str
        :param use_cache: do we use cached feature
        :typpe use_cache: True
        :return: Kosh loader object
        :rtype: KoshLoader
        """
        Id_original = str(Id)
        if verbose:
            print(f"Getting loader for Id: {Id}")
        if "__uri__" in Id:
            # Ok this is a pure sina file with mime_type
            Id, uri = Id.split("__uri__")
        else:
            uri = None
        if verbose:
            print("Finding loader for: {}".format(uri))
        if (Id_original, requestorId) in self._cached_loaders:
            try:
                feats = self._cached_loaders[Id_original, requestorId][0]._list_features(use_cache=use_cache)  # != []
            except Exception as err:
                feats = []
                if verbose:
                    print("Error opening {} with loader {}: {}".format(
                        uri, self._cached_loaders[Id_original, requestorId], err))
            if feats != []:
                return self._cached_loaders[Id_original, requestorId]
        record = self.get_record(Id)
        obj = self._load(Id)
        # uri not none means it is pure sina record with file and mime_type
        if (record["type"] not in self._kosh_reserved_record_types and uri is None)\
                or record["type"] in [self._ensembles_type, "__kosh_storeinfo__"]:
            # Not reserved means dataset
            return KoshSinaLoader(obj), record["type"]
        # Ok special type
        if uri is None:
            if "mime_type" in record["data"]:
                mime_type = record["data"]["mime_type"]["value"]
            else:
                mime_type = None
            mime_type_passed = mime_type
        else:  # Pure sina with file/mime_type
            mime_type = mime_type_passed = record["files"][uri]["mimetype"]
        if mime_type in self.loaders:
            for ld in self.loaders[mime_type]:
                try:
                    feats = ld(obj, mime_type=mime_type_passed,
                               uri=uri)._list_features(use_cache=use_cache, verbose=verbose)
                except Exception as err:
                    # Something happened can't list features
                    feats = []
                    if verbose:
                        print("Error opening {} with loader {}: {}".format(obj.uri, ld, err))
                if feats != []:
                    break
            self._cached_loaders[Id_original, requestorId] = ld(
                obj, mime_type=mime_type_passed, uri=uri, requestorId=requestorId), record["type"]
            return self._cached_loaders[Id_original, requestorId]
        # sometime types have subtypes (e.g 'file') let's look if we
        # understand a subtype since we can't figure it out from mime_type
        if record["type"] in self.loaders:  # ok not a generic loader let's use it
            for ld in self.loaders[record["type"]]:
                try:
                    feats = ld(obj, mime_type=mime_type_passed,
                               uri=uri, requestorId=requestorId)._list_features(use_cache=use_cache)
                except Exception:
                    # Something happened can't list features
                    feats = []
                if feats != []:
                    break
            self._cached_loaders[Id_original, requestorId] = ld(
                obj, mime_type=mime_type_passed, uri=uri, requestorId=requestorId), record["type"]
            return self._cached_loaders[Id_original, requestorId]
        return None, None

    @lock_strategies.lock_method
    def open(self, Id, loader=None, requestorId=None, *args, **kargs):
        """open loads an object in store based on its Id
        and run its open function

        :param Id: unique id of object to open. Can also be a Sina record.
        :type Id: str
        :param loader: loader to use, defaults to None which means pick for me
        :type loader: KoshLoader
        :param requestorId: The id of the dataset requesting data
        :type requestorId: str
        :return:
        """
        from sina.model import Record
        if isinstance(Id, Record):  # Sina record by itself
            Id = Id.id

        if loader is None:
            loader, _ = self._find_loader(Id, requestorId=requestorId)
        else:
            loader = loader(self._load(Id), requestorId=requestorId)
        return loader.open(*args, **kargs)

    @lock_strategies.lock_method
    def _load(self, Id):
        """_load returns an associated source based on id

        :param Id: unique id in store
        :type Id: str
        :return: loaded object
        """
        record = self.get_record(Id)
        if record["type"] == self._sources_type:
            return KoshSinaFile(Id, kosh_type=record["type"],
                                record_handler=self.__record_handler__,
                                store=self, record=record)
        else:
            return KoshSinaObject(Id, kosh_type=record["type"],
                                  record_handler=self.__record_handler__,
                                  store=self, record=record)

    @lock_strategies.lock_method
    def get(self, Id, feature, format=None, loader=None,
            transformers=[], requestorId=None, *args, **kargs):
        """get returns an associated source's data

        :param Id: Id of object to retrieve
        :type Id: str
        :param feature: feature to retrieve
        :type feature: str
        :param format: preferred format, defaults to None means pick for me
        :type format: str, optional
        :param loader: loader to use, defaults to None means pick for me
        :return: data in requested format
        :param transformers: A list of transformers to use after the data is loaded
        :type transformers: kosh.operator.KoshTransformer
        :param requestorId: The id of the dataset requesting data
        :type requestorId: str
        """
        if loader is None:
            loader, _ = self._find_loader(Id, requestorId=requestorId)
        else:
            loader = loader(self._load(Id), requestorId=requestorId)

        return loader.get(feature, format, transformers=[], *args, **kargs)

    @lock_strategies.lock_method
    def search(self, *atts, **keys):
        """
        Deprecated use find
        """
        warnings.warn("The 'search' function is deprecated and now called `find`.\n"
                      "Please update your code to use `find` as `search` might disappear in the future",
                      DeprecationWarning)
        return self.find(*atts, **keys)

    @lock_strategies.lock_method
    def find_ensembles(self, *atts, **keys):
        """Find ensembles matching some metadata in the store
        arguments are the metadata name we are looking for e.g
        find("attr1", "attr2")
        you can further restrict by specifying exact value for a metadata
        via key=value
        you can return ids only by using: ids_only=True
        range can be specified via: sina.utils.DataRange(min, max)

        :return: generator of matching ensembles in store
        :rtype: generator
        """
        return self.find(types=self._ensembles_type, *atts, **keys)

    @lock_strategies.lock_method
    def find(self, *atts, **keys):
        """Find objects matching some metadata in the store
        and its associated stores.

        Arguments are the metadata name we are looking for e.g
        find("attr1", "attr2")
        you can further restrict by specifying exact value for a metadata
        via key=value
        you can return ids only by using: ids_only=True
        range can be specified via: sina.utils.DataRange(min, max)

        "file_uri" is a reserved key that will return all records being associated
                   with the given "uri", e.g store.find(file_uri=uri)
        "types" let you search over specific sina record types only.
        "id_pool" will search based on id of Sina record or Kosh dataset. Can be a list.

        :param load_type: How the dataset is returned ('dataset' for Kosh Dataset,
            'record' for Sina Record, 'dictionary' for Dictonary).
            Used for faster load times, defaults to 'dataset'
        :type load_type: str, optional
        :return: generator of matching objects in store
        :rtype: generator
        """

        from sina.model import Record
        if 'id_pool' in keys:
            if isinstance(keys['id_pool'], str):
                ids_to_add = [keys['id_pool']]
            else:
                ids_to_add = [id for id in keys['id_pool']]
        else:
            ids_to_add = []

        keys['load_type'] = keys.get('load_type', 'dataset')
        sort_by = keys.pop("sort_by", self.session_sort_by)
        sort_by_descending = keys.pop("sort_by_descending", self.session_sort_by_descending)

        atts_to_remove = []
        for attr in atts:
            if isinstance(attr, (Record, kosh.dataset.KoshDataset)):  # Sina record or Kosh dataset by itself
                ids_to_add.append(attr.id)
                atts_to_remove.append(attr)
            elif isinstance(attr, types.GeneratorType):  # Multiple records from Sina.find() or Kosh.find()
                for at in attr:
                    ids_to_add.append(at.id)
                atts_to_remove.append(attr)

        atts = tuple([item for item in atts if item not in atts_to_remove])
        if 'id_pool' in keys or ids_to_add:  # Create key if doesn't exist
            keys['id_pool'] = [*set(ids_to_add)]

        def find_generator(*atts, **keys):
            for result in self._find(*atts, **keys):
                yield result

            searched_stores = [self.db_uri]
            if hasattr(self, "searched_stores"):
                self.searched_stores += list(searched_stores)
            else:
                self.searched_stores = list(searched_stores)
            for store in self._associated_stores_:
                if hasattr(store, "searched_stores"):
                    if store.db_uri in store.searched_stores:
                        continue
                    else:
                        store.searched_stores += list(searched_stores)
                else:
                    store.searched_stores = list(searched_stores)
                searched_stores += [store.db_uri, ]
                for result in store.find(*atts, **keys):
                    yield result
            # cleanup searched store uris
            for store in self._associated_stores_ + [self, ]:
                for id_ in list(searched_stores):
                    if id_ in store.searched_stores:
                        store.searched_stores.remove(id_)

        if sort_by is not None:
            results = list(find_generator(*atts, **keys))
            results.sort(key=lambda result: getattr(result, sort_by, -1),
                         reverse=sort_by_descending)
            for result in results:
                yield result
        else:
            yield from find_generator(*atts, **keys)

    @lock_strategies.lock_method
    def _find(self, *atts, **keys):
        """Find objects matching some metadata in the store
        arguments are the metadata name we are looking for e.g
        find("attr1", "attr2")
        you can further restrict by specifying exact value for a metadata
        via key=value
        you can return ids only by using: ids_only=True
        range can be specified via: sina.utils.DataRange(min, max)

        "file_uri" is a reserved key that will return all records being associated
                   with the given "uri", e.g store.find(file_uri=uri)
        "types" let you search over specific sina record types only.

        :param load_type: How the dataset is returned ('datset' for Kosh Dataset,
            'record' for Sina Record, 'dictionary' for Dictonary).
            Used for faster load times, defaults to 'dataset'
        :type load_type: str, optional
        :return: generator of matching objects in store
        :rtype: generator
        """
        from sina.utils import Negation, not_, exists
        mode = self.__sync__
        if mode:
            # we will not update any rec in here, turnin off sync
            # it makes things much faster
            backup = self.__sync__dict__
            self.__sync__dict__ = {}
            self.synchronous()
        sina_kargs = {}
        ids_only = keys.pop("ids_only", False)
        load_type = keys.pop("load_type", "dataset")
        # We only want to search sina for ids not records
        sina_kargs["ids_only"] = True

        if "kosh_type" in keys and "types" in keys:
            raise ValueError(
                "`kosh_type` has been replaced with `types` you cannot use both at same time")
        if "kosh_type" in keys:
            warnings.warn(
                "`kosh_type` is being deprecated in favor of `types` and will not work in a future version",
                DeprecationWarning)
            record_types = keys.pop("kosh_type")
        else:
            record_types = keys.pop("types", (None, None))  # can't use just None

        if isinstance(record_types, six.string_types):
            record_types = [record_types, ]
        if record_types not in [None, (None, None)] and not isinstance(
                record_types, (list, tuple, Negation)):
            raise ValueError("`types` must be None, str, list or sina.utils.Negation")

        if 'file_uri' in keys and 'file' in keys:
            raise ValueError(
                "`file` has been deprecated for `file_uri` but you cannot use both at same time")
        if 'file' in keys:
            file_uri = keys.pop("file")
        else:
            file_uri = keys.pop("file_uri", None)

        # Ok now let's look if the user wants to search for an id
        if "id" in keys:
            if "id_pool" in keys:
                raise ValueError("you cannot use id and id_pool together")
            warnings.warn("When searching by id use id_pool")
            sina_kargs["id_pool"] = keys["id"]
            del keys["id"]
        else:
            sina_kargs["id_pool"] = keys.pop("id_pool", None)

        sina_kargs["file_uri"] = file_uri
        sina_kargs["query_order"] = keys.pop(
            "query_order", ("data", "file_uri", "types"))

        if record_types == (None, None):
            record_types = not_(self._kosh_reserved_record_types)
            if sina_kargs["id_pool"] is not None:
                record_types = None

        sina_kargs["types"] = record_types
        # The data dict for sina
        sina_data = keys.pop("data", {})
        if not isinstance(sina_data, dict):
            keys["data"] = sina_data
            sina_data = {}
        elif len(sina_data) != 0 and (len(keys) != 0 or len(atts) != 0):
            warnings.warn(
                "It is not recommended to use the find function by mixing keys and the reserved key `data`")

        # Maybe user is trying to get an attribute data
        for att in atts:
            sina_data[att] = exists()

        sina_data.update(keys)
        sina_kargs["data"] = sina_data
        if isinstance(sina_kargs["id_pool"], six.string_types):
            sina_kargs["id_pool"] = [sina_kargs["id_pool"], ]
        # is it a blank search, e.g get me everything?
        get_all = sina_kargs.get("data", {}) == {} and \
            sina_kargs.get("file_uri", None) is None and \
            sina_kargs.get("id_pool", None) is None and \
            sina_kargs.get("types", []) == []

        if get_all:
            match = set(self.__record_handler__.get_all(ids_only=True))
        else:
            match = set(self.__record_handler__.find(**sina_kargs))

        if not self.__sync__:
            # We need to check or in memory records as well
            if get_all:
                match_mem = set(
                    self._added_unsync_mem_store.records.get_all(
                        ids_only=True))
            else:
                match_mem = set(self._added_unsync_mem_store.records.find(**sina_kargs))
            match = match.union(match_mem)

        if mode:
            # we need to restore sync mode
            self.__sync__dict__ = backup
            self.synchronous()

        for rec_id in match:
            if ids_only:
                yield rec_id
            else:
                if load_type == 'dataset':
                    try:
                        yield self.open(rec_id)
                    except Exception:
                        yield self._load(rec_id)
                elif load_type == 'record':
                    yield self._load(rec_id)
                elif load_type == 'dictionary':
                    yield self.get_record(rec_id).__dict__['raw']

    @lock_strategies.lock_method
    def check_sync_conflicts(self, keys):
        """Checks if their will be sync conflicts
        :param keys: keys of objects to syncs (id/type)
        :type keys: list
        :return: dictionary of objects ids and their failing attributes
        :rtype: dict
        """
        # First pass to make sure we have no conflict
        conflicts = {}
        for key in keys:
            try:
                db_record = self.__record_handler__.get(key)
                # db_record = self.get_record(key)
                try:
                    local_record = self.__sync__dict__[key]
                    # Dataset created locally on unsynced store do not have
                    # this attribute
                    try:
                        last_local = datetime.strptime(local_record["data"]['last_modified_date']['value'],
                                                       "%Y-%m-%d %H:%M:%S.%f").timestamp()
                    except KeyError:
                        last_local = -1
                    for att in db_record["user_defined"]['kosh_information']:
                        conflict = False
                        if att[-14:] != "_last_modified":
                            continue
                        last_db = db_record["user_defined"]['kosh_information'][att]
                        if last_db > last_local and att in local_record["user_defined"]['kosh_information']:
                            # Conflict
                            if att[-27:-14] == "___associated":
                                # ok dealing with associated data
                                uri = att[:-27]
                                # deleted locally
                                if uri not in local_record["files"]:
                                    if uri in db_record["files"]:
                                        conflict = True
                                else:
                                    if uri not in db_record["files"]:
                                        conflict = True
                                    elif db_record["files"][uri]["mimetype"] != local_record["files"][uri]["mimetype"]:
                                        conflict = True
                                if conflict:
                                    conf = {uri: (db_record["files"].get(uri, {"mimetype": "deleted"})["mimetype"],
                                                  last_db,
                                                  local_record["files"].get(uri, {"mimetype": "deleted"})[
                                        "mimetype"],
                                        local_record["user_defined"]['kosh_information'][att])}
                                    if key not in conflicts:
                                        conflicts[key] = conf
                                    else:
                                        conflicts[key].update(conf)
                                    conflicts[key]["last_check_from_db"] = last_local
                                    conflicts[key]["type"] = "associated"
                            else:
                                name = att[:-14]
                                # deleted locally
                                if name not in local_record["data"]:
                                    if name in db_record["data"]:
                                        conflict = True
                                else:
                                    if name not in db_record["data"]:
                                        conflict = True
                                    elif db_record["data"][name]["value"] != local_record["data"][name]["value"]:
                                        conflict = True
                                if conflict:
                                    conf = {name: (db_record["data"].get(name, {"value": "deleted"})["value"],
                                                   last_db,
                                                   local_record["data"].get(
                                        name, {"value": "deleted"})["value"],
                                        local_record["user_defined"]['kosh_information'][att])}
                                    if key not in conflicts:
                                        conflicts[key] = conf
                                    else:
                                        conflicts[key].update(conf)
                                    conflicts[key]["last_check_from_db"] = last_local
                                    conflicts[key]["type"] = "attribute"
                except Exception:  # ok let's see if it was a delete ones
                    local_record = self.__sync__deleted[key]
                    try:
                        last_local = datetime.strptime(local_record["data"]['last_modified_date']['value'],
                                                       "%Y-%m-%d %H:%M:%S.%f").timestamp()
                    except KeyError:
                        last_local = -1
                    for att in db_record["user_defined"]['kosh_information']:
                        conflict = False
                        if att[-14:] != "_last_modified":
                            continue
                        last_db = db_record["user_defined"]['kosh_information'][att]
                        if last_db > last_local:
                            conf = {att[:14]: (
                                "modified in db", "ds deleted here", "")}
                            if key not in conflicts:
                                conflicts[key] = conf
                            else:
                                conflicts[key].update(conf)
                            conflicts[key]["last_check_from_db"] = last_local
                            conflicts[key]["type"] = "delete"
            except BaseException:  # It's a new record no conflict
                # It could be it was deleted in store while we touched it here
                try:
                    local_record = self.__sync__dict__[key]
                    # Dataset created locally on unsynced store do not have
                    # this attribute
                    try:
                        last_local = datetime.strptime(local_record["data"]['last_modified_date']['value'],
                                                       'YYYY-MM-DD HH:MM:SS.microseconds').time()
                    except KeyError:
                        last_local = -1
                    if last_local != -1:  # yep we read it from store
                        conf = {
                            local_record["data"]["name"]["value"]: (
                                "deleted in store", "", "")}
                        conf["last_check_from_db"] = last_local
                        conf["type"] = "delete"
                        if key not in conflicts:
                            conflicts[key] = conf
                        else:
                            conflicts[key].update(conf)
                except Exception:  # deleted too so no issue
                    pass
        return conflicts

    @lock_strategies.lock_method
    def is_synchronous(self):
        """is_synchronous is store is synchronous mode

        :return: synchronous or not
        :rtype: bool
        """
        return self.__sync__

    @lock_strategies.lock_method
    def synchronous(self, mode=None):
        """Change sync mode for the store

        :param mode: The mode to True means synchronous mode, False means asynchronous, None  means switch
                     anything else is ignored and it simply returns the mode
        :type mode: bool
        :return: current synchronization mode
        :rtype: bool
        """

        if mode is None:
            self.__sync__ = not self.__sync__
        elif mode in [True, False]:
            if mode and not self.__sync__:  # Going to go to always sync on need to sync first
                self.sync()
            self.__sync__ = mode
        return self.__sync__

    @lock_strategies.lock_method
    def sync(self, keys=None):
        """Sync with db
        :param keys: keys of objects to sync (id/type)
        :type keys: list
        :return: None
        :rtype: None
        """
        if self.__sync__:
            return

        if not hasattr(self.__record_handler__, "insert"):
            raise RuntimeError("Kosh store is read_only, cannot sync with it")

        if keys is None:
            keys = list(self.__sync__dict__.keys()) + \
                list(self.__sync__deleted__.keys())
        if len(keys) == 0:
            return
        conflicts = self.check_sync_conflicts(keys)
        if len(conflicts) != 0:  # Conflicts, aborting
            msg = "Conflicts exist objects have been modified in db and locally"
            for key in conflicts:
                msg += "\nObject id:{}".format(key)
                msg += "\n\tLast read from db: {}".format(
                    conflicts[key]["last_check_from_db"])
                for k in conflicts[key]:
                    if k in ["last_check_from_db", "type"]:
                        continue
                    if conflicts[key]["type"] == "attribute":
                        st = "\n\t" + k + " modified to value '{}' at {} in db, modified locally to '{}' at {}"
                    elif conflicts[key]["type"] == "delete":
                        st = "\n\t" + k + "{} {} {}"
                    else:
                        st = "\n\tfile '" + k + \
                            "' mimetype modified to'{}' at {} in db, modified locally to '{}' at {}"
                    st = st.format(*conflicts[key][k])
                    msg += st
            raise RuntimeError(msg)
        # Ok no conflict we still need to sync
        update_records = []
        del_keys = []
        for key in keys:
            try:
                local = self.__sync__dict__[key]
            except Exception:
                # Ok it comes from the deleted datasets
                del_keys.append(key)
                continue
            try:
                db = self.__record_handler__.get(key)
                # db = self.get_record(key)
            except ValueError:
                # not in main store yet
                _update_record(local, self)
                # now we can retrieve it process for associated file not in storer yet
                db = self.__record_handler__.get(key)
                # db = self.get_record(key)
            for att in local["user_defined"]['kosh_information']:
                if att[-14:] == "_last_modified":  # We touched it
                    if att[-27:-14] == "___associated":
                        # ok it's an associated thing
                        uri = att[:-27]
                        if uri not in local["files"]:  # dissociated
                            del db["files"][uri]
                            continue
                        # Now let's see if it is in main store
                        try:
                            self.__record_handler__.get(local["files"][uri]["kosh_id"])
                            # self.get_record(local["files"][uri]["kosh_id"])
                        except ValueError:
                            # Ok it is not in the store itself
                            rec = self.get_record(local["files"][uri]["kosh_id"])
                            _update_record(rec, self)
                        if att not in db["user_defined"]['kosh_information']:  # newly associated
                            db["files"][uri] = local["files"][uri]
                            db["user_defined"]['kosh_information'][att] = \
                                local["user_defined"]['kosh_information'][att]
                        elif local["user_defined"]['kosh_information'][att] > \
                                db["user_defined"]['kosh_information'][att]:
                            # last changed locally
                            db["files"][uri] = local["files"][uri]
                            db["user_defined"]['kosh_information'][att] = \
                                local["user_defined"]['kosh_information'][att]
                    else:
                        name = att[:-14]
                        if name not in local["data"]:  # we deleted it
                            if name in db["data"]:
                                del db["data"][name]
                        elif local["user_defined"]['kosh_information'][att] > \
                                db["user_defined"]['kosh_information'][att]:
                            db["data"][name] = local["data"][name]
                            db["user_defined"]['kosh_information'][att] = \
                                local["user_defined"]['kosh_information'][att]
            if db is not None:
                update_records.append(db)
            else:  # db did not have that key and returned None (no error)
                update_records.append(local)
            del_keys.append(key)

        rels = []
        relationships = self.get_sina_store().relationships
        for id_ in update_records:
            rels += relationships.find(id_.id, None, None)
            rels += relationships.find(None, None, id_.id)
        if not self.__sync__:
            _update_record(del_keys, self, delete=True)
            _update_record(update_records, self)
            relationships.insert(rels)
        _update_record(update_records, self)
        for key in list(keys):
            try:
                _update_record(key, self, self._added_unsync_mem_store, delete=True)
            except Exception:
                pass
            try:
                del self.__sync__dict__[key]
            except Exception:
                # probably coming from del then
                del self.__sync__deleted__[key]

    @lock_strategies.lock_method
    def add_user(self, username, groups=[]):
        """add_user adds a user to the Kosh store

        :param username: username to add
        :type username: str
        :param groups: kosh specific groups to add to this user
        :type groups: list
        """
        from sina.model import Record
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        existing_users = self.__record_handler__.find_with_type(
            self._users_type)
        users = [rec["data"]["username"]["value"] for rec in existing_users]
        if username not in users:
            # Create user
            uid = hashlib.md5(username.encode()).hexdigest()
            user = Record(id=uid, type=self._users_type, user_defined={'kosh_information': {}})
            user.add_data("username", username)
            _update_record(user, self)
            self.add_user_to_group(username, groups)
        else:
            raise ValueError("User {} already exists".format(username))

    @lock_strategies.lock_method
    def add_group(self, group):
        """Add a kosh specific group, cannot match existing group on unix system

        :param group: ugroup to add
        :type group: str
        """
        from sina.model import Record
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        existing_groups = self.__record_handler__.find_with_type(
            self._groups_type)
        groups_names = [rec["data"]["name"]["value"]
                        for rec in existing_groups]
        if group in groups_names:
            raise ValueError("group {} already exist".format(group))

        # now get unix groups
        if not sys.platform.startswith("win"):
            unix_groups = [g[0] for g in grp.getgrall()]
        else:
            unix_groups = []
        if group in unix_groups:
            raise ValueError("{} is a unix group on this system.format(group)")

        # Create group
        uid = uuid.uuid4().hex
        group_rec = Record(id=uid, type=self._groups_type, user_defined={'kosh_information': {}})
        group_rec.add_data("name", group)
        _update_record(group_rec, self)

    @lock_strategies.lock_method
    def add_user_to_group(self, username, groups):
        """Add a user to some group(s)

        :param username: username to add
        :type username: str
        :param groups: kosh specific groups to add to this user
        :type groups: list
        """
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        users_filter = self.__record_handler__.find_with_type(
            self._users_type, ids_only=True)
        names_filter = list(
            self.__record_handler__.find_with_data(
                username=username))
        inter_recs = set(users_filter).intersection(set(names_filter))
        if len(inter_recs) == 0:
            raise ValueError("User {} does not exists".format(username))
        user = self.get_record(names_filter[0])
        user_groups = user["data"].get(
            self._groups_type, {
                "value": []})["value"]

        existing_groups = self.__record_handler__.find_with_type(
            self._groups_type)
        groups_names = [rec["data"]["name"]["value"]
                        for rec in existing_groups]
        for group in groups:
            if group not in groups_names:
                warnings.warn(
                    "Group {} is not a Kosh group, skipping".format(group))
                continue
            user_groups.append(group)
        if len(user_groups) == 0:
            user.set_data("groups", None)
        else:
            user.set_data("groups", list(set(user_groups)))
        _update_record(user, self)

    @lock_strategies.lock_method
    def export_dataset(self, datasets, file=None):
        """exports a dataset

        :param datasets: dataset (or their ids) to export
        :type datasets: list or str
        :param file: optional file to dump dataset to
        :type file: None or str
        """
        if not isinstance(datasets, (list, tuple, types.GeneratorType)):
            datasets = [datasets, ]
        for dataset in datasets:
            if isinstance(dataset, six.string_types):
                return self.open(dataset).export(file)
            else:
                return dataset.export(file)

    @lock_strategies.lock_method
    def import_dataset(self, datasets, match_attributes=[
                       "name", ], merge_handler=None, merge_handler_kargs={}, skip_sina_record_sections=[],
                       ingest_funcs=None):
        """import datasets and ensembles that were exported from another store, or load them from a json file
        :param datasets: Dataset/Ensemble object exported by another store, a dataset/ensemble
                         or a json file containing these.
        :type datasets: json, csv, hdf5 file, json loaded object, kosh.KoshDataset, sina.Record, pandas.DataFrame
        :param match_attributes: parameters on a dataset to use if this it is already in the store
                                 in general we can't use 'id' since it is randomly generated at creation
                                 If the "same" dataset was created in two different stores
                                 (e.g running the same code twice but with different Kosh store)
                                 the dataset would be identical in both store but with different ids.
                                 This helps you make sure you do not end up with duplicate entries.
                                 Warning, if this parameter is too lose too many datasets will match
                                 and the import will abort, if it's too tight duplicates will not be identified.
        :type match_attributes: list of str
        :param merge_handler: If found dataset has attributes with different values from imported dataset
                                 how do we handle this? Accept values are: None, "conservative", "overwrite",
                                 "preserve", or a function.
                              The function decalartion should be:
                                        foo(store_dataset,
                                            imported_dataset_attributes_dict,
                                            section,
                                            **merge_handler_kargs)
                              Where `store_dataset` is the destination kosh dataset or its non-data dictionary section
                                    `imported_dataset_attributes_dict` is a dictionary of attributes/values
                                                                       of the dataset being imported
                                    `section` is the section of the record being updated
                                    `merge_handler_kargs` is a dict of passed for this function
                              And return a dictionary of attributes/values the target_dataset should have.
        :type merge_handler: None, str, func
        :param merge_handler_kargs: If a function is passed to merge_handler these keywords arguments
                                    will be passed in addition to this store dataset and the imported dataset.
        :type merge_handler_kargs: dict
        :param skip_sina_record_sections: When importing a sina record, skip over these sections
        :type skip_sina_record_sections: list
        :param ingest_funcs: A function or list of functions to
                             run against each Sina record before insertion.
                             We queue them up to run here. They will be run in list order.
        :type ingest_funcs: callable or list of callables
        :return: list of datasets
        :rtype: list of KoshSinaDataset
        """
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])

        from pandas import DataFrame
        from sina.model import generate_record_from_csv, generate_record_from_pandas

        out = []
        if isinstance(datasets, DataFrame):
            datasets = generate_record_from_pandas(datasets)
        elif isinstance(datasets, str):
            if os.path.exists(datasets) and ".json" not in datasets and ".hdf5" not in datasets:
                datasets = generate_record_from_csv(datasets)

        if not isinstance(datasets, (list, tuple, types.GeneratorType)):
            return self._import_dataset(datasets, match_attributes=match_attributes,
                                        merge_handler=merge_handler,
                                        merge_handler_kargs=merge_handler_kargs,
                                        skip_sina_record_sections=skip_sina_record_sections,
                                        ingest_funcs=ingest_funcs)
        else:
            for dataset in datasets:
                out.append(self._import_dataset(dataset, match_attributes=match_attributes,
                                                merge_handler=merge_handler,
                                                merge_handler_kargs=merge_handler_kargs,
                                                skip_sina_record_sections=skip_sina_record_sections,
                                                ingest_funcs=ingest_funcs))
        return out

    @lock_strategies.lock_method
    def _import_dataset(self, datasets, match_attributes=[
            "name", ], merge_handler=None, merge_handler_kargs={}, skip_sina_record_sections=[], ingest_funcs=None):
        """import dataset that was exported from another store, or load them from a json file
        :param datasets: Dataset object exported by another store, a dataset or a json file containing the dataset
        :type datasets: json, csv, hdf5 file, json loaded object, kosh.KoshDataset, sina.Record, pandas.DataFrame
        :param match_attributes: parameters on a dataset to use if this it is already in the store
                                 in general we can't use 'id' since it is randomly generated at creation
                                 If the "same" dataset was created in two different stores
                                 (e.g running the same code twice but with different Kosh store)
                                 the dataset would be identical in both store but with different ids.
                                 This helps you make sure you do not end up with duplicate entries.
                                 Warning, if this parameter is too lose too many datasets will match
                                 and the import will abort, if it's too tight duplicates will not be identified.
        :type match_attributes: list of str
        :param merge_handler: If found dataset has attributes with different values from imported dataset
                                 how do we handle this? Accept values are: None, "conservative", "overwrite",
                                 "preserve", or a function.
                                 A function should take in foo(store_dataset, imported_dataset, **merge_handler_kargs)
        :type merge_handler: None, str, func
        :param merge_handler_kargs: If a function is passed to merge_handler these keywords arguments
                                    will be passed in addition to this store dataset and the imported dataset.
        :type merge_handler_kargs: dict
        :param skip_sina_record_sections: When importing a sina record, skip over these sections
        :type skip_sina_record_sections: list
        :param ingest_funcs: A function or list of functions to
                             run against each Sina record before insertion.
                             We queue them up to run here. They will be run in list order.
        :type ingest_funcs: callable or list of callables
        :return: list of datasets
        :rtype: list of KoshSinaDataset
        """
        from sina.model import Relationship, generate_record_from_json, Record
        from sina.utils import load_document, load_document_hdf5

        if isinstance(datasets, Record):
            json_data = datasets.to_json()
            if isinstance(json_data, bytes):
                json_str = json_data.decode('utf-8')
            else:
                json_str = json_data
            json_str = re.sub(r'\bNaN\b', 'null', json_str, flags=re.IGNORECASE)
            parsed = orjson.loads(json_str)
            datasets = {"records": [parsed]}

        if isinstance(datasets, str):
            # First let's try using Sina's load
            try:
                try:
                    recs, relationships_in = load_document_hdf5(datasets)
                except:  # noqae722
                    recs, relationships_in = load_document(datasets)
                records_in = [rec.raw for rec in recs]
                from_file = False
            except Exception:
                with open(datasets) as f:
                    from_file = orjson.loads(f.read())
                    if isinstance(from_file, list):
                        records_in = []
                        relationships_in = []
                        for entry in from_file:
                            records_in.append(entry.get("records", []))
                            relationships_in.append(entry.get("relationships", []))
                    else:
                        records_in = from_file.get("records", [])
                        relationships_in = from_file.get("relationships", [])
        elif isinstance(datasets, dict):
            from_file = datasets
            records_in = from_file["records"]
            relationships_in = from_file.get("relationships", [])
        elif isinstance(datasets, (KoshDataset, KoshEnsemble)):
            from_file = datasets.export()
            records_in = from_file["records"]
            relationships_in = from_file.get("relationships", [])
        else:
            raise ValueError(
                "`datasets` must be a Kosh importable object or a file or dict containing json-ized datasets")

        if ingest_funcs is not None:
            temp_store = connect(None)
            temp_store.get_sina_records().insert(
                [generate_record_from_json(record) for record in records_in])
            temp_datasets = list(temp_store.find())
            if isinstance(ingest_funcs, (list, tuple)):
                for ingest_func in ingest_funcs:
                    temp_datasets = [ingest_func(ds) for ds in temp_datasets]
            else:
                temp_datasets = [ingest_func(ds) for ds in temp_datasets]
            records_in = [ds.export()["records"][0] for ds in temp_datasets]
        # setup merge handler
        ok_merge_handler_values = [
            None, "conservative", "preserve", "overwrite"]
        if merge_handler in ok_merge_handler_values:
            merge_handler_kargs = {"handling_method": merge_handler}
            merge_handler = merge_datasets_handler
        elif not (isfunction(merge_handler) or ismethod(merge_handler)):
            raise ValueError(
                "'merge_handler' must be one {} or a function/method".format(ok_merge_handler_values))

        matches = []
        remapped = {}
        for record in records_in:
            if "id" not in record:
                if "local_id" in record:
                    record["id"] = record["local_id"]
                else:  # Cannot import a rec w/o id
                    warnings.warn(f"Skipped record w/o id or local_id: {record}")
                    continue
            if "name" not in record["data"]:
                record["data"]["name"] = {"value": "Unnamed Dataset"}
            if "creator" not in record["data"]:
                record["data"]["creator"] = {"value": self.__user_id__}
            if "creation_date" not in record["data"]:
                record["data"]["creation_date"] = {"value": str(datetime.fromtimestamp(time.time()))}
            if 'user_defined' not in record.keys():
                record["user_defined"] = {}
            record["user_defined"]['kosh_information'] = {}
            for section in skip_sina_record_sections:
                record[section] = {}
            if "data" in record:
                data = record["data"]
            else:
                data = {}
            if from_file and record["type"] == from_file.get("sources_type", "file"):
                is_source = True
            else:
                is_source = False
            # Not 100% data.keys is guaranteed to come back the same twice in a
            # row
            keys = sorted(data.keys())
            atts = dict(zip(keys, [data[x]["value"] for x in keys]))
            min_ver = from_file.get("minimum_kosh_version", (0, 0, 0)) if from_file else None
            if min_ver is not None and kosh.version(comparable=True) < min_ver:
                raise ValueError("Cannot import dataset it requires min kosh version of {}, we are at: {}".format(
                    min_ver, kosh.version(comparable=True)))

            if not is_source:
                # Ok now we need to see if dataset already exist?
                match_dict = {}
                for attribute in match_attributes:
                    if attribute in atts:
                        match_dict[attribute] = atts[attribute]
                    elif attribute == "id":
                        match_dict["id_pool"] = record["id"]

                matching = list(self.find(**match_dict))
                if len(matching) > 1:
                    raise ValueError("dataset criteria: {} matches multiple ({}) "
                                     "datasets in store {}, try changing 'match_attributes' when calling"
                                     " this function".format(
                                         match_dict, len(matching), self.db_uri))
                elif len(matching) == 1:
                    # All right we do have a possible conflict here
                    match = matching[0]
                    merged_attributes = merge_handler(
                        match, atts, "data", **merge_handler_kargs)
                    # Ok at this point no conflict!
                    match.update(merged_attributes)
                    match_rec = match.get_record()
                    remapped[record["id"]] = match_rec.id
                else:  # Non existent dataset
                    try:
                        self.get_record(record["id"])
                        # Ok this record already exists
                        # and we need a new unique one
                        record["id"] = uuid.uuid4().hex
                    except (KeyError, ValueError):
                        # Does not exists, let's keep the id
                        pass
                    match_rec = record
            else:  # ok it is a source
                # Let's find the source rec that match this uri
                current_sources = list(
                    self.find(
                        types=[self._sources_type, ],
                        uri=data["uri"]["value"],
                        ids_only=True))
                if len(current_sources) > 0:
                    match = self._load(current_sources[0])
                    if match.id != record["id"]:
                        # Darn this store already associated this uri
                        if match.mime_type != data["mime_type"]["value"]:
                            raise ValueError("trying to import an associated source {} with mime_type {} but "  # noqa
                                             "this store already associated"  # noqa
                                             " this source with mime_type {}".format(data["uri"]["value"],
                                                                                     data["mime_type"]["value"],
                                                                                     match.mime_type))
                    match_rec = match.get_record()
                else:
                    match_rec = record
            # update the record
            # But first make sure it is a record :)
            if isinstance(match_rec, dict):
                if 'id' not in match_rec:
                    match_rec['id'] = uuid.uuid4().hex
                if from_file:
                    match_rec = generate_record_from_json(match_rec)
                else:
                    for rec in recs:
                        if rec["id"] == record["id"]:
                            match_rec = rec
                            break

            # User defined and files are preserved?
            for section in ["user_defined", "files", "library_data"]:
                if section in record:
                    if match_rec.raw[section] != record[section] and record[section] != {
                    }:
                        if merge_handler != merge_datasets_handler:
                            match_rec.raw[section].update(
                                merge_handler(match_rec, record[section],
                                              section, **merge_handler_kargs))
                        elif merge_handler_kargs["handling_method"] == "conservative":
                            raise RuntimeError(
                                "{} section do not match aborting under conservative merge option"
                                "\nImport: {}\nInstore: {}".format(section, record[section], match_rec.raw[section]))
                        elif merge_handler_kargs["handling_method"] == "overwrite":
                            match_rec.raw[section].update(record[section])
                        else:  # preserve
                            pass
            # Curves are preserved
            if "curve_sets" in record:
                for curve_set in record["curve_sets"]:
                    if curve_set not in match_rec.raw["curve_sets"]:
                        match_rec.raw["curve_sets"][curve_set] = record["curve_sets"][curve_set]
                    else:
                        if merge_handler != merge_datasets_handler:
                            raise RuntimeError(
                                "We do not know how to merge curves with custom merge handler")
                        if merge_handler_kargs["handling_method"] == "conservative":
                            if match_rec.raw["curve_sets"][curve_set] != record["curve_sets"][curve_set]:

                                raise RuntimeError(
                                    "curveset {} do not match, `conservative` method used, aborting".format(curve_set))
                        elif merge_handler_kargs["handling_method"] == "overwrite":
                            match_rec.raw["curve_sets"][curve_set]["independent"].update(
                                record["curve_sets"][curve_set]["independent"])
                            match_rec.raw["curve_sets"][curve_set]["dependent"].update(
                                record["curve_sets"][curve_set]["dependent"])
                        else:  # preserve
                            pass
            _update_record(match_rec, self)
            matches.append(match_rec["id"])

        for relationship in relationships_in:
            try:
                rel = Relationship(subject_id=getattr(relationship, 'subject', relationship.subject_id),
                                   predicate=relationship.predicate,
                                   object_id=getattr(relationship, 'object', relationship.object_id))
                self.get_sina_store().relationships.insert(rel)
            except Exception:  # sqlalchemy.exc.IntegrityError
                pass

            # Ensembles
            if getattr(relationship, 'predicate', '') == 'is a member of ensemble':
                try:
                    self.create_ensemble(id=relationship.object_id)
                except Exception:  # ensemble already in store
                    pass

                try:
                    e = list(self.find(id=relationship.object_id))[0]
                    ds = list(self.find(id=relationship.subject_id))[0]
                    ds.join_ensemble(e)
                except Exception:  # dataset already in ensemble
                    pass

        # We need to make sure any merged (remapped) dataset is still properly
        # associated
        for id_ in matches:
            rec = self.get_record(id_)
            if rec["type"] == self._sources_type == from_file.get(
                    "sources_type", "file"):
                associated = rec["data"]["associated"]["value"]
                altered = False
                for rem in remapped:
                    if rem in associated:
                        index = associated.index(rem)
                        associated[index] = remapped[rem]
                        d = self.open(remapped[rem])
                        d.associate(
                            rec["data"]["uri"]["value"],
                            rec["data"]["mime_type"]["value"])
                        altered = True
                if altered:
                    _update_record(rec, self)

        return [self.open(x) for x in matches]

    @lock_strategies.lock_method
    def reassociate(self, target, source=None, absolute_path=True):
        """This function allows to re-associate data whose uri might have changed

        The source can be the original uri or sha and target is the new uri to use.
        :param target: New uri
        :type target: str
        :param source: uri or sha (long or short of reassociate) to reassociate
                       with target, if None then the short uri from target will be used
        :type source: str or None
        :param absolute_path: if file exists should we store its absolute_path
        :type absolute_path: bool
        :return: None
        :rtype: None
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        # First let's convert to abs path if necessary
        if absolute_path:
            if os.path.exists(target):
                target = os.path.abspath(target)
            if source is not None and not re.search("[0-9a-f]{64}", source):  # full path doesn't apply to sha
                source = os.path.abspath(source)

        if os.path.isdir(target):
            target = os.path.join(target, "")
            cmd = "rsync -v --dry-run -r" + " " + target + " ./"
            p, o, e = process_cmd(cmd, use_shell=True)
            rsync_dryrun_out_lines = o.decode().split("\n")
            index = [idx for idx, s in enumerate(rsync_dryrun_out_lines) if 'sent ' in s][0]
            targets_in_dir = rsync_dryrun_out_lines[1:index-1]
            targets = [os.path.join(target, target_in_dir) for target_in_dir in targets_in_dir]
        else:
            targets = [target]

        for target in targets:
            # Now, did we pass a source for uri to replace?
            if source is None:
                source = compute_fast_sha(target)

            # Ok now let's get all associated uri that match
            # Fist assuming it's a fast_sha search all "kosh files" that match this
            matches = list(
                self.find(
                    types=[
                        self._sources_type,
                    ],
                    fast_sha=source,
                    ids_only=True))
            # Now it could be simply a uri
            matches += list(
                self.find(
                    types=[
                        self._sources_type,
                    ],
                    uri=source,
                    ids_only=True))
            # And it's quite possible it's a long_sha too
            matches += list(self.find(types=[self._sources_type, ],
                                      long_sha=source, ids_only=True))

            # And now let's do the work
            for match_id in matches:
                try:
                    match = self._load(match_id)
                    for associated_id in match.associated:
                        associated = self.open(associated_id)
                        associated_record = associated.get_record()
                        raw_associated_record = associated_record.raw
                        raw_associated_record["files"][target] = raw_associated_record["files"][match.uri]
                        del raw_associated_record["files"][match.uri]
                        if self.sync:
                            associated._update_record(associated_record)
                        else:
                            associated._update_record(associated_record, self._added_unsync_mem_store)
                    match.uri = target
                except Exception:
                    pass

    @lock_strategies.lock_method
    def cleanup_files(self, dry_run=False, interactive=False, clean_fastsha=False,
                      **dataset_search_keys):
        """Cleanup the store from references to dead files
        Also updates the fast_shas if necessary
        You can filter associated objects for each dataset by passing key=values
        e.g mime_type=hdf5 will only dissociate non-existing files associated with mime_type hdf5
        some_att=some_val will only dissociate non-existing files associated and having the attribute
        'some_att' with value of 'some_val'
        returns list of uris to be removed.
        :param dry_run: Only does a dry_run
        :type dry_run: bool
        :param interactive: interactive mode, ask before dissociating
        :type interactive: bool
        :param clean_fastsha: Do we want to update fast_sha if it changed?
        :type clean_fastsha: bool
        :returns: list of uris (to be) removed.
        :rtype: list
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        missings = []
        datasets = self.find()
        for dataset in datasets:
            missings += dataset.cleanup_files(dry_run=dry_run,
                                              interactive=interactive,
                                              clean_fastsha=clean_fastsha,
                                              **dataset_search_keys)
        return missings

    @lock_strategies.lock_method
    def check_integrity(self):
        """Runs a sanity check on the store:
        1- Are associated files reachable?
        2- Did fast_shas change since file was associated
        """
        return self.cleanup_files(dry_run=True, clean_fastsha=True)

    @lock_strategies.lock_method
    def associate(self, store, reciprocal=False):
        """Associate another store

        All associated stores will be used for queries purposes.

        WARNING: While associating stores will make them look like one big store,
                 ensembles' members MUST belong to the same store as the ensemble.

        :param store: The store to associate
        :type store: KoshStore

        :param reciprocal: By default, this is a one way relationship.
                           The associated store will NOT be aware of
                           this association, turning this on create
                           the association in both stores.
        :type reciprocal: bool
        """
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        if not isinstance(store, KoshStore):
            raise TypeError("store must be a KoshStore or path to one")

        sina_recs = self.get_sina_records()
        store_info = list(sina_recs.find_with_type("__kosh_storeinfo__"))[0]
        if "associated_stores" not in store_info["data"]:
            store_info.add_data("associated_stores", [])
        stores = store_info["data"]["associated_stores"]["value"]
        if store.db_uri not in stores:
            stores.append(store.db_uri)
            store_info["data"]["associated_stores"]["value"] = stores
            _update_record(store_info, self)
            self._associated_stores_.append(store)
        if reciprocal:
            store.associate(self)

    @lock_strategies.lock_method
    def dissociate(self, store, reciprocal=False):
        """Dissociate another store

        :param store: The store to associate
        :type store: KoshStore or six.string_types

        :param reciprocal: By default, this is a one way relationship.
                           The disssociated store will NOT be aware of
                           this action, turning this on create
                           the dissociation in both stores.
        :type reciprocal: bool
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        if not isinstance(store, (six.string_types, KoshStore)):
            raise TypeError("store must be a KoshStore or path to one")

        sina_recs = self.get_sina_records()
        store_info = list(sina_recs.find_with_type("__kosh_storeinfo__"))[0]
        if "associated_stores" not in store_info["data"]:
            warnings.warn("No store is associated with this store: {}".format(self.db_uri))
            return
        # refresh value
        stores = store_info["data"]["associated_stores"]["value"]

        if isinstance(store, six.string_types):
            try:
                store_path = store
                store = self.get_associated_store(store_path)
            except Exception:
                raise ValueError("Could not open store at: {}".format(store_path))

        if store.db_uri in stores:
            stores.remove(store.db_uri)
            store_info["data"]["associated_stores"]["value"] = stores
            _update_record(store_info, self)
            self._associated_stores_.remove(store)
        else:
            warnings.warn("store {} does not seem to be associated with this store ({})".format(
                store.db_uri, self.db_uri))

        if reciprocal:
            store.dissociate(self)

    @lock_strategies.lock_method
    def get_associated_store(self, uri):
        """Returns the associated store based on its uri.

        :param uri: uri to the desired store
        :type uri: six.string_types
        :returns: Associated kosh store
        :rtype: KoshStore
        """

        if not isinstance(uri, six.string_types):
            raise TypeError("uri must be string")

        for store in self._associated_stores_:
            if store.db_uri == uri:
                return store
        raise ValueError(
            "{} store does not seem to be associated with this store: {}".format(uri, store.db_uri))

    @lock_strategies.lock_method
    def get_associated_stores(self, uris=True):
        """Return the list of associated stores
        :param uris: Return the list of uri pointing to the store if True,
                     or the actual stores otherwise.
        :type uris: bool
        :returns: generator to stores
        :rtype: generator
        """
        for store in self._associated_stores_:
            if uris:
                yield store.db_uri
            else:
                yield store

    @lock_strategies.lock_method
    def _cli_list_creator(self, arg, var, cmmd, path=""):
        """Creates a list of arg and var pairs for the cmmd passed to kosh_command.py

        :param arg: The argparse argument to use
        :type arg: str
        :param var: The variable that goes along with the argparse argument
        :type var: str
        :param cmmd: The command list to append to
        :type cmmd: list
        :param path: Path that will be combined with var, defaults to ""
        :type path: str, optional
        :return: The appended command list
        :rtype: list
        """

        if isinstance(var, str):
            var = f"{arg} " + os.path.join(path, var)
        elif isinstance(var, list):
            if len(var) == 1:
                var = f"{arg} " + os.path.join(path, var[0])
            else:
                for i, v in enumerate(var):
                    var[i] = os.path.join(path, v)
                var = f"{arg} " + f" {arg} ".join(var)
        var = var.split()

        cmmd = cmmd + var

        return cmmd

    @lock_strategies.lock_method
    def _mv_cp(self, src, dst, mv_cp,
               stores, destination_stores, dataset_record_type,
               dataset_matching_attributes, version, merge_strategy, mk_dirs):
        """Creates the cmmd for mv and cp passed to kosh_command.py

        :param src: The source of files or directories to mv or cp
        :type src: Union[str, list]
        :param dst: The destination of files or directories to mv or cp
        :type dst: str
        :param mv_cp: Move or copy files or directories
        :type mv_cp: str
        :param stores: Kosh stores to associate the mv or cp
        :type stores: Union[kosh.dataset.KoshDataset, list]
        :param destination_stores: Kosh stores to associate the mv or cp
        :type destination_stores: Union[kosh.dataset.KoshDataset, list]
        :param dataset_record_type: Type used by sina db that Kosh will recognize as dataset
        :type dataset_record_type: str
        :param dataset_matching_attributes: List of attributes used to identify if two datasets are identical
        :type dataset_matching_attributes: list
        :param version: Print version and exit
        :type version: bool
        :param merge_strategy: When importing dataset, how do we handle conflict
        :type merge_strategy: str
        :param mk_dirs: Make destination directories if they don't exist
        :type mk_dirs: bool
        """

        # --stores
        cmmd = ["--stores",  self.db_uri]

        if stores:
            if isinstance(stores, list):
                for i, store in enumerate(stores):
                    stores[i] = store.db_uri
            else:
                stores = stores.db_uri
            cmmd = self._cli_list_creator("--stores", stores, cmmd, os.getcwd())

        # --destination_stores
        if destination_stores:
            if isinstance(destination_stores, list):
                for i, destination_store in enumerate(destination_stores):
                    destination_stores[i] = destination_store.db_uri
            else:
                destination_stores = destination_stores.db_uri
            cmmd = self._cli_list_creator("--destination_stores", destination_stores, cmmd, os.getcwd())

        # --sources
        cmmd = self._cli_list_creator("--sources", src, cmmd)

        # --dataset_record_type
        cmmd.extend(["--dataset_record_type", dataset_record_type])

        # --dataset_matching_attributes
        cmmd.extend(["--dataset_matching_attributes", f"{dataset_matching_attributes}"])

        # --destination
        cmmd.extend(["--destination", dst])

        # --version
        if version:
            cmmd.extend(["--version"])

        # --merge_strategy
        cmmd.extend(["--merge_strategy", merge_strategy])

        # --mk_dirs
        if mk_dirs:
            cmmd.extend(["--mk_dirs"])

        KoshCmd._mv_cp_(self, mv_cp, store_args=cmmd)

    @lock_strategies.lock_method
    def mv(self, src, dst, stores=[],
           destination_stores=[], dataset_record_type="dataset", dataset_matching_attributes=['name', ],
           version=False, merge_strategy="conservative", mk_dirs=False):
        """Moves files or directories

        :param src: The source of files or directories to mv or cp
        :type src: Union[str, list]
        :param dst: The destination of files or directories to mv or cp
        :type dst: str
        :param stores: Kosh stores to associate the mv or cp, defaults to []
        :type stores: Union[kosh.dataset.KoshDataset, list], optional
        :param destination_stores: Kosh stores to associate the mv or cp, defaults to []
        :type destination_stores: Union[kosh.dataset.KoshDataset, list], optional
        :param dataset_record_type: Type used by sina db that Kosh will recognize as dataset, defaults to "dataset"
        :type dataset_record_type: str, optional
        :param dataset_matching_attributes: List of attributes used to identify if two datasets are identical,
            defaults to ["name", ]
        :type dataset_matching_attributes: list, optional
        :param version: Print version and exit, defaults to False
        :type version: bool, optional
        :param merge_strategy: When importing dataset, how do we handle conflict, defaults to "conservative"
        :type merge_strategy: str, optional
        :param mk_dirs: Make destination directories if they don't exist
        :type mk_dirs: bool, optional
        """
        __check_valid_connection_type__(self.__connection_type__, ['write'])
        self._mv_cp(src, dst, "mv", stores, destination_stores, dataset_record_type,
                    dataset_matching_attributes, version, merge_strategy, mk_dirs)

    @lock_strategies.lock_method
    def cp(self, src, dst, stores=[],
           destination_stores=[], dataset_record_type="dataset", dataset_matching_attributes=['name', ],
           version=False, merge_strategy="conservative", mk_dirs=False):
        """Copies files or directories

        :param src: The source of files or directories to mv or cp
        :type src: Union[str, list]
        :param dst: The destination of files or directories to mv or cp
        :type dst: str
        :param stores: Kosh stores to associate the mv or cp, defaults to []
        :type stores: Union[kosh.dataset.KoshDataset, list], optional
        :param destination_stores: Kosh stores to associate the mv or cp, defaults to []
        :type destination_stores: Union[kosh.dataset.KoshDataset, list], optional
        :param dataset_record_type: Type used by sina db that Kosh will recognize as dataset, defaults to "dataset"
        :type dataset_record_type: str, optional
        :param dataset_matching_attributes: List of attributes used to identify if two datasets are identical,
            defaults to ["name", ]
        :type dataset_matching_attributes: list, optional
        :param version: Print version and exit, defaults to False
        :type version: bool, optional
        :param merge_strategy: When importing dataset, how do we handle conflict, defaults to "conservative"
        :type merge_strategy: str, optional
        :param mk_dirs: Make destination directories if they don't exist
        :type mk_dirs: bool, optional
        """
        __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
        self._mv_cp(src, dst, "cp", stores, destination_stores, dataset_record_type,
                    dataset_matching_attributes, version, merge_strategy, mk_dirs)

    @lock_strategies.lock_method
    def tar(self, tar_file, tar_opts, src="", tar_type="tar",
            stores=[], dataset_record_type="dataset", no_absolute_path=False,
            dataset_matching_attributes=["name", ], merge_strategy="conservative"):
        """Creates or extracts a tar file

        :param tar_file: The name of the tar file
        :type tar_file: str
        :param tar_opts: Extra arguments such as -c to create and -x to extract
        :type tar_opts: str
        :param src: List of files or directories to tar
        :type src: list, optional
        :param tar_type: Type of tar file including htar, defaults to "tar"
        :type tar_type: str, optional
        :param stores: Kosh store(s) to use, defaults to []
        :type stores: list, optional
        :param dataset_record_type: Record type used by Kosh when adding
            datasets to Sina database, defaults to "dataset"
        :type dataset_record_type: str, optional
        :param no_absolute_path: Do not use absolute path when searching stores, defaults to False
        :type no_absolute_path: bool, optional
        :param dataset_matching_attributes: List of attributes used to identify if two datasets
            are identical, defaults to ["name", ]
        :type dataset_matching_attributes: list, optional
        :param merge_strategy: When importing dataset, how do we handle conflict, defaults to "conservative"
        :type merge_strategy: str, optional
        """

        # Options includes src
        opts = tar_opts.split()
        if isinstance(src, str):
            src = [src]
        opts.extend(src)

        # --stores
        cmmd = ["--stores",  self.db_uri]

        if stores:
            if isinstance(stores, list):
                for i, store in enumerate(stores):
                    stores[i] = store.db_uri
            else:
                stores = stores.db_uri
            cmmd = self._cli_list_creator("--stores", stores, cmmd, os.getcwd())

        # --dataset_record_type
        cmmd.extend(["--dataset_record_type", dataset_record_type])

        # --file
        cmmd.extend(["--file",  tar_file])

        # --no_absolute_path
        if no_absolute_path:
            cmmd.extend(["--no_absolute_path"])

        # --dataset_matching_attributes
        cmmd.extend(["--dataset_matching_attributes", f"{dataset_matching_attributes}"])

        # --merge_strategy
        cmmd.extend(["--merge_strategy", merge_strategy])

        KoshCmd._tar(self, tar_type, store_args=cmmd, opts=opts)

    @lock_strategies.lock_method
    def to_dataframe(self, data_columns=[], *atts, **keys):
        """Return the find object as a Pandas DataFrame.

        Pass in the same arguments and keyword arguments as the find method.

        Arguments are the metadata name we are looking for e.g
        find("attr1", "attr2")
        you can further restrict by specifying exact value for a metadata
        via key=value
        you can return ids only by using: ids_only=True
        range can be specified via: sina.utils.DataRange(min, max)

        "file_uri" is a reserved key that will return all records being associated
                   with the given "uri", e.g store.find(file_uri=uri)
        "types" let you search over specific sina record types only.
        "id_pool" will search based on id of Sina record or Kosh dataset. Can be a list.

        :param data_columns: Columns to extract. By default this will include ['id', 'name', 'creator',
                                                                               'creation_date', 'last_modified_date'].
                             If nothing is passed, will return all data.
        :type data_columns: Union(str, list), optional
        :return: Pandas DataFrame
        :rtype: Pandas DataFrame
        """
        import pandas as pd
        if isinstance(data_columns, str):
            data_columns = [data_columns]

        keys['load_type'] = 'dictionary'
        keys['ids_only'] = False
        datasets = list(self.find(*atts, **keys))

        attr_dict = {}
        total_datasets = len(datasets)

        # Always have these by default
        defaults = ['id', 'name', 'creator', 'creation_date', 'last_modified_date']

        # Acquire all data if `data_columns` was not passed
        if not data_columns:
            unique_keys = []
            for i, dataset in enumerate(datasets):
                unique_keys.extend(list(dataset['data'].keys()))

            data_columns = sorted(set(unique_keys))

        data_columns = defaults + data_columns  # Want defaults in front
        data_columns = [dc for dc in data_columns if "_ENSEMBLE_TAG_" not in dc]  # Remove ensemble tags
        attr_dict = {d: [pd.NA] * total_datasets for d in data_columns}

        for i, dataset in enumerate(datasets):
            for column in data_columns:
                if column == "id":
                    attr_dict[column][i] = dataset['id']
                else:
                    attr_dict[column][i] = dataset['data'].get(column, {}).get('value', pd.NA)

        df = pd.DataFrame(attr_dict)
        return df

__init__(db_uri=None, username=os.environ.get('USER', 'default'), db=None, keyspace=None, sync=True, dataset_record_type='dataset', verbose=True, use_lock_file=False, kosh_reserved_record_types=[], read_only=False, allow_connection_pooling=False, ensemble_predicate=None, execution_options={}, connection_type='write', lock_strategy=None, session_sort_by=None, session_sort_by_descending=False, verbose_attributes=False)

init initialize a new Sina-based store

Parameters:

Name Type Description Default
db (str, optional)

type of database, defaults to 'sql', can be 'cass'

None
username str

user name defaults to user id

get('USER', 'default')
db_uri (str | list, optional)

uri to sql file or list of cassandra node ips, defaults to None

None
keyspace (str, optional)

cassandra keyspace, defaults to None

None
sync bool

Does Kosh sync automatically to the db (True) or on demand (False)

True
dataset_record_type str

Kosh element type is "dataset" this can change the default This is useful if reading in other sina db

'dataset'
verbose bool

verbose message

True
use_lock_file bool

If you receive sqlite threads access error, turning this on might help

False
kosh_reserved_record_types list of strings

list of record types that are reserved for Kosh internal use, will be ignored when searching store

[]
read_only bool

Can we modify the database source?

False
allow_connection_pooling bool

Allow "pooling" behavior that recycles connections, which may prevent them from closing fully on .close(). Only used for the sql backend.

False
ensemble_predicate str

The predicate for the relationship to an ensemble

None
execution_options dict

execution options keyword to pass to sina store record_dao at creation time

{}
connection_type str

whether to create a write, append, or read only store

'write'
lock_strategy LockStrategy

The LockStrategy to apply to class methods

None
session_sort_by str

A default sort_by key for find operations for this store

None
session_sort_by_descending bool

A default sort_by_descending flag for this store

False
verbose_attributes bool

Should we print attribute values of over length 30?

False

Raises:

Type Description
ConnectionRefusedError

Could not connect to cassandra

SystemError

more than one user match.

Source code in kosh/store.py
def __init__(self, db_uri=None, username=os.environ.get("USER", "default"), db=None,
             keyspace=None, sync=True, dataset_record_type="dataset",
             verbose=True, use_lock_file=False, kosh_reserved_record_types=[],
             read_only=False, allow_connection_pooling=False, ensemble_predicate=None,
             execution_options={}, connection_type='write', lock_strategy=None,
             session_sort_by=None, session_sort_by_descending=False, verbose_attributes=False):
    """__init__ initialize a new Sina-based store

    :param db: type of database, defaults to 'sql', can be 'cass'
    :type db: str, optional
    :param username: user name defaults to user id
    :type username: str
    :param db_uri: uri to sql file or list of cassandra node ips, defaults to None
    :type db_uri: str or list, optional
    :param keyspace: cassandra keyspace, defaults to None
    :type keyspace: str, optional
    :param sync: Does Kosh sync automatically to the db (True) or on demand (False)
    :type sync: bool
    :param dataset_record_type: Kosh element type is "dataset" this can change the default
                                This is useful if reading in other sina db
    :type dataset_record_type: str
    :param verbose: verbose message
    :type verbose: bool
    :param use_lock_file: If you receive sqlite threads access error, turning this on might help
    :type use_lock_file: bool
    :param kosh_reserved_record_types: list of record types that are reserved for Kosh internal
                                       use, will be ignored when searching store
    :type kosh_reserved_record_types: list of strings
    :param read_only: Can we modify the database source?
    :type read_only: bool
    :param allow_connection_pooling: Allow "pooling" behavior that recycles connections,
                                    which may prevent them from closing fully on .close().
                                    Only used for the sql backend.
    :type allow_connection_pooling: bool
    :param ensemble_predicate: The predicate for the relationship to an ensemble
    :type ensemble_predicate: str
    :param execution_options: execution options keyword to pass to sina store record_dao at creation time
    :type execution_options: dict
    :param connection_type: whether to create a write, append, or read only store
    :type connection_type: str
    :param lock_strategy: The LockStrategy to apply to class methods
    :type lock_strategy: LockStrategy
    :raises ConnectionRefusedError: Could not connect to cassandra
    :raises SystemError: more than one user match.
    :param session_sort_by: A default sort_by key for find operations for this store
    :type session_sort_by: str
    :param session_sort_by_descending: A default sort_by_descending flag for this store
    :type session_sort_by_descending: bool
    :param verbose_attributes: Should we print attribute values of over length 30?
    :type verbose_attributes: bool
    """
    atexit.register(self._clean_up)
    from sina import connect as sina_connect
    from sina.utils import Negation
    if lock_strategy is None:
        lock_strategy = lock_strategies.NoLocking()
    self.lock_strategy = lock_strategy
    self.session_sort_by = session_sort_by
    self.session_sort_by_descending = session_sort_by_descending
    self.verbose_attributes = verbose_attributes

    with lock_strategy:
        if db_uri is not None and "://" in db_uri and use_lock_file:
            warnings.warn("You cannot use `lock_file` on non file-based db, turning it off", ResourceWarning)
            use_lock_file = False
        if connection_type not in ['write', 'append', 'read']:
            raise ValueError(
                    "`connection_type` must be one of ['write', 'append', 'read']")
        self.__connection_type__ = connection_type
        self.use_lock_file = use_lock_file
        if connection_type == 'read':
            read_only = True
        if read_only:
            sync = False
        self.__read_only__ = read_only
        self.__sync__ = sync
        self.__sync__dict__ = {}
        self.__sync__deleted__ = {}

        if db is None:
            db = 'sql'
        self._dataset_record_type = dataset_record_type
        self.db_uri = db_uri
        if db == "sql":
            if db_uri is not None and not os.path.exists(db_uri):
                if "://" in db_uri:
                    self.__sina_store = sina_connect(
                        db_uri, read_only=read_only, connection_type=connection_type)
                    self.__sina_store._record_dao.session.connection(execution_options=execution_options)
                else:
                    raise ValueError(
                        "Kosh store could not be found at: {}".format(db_uri))
            else:
                self.lock()
                if db_uri is not None:
                    db_pth = os.path.abspath(db_uri)
                else:
                    db_pth = None
                self.__sina_store = sina_connect(database=db_pth,
                                                 read_only=read_only,
                                                 database_type=db,
                                                 allow_connection_pooling=allow_connection_pooling,
                                                 connection_type=connection_type)
                self.__sina_store._record_dao.session.connection(execution_options=execution_options)
                self.unlock()
        elif db.lower().startswith('cass'):
            self.__sina_store = sina_connect(
                keyspace=keyspace, database=db_uri,
                database_type='cassandra', read_only=read_only,
                allow_connection_pooling=allow_connection_pooling,
                connection_type=connection_type)

        rec = update_store_and_get_info_record(self.__sina_store.records, ensemble_predicate)

        self._cached_features_ = rec["user_defined"]["cached_features"]
        self._sources_type = rec["data"]["sources_type"]["value"]
        self._users_type = rec["data"]["users_type"]["value"]
        self._groups_type = rec["data"]["groups_type"]["value"]
        self._loaders_type = rec["data"]["loaders_type"]["value"]
        self._ensembles_type = rec["data"]["ensembles_type"]["value"]
        self._ensemble_predicate = rec["data"]["ensemble_predicate"]["value"]
        self._kosh_reserved_record_types = kosh_reserved_record_types + \
            rec["data"]["reserved_types"]["value"]
        kosh_reserved = list(self._kosh_reserved_record_types)
        kosh_reserved.remove(self._sources_type)
        self._kosh_datasets_and_sources = Negation(kosh_reserved)

        # Associated stores
        self._associated_stores_ = []
        if "associated_stores" in rec["data"]:
            for store in rec["data"]["associated_stores"]["value"]:
                try:
                    self._associated_stores_.append(kosh.connect(store, read_only=read_only, sync=sync,
                                                                 connection_type=connection_type))
                except Exception:  # most likely a sqlalchemy.exc.DatabaseError
                    warnings.warn("Could not open associated store: {}".format(store))

        self.lock()
        self.__dict__["__record_handler__"] = self.__sina_store.records
        self.unlock()
        users_filter = list(self.__record_handler__.find_with_type(
            self._users_type, ids_only=True))
        names_filter = list(
            self.__record_handler__.find_with_data(
                username=username))
        inter_recs = set(users_filter).intersection(set(names_filter))
        if len(inter_recs) == 0:
            # raise ConnectionRefusedError("Unknown user: {}".format(username))
            # For now just letting anyone log in as anonymous
            warnings.warn("Unknown user, you will be logged as anonymous user")
            names_filter = self.__record_handler__.find_with_data(
                username="anonymous")
            self.__user_id__ = "anonymous"
        elif len(inter_recs) > 1:
            raise SystemError("Internal error, more than one user match!")
        else:
            self.__user_id__ = list(inter_recs)[0]
        self.storeLoader = KoshSinaLoader
        self.loaders = {}
        self.add_loader(self.storeLoader)

        # self.storeLoader = KoshLoader
        self.add_loader(KoshFileLoader)
        self.add_loader(JSONLoader)
        self.add_loader(NpyLoader)
        self.add_loader(NumpyTxtLoader)
        self.add_loader(PandasLoader)
        try:
            self.add_loader(HDF5Loader)
        except Exception:  # no h5py module?
            if verbose:
                warnings.warn("Could not add hdf5 loader, check if you have h5py installed."
                              " Pass verbose=False when creating the store to turn this message off")
        try:
            self.add_loader(PILLoader)
        except Exception:  # no PIL?
            if verbose:
                warnings.warn("Could not add pil loader, check if you have pillow installed."
                              " Pass verbose=False when creating the store to turn this message off")
        self.add_loader(PGMLoader)
        try:
            self.add_loader(UltraLoader)
        except Exception:  # no pydv?
            if verbose:
                warnings.warn("Could not add ultra files loader, check if you have pydv installed."
                              " Pass verbose=False when creating the store to turn this message off")
        try:
            self.add_loader(SidreMeshBlueprintFieldLoader)
        except Exception:  # no conduit?
            if verbose:
                warnings.warn("Could not add sidre blueprint meshfield loader, check if you have conduit installed."
                              " Pass verbose=False when creating the store to turn this message off")
        # Now let's add the loaders in the store
        for rec_loader in self.__record_handler__.find_with_type("koshloader"):
            loader = kosh_pickler.loads(rec_loader.data["code"]["value"])
            self.add_loader(loader)
        self._added_unsync_mem_store = sina_connect(None)
        self._cached_loaders = collections.OrderedDict()
        # self._cached_features = "cached_features"

        # Ok we need to map the KoshFileLoader back to whatever the source_type is
        # in this store
        ks = self.loaders["file"]
        for loader in ks:
            loader.types[self._sources_type] = loader.types["file"]
        self.loaders[self._sources_type] = self.loaders["file"]

add_group(group)

Add a kosh specific group, cannot match existing group on unix system

Parameters:

Name Type Description Default
group str

ugroup to add

required
Source code in kosh/store.py
@lock_strategies.lock_method
def add_group(self, group):
    """Add a kosh specific group, cannot match existing group on unix system

    :param group: ugroup to add
    :type group: str
    """
    from sina.model import Record
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    existing_groups = self.__record_handler__.find_with_type(
        self._groups_type)
    groups_names = [rec["data"]["name"]["value"]
                    for rec in existing_groups]
    if group in groups_names:
        raise ValueError("group {} already exist".format(group))

    # now get unix groups
    if not sys.platform.startswith("win"):
        unix_groups = [g[0] for g in grp.getgrall()]
    else:
        unix_groups = []
    if group in unix_groups:
        raise ValueError("{} is a unix group on this system.format(group)")

    # Create group
    uid = uuid.uuid4().hex
    group_rec = Record(id=uid, type=self._groups_type, user_defined={'kosh_information': {}})
    group_rec.add_data("name", group)
    _update_record(group_rec, self)

add_loader(loader, save=False)

Adds a loader to the store

Parameters:

Name Type Description Default
loader KoshLoader

The Kosh loader you want to add to the store

required
save bool

Do we also save it in store for later re-use

False

Returns:

Type Description
None

None

Source code in kosh/store.py
@lock_strategies.lock_method
def add_loader(self, loader, save=False):
    """Adds a loader to the store

    :param loader: The Kosh loader you want to add to the store
    :type loader: KoshLoader
    :param save: Do we also save it in store for later re-use
    :type save: bool

    :return: None
    :rtype: None
    """
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    # We add a loader we need to clear the cache
    self._cached_loaders = collections.OrderedDict()

    for k in loader.types:
        if k in self.loaders:
            if loader not in self.loaders[k]:
                self.loaders[k].append(loader)
        else:
            self.loaders[k] = [loader, ]

    if save:  # do we save it in store
        self.save_loader(loader)

add_user(username, groups=[])

add_user adds a user to the Kosh store

Parameters:

Name Type Description Default
username str

username to add

required
groups list

kosh specific groups to add to this user

[]
Source code in kosh/store.py
@lock_strategies.lock_method
def add_user(self, username, groups=[]):
    """add_user adds a user to the Kosh store

    :param username: username to add
    :type username: str
    :param groups: kosh specific groups to add to this user
    :type groups: list
    """
    from sina.model import Record
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    existing_users = self.__record_handler__.find_with_type(
        self._users_type)
    users = [rec["data"]["username"]["value"] for rec in existing_users]
    if username not in users:
        # Create user
        uid = hashlib.md5(username.encode()).hexdigest()
        user = Record(id=uid, type=self._users_type, user_defined={'kosh_information': {}})
        user.add_data("username", username)
        _update_record(user, self)
        self.add_user_to_group(username, groups)
    else:
        raise ValueError("User {} already exists".format(username))

add_user_to_group(username, groups)

Add a user to some group(s)

Parameters:

Name Type Description Default
username str

username to add

required
groups list

kosh specific groups to add to this user

required
Source code in kosh/store.py
@lock_strategies.lock_method
def add_user_to_group(self, username, groups):
    """Add a user to some group(s)

    :param username: username to add
    :type username: str
    :param groups: kosh specific groups to add to this user
    :type groups: list
    """
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    users_filter = self.__record_handler__.find_with_type(
        self._users_type, ids_only=True)
    names_filter = list(
        self.__record_handler__.find_with_data(
            username=username))
    inter_recs = set(users_filter).intersection(set(names_filter))
    if len(inter_recs) == 0:
        raise ValueError("User {} does not exists".format(username))
    user = self.get_record(names_filter[0])
    user_groups = user["data"].get(
        self._groups_type, {
            "value": []})["value"]

    existing_groups = self.__record_handler__.find_with_type(
        self._groups_type)
    groups_names = [rec["data"]["name"]["value"]
                    for rec in existing_groups]
    for group in groups:
        if group not in groups_names:
            warnings.warn(
                "Group {} is not a Kosh group, skipping".format(group))
            continue
        user_groups.append(group)
    if len(user_groups) == 0:
        user.set_data("groups", None)
    else:
        user.set_data("groups", list(set(user_groups)))
    _update_record(user, self)

associate(store, reciprocal=False)

Associate another store

All associated stores will be used for queries purposes.

WARNING: While associating stores will make them look like one big store, ensembles' members MUST belong to the same store as the ensemble.

Parameters:

Name Type Description Default
store KoshStore

The store to associate

required
reciprocal bool

By default, this is a one way relationship. The associated store will NOT be aware of this association, turning this on create the association in both stores.

False
Source code in kosh/store.py
@lock_strategies.lock_method
def associate(self, store, reciprocal=False):
    """Associate another store

    All associated stores will be used for queries purposes.

    WARNING: While associating stores will make them look like one big store,
             ensembles' members MUST belong to the same store as the ensemble.

    :param store: The store to associate
    :type store: KoshStore

    :param reciprocal: By default, this is a one way relationship.
                       The associated store will NOT be aware of
                       this association, turning this on create
                       the association in both stores.
    :type reciprocal: bool
    """
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    if not isinstance(store, KoshStore):
        raise TypeError("store must be a KoshStore or path to one")

    sina_recs = self.get_sina_records()
    store_info = list(sina_recs.find_with_type("__kosh_storeinfo__"))[0]
    if "associated_stores" not in store_info["data"]:
        store_info.add_data("associated_stores", [])
    stores = store_info["data"]["associated_stores"]["value"]
    if store.db_uri not in stores:
        stores.append(store.db_uri)
        store_info["data"]["associated_stores"]["value"] = stores
        _update_record(store_info, self)
        self._associated_stores_.append(store)
    if reciprocal:
        store.associate(self)

check_integrity()

Runs a sanity check on the store: 1- Are associated files reachable? 2- Did fast_shas change since file was associated

Source code in kosh/store.py
@lock_strategies.lock_method
def check_integrity(self):
    """Runs a sanity check on the store:
    1- Are associated files reachable?
    2- Did fast_shas change since file was associated
    """
    return self.cleanup_files(dry_run=True, clean_fastsha=True)

check_sync_conflicts(keys)

Checks if their will be sync conflicts

Parameters:

Name Type Description Default
keys list

keys of objects to syncs (id/type)

required

Returns:

Type Description
dict

dictionary of objects ids and their failing attributes

Source code in kosh/store.py
@lock_strategies.lock_method
def check_sync_conflicts(self, keys):
    """Checks if their will be sync conflicts
    :param keys: keys of objects to syncs (id/type)
    :type keys: list
    :return: dictionary of objects ids and their failing attributes
    :rtype: dict
    """
    # First pass to make sure we have no conflict
    conflicts = {}
    for key in keys:
        try:
            db_record = self.__record_handler__.get(key)
            # db_record = self.get_record(key)
            try:
                local_record = self.__sync__dict__[key]
                # Dataset created locally on unsynced store do not have
                # this attribute
                try:
                    last_local = datetime.strptime(local_record["data"]['last_modified_date']['value'],
                                                   "%Y-%m-%d %H:%M:%S.%f").timestamp()
                except KeyError:
                    last_local = -1
                for att in db_record["user_defined"]['kosh_information']:
                    conflict = False
                    if att[-14:] != "_last_modified":
                        continue
                    last_db = db_record["user_defined"]['kosh_information'][att]
                    if last_db > last_local and att in local_record["user_defined"]['kosh_information']:
                        # Conflict
                        if att[-27:-14] == "___associated":
                            # ok dealing with associated data
                            uri = att[:-27]
                            # deleted locally
                            if uri not in local_record["files"]:
                                if uri in db_record["files"]:
                                    conflict = True
                            else:
                                if uri not in db_record["files"]:
                                    conflict = True
                                elif db_record["files"][uri]["mimetype"] != local_record["files"][uri]["mimetype"]:
                                    conflict = True
                            if conflict:
                                conf = {uri: (db_record["files"].get(uri, {"mimetype": "deleted"})["mimetype"],
                                              last_db,
                                              local_record["files"].get(uri, {"mimetype": "deleted"})[
                                    "mimetype"],
                                    local_record["user_defined"]['kosh_information'][att])}
                                if key not in conflicts:
                                    conflicts[key] = conf
                                else:
                                    conflicts[key].update(conf)
                                conflicts[key]["last_check_from_db"] = last_local
                                conflicts[key]["type"] = "associated"
                        else:
                            name = att[:-14]
                            # deleted locally
                            if name not in local_record["data"]:
                                if name in db_record["data"]:
                                    conflict = True
                            else:
                                if name not in db_record["data"]:
                                    conflict = True
                                elif db_record["data"][name]["value"] != local_record["data"][name]["value"]:
                                    conflict = True
                            if conflict:
                                conf = {name: (db_record["data"].get(name, {"value": "deleted"})["value"],
                                               last_db,
                                               local_record["data"].get(
                                    name, {"value": "deleted"})["value"],
                                    local_record["user_defined"]['kosh_information'][att])}
                                if key not in conflicts:
                                    conflicts[key] = conf
                                else:
                                    conflicts[key].update(conf)
                                conflicts[key]["last_check_from_db"] = last_local
                                conflicts[key]["type"] = "attribute"
            except Exception:  # ok let's see if it was a delete ones
                local_record = self.__sync__deleted[key]
                try:
                    last_local = datetime.strptime(local_record["data"]['last_modified_date']['value'],
                                                   "%Y-%m-%d %H:%M:%S.%f").timestamp()
                except KeyError:
                    last_local = -1
                for att in db_record["user_defined"]['kosh_information']:
                    conflict = False
                    if att[-14:] != "_last_modified":
                        continue
                    last_db = db_record["user_defined"]['kosh_information'][att]
                    if last_db > last_local:
                        conf = {att[:14]: (
                            "modified in db", "ds deleted here", "")}
                        if key not in conflicts:
                            conflicts[key] = conf
                        else:
                            conflicts[key].update(conf)
                        conflicts[key]["last_check_from_db"] = last_local
                        conflicts[key]["type"] = "delete"
        except BaseException:  # It's a new record no conflict
            # It could be it was deleted in store while we touched it here
            try:
                local_record = self.__sync__dict__[key]
                # Dataset created locally on unsynced store do not have
                # this attribute
                try:
                    last_local = datetime.strptime(local_record["data"]['last_modified_date']['value'],
                                                   'YYYY-MM-DD HH:MM:SS.microseconds').time()
                except KeyError:
                    last_local = -1
                if last_local != -1:  # yep we read it from store
                    conf = {
                        local_record["data"]["name"]["value"]: (
                            "deleted in store", "", "")}
                    conf["last_check_from_db"] = last_local
                    conf["type"] = "delete"
                    if key not in conflicts:
                        conflicts[key] = conf
                    else:
                        conflicts[key].update(conf)
            except Exception:  # deleted too so no issue
                pass
    return conflicts

cleanup_files(dry_run=False, interactive=False, clean_fastsha=False, **dataset_search_keys)

Cleanup the store from references to dead files Also updates the fast_shas if necessary You can filter associated objects for each dataset by passing key=values e.g mime_type=hdf5 will only dissociate non-existing files associated with mime_type hdf5 some_att=some_val will only dissociate non-existing files associated and having the attribute 'some_att' with value of 'some_val' returns list of uris to be removed.

Parameters:

Name Type Description Default
dry_run bool

Only does a dry_run

False
interactive bool

interactive mode, ask before dissociating

False
clean_fastsha bool

Do we want to update fast_sha if it changed?

False

Returns:

Type Description
list

list of uris (to be) removed.

Source code in kosh/store.py
@lock_strategies.lock_method
def cleanup_files(self, dry_run=False, interactive=False, clean_fastsha=False,
                  **dataset_search_keys):
    """Cleanup the store from references to dead files
    Also updates the fast_shas if necessary
    You can filter associated objects for each dataset by passing key=values
    e.g mime_type=hdf5 will only dissociate non-existing files associated with mime_type hdf5
    some_att=some_val will only dissociate non-existing files associated and having the attribute
    'some_att' with value of 'some_val'
    returns list of uris to be removed.
    :param dry_run: Only does a dry_run
    :type dry_run: bool
    :param interactive: interactive mode, ask before dissociating
    :type interactive: bool
    :param clean_fastsha: Do we want to update fast_sha if it changed?
    :type clean_fastsha: bool
    :returns: list of uris (to be) removed.
    :rtype: list
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    missings = []
    datasets = self.find()
    for dataset in datasets:
        missings += dataset.cleanup_files(dry_run=dry_run,
                                          interactive=interactive,
                                          clean_fastsha=clean_fastsha,
                                          **dataset_search_keys)
    return missings

close()

closes store and sina related things

Source code in kosh/store.py
@lock_strategies.lock_method
def close(self):
    """closes store and sina related things"""
    try:
        # We need to update the features
        recs = self.get_sina_records()
        store_rec = get_store_info_record(recs)
        cached_features = get_store_info_record_attribute(recs, "cached_features")
        cached_features.update(self._cached_features_)
        store_rec["user_defined"]["cached_features"] = cached_features
        _update_record(store_rec, self)
    except Exception:  # store is likely closed already
        pass
    self.__sina_store.close()
    gc.collect()

cp(src, dst, stores=[], destination_stores=[], dataset_record_type='dataset', dataset_matching_attributes=['name'], version=False, merge_strategy='conservative', mk_dirs=False)

Copies files or directories

Parameters:

Name Type Description Default
src Union[str, list]

The source of files or directories to mv or cp

required
dst str

The destination of files or directories to mv or cp

required
stores (Union[KoshDataset, list], optional)

Kosh stores to associate the mv or cp, defaults to []

[]
destination_stores (Union[KoshDataset, list], optional)

Kosh stores to associate the mv or cp, defaults to []

[]
dataset_record_type (str, optional)

Type used by sina db that Kosh will recognize as dataset, defaults to "dataset"

'dataset'
dataset_matching_attributes (list, optional)

List of attributes used to identify if two datasets are identical, defaults to ["name", ]

['name']
version (bool, optional)

Print version and exit, defaults to False

False
merge_strategy (str, optional)

When importing dataset, how do we handle conflict, defaults to "conservative"

'conservative'
mk_dirs (bool, optional)

Make destination directories if they don't exist

False
Source code in kosh/store.py
@lock_strategies.lock_method
def cp(self, src, dst, stores=[],
       destination_stores=[], dataset_record_type="dataset", dataset_matching_attributes=['name', ],
       version=False, merge_strategy="conservative", mk_dirs=False):
    """Copies files or directories

    :param src: The source of files or directories to mv or cp
    :type src: Union[str, list]
    :param dst: The destination of files or directories to mv or cp
    :type dst: str
    :param stores: Kosh stores to associate the mv or cp, defaults to []
    :type stores: Union[kosh.dataset.KoshDataset, list], optional
    :param destination_stores: Kosh stores to associate the mv or cp, defaults to []
    :type destination_stores: Union[kosh.dataset.KoshDataset, list], optional
    :param dataset_record_type: Type used by sina db that Kosh will recognize as dataset, defaults to "dataset"
    :type dataset_record_type: str, optional
    :param dataset_matching_attributes: List of attributes used to identify if two datasets are identical,
        defaults to ["name", ]
    :type dataset_matching_attributes: list, optional
    :param version: Print version and exit, defaults to False
    :type version: bool, optional
    :param merge_strategy: When importing dataset, how do we handle conflict, defaults to "conservative"
    :type merge_strategy: str, optional
    :param mk_dirs: Make destination directories if they don't exist
    :type mk_dirs: bool, optional
    """
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    self._mv_cp(src, dst, "cp", stores, destination_stores, dataset_record_type,
                dataset_matching_attributes, version, merge_strategy, mk_dirs)

create(name='Unnamed Dataset', id=None, metadata={}, schema=None, sina_type=None, **kargs)

create a new (possibly named) dataset

Parameters:

Name Type Description Default
name (str, optional)

name for the dataset, defaults to None

'Unnamed Dataset'
id (str, optional)

unique Id, defaults to None which means use uuid4()

None
metadata (dict, optional)

dictionary of attribute/value pair for the dataset, defaults to {}

{}
schema KoshSchema

a KoshSchema object to validate datasets and when setting attributes

None
sina_type str

If you want to query the store for a specific sina record type, not just a dataset

None
alias_feature (dict, opt)

Dictionary of feature aliases

required
kargs dict

extra keyword arguments (ignored)

{}

Returns:

Type Description
KoshDataset

KoshDataset

Raises:

Type Description
RuntimeError

Dataset already exists

Source code in kosh/store.py
@lock_strategies.lock_method
def create(self, name="Unnamed Dataset", id=None,
           metadata={}, schema=None, sina_type=None, **kargs):
    """create a new (possibly named) dataset

    :param name: name for the dataset, defaults to None
    :type name: str, optional
    :param id: unique Id, defaults to None which means use uuid4()
    :type id: str, optional
    :param metadata: dictionary of attribute/value pair for the dataset, defaults to {}
    :type metadata: dict, optional
    :param schema: a KoshSchema object to validate datasets and when setting attributes
    :type schema: KoshSchema
    :param sina_type: If you want to query the store for a specific sina record type, not just a dataset
    :type sina_type: str
    :param alias_feature: Dictionary of feature aliases
    :type alias_feature: dict, opt
    :param kargs: extra keyword arguments (ignored)
    :type kargs: dict
    :raises RuntimeError: Dataset already exists
    :return: KoshDataset
    :rtype: KoshDataset
    """
    from sina.model import Record
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    if "datasetId" in kargs:
        if id is None:
            warnings.warn(
                "'datasetId' has been deprecated in favor of 'id'",
                DeprecationWarning)
            id = kargs["datasetId"]
        else:
            raise ValueError(
                "'datasetId' is deprecated in favor of 'id' which you already set here")

    if sina_type is None:
        sina_type = self._dataset_record_type
    if id is None:
        Id = uuid.uuid4().hex
    else:
        if id in self.__record_handler__.find_with_type(
                sina_type, ids_only=True):
            raise RuntimeError(
                "Dataset id {} already exists".format(id))
        Id = id

    metadata = metadata.copy()
    metadata["creator"] = self.__user_id__
    if "creation_date" in metadata:  # if statement for self.import_dataset()
        try:  # Correct format, don't do anything
            datetime.strptime(metadata["creation_date"], 'YYYY-MM-DD HH:MM:SS.microseconds')
        except ValueError:
            metadata["creation_date"] = str(datetime.fromtimestamp(time.time()))
    else:
        metadata["creation_date"] = str(datetime.fromtimestamp(time.time()))

    if "name" not in metadata:
        metadata["name"] = name
    metadata["_associated_data_"] = None
    for k in metadata:
        if k == 'alias_feature':
            metadata[k] = {'value':  kosh_pickler.dumps(metadata[k])}
        else:
            metadata[k] = {'value': metadata[k]}
    rec = Record(id=Id, type=sina_type, data=metadata, user_defined={'kosh_information': {}})
    if self.__sync__:
        _update_record(rec, self)
    else:
        self.__sync__dict__[Id] = rec
        _update_record(rec, self, self._added_unsync_mem_store)
    try:
        if sina_type == self._ensembles_type:
            out = KoshEnsemble(Id, store=self, schema=schema, record=rec)
        else:
            out = KoshDataset(Id, store=self, schema=schema, record=rec)
    except Exception as err:  # probably schema validation error
        if self.__sync__:
            _update_record(Id, self, delete=True)
        else:
            del self.__sync__dict__[Id]
            _update_record(rec, self, self._added_unsync_mem_store, delete=True)
        raise err
    return out

create_ensemble(name='Unnamed Ensemble', id=None, metadata={}, schema=None, **kargs)

Create a Kosh ensemble object

Parameters:

Name Type Description Default
name (str, optional)

name for the dataset, defaults to None

'Unnamed Ensemble'
id (str, optional)

unique Id, defaults to None which means use uuid4()

None
metadata (dict, optional)

dictionary of attribute/value pair for the dataset, defaults to {}

{}
schema KoshSchema

a KoshSchema object to validate datasets and when setting attributes

None
kargs dict

extra keyword arguments (ignored)

{}

Returns:

Type Description
KoshEnsemble

KoshEnsemble

Raises:

Type Description
RuntimeError

Dataset already exists

Source code in kosh/store.py
@lock_strategies.lock_method
def create_ensemble(self, name="Unnamed Ensemble", id=None, metadata={}, schema=None, **kargs):
    """Create a Kosh ensemble object
    :param name: name for the dataset, defaults to None
    :type name: str, optional
    :param id: unique Id, defaults to None which means use uuid4()
    :type id: str, optional
    :param metadata: dictionary of attribute/value pair for the dataset, defaults to {}
    :type metadata: dict, optional
    :param schema: a KoshSchema object to validate datasets and when setting attributes
    :type schema: KoshSchema
    :param kargs: extra keyword arguments (ignored)
    :type kargs: dict
    :raises RuntimeError: Dataset already exists
    :return: KoshEnsemble
    :rtype: KoshEnsemble
    """
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    return self.create(name=name, id=id, metadata=metadata, schema=schema, sina_type=self._ensembles_type, **kargs)

delete(Id)

remove a record from store. for datasets dissociate all associated data first.

Parameters:

Name Type Description Default
Id str

unique Id or kosh_obj

required
Source code in kosh/store.py
@lock_strategies.lock_method
def delete(self, Id):
    """remove a record from store.
    for datasets dissociate all associated data first.

    :param Id: unique Id or kosh_obj
    :type Id: str
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    if not isinstance(Id, six.string_types):
        Id = Id.id
    rec = self.get_record(Id)
    if rec.type not in self._kosh_reserved_record_types:
        kosh_obj = self.open(Id)
        for uri in list(rec["files"].keys()):
            # Let's dissociate to remove unused kosh objects as well
            kosh_obj.dissociate(uri)
    if not self.__sync__:
        _update_record(Id, self, self._added_unsync_mem_store, delete=True)
        if Id in self.__sync__dict__:
            del self.__sync__dict__[Id]
            self.__sync__deleted__[Id] = rec
            rec["user_defined"]['kosh_information']["deleted_time"] = time.time()
    else:
        _update_record(Id, self, delete=True)

delete_all_contents(force='')

Delete EVERYTHING in a datastore; this cannot be undone.

Parameters:

Name Type Description Default
force str

This function is meant to raise a confirmation prompt. If you want to use it in an automated script (and you're sure of what you're doing), set this to "SKIP PROMPT".

''

Returns:

Type Description

whether the deletion happened.

Source code in kosh/store.py
@lock_strategies.lock_method
def delete_all_contents(self, force=""):
    """
    Delete EVERYTHING in a datastore; this cannot be undone.

    :param force: This function is meant to raise a confirmation prompt. If you
                  want to use it in an automated script (and you're sure of
                  what you're doing), set this to "SKIP PROMPT".
    :type force: str
    :returns: whether the deletion happened.
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    ret = self.__sina_store.delete_all_contents(force=force)
    update_store_and_get_info_record(self.__sina_store.records)
    create_kosh_users(self.__sina_store.records)
    return ret

delete_loader(loader, permanently=False)

Removes a loader from the store and possible from its db

Parameters:

Name Type Description Default
loader KoshLoader

The Kosh loader you want to remove

required
permanently bool

Do we also remove it from the db if saved there?

False

Returns:

Type Description
None

None

Source code in kosh/store.py
@lock_strategies.lock_method
def delete_loader(self, loader, permanently=False):
    """Removes a loader from the store and possible from its db

    :param loader: The Kosh loader you want to remove
    :type loader: KoshLoader
    :param permanently: Do we also remove it from the db if saved there?
    :type permanently: bool

    :return: None
    :rtype: None
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    # We deleted a loader we need to clear the cache
    existing_cached = self._cached_loaders.values()
    empty_types = []
    for k in loader.types:
        if k in self.loaders:
            if loader in self.loaders[k]:
                self.loaders[k].remove(loader)
                if len(self.loaders[k]) == 0:
                    # ok we took them all out
                    empty_types.append(k)
                __listed_features_cache = self._cached_features_
                yank = []
                for id in __listed_features_cache.keys():
                    for ld, _ in existing_cached:
                        if isinstance(ld, loader) and id.startswith(ld.signature):
                            yank.append(id)
                for id in yank:
                    del __listed_features_cache[id]
                self._cached_features_ = __listed_features_cache
    for k in empty_types:
        del self.loaders[k]
    self._cached_loaders = collections.OrderedDict()

    if permanently:  # Remove it from saved in db as well
        pickled = kosh_pickler.dumps(loader)
        rec = next(self.find(types="koshloader", code=pickled, ids_only=True), None)
        if rec is not None:
            _update_record(rec, self, delete=True)

dissociate(store, reciprocal=False)

Dissociate another store

Parameters:

Name Type Description Default
store KoshStore | string_types

The store to associate

required
reciprocal bool

By default, this is a one way relationship. The disssociated store will NOT be aware of this action, turning this on create the dissociation in both stores.

False
Source code in kosh/store.py
@lock_strategies.lock_method
def dissociate(self, store, reciprocal=False):
    """Dissociate another store

    :param store: The store to associate
    :type store: KoshStore or six.string_types

    :param reciprocal: By default, this is a one way relationship.
                       The disssociated store will NOT be aware of
                       this action, turning this on create
                       the dissociation in both stores.
    :type reciprocal: bool
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    if not isinstance(store, (six.string_types, KoshStore)):
        raise TypeError("store must be a KoshStore or path to one")

    sina_recs = self.get_sina_records()
    store_info = list(sina_recs.find_with_type("__kosh_storeinfo__"))[0]
    if "associated_stores" not in store_info["data"]:
        warnings.warn("No store is associated with this store: {}".format(self.db_uri))
        return
    # refresh value
    stores = store_info["data"]["associated_stores"]["value"]

    if isinstance(store, six.string_types):
        try:
            store_path = store
            store = self.get_associated_store(store_path)
        except Exception:
            raise ValueError("Could not open store at: {}".format(store_path))

    if store.db_uri in stores:
        stores.remove(store.db_uri)
        store_info["data"]["associated_stores"]["value"] = stores
        _update_record(store_info, self)
        self._associated_stores_.remove(store)
    else:
        warnings.warn("store {} does not seem to be associated with this store ({})".format(
            store.db_uri, self.db_uri))

    if reciprocal:
        store.dissociate(self)

export_dataset(datasets, file=None)

exports a dataset

Parameters:

Name Type Description Default
datasets list | str

dataset (or their ids) to export

required
file None | str

optional file to dump dataset to

None
Source code in kosh/store.py
@lock_strategies.lock_method
def export_dataset(self, datasets, file=None):
    """exports a dataset

    :param datasets: dataset (or their ids) to export
    :type datasets: list or str
    :param file: optional file to dump dataset to
    :type file: None or str
    """
    if not isinstance(datasets, (list, tuple, types.GeneratorType)):
        datasets = [datasets, ]
    for dataset in datasets:
        if isinstance(dataset, six.string_types):
            return self.open(dataset).export(file)
        else:
            return dataset.export(file)

find(*atts, **keys)

Find objects matching some metadata in the store and its associated stores.

Arguments are the metadata name we are looking for e.g find("attr1", "attr2") you can further restrict by specifying exact value for a metadata via key=value you can return ids only by using: ids_only=True range can be specified via: sina.utils.DataRange(min, max)

"file_uri" is a reserved key that will return all records being associated with the given "uri", e.g store.find(file_uri=uri) "types" let you search over specific sina record types only. "id_pool" will search based on id of Sina record or Kosh dataset. Can be a list.

Parameters:

Name Type Description Default
load_type (str, optional)

How the dataset is returned ('dataset' for Kosh Dataset, 'record' for Sina Record, 'dictionary' for Dictonary). Used for faster load times, defaults to 'dataset'

required

Returns:

Type Description
generator

generator of matching objects in store

Source code in kosh/store.py
@lock_strategies.lock_method
def find(self, *atts, **keys):
    """Find objects matching some metadata in the store
    and its associated stores.

    Arguments are the metadata name we are looking for e.g
    find("attr1", "attr2")
    you can further restrict by specifying exact value for a metadata
    via key=value
    you can return ids only by using: ids_only=True
    range can be specified via: sina.utils.DataRange(min, max)

    "file_uri" is a reserved key that will return all records being associated
               with the given "uri", e.g store.find(file_uri=uri)
    "types" let you search over specific sina record types only.
    "id_pool" will search based on id of Sina record or Kosh dataset. Can be a list.

    :param load_type: How the dataset is returned ('dataset' for Kosh Dataset,
        'record' for Sina Record, 'dictionary' for Dictonary).
        Used for faster load times, defaults to 'dataset'
    :type load_type: str, optional
    :return: generator of matching objects in store
    :rtype: generator
    """

    from sina.model import Record
    if 'id_pool' in keys:
        if isinstance(keys['id_pool'], str):
            ids_to_add = [keys['id_pool']]
        else:
            ids_to_add = [id for id in keys['id_pool']]
    else:
        ids_to_add = []

    keys['load_type'] = keys.get('load_type', 'dataset')
    sort_by = keys.pop("sort_by", self.session_sort_by)
    sort_by_descending = keys.pop("sort_by_descending", self.session_sort_by_descending)

    atts_to_remove = []
    for attr in atts:
        if isinstance(attr, (Record, kosh.dataset.KoshDataset)):  # Sina record or Kosh dataset by itself
            ids_to_add.append(attr.id)
            atts_to_remove.append(attr)
        elif isinstance(attr, types.GeneratorType):  # Multiple records from Sina.find() or Kosh.find()
            for at in attr:
                ids_to_add.append(at.id)
            atts_to_remove.append(attr)

    atts = tuple([item for item in atts if item not in atts_to_remove])
    if 'id_pool' in keys or ids_to_add:  # Create key if doesn't exist
        keys['id_pool'] = [*set(ids_to_add)]

    def find_generator(*atts, **keys):
        for result in self._find(*atts, **keys):
            yield result

        searched_stores = [self.db_uri]
        if hasattr(self, "searched_stores"):
            self.searched_stores += list(searched_stores)
        else:
            self.searched_stores = list(searched_stores)
        for store in self._associated_stores_:
            if hasattr(store, "searched_stores"):
                if store.db_uri in store.searched_stores:
                    continue
                else:
                    store.searched_stores += list(searched_stores)
            else:
                store.searched_stores = list(searched_stores)
            searched_stores += [store.db_uri, ]
            for result in store.find(*atts, **keys):
                yield result
        # cleanup searched store uris
        for store in self._associated_stores_ + [self, ]:
            for id_ in list(searched_stores):
                if id_ in store.searched_stores:
                    store.searched_stores.remove(id_)

    if sort_by is not None:
        results = list(find_generator(*atts, **keys))
        results.sort(key=lambda result: getattr(result, sort_by, -1),
                     reverse=sort_by_descending)
        for result in results:
            yield result
    else:
        yield from find_generator(*atts, **keys)

find_ensembles(*atts, **keys)

Find ensembles matching some metadata in the store arguments are the metadata name we are looking for e.g find("attr1", "attr2") you can further restrict by specifying exact value for a metadata via key=value you can return ids only by using: ids_only=True range can be specified via: sina.utils.DataRange(min, max)

Returns:

Type Description
generator

generator of matching ensembles in store

Source code in kosh/store.py
@lock_strategies.lock_method
def find_ensembles(self, *atts, **keys):
    """Find ensembles matching some metadata in the store
    arguments are the metadata name we are looking for e.g
    find("attr1", "attr2")
    you can further restrict by specifying exact value for a metadata
    via key=value
    you can return ids only by using: ids_only=True
    range can be specified via: sina.utils.DataRange(min, max)

    :return: generator of matching ensembles in store
    :rtype: generator
    """
    return self.find(types=self._ensembles_type, *atts, **keys)

get(Id, feature, format=None, loader=None, transformers=[], requestorId=None, *args, **kargs)

get returns an associated source's data

Parameters:

Name Type Description Default
Id str

Id of object to retrieve

required
feature str

feature to retrieve

required
format (str, optional)

preferred format, defaults to None means pick for me

None
loader

loader to use, defaults to None means pick for me

None
transformers KoshTransformer

A list of transformers to use after the data is loaded

[]
requestorId str

The id of the dataset requesting data

None

Returns:

Type Description

data in requested format

Source code in kosh/store.py
@lock_strategies.lock_method
def get(self, Id, feature, format=None, loader=None,
        transformers=[], requestorId=None, *args, **kargs):
    """get returns an associated source's data

    :param Id: Id of object to retrieve
    :type Id: str
    :param feature: feature to retrieve
    :type feature: str
    :param format: preferred format, defaults to None means pick for me
    :type format: str, optional
    :param loader: loader to use, defaults to None means pick for me
    :return: data in requested format
    :param transformers: A list of transformers to use after the data is loaded
    :type transformers: kosh.operator.KoshTransformer
    :param requestorId: The id of the dataset requesting data
    :type requestorId: str
    """
    if loader is None:
        loader, _ = self._find_loader(Id, requestorId=requestorId)
    else:
        loader = loader(self._load(Id), requestorId=requestorId)

    return loader.get(feature, format, transformers=[], *args, **kargs)

get_associated_store(uri)

Returns the associated store based on its uri.

Parameters:

Name Type Description Default
uri string_types

uri to the desired store

required

Returns:

Type Description
KoshStore

Associated kosh store

Source code in kosh/store.py
@lock_strategies.lock_method
def get_associated_store(self, uri):
    """Returns the associated store based on its uri.

    :param uri: uri to the desired store
    :type uri: six.string_types
    :returns: Associated kosh store
    :rtype: KoshStore
    """

    if not isinstance(uri, six.string_types):
        raise TypeError("uri must be string")

    for store in self._associated_stores_:
        if store.db_uri == uri:
            return store
    raise ValueError(
        "{} store does not seem to be associated with this store: {}".format(uri, store.db_uri))

get_associated_stores(uris=True)

Return the list of associated stores

Parameters:

Name Type Description Default
uris bool

Return the list of uri pointing to the store if True, or the actual stores otherwise.

True

Returns:

Type Description
generator

generator to stores

Source code in kosh/store.py
@lock_strategies.lock_method
def get_associated_stores(self, uris=True):
    """Return the list of associated stores
    :param uris: Return the list of uri pointing to the store if True,
                 or the actual stores otherwise.
    :type uris: bool
    :returns: generator to stores
    :rtype: generator
    """
    for store in self._associated_stores_:
        if uris:
            yield store.db_uri
        else:
            yield store

get_record(Id)

Gets the sina record tied to an id tags record with time of last access to db

Parameters:

Name Type Description Default
Id str

record id

required

Returns:

Type Description
sina.model.Record

sina record

Source code in kosh/store.py
@lock_strategies.lock_method
def get_record(self, Id):
    """Gets the sina record tied to an id
    tags record with time of last access to db
    :param Id: record id
    :type Id: str
    :return: sina record
    :rtype: sina.model.Record
    """
    if (not self.__sync__) and Id in self.__sync__dict__:
        record = self.__sync__dict__[Id]
    else:
        record = self.__record_handler__.get(Id)
        self.__sync__dict__[Id] = record
        if not self.__sync__:  # we are not autosyncing
            try:
                keys = list(record["user_defined"]['kosh_information'].keys())
            except KeyError:
                record["user_defined"]['kosh_information'] = {}
                keys = []
            for key in keys:
                if key[-14:] == "_last_modified":
                    del record["user_defined"]['kosh_information'][key]

    return record

get_sina_records()

Returns sina store's records

Source code in kosh/store.py
@lock_strategies.lock_method
def get_sina_records(self):
    """Returns sina store's records"""
    return self.__sina_store.records

get_sina_store()

Returns the sina store object

Source code in kosh/store.py
@lock_strategies.lock_method
def get_sina_store(self):
    """Returns the sina store object"""
    return self.__sina_store

import_dataset(datasets, match_attributes=['name'], merge_handler=None, merge_handler_kargs={}, skip_sina_record_sections=[], ingest_funcs=None)

import datasets and ensembles that were exported from another store, or load them from a json file

Parameters:

Name Type Description Default
datasets json, csv, hdf5 file, json loaded object, kosh.KoshDataset, sina.Record, pandas.DataFrame

Dataset/Ensemble object exported by another store, a dataset/ensemble or a json file containing these.

required
match_attributes list of str

parameters on a dataset to use if this it is already in the store in general we can't use 'id' since it is randomly generated at creation If the "same" dataset was created in two different stores (e.g running the same code twice but with different Kosh store) the dataset would be identical in both store but with different ids. This helps you make sure you do not end up with duplicate entries. Warning, if this parameter is too lose too many datasets will match and the import will abort, if it's too tight duplicates will not be identified.

['name']
merge_handler (None, str, func)

If found dataset has attributes with different values from imported dataset how do we handle this? Accept values are: None, "conservative", "overwrite", "preserve", or a function. The function decalartion should be: foo(store_dataset, imported_dataset_attributes_dict, section, **merge_handler_kargs) Where store_dataset is the destination kosh dataset or its non-data dictionary section imported_dataset_attributes_dict is a dictionary of attributes/values of the dataset being imported section is the section of the record being updated merge_handler_kargs is a dict of passed for this function And return a dictionary of attributes/values the target_dataset should have.

None
merge_handler_kargs dict

If a function is passed to merge_handler these keywords arguments will be passed in addition to this store dataset and the imported dataset.

{}
skip_sina_record_sections list

When importing a sina record, skip over these sections

[]
ingest_funcs callable | list of callables

A function or list of functions to run against each Sina record before insertion. We queue them up to run here. They will be run in list order.

None

Returns:

Type Description
list of KoshSinaDataset

list of datasets

Source code in kosh/store.py
@lock_strategies.lock_method
def import_dataset(self, datasets, match_attributes=[
                   "name", ], merge_handler=None, merge_handler_kargs={}, skip_sina_record_sections=[],
                   ingest_funcs=None):
    """import datasets and ensembles that were exported from another store, or load them from a json file
    :param datasets: Dataset/Ensemble object exported by another store, a dataset/ensemble
                     or a json file containing these.
    :type datasets: json, csv, hdf5 file, json loaded object, kosh.KoshDataset, sina.Record, pandas.DataFrame
    :param match_attributes: parameters on a dataset to use if this it is already in the store
                             in general we can't use 'id' since it is randomly generated at creation
                             If the "same" dataset was created in two different stores
                             (e.g running the same code twice but with different Kosh store)
                             the dataset would be identical in both store but with different ids.
                             This helps you make sure you do not end up with duplicate entries.
                             Warning, if this parameter is too lose too many datasets will match
                             and the import will abort, if it's too tight duplicates will not be identified.
    :type match_attributes: list of str
    :param merge_handler: If found dataset has attributes with different values from imported dataset
                             how do we handle this? Accept values are: None, "conservative", "overwrite",
                             "preserve", or a function.
                          The function decalartion should be:
                                    foo(store_dataset,
                                        imported_dataset_attributes_dict,
                                        section,
                                        **merge_handler_kargs)
                          Where `store_dataset` is the destination kosh dataset or its non-data dictionary section
                                `imported_dataset_attributes_dict` is a dictionary of attributes/values
                                                                   of the dataset being imported
                                `section` is the section of the record being updated
                                `merge_handler_kargs` is a dict of passed for this function
                          And return a dictionary of attributes/values the target_dataset should have.
    :type merge_handler: None, str, func
    :param merge_handler_kargs: If a function is passed to merge_handler these keywords arguments
                                will be passed in addition to this store dataset and the imported dataset.
    :type merge_handler_kargs: dict
    :param skip_sina_record_sections: When importing a sina record, skip over these sections
    :type skip_sina_record_sections: list
    :param ingest_funcs: A function or list of functions to
                         run against each Sina record before insertion.
                         We queue them up to run here. They will be run in list order.
    :type ingest_funcs: callable or list of callables
    :return: list of datasets
    :rtype: list of KoshSinaDataset
    """
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])

    from pandas import DataFrame
    from sina.model import generate_record_from_csv, generate_record_from_pandas

    out = []
    if isinstance(datasets, DataFrame):
        datasets = generate_record_from_pandas(datasets)
    elif isinstance(datasets, str):
        if os.path.exists(datasets) and ".json" not in datasets and ".hdf5" not in datasets:
            datasets = generate_record_from_csv(datasets)

    if not isinstance(datasets, (list, tuple, types.GeneratorType)):
        return self._import_dataset(datasets, match_attributes=match_attributes,
                                    merge_handler=merge_handler,
                                    merge_handler_kargs=merge_handler_kargs,
                                    skip_sina_record_sections=skip_sina_record_sections,
                                    ingest_funcs=ingest_funcs)
    else:
        for dataset in datasets:
            out.append(self._import_dataset(dataset, match_attributes=match_attributes,
                                            merge_handler=merge_handler,
                                            merge_handler_kargs=merge_handler_kargs,
                                            skip_sina_record_sections=skip_sina_record_sections,
                                            ingest_funcs=ingest_funcs))
    return out

is_synchronous()

is_synchronous is store is synchronous mode

Returns:

Type Description
bool

synchronous or not

Source code in kosh/store.py
@lock_strategies.lock_method
def is_synchronous(self):
    """is_synchronous is store is synchronous mode

    :return: synchronous or not
    :rtype: bool
    """
    return self.__sync__

lock()

Attempts to lock the store, helps when many concurrent requests are made to the store

Source code in kosh/store.py
@lock_strategies.lock_method
def lock(self):
    """Attempts to lock the store, helps when many concurrent requests are made to the store"""
    if not self.use_lock_file or "://" in self.db_uri:
        return
    locked = False
    while not locked:
        try:
            self.lock_file = open(self.db_uri + ".handle", "w")
            fcntl.lockf(self.lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
            locked = True
        except Exception:
            time.sleep(0.1)

mv(src, dst, stores=[], destination_stores=[], dataset_record_type='dataset', dataset_matching_attributes=['name'], version=False, merge_strategy='conservative', mk_dirs=False)

Moves files or directories

Parameters:

Name Type Description Default
src Union[str, list]

The source of files or directories to mv or cp

required
dst str

The destination of files or directories to mv or cp

required
stores (Union[KoshDataset, list], optional)

Kosh stores to associate the mv or cp, defaults to []

[]
destination_stores (Union[KoshDataset, list], optional)

Kosh stores to associate the mv or cp, defaults to []

[]
dataset_record_type (str, optional)

Type used by sina db that Kosh will recognize as dataset, defaults to "dataset"

'dataset'
dataset_matching_attributes (list, optional)

List of attributes used to identify if two datasets are identical, defaults to ["name", ]

['name']
version (bool, optional)

Print version and exit, defaults to False

False
merge_strategy (str, optional)

When importing dataset, how do we handle conflict, defaults to "conservative"

'conservative'
mk_dirs (bool, optional)

Make destination directories if they don't exist

False
Source code in kosh/store.py
@lock_strategies.lock_method
def mv(self, src, dst, stores=[],
       destination_stores=[], dataset_record_type="dataset", dataset_matching_attributes=['name', ],
       version=False, merge_strategy="conservative", mk_dirs=False):
    """Moves files or directories

    :param src: The source of files or directories to mv or cp
    :type src: Union[str, list]
    :param dst: The destination of files or directories to mv or cp
    :type dst: str
    :param stores: Kosh stores to associate the mv or cp, defaults to []
    :type stores: Union[kosh.dataset.KoshDataset, list], optional
    :param destination_stores: Kosh stores to associate the mv or cp, defaults to []
    :type destination_stores: Union[kosh.dataset.KoshDataset, list], optional
    :param dataset_record_type: Type used by sina db that Kosh will recognize as dataset, defaults to "dataset"
    :type dataset_record_type: str, optional
    :param dataset_matching_attributes: List of attributes used to identify if two datasets are identical,
        defaults to ["name", ]
    :type dataset_matching_attributes: list, optional
    :param version: Print version and exit, defaults to False
    :type version: bool, optional
    :param merge_strategy: When importing dataset, how do we handle conflict, defaults to "conservative"
    :type merge_strategy: str, optional
    :param mk_dirs: Make destination directories if they don't exist
    :type mk_dirs: bool, optional
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    self._mv_cp(src, dst, "mv", stores, destination_stores, dataset_record_type,
                dataset_matching_attributes, version, merge_strategy, mk_dirs)

open(Id, loader=None, requestorId=None, *args, **kargs)

open loads an object in store based on its Id and run its open function

Parameters:

Name Type Description Default
Id str

unique id of object to open. Can also be a Sina record.

required
loader KoshLoader

loader to use, defaults to None which means pick for me

None
requestorId str

The id of the dataset requesting data

None

Returns:

Type Description
Source code in kosh/store.py
@lock_strategies.lock_method
def open(self, Id, loader=None, requestorId=None, *args, **kargs):
    """open loads an object in store based on its Id
    and run its open function

    :param Id: unique id of object to open. Can also be a Sina record.
    :type Id: str
    :param loader: loader to use, defaults to None which means pick for me
    :type loader: KoshLoader
    :param requestorId: The id of the dataset requesting data
    :type requestorId: str
    :return:
    """
    from sina.model import Record
    if isinstance(Id, Record):  # Sina record by itself
        Id = Id.id

    if loader is None:
        loader, _ = self._find_loader(Id, requestorId=requestorId)
    else:
        loader = loader(self._load(Id), requestorId=requestorId)
    return loader.open(*args, **kargs)

reassociate(target, source=None, absolute_path=True)

This function allows to re-associate data whose uri might have changed

The source can be the original uri or sha and target is the new uri to use.

Parameters:

Name Type Description Default
target str

New uri

required
source str | None

uri or sha (long or short of reassociate) to reassociate with target, if None then the short uri from target will be used

None
absolute_path bool

if file exists should we store its absolute_path

True

Returns:

Type Description
None

None

Source code in kosh/store.py
@lock_strategies.lock_method
def reassociate(self, target, source=None, absolute_path=True):
    """This function allows to re-associate data whose uri might have changed

    The source can be the original uri or sha and target is the new uri to use.
    :param target: New uri
    :type target: str
    :param source: uri or sha (long or short of reassociate) to reassociate
                   with target, if None then the short uri from target will be used
    :type source: str or None
    :param absolute_path: if file exists should we store its absolute_path
    :type absolute_path: bool
    :return: None
    :rtype: None
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    # First let's convert to abs path if necessary
    if absolute_path:
        if os.path.exists(target):
            target = os.path.abspath(target)
        if source is not None and not re.search("[0-9a-f]{64}", source):  # full path doesn't apply to sha
            source = os.path.abspath(source)

    if os.path.isdir(target):
        target = os.path.join(target, "")
        cmd = "rsync -v --dry-run -r" + " " + target + " ./"
        p, o, e = process_cmd(cmd, use_shell=True)
        rsync_dryrun_out_lines = o.decode().split("\n")
        index = [idx for idx, s in enumerate(rsync_dryrun_out_lines) if 'sent ' in s][0]
        targets_in_dir = rsync_dryrun_out_lines[1:index-1]
        targets = [os.path.join(target, target_in_dir) for target_in_dir in targets_in_dir]
    else:
        targets = [target]

    for target in targets:
        # Now, did we pass a source for uri to replace?
        if source is None:
            source = compute_fast_sha(target)

        # Ok now let's get all associated uri that match
        # Fist assuming it's a fast_sha search all "kosh files" that match this
        matches = list(
            self.find(
                types=[
                    self._sources_type,
                ],
                fast_sha=source,
                ids_only=True))
        # Now it could be simply a uri
        matches += list(
            self.find(
                types=[
                    self._sources_type,
                ],
                uri=source,
                ids_only=True))
        # And it's quite possible it's a long_sha too
        matches += list(self.find(types=[self._sources_type, ],
                                  long_sha=source, ids_only=True))

        # And now let's do the work
        for match_id in matches:
            try:
                match = self._load(match_id)
                for associated_id in match.associated:
                    associated = self.open(associated_id)
                    associated_record = associated.get_record()
                    raw_associated_record = associated_record.raw
                    raw_associated_record["files"][target] = raw_associated_record["files"][match.uri]
                    del raw_associated_record["files"][match.uri]
                    if self.sync:
                        associated._update_record(associated_record)
                    else:
                        associated._update_record(associated_record, self._added_unsync_mem_store)
                match.uri = target
            except Exception:
                pass

remove_loader(loader)

Removes a loader from the store and its db

Parameters:

Name Type Description Default
loader KoshLoader

The Kosh loader you want to add to the store

required

Returns:

Type Description
None

None

Source code in kosh/store.py
@lock_strategies.lock_method
def remove_loader(self, loader):
    """Removes a loader from the store and its db

    :param loader: The Kosh loader you want to add to the store
    :type loader: KoshLoader

    :return: None
    :rtype: None
    """
    __check_valid_connection_type__(self.__connection_type__, ['write'])
    self.delete_loader(loader, permanently=True)

save_loader(loader)

Save a loader to the store Executed immediately even in async mode

Parameters:

Name Type Description Default
loader KoshLoader

Loader to save

required
Source code in kosh/store.py
@lock_strategies.lock_method
def save_loader(self, loader):
    """Save a loader to the store
    Executed immediately even in async mode

    :param loader: Loader to save
    :type loader: KoshLoader
    """
    from sina.model import Record
    __check_valid_connection_type__(self.__connection_type__, ['write', 'append'])
    pickled = kosh_pickler.dumps(loader)
    rec = next(self.find(types="koshloader", code=pickled, ids_only=True), None)
    if rec is not None:
        # already in store
        return
    rec = Record(id=uuid.uuid4().hex, type="koshloader", user_defined={'kosh_information': {}})
    rec.add_data("code", pickled)
    _update_record(rec, self)

search(*atts, **keys)

Deprecated use find

Source code in kosh/store.py
@lock_strategies.lock_method
def search(self, *atts, **keys):
    """
    Deprecated use find
    """
    warnings.warn("The 'search' function is deprecated and now called `find`.\n"
                  "Please update your code to use `find` as `search` might disappear in the future",
                  DeprecationWarning)
    return self.find(*atts, **keys)

sync(keys=None)

Sync with db

Parameters:

Name Type Description Default
keys list

keys of objects to sync (id/type)

None

Returns:

Type Description
None

None

Source code in kosh/store.py
@lock_strategies.lock_method
def sync(self, keys=None):
    """Sync with db
    :param keys: keys of objects to sync (id/type)
    :type keys: list
    :return: None
    :rtype: None
    """
    if self.__sync__:
        return

    if not hasattr(self.__record_handler__, "insert"):
        raise RuntimeError("Kosh store is read_only, cannot sync with it")

    if keys is None:
        keys = list(self.__sync__dict__.keys()) + \
            list(self.__sync__deleted__.keys())
    if len(keys) == 0:
        return
    conflicts = self.check_sync_conflicts(keys)
    if len(conflicts) != 0:  # Conflicts, aborting
        msg = "Conflicts exist objects have been modified in db and locally"
        for key in conflicts:
            msg += "\nObject id:{}".format(key)
            msg += "\n\tLast read from db: {}".format(
                conflicts[key]["last_check_from_db"])
            for k in conflicts[key]:
                if k in ["last_check_from_db", "type"]:
                    continue
                if conflicts[key]["type"] == "attribute":
                    st = "\n\t" + k + " modified to value '{}' at {} in db, modified locally to '{}' at {}"
                elif conflicts[key]["type"] == "delete":
                    st = "\n\t" + k + "{} {} {}"
                else:
                    st = "\n\tfile '" + k + \
                        "' mimetype modified to'{}' at {} in db, modified locally to '{}' at {}"
                st = st.format(*conflicts[key][k])
                msg += st
        raise RuntimeError(msg)
    # Ok no conflict we still need to sync
    update_records = []
    del_keys = []
    for key in keys:
        try:
            local = self.__sync__dict__[key]
        except Exception:
            # Ok it comes from the deleted datasets
            del_keys.append(key)
            continue
        try:
            db = self.__record_handler__.get(key)
            # db = self.get_record(key)
        except ValueError:
            # not in main store yet
            _update_record(local, self)
            # now we can retrieve it process for associated file not in storer yet
            db = self.__record_handler__.get(key)
            # db = self.get_record(key)
        for att in local["user_defined"]['kosh_information']:
            if att[-14:] == "_last_modified":  # We touched it
                if att[-27:-14] == "___associated":
                    # ok it's an associated thing
                    uri = att[:-27]
                    if uri not in local["files"]:  # dissociated
                        del db["files"][uri]
                        continue
                    # Now let's see if it is in main store
                    try:
                        self.__record_handler__.get(local["files"][uri]["kosh_id"])
                        # self.get_record(local["files"][uri]["kosh_id"])
                    except ValueError:
                        # Ok it is not in the store itself
                        rec = self.get_record(local["files"][uri]["kosh_id"])
                        _update_record(rec, self)
                    if att not in db["user_defined"]['kosh_information']:  # newly associated
                        db["files"][uri] = local["files"][uri]
                        db["user_defined"]['kosh_information'][att] = \
                            local["user_defined"]['kosh_information'][att]
                    elif local["user_defined"]['kosh_information'][att] > \
                            db["user_defined"]['kosh_information'][att]:
                        # last changed locally
                        db["files"][uri] = local["files"][uri]
                        db["user_defined"]['kosh_information'][att] = \
                            local["user_defined"]['kosh_information'][att]
                else:
                    name = att[:-14]
                    if name not in local["data"]:  # we deleted it
                        if name in db["data"]:
                            del db["data"][name]
                    elif local["user_defined"]['kosh_information'][att] > \
                            db["user_defined"]['kosh_information'][att]:
                        db["data"][name] = local["data"][name]
                        db["user_defined"]['kosh_information'][att] = \
                            local["user_defined"]['kosh_information'][att]
        if db is not None:
            update_records.append(db)
        else:  # db did not have that key and returned None (no error)
            update_records.append(local)
        del_keys.append(key)

    rels = []
    relationships = self.get_sina_store().relationships
    for id_ in update_records:
        rels += relationships.find(id_.id, None, None)
        rels += relationships.find(None, None, id_.id)
    if not self.__sync__:
        _update_record(del_keys, self, delete=True)
        _update_record(update_records, self)
        relationships.insert(rels)
    _update_record(update_records, self)
    for key in list(keys):
        try:
            _update_record(key, self, self._added_unsync_mem_store, delete=True)
        except Exception:
            pass
        try:
            del self.__sync__dict__[key]
        except Exception:
            # probably coming from del then
            del self.__sync__deleted__[key]

synchronous(mode=None)

Change sync mode for the store

Parameters:

Name Type Description Default
mode bool

The mode to True means synchronous mode, False means asynchronous, None means switch anything else is ignored and it simply returns the mode

None

Returns:

Type Description
bool

current synchronization mode

Source code in kosh/store.py
@lock_strategies.lock_method
def synchronous(self, mode=None):
    """Change sync mode for the store

    :param mode: The mode to True means synchronous mode, False means asynchronous, None  means switch
                 anything else is ignored and it simply returns the mode
    :type mode: bool
    :return: current synchronization mode
    :rtype: bool
    """

    if mode is None:
        self.__sync__ = not self.__sync__
    elif mode in [True, False]:
        if mode and not self.__sync__:  # Going to go to always sync on need to sync first
            self.sync()
        self.__sync__ = mode
    return self.__sync__

tar(tar_file, tar_opts, src='', tar_type='tar', stores=[], dataset_record_type='dataset', no_absolute_path=False, dataset_matching_attributes=['name'], merge_strategy='conservative')

Creates or extracts a tar file

Parameters:

Name Type Description Default
tar_file str

The name of the tar file

required
tar_opts str

Extra arguments such as -c to create and -x to extract

required
src (list, optional)

List of files or directories to tar

''
tar_type (str, optional)

Type of tar file including htar, defaults to "tar"

'tar'
stores (list, optional)

Kosh store(s) to use, defaults to []

[]
dataset_record_type (str, optional)

Record type used by Kosh when adding datasets to Sina database, defaults to "dataset"

'dataset'
no_absolute_path (bool, optional)

Do not use absolute path when searching stores, defaults to False

False
dataset_matching_attributes (list, optional)

List of attributes used to identify if two datasets are identical, defaults to ["name", ]

['name']
merge_strategy (str, optional)

When importing dataset, how do we handle conflict, defaults to "conservative"

'conservative'
Source code in kosh/store.py
@lock_strategies.lock_method
def tar(self, tar_file, tar_opts, src="", tar_type="tar",
        stores=[], dataset_record_type="dataset", no_absolute_path=False,
        dataset_matching_attributes=["name", ], merge_strategy="conservative"):
    """Creates or extracts a tar file

    :param tar_file: The name of the tar file
    :type tar_file: str
    :param tar_opts: Extra arguments such as -c to create and -x to extract
    :type tar_opts: str
    :param src: List of files or directories to tar
    :type src: list, optional
    :param tar_type: Type of tar file including htar, defaults to "tar"
    :type tar_type: str, optional
    :param stores: Kosh store(s) to use, defaults to []
    :type stores: list, optional
    :param dataset_record_type: Record type used by Kosh when adding
        datasets to Sina database, defaults to "dataset"
    :type dataset_record_type: str, optional
    :param no_absolute_path: Do not use absolute path when searching stores, defaults to False
    :type no_absolute_path: bool, optional
    :param dataset_matching_attributes: List of attributes used to identify if two datasets
        are identical, defaults to ["name", ]
    :type dataset_matching_attributes: list, optional
    :param merge_strategy: When importing dataset, how do we handle conflict, defaults to "conservative"
    :type merge_strategy: str, optional
    """

    # Options includes src
    opts = tar_opts.split()
    if isinstance(src, str):
        src = [src]
    opts.extend(src)

    # --stores
    cmmd = ["--stores",  self.db_uri]

    if stores:
        if isinstance(stores, list):
            for i, store in enumerate(stores):
                stores[i] = store.db_uri
        else:
            stores = stores.db_uri
        cmmd = self._cli_list_creator("--stores", stores, cmmd, os.getcwd())

    # --dataset_record_type
    cmmd.extend(["--dataset_record_type", dataset_record_type])

    # --file
    cmmd.extend(["--file",  tar_file])

    # --no_absolute_path
    if no_absolute_path:
        cmmd.extend(["--no_absolute_path"])

    # --dataset_matching_attributes
    cmmd.extend(["--dataset_matching_attributes", f"{dataset_matching_attributes}"])

    # --merge_strategy
    cmmd.extend(["--merge_strategy", merge_strategy])

    KoshCmd._tar(self, tar_type, store_args=cmmd, opts=opts)

to_dataframe(data_columns=[], *atts, **keys)

Return the find object as a Pandas DataFrame.

Pass in the same arguments and keyword arguments as the find method.

Arguments are the metadata name we are looking for e.g find("attr1", "attr2") you can further restrict by specifying exact value for a metadata via key=value you can return ids only by using: ids_only=True range can be specified via: sina.utils.DataRange(min, max)

"file_uri" is a reserved key that will return all records being associated with the given "uri", e.g store.find(file_uri=uri) "types" let you search over specific sina record types only. "id_pool" will search based on id of Sina record or Kosh dataset. Can be a list.

Parameters:

Name Type Description Default
data_columns (Union(str, list), optional)

Columns to extract. By default this will include ['id', 'name', 'creator', 'creation_date', 'last_modified_date']. If nothing is passed, will return all data.

[]

Returns:

Type Description
Pandas DataFrame

Pandas DataFrame

Source code in kosh/store.py
@lock_strategies.lock_method
def to_dataframe(self, data_columns=[], *atts, **keys):
    """Return the find object as a Pandas DataFrame.

    Pass in the same arguments and keyword arguments as the find method.

    Arguments are the metadata name we are looking for e.g
    find("attr1", "attr2")
    you can further restrict by specifying exact value for a metadata
    via key=value
    you can return ids only by using: ids_only=True
    range can be specified via: sina.utils.DataRange(min, max)

    "file_uri" is a reserved key that will return all records being associated
               with the given "uri", e.g store.find(file_uri=uri)
    "types" let you search over specific sina record types only.
    "id_pool" will search based on id of Sina record or Kosh dataset. Can be a list.

    :param data_columns: Columns to extract. By default this will include ['id', 'name', 'creator',
                                                                           'creation_date', 'last_modified_date'].
                         If nothing is passed, will return all data.
    :type data_columns: Union(str, list), optional
    :return: Pandas DataFrame
    :rtype: Pandas DataFrame
    """
    import pandas as pd
    if isinstance(data_columns, str):
        data_columns = [data_columns]

    keys['load_type'] = 'dictionary'
    keys['ids_only'] = False
    datasets = list(self.find(*atts, **keys))

    attr_dict = {}
    total_datasets = len(datasets)

    # Always have these by default
    defaults = ['id', 'name', 'creator', 'creation_date', 'last_modified_date']

    # Acquire all data if `data_columns` was not passed
    if not data_columns:
        unique_keys = []
        for i, dataset in enumerate(datasets):
            unique_keys.extend(list(dataset['data'].keys()))

        data_columns = sorted(set(unique_keys))

    data_columns = defaults + data_columns  # Want defaults in front
    data_columns = [dc for dc in data_columns if "_ENSEMBLE_TAG_" not in dc]  # Remove ensemble tags
    attr_dict = {d: [pd.NA] * total_datasets for d in data_columns}

    for i, dataset in enumerate(datasets):
        for column in data_columns:
            if column == "id":
                attr_dict[column][i] = dataset['id']
            else:
                attr_dict[column][i] = dataset['data'].get(column, {}).get('value', pd.NA)

    df = pd.DataFrame(attr_dict)
    return df

unlock()

Unlocks the store so other can access it

Source code in kosh/store.py
@lock_strategies.lock_method
def unlock(self):
    """Unlocks the store so other can access it"""
    if not self.use_lock_file or "://" in self.db_uri:
        return
    fcntl.lockf(self.lock_file, fcntl.LOCK_UN)
    self.lock_file.close()
    # Wrapping this in a try/except
    # In case concurrency by same user
    # already removed the file
    try:
        os.remove(self.lock_file.name)
    except Exception:
        pass

connect(database, keyspace=None, database_type=None, allow_connection_pooling=False, read_only=False, delete_all_contents=False, execution_options={}, connection_type='write', lock_strategy=None, verbose=False, **kargs)

Connect to a Sina store.

Given a uri/path (and, if required, the name of a keyspace), figures out which backend is required.

Parameters:

Name Type Description Default
database str

The URI of the store to connect to.

required
keyspace str

The keyspace to connect to (Cassandra only).

None
database_type str

Type of backend to connect to. If not provided, Sina will infer this from . One of "sql" or "cassandra".

None
allow_connection_pooling bool

Allow "pooling" behavior that recycles connections, which may prevent them from closing fully on .close(). Only used for the sql backend.

False
read_only bool

whether to create a read-only store

False
execution_options dict

execution options keyword to pass to sina store record_dao at creation time

{}
connection_type str

whether to create a write, append, or read only store

'write'
kargs dict, key=value

Any extra arguments you wish to pass to the KoshStore function

{}
delete_all_contents bool

Deletes all data after opening the db

False
lock_strategy LockStrategy

A LockStrategy object to incorporate locking mechanisms

None

Returns:

Type Description
KoshStoreClass

a KoshStore object connected to the specified database

Source code in kosh/store.py
@lock_strategies.lock_function
def connect(database, keyspace=None, database_type=None,
            allow_connection_pooling=False, read_only=False,
            delete_all_contents=False, execution_options={},
            connection_type="write", lock_strategy=None,
            verbose=False, **kargs):
    """Connect to a Sina store.

Given a uri/path (and, if required, the name of a keyspace),
figures out which backend is required.

:param database: The URI of the store to connect to.
:type database: str
:param keyspace: The keyspace to connect to (Cassandra only).
:type keyspace: str
:param database_type: Type of backend to connect to. If not provided, Sina
                      will infer this from <database>. One of "sql" or
                      "cassandra".
:type database_type: str
:param allow_connection_pooling: Allow "pooling" behavior that recycles connections,
                                 which may prevent them from closing fully on .close().
                                 Only used for the sql backend.
:type allow_connection_pooling: bool
:param read_only: whether to create a read-only store
:type read_only: bool
:param execution_options: execution options keyword to pass to sina store record_dao at creation time
:type execution_options: dict
:param connection_type: whether to create a write, append, or read only store
:type connection_type: str
:param kargs: Any extra arguments you wish to pass to the KoshStore function
:type kargs: dict, key=value
:param delete_all_contents: Deletes all data after opening the db
:type delete_all_contents: bool
:param lock_strategy: A LockStrategy object to incorporate locking mechanisms
:type lock_strategy: LockStrategy
:return: a KoshStore object connected to the specified database
:rtype: KoshStoreClass
"""
    from sina import connect as sina_connect
    from sqlalchemy.exc import ResourceClosedError
    if lock_strategy is None:
        lock_strategy = lock_strategies.NoLocking()

    with lock_strategy:
        db = kargs.pop("db", None)
        if db is not None:
            if database_type is not None and db != database_type:
                raise ValueError(
                    "You cannot specify `db` and `database_type` with different values")
            database_type = db
        if connection_type not in ['write', 'append', 'read']:
            raise ValueError(
                "`connection_type` must be one of ['write', 'append', 'read']")
        if connection_type == 'read':
            read_only = True
        sina_store = sina_connect(database=database,
                                  keyspace=keyspace,
                                  database_type=database_type,
                                  allow_connection_pooling=allow_connection_pooling,
                                  read_only=read_only, connection_type=connection_type)
        # sina_store._record_dao.session.connection(execution_options=execution_options)

        if not read_only and connection_type == 'write':
            if delete_all_contents:
                try:
                    sina_store.delete_all_contents(force="SKIP PROMPT")
                except ResourceClosedError:
                    pass  # Store was already empty
            update_store_and_get_info_record(sina_store.records)
            create_kosh_users(sina_store.records)
        sina_store.close()
        sync = kargs.pop("sync", True)
        if read_only:
            sync = False
        store = KoshStore(database, sync=sync, keyspace=keyspace, read_only=read_only,
                          db=database_type,
                          allow_connection_pooling=allow_connection_pooling,
                          execution_options=execution_options, connection_type=connection_type,
                          lock_strategy=lock_strategy,
                          verbose=verbose,
                          **kargs)
        return store