@@ -405,6 +405,35 @@ Blaze provides a standard API for doing computations with various
405
405
in-memory and on-disk backends: NumPy, pandas, SQLAlchemy, MongoDB, PyTables,
406
406
PySpark.
407
407
408
+ `Cylon <https://cylondata.org/ >`__
409
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
410
+
411
+ Cylon is a fast, scalable, distributed memory parallel runtime with a pandas
412
+ like Python DataFrame API. ”Core Cylon” is implemented with C++ using Apache
413
+ Arrow format to represent the data in-memory. Cylon DataFrame API implements
414
+ most of the core operators of pandas such as merge, filter, join, concat,
415
+ group-by, drop_duplicates, etc. These operators are designed to work across
416
+ thousands of cores to scale applications. It can interoperate with pandas
417
+ DataFrame by reading data from pandas or converting data to pandas so users
418
+ can selectively scale parts of their pandas DataFrame applications.
419
+
420
+ .. code :: python
421
+
422
+ from pycylon import read_csv, DataFrame, CylonEnv
423
+ from pycylon.net import MPIConfig
424
+
425
+ # Initialize Cylon distributed environment
426
+ config: MPIConfig = MPIConfig()
427
+ env: CylonEnv = CylonEnv(config = config, distributed = True )
428
+
429
+ df1: DataFrame = read_csv(' /tmp/csv1.csv' )
430
+ df2: DataFrame = read_csv(' /tmp/csv2.csv' )
431
+
432
+ # Using 1000s of cores across the cluster to compute the join
433
+ df3: Table = df1.join(other = df2, on = [0 ], algorithm = " hash" , env = env)
434
+
435
+ print (df3)
436
+
408
437
`Dask <https://dask.readthedocs.io/en/latest/ >`__
409
438
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
410
439
0 commit comments