import json
import time
import pandas as pd
data_sets = pd.read_pickle("val_token_id.pandas_pickle")
data_sets=[str(i) for i in data_sets]
start=time.time()
[json.loads(i) for i in data_sets]
print(time.time()-start)
start=time.time()
[eval(i) for i in data_sets]
print(time.time()-start)
import json
import time
from multiprocessing import Process, Manager, freeze_support
import pandas as pd
from tqdm import tqdm
def json_loads_data(return_list,one_data):
return_list+=[json.loads(i) for i in tqdm(one_data)]
if __name__ == '__main__':
freeze_support()
data_sets = pd.read_pickle("val_token_id.pandas_pickle")
data_sets = [str(i) for i in data_sets]
start = time.time()
data = Manager().list()
num = 5
p_list = []
for i in range(0, len(data_sets), len(data_sets)//num):
j = i + len(data_sets)//num
p = Process(target=json_loads_data, args=(data, data_sets[i:j]))
p.start()
p_list.append(p)
for p in p_list:
p.join()
print("multi_json_loads", time.time() - start)
start = time.time()
[json.loads(i) for i in data_sets]
print("json_loads", time.time() - start)
start = time.time()
pd.DataFrame(data_sets)[0].apply(lambda x: json.loads(x)).values.tolist()
print("dataFrame_apply", time.time() - start)
start = time.time()
json.loads(str(data_sets).replace("'", ""))
print("json_loads_str", time.time() - start)
start = time.time()
[eval(i) for i in data_sets]
print("eval", time.time() - start)
https://blog.csdn.net/qq_35869630/article/details/105919104
Python 在大数据处理下的优化(一)用json.loads比eval快10倍!