版本为2.3.1?
转为 png 的文档:PNG 2D depiction (png) — Open Babel openbabel-3-1-1 documentation
上述链接是所有的可选项,例如:
-p?<pixels>
image size, default 300
-w?<pixels>
image width (or from image size)
-h?<pixels>
image height (or from image size)
?需要注意的是,这些可选项,需要加 -x 在选项前才可用,例如 -xp 512
输出512*512的png,但是这种方式的话,输出的比较模糊
obabel -:"COC(=O)CCCCCCCC(=O)NC(CCC/N=C(\N)N[N+](=O)[O-])C(=O)NC(C=O)CC(C)C" -opng -O data/cys.png -xp 512
备注:data文件夹是需要提前自己创建好,否则错误
转为 SVG 的文档:SVG 2D depiction (svg) — Open Babel openbabel-3-1-1 documentation
obabel -:"COC(=O)CCCCCCCC(=O)NC(CCC/N=C(\N)N[N+](=O)[O-])C(=O)NC(C=O)CC(C)C" -osvg -O data/cys.svg -xC -xt
这一步是因为直接生成的png是比较模糊的,所以这里使用svg生成512的png
import cairosvg
from PIL import Image
cairosvg.svg2png(url="images/cys.svg", write_to="images/temp_cys.png", output_width=512, output_height=512)
# 打开转换后的 PNG 图像
original_image = Image.open("images/temp_cys.png")
# 创建一个白色背景的新图像
new_image = Image.new("RGBA", (512, 512), "white")
# 计算居中位置
x = (512 - original_image.width) // 2
y = (512 - original_image.height) // 2
# 将原始图像粘贴到新图像上
new_image.paste(original_image, (x, y), original_image)
# 保存最终图像
new_image.save("images/cys.png")
因为rdkit转成的图像有点问题:CCS(=O)(=O)[C@]12CC3CC(C1)[C@@H](NC(=O)C(C)(C)Oc1ccc(F)cc1Cl)C(C3)C2
例如上面这个smiles转换后就会有“重叠”,所以这里使用openbabel
from multiprocessing import Pool, cpu_count
import argparse
import os
import pandas as pd
from tqdm import tqdm
import cairosvg
def process_smiles(args_tuple):
opt_smiles, name, args, opt_img = args_tuple
try:
svg_filename = "{}.svg".format(name)
svg_img_save_path = os.path.join(opt_img, svg_filename)
png_filename = "{}.png".format(name)
png_img_save_path = os.path.join(opt_img, png_filename)
command = "obabel -:\"{}\" -osvg -O {} -xC -xt".format(opt_smiles, svg_img_save_path)
os.system(command)
cairosvg.svg2png(url=svg_img_save_path, write_to=png_img_save_path, output_width=args.outputPixel, output_height=args.outputPixel)
os.remove(svg_img_save_path)
except Exception as e:
print(e)
os.remove(svg_img_save_path)
print("The current error SMILES is :", opt_smiles)
def main():
parser = argparse.ArgumentParser(description='csv2image')
parser.add_argument('--dataroot', type=str, default="./", help='data root')
parser.add_argument('--csvname', type=str, default="top50.csv", help='The name of CSV')
parser.add_argument('--outputImageDir', type=str, default="../data/")
parser.add_argument('--theCsvFirstLineSMILES', type=str, default="smiles", help='CSV你要转换的列名')
parser.add_argument('--outputPixel', type=int, default="512", help='输出的像素')
args = parser.parse_args()
raw_file_path = os.path.join(args.dataroot, args.csvname)
opt_img = os.path.join(args.dataroot, args.outputImageDir)
if not os.path.exists(opt_img):
os.makedirs(opt_img)
df = pd.read_csv(raw_file_path)
opt_smiles = df[args.theCsvFirstLineSMILES].values
smiles_name = df['Molecule_ChEMBL_ID'].values
# Prepare data for multiprocessing
data_tuples = [(smiles, name, args, opt_img) for smiles, name in zip(opt_smiles, smiles_name)]
# Use all available CPUs
pool = Pool(cpu_count())
# Process data in parallel
for _ in tqdm(pool.imap_unordered(process_smiles, data_tuples), total=len(data_tuples)):
pass
pool.close()
pool.join()
if __name__ == '__main__':
main()
# python smiles2image_muti_kernel.py --csvname data/test.csv --outputImageDir data/images2/ --theCsvFirstLineSMILES canonical_smiles