Source code for chat.database

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# PEP 8 check with Pylint
"""Manage NLU database based on neo4j graph database.
管理基于neo4j图形数据库的自然语言理解数据库。

The 'py2neo' is a python package of neo4j graph database.
1.Support increase, delete, modify, query and other operations for nodes,
relationships, subgraph and graph;
2.Support batch processing;
3.Support command line;
4.Support read the data file, batch import and export.
"py2neo"是neo4j图形数据库的python接口包。
1.支持节点,关系,子图,全图的增、删、改、查;
2.支持批处理;
3.支持命令行;
4.支持读取数据文件批量导入及导出。

Available functions:
- All classes and functions: 所有类和函数
"""
import string
from optparse import OptionParser
from py2neo import Graph, Node, Relationship, NodeSelector
from .mytools import read_excel
from .semantic import get_tag


[docs]class Database(): """Manage Database. 管理数据库。 It support python command line parameter processing of relational database and graph database. You can view all the features by 'python xxx.py -h'. 支持关系数据库和图形数据库的python命令行参数处理。 可以通过'python xxx.py -h'查看所有功能。 Public attributes: - rdb: Relational database. 关系数据库。 - graph: Graph database. 图数据库。 """ def __init__(self, password=None, userid="userid"): self.rdb = None self.graph = Graph("http://localhost:7474/db/data", password=password) self.gconfig = self.graph.find_one("User", "userid", userid) self.selector = NodeSelector(self.graph) self.usage = "usage: python %prog [options] arg" self.version = "%prog 1.0" self.parser = OptionParser(usage=self.usage, version=self.version) self.parser.add_option("-v", "--verbose", action="store_true", dest="verbose") self.parser.add_option("-q", "--quiet", action="store_false", dest="verbose") self.parser.add_option("-b", "--batch", dest="batch", action="store_true", \ help="batch processing of graph database") self.parser.add_option("-f", "--file", dest="filename", \ help="read data from filename") self.parser.add_option("-p", "--path", dest="filepath", \ help="read data from filepath") self.parser.add_option("-a", "--add", dest="add", \ help="add subgraph to graph database") self.parser.add_option("-d", "--delete", dest="delete", \ help="delete subgraph of graph database") self.parser.add_option("-e", "--edit", dest="edit", \ help="edit subgraph of graph database") self.parser.add_option("-s", "--search", dest="search", \ help="search subgraph of graph database") (self.options, self.args) = self.parser.parse_args() # if len(self.args) == 0: # self.parser.error("incorrect number of arguments") if self.options.verbose: print("reading %s..." % self.options.filename) if self.options.delete: for name in self.args: self.delete(pattern=self.options.delete, name=name) def delete(self, pattern="n", name=None): """Batch delete data or subgraph in database. 在数据库中批量删除数据或者子图。 Args: pattern: Type of subgraph. 子图类型。 name: Name of subgraph. 子图名称。 """ if pattern == "all": self.graph.delete_all() elif pattern == "n": self.graph.run("MATCH(n:" + name + ") DETACH DELETE n") elif pattern == "r": self.graph.run("MATCH (n)-[r:" + name + "]-(m) DETACH DELETE r") elif pattern == "nr": self.graph.run("MATCH (n)<-[r:" + name + "]-(m) DETACH DELETE r DELETE n") elif pattern == "rm": self.graph.run("MATCH (n)-[r:" + name + "]->(m) DETACH DELETE r DELETE m") elif pattern == "nrm": self.graph.run("MATCH (n)-[r:" + name + "]-(m) DETACH DELETE r DELETE n DELETE m") def add_qa(self, nodeclass="NluCell", name=None, content=None, topic="", \ behavior="", parameter="", url="", tag="", keywords="", api="", txt="", \ img="", chart="", delimiter=None): """ Add qa node in graph. """ assert name is not None, "name must be string." assert content is not None, "content must be string." questions = name.split(delimiter) for question in questions: tag = get_tag(question, self.gconfig) node = Node(nodeclass, name=question, content=content, topic=topic, \ behavior=behavior, parameter=parameter, url=url, tag=tag, \ keywords=keywords, api=api, txt=txt, img=img, chart=chart, hot="0") self.graph.create(node) def handle_excel(self, filename=None, custom_sheets=None): """Processing data of excel. """ assert filename is not None, "filename can not be None" data = read_excel(filename) data_sheets = data.sheet_names() if custom_sheets: sheet_names = list(set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets for sheet_name in sheet_names: # 可自定义要导入的子表格 table = data.sheet_by_name(sheet_name) # 1.Select specified table # table = data.sheet_by_index(0) if data: # 2.Select specified column col_format = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M'] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头 for i in range(2, nrows): name = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value topic = table.cell(i, col_index[2]).value behavior = table.cell(i, col_index[3]).value parameter = table.cell(i, col_index[4]).value url = table.cell(i, col_index[5]).value tag = table.cell(i, col_index[6]).value keywords = table.cell(i, col_index[7]).value api = table.cell(i, col_index[8]).value txt = table.cell(i, col_index[9]).value img = table.cell(i, col_index[10]).value chart = table.cell(i, col_index[11]).value # hot = 0 table.cell(i, col_index[12]).value # 3.Your processing function of excel data here self.add_qa(name=name, content=content, topic=topic, \ behavior=behavior, parameter=parameter, url=url, tag=tag, \ keywords=keywords, api=api, txt=txt, img=img, chart=chart, \ delimiter="|") except Exception as error: print('Error: %s' %error) return None else: print('Error! Data of %s is empty!' %sheet_name) return None def handle_txt(self, filename=None): """ Processing text file to generate subgraph. """ assert filename is not None, "filename can not be None" with open(filename, encoding="UTF-8") as file: question = file.readline().rstrip() while question: answer = file.readline().rstrip() print("question: " + question) print("answer: " + answer) self.add_qa(name=question, content=answer, delimiter="|") question = file.readline().rstrip()