Save
Saving
  • guluting

    @裸奔的de饺子 我们现在在使用的微服务框架Nameko+RabbitMQ消息队列,目前测试下来使用比较轻便也稳定😀

    posted in GuaiK实验室 read more
  • guluting

    simhash是locality sensitive hash(局部敏感哈希)的一种,最早由Moses Charikar在《similarity estimation techniques from rounding algorithms》一文中提出。Google就是基于此算法实现网页文件查重的。
    simhash将一个文档转换成一个64位的字节的simhash值,然后判断重复只需要判断他们的simhash值的距离是不是<n,就可以判断两个文档是否相似。

    #-*- coding:UTF-8 -*-
    #
    # 文章内容查重模块
    
    import jieba
    from simhash import Simhash, SimhashIndex
    
    class CheckDup():
        def __init__(self):
            self.__index = None
    
        def add_index(self,key,value):
            s = Simhash([tag for tag in jieba.cut_for_search(value) if len(tag)>2])
            if self.__index == None:
                print "init check dup..."
                # k=16
                self.__index = SimhashIndex([(key,s)], k=16)
            else:
                # print "add index key:"+key
                self.__index.add(key,s)
    
        def del_index(self,key,value):
            s = Simhash([tag for tag in jieba.cut_for_search(value) if len(tag)>2])
            if self.__index:
                # print "del index key:"+key
                self.__index.delete(key,s)
    
        def update_index(self,key,old_v,new_v):
            os = Simhash([tag for tag in jieba.cut_for_search(old_v) if len(tag)>2])
            ns = Simhash([tag for tag in jieba.cut_for_search(new_v) if len(tag)>2])
            if self.__index:
                # print "update index key:"+key
                self.__index.delete(key,os)
                self.__index.add(key,ns)
    
        def find_near(self,value):
            if self.__index == None:
                return []
            s = Simhash([tag for tag in jieba.cut_for_search(value) if len(tag)>2])
            return self.__index.get_near_dups(s)
    
    

    posted in GuaiK实验室 read more
  • guluting

    #-*- coding:UTF-8 -*-
    from numpy import *
    def loadDataSet():
        postingList = [
            ["my","dog","has","flea","problems","help","please"],
            ["maybe","not","take","him","to","dog","park","stupid"],
            ["my","dalmation","is","so","cute","I","love","him"],
            ["stop","posting","stupid","worthless","garbage"],
            ["mr","licks","ate","my","steak","how","to","stop","him"],
            ["quit","buying","worthless","dog","food","stupid"]
        ]
        classVec = [0,1,0,1,0,1]    # 1代表侮辱性文字,0代表正常言论
        return postingList,classVec
    
    def createVocabList(dataSet):
        vocabSet = set([])
        for document in dataSet:
            vocabSet = vocabSet | set(document) # 计算所有文档并集
        return list(vocabSet)
    
    
    def setOfWords2Vec(vocabList,inputSet):
        returnVec = [0]*len(vocabList)
        for word in inputSet:
            if word in vocabList:
                returnVec[vocabList.index(word)] += 1
            else:
                print "the word:%s is not in my Vocabulary!" % word
        return returnVec
    
    
    def trainNBO(trainMatrix,trainCategory):
        numTrainDocs = len(trainMatrix)     # 计算出待训练的文档数
        numWords = len(trainMatrix[0])      # 计算出单词总数
        pAbusive = sum(trainCategory)/float(numTrainDocs) # 计算出侮辱性文档占总文档的比值
        p0Num = ones(numWords)
        p1Num = ones(numWords)
        p0Denom = 2.0
        p1Denom = 2.0
        for i in range(numTrainDocs):
            if trainCategory[i] == 1: # 侮辱性文档
                p1Num += trainMatrix[i]
                p1Denom += sum(trainMatrix[i])
            else:
                p0Num += trainMatrix[i]
                p0Denom += sum(trainMatrix[i])
        p1Vect = log(p1Num/p1Denom)
        p0Vect = log(p0Num/p0Denom)
        return p0Vect,p1Vect,pAbusive
    
    
    def classifyNB(vec2Classify,p0Vec,p1Vec,pClass1):
        p1 = sum(vec2Classify * p1Vec) + log(pClass1)
        p0 = sum(vec2Classify * p0Vec) + log(1.0-pClass1)
        if p1 > p0:
            return 1
        else:
            return 0
    
    def testNB():
        listOPosts,listClasses = loadDataSet()
        myVocabList = createVocabList(listOPosts)
        trainMat = []
        for postinDoc in listOPosts:
            trainMat.append(setOfWords2Vec(myVocabList,postinDoc))
        p0V,p1V,pAb = trainNBO(array(trainMat),array(listClasses))
        testEntry = ["quit","buying","worthless","dog","food","stupid"]
        thisDoc = array(setOfWords2Vec(myVocabList,testEntry))
        print testEntry,"classified as:",classifyNB(thisDoc,p0V,p1V,pAb)
    
    testNB()
    

    posted in GuaiK实验室 read more
  • guluting

    Python集合处理常见的有添加,删除,交集与并集操作。
    以下是集合操作的常见符号:

    数学符号 Python符号 含义
    - - 差集
    & 交集
    并集

    集合创建

    [输入]set("ghostds")
    [输出]set(['d', 'g', 'h', 'o', 's', 't'])
    

    交集

    [输入]a = set("abc")
    [输入]b = set("cdef")
    [输入]a & b
    [输出]set(['c'])
    

    并集

    [输入]a = set("abc")
    [输入]b = set("cdef")
    [输入]a | b
    [输出]set(['a', 'c', 'b', 'e', 'd', 'f'])
    

    差集

    [输入]a = set("abc")
    [输入]b = set("cdef")
    [输入]a - b
    [输出]set(['a', 'b'])
    

    posted in GuaiK实验室 read more
  • guluting

    openssl genrsa -out private_key.pem 1024
    
    openssl req -new -key private_key.pem -out rsaCertReq.csr
    
    openssl x509 -req -days 3650 -in rsaCertReq.csr -signkey private_key.pem -out rsaCert.crt
    
    openssl x509 -outform der -in rsaCert.crt -out public_key.der               // Create public_key.der For IOS
    
    openssl pkcs12 -export -out private_key.p12 -inkey private_key.pem -in rsaCert.crt  // Create private_key.p12 For IOS. 这一步,请记住你输入的密码,IOS代码里会用到
    
    openssl rsa -in private_key.pem -out rsa_public_key.pem -pubout             // Create rsa_public_key.pem For Java
     
    openssl pkcs8 -topk8 -in private_key.pem -out pkcs8_private_key.pem -nocrypt     // Create pkcs8_private_key.pem For Java
    
    pyrsa-priv2pub -i myprivatekey.pem -o mypublickey.pem
    

    posted in GuaiK实验室 read more
  • guluting

    以下代码先通过时间段检索数据,将检索出数据中的tags分解成多条数据,之后通过group对tags进行分组统计,然后使用sort对数据进行倒序排序,随后返回100条记录。

    db.doc.aggregate([
        {"$match":{"date":{"$gte":from_date,"$lt":to_date}}},
        {"$unwind": "$tags"},
        {"$group":{"_id":"$tags","tag_total":{"$sum":1}}},
        {"$sort": {"tag_total": -1}},
        {"$limit":100}
    ])
    

    posted in GuaiK实验室 read more
  • guluting

    alt

    运行如下安装指令

    yum install postgresql  
    yum install postgresql-server  
    yum install postgresql-contrib  
    yum update  
    postgresql-setup initdb  
    systemctl enable postgresql.service  
    systemctl start postgresql.service
    

    修改数据库登录密码

    sudo su - postgres  
    psql   
    alter user postgres with password 'xxxx';  
    

    允许远程访问【危险操作】

    vi /var/lib/pgsql/data/pg_hba.conf  
    host	all	all	0.0.0.0/0	trust 
    
    vi /var/lib/pgsql/data/postgresql.conf  
    listen_address = '*'
    
    systemctl restart postgresql.service
    

    配置防火墙

    firewall-cmd --permanent --zone=public --add-port=5432/tcp    增加规则  
    firewall-cmd --permanent --zone=public --remove-port=5432/tcp  删除规则  
    firewall-cmd --reload
    

    posted in GuaiK机房 read more
  • guluting

    alt

    配置centos上网

    cd /etc/sysconfig/network-scripts/  
    vi ifcfg-enoxxxxxxxxx  
    将最后一行的ONBOOT=no改为ONBOOT=yes  
    service network restart  
    yum install net-tools  
    

    GCC安装
    yum install gcc-c++


    一键安装开发工具集合
    yum groupinstall "Development Tools"


    EPEL仓库安装
    yum install epel-release


    安装GDB

    yum install ncurses-devel  
    wget http://ftp.gnu.org/gnu/gdb/gdb-7.6.1.tar.gz  
    tar -zxvf gdb-7.6.1.tar.gz  
    ./configure  
    make  
    sudo make install  
    cp gdb-7.6.1/gdb/gdb /usr/bin/gdb   
    gdb -v  
    

    OPEN SSL 安装
    yum install openssl-devel


    iconv 下载安装

    wget http://ftp.gnu.org/pub/gnu/libiconv/libiconv-1.14.tar.gz  
    tar -zxvf libiconv-1.14.tar.gz   
    cd cd srclib/  
    sed -i -e '/gets is a security/d' ./stdio.in.h  
    
    ./configure --prefix=/usr/local  
    make  
    make install
    

    mysqlclient安装
    yum install mysql-devel


    CURL安装

    wget http://curl.haxx.se/download/curl-7.39.0.tar.gz
    tar -xzvf curl-7.39.0.tar.gz
    cd curl-7.39.0
    ./configure --with-ssl
    make
    make install
    

    BOOST安装

    yum install boost    
    yum install boost-devel    
    yum install boost-doc
    

    libevent安装
    yum install libevent-devel


    安装PIL图像处理模块
    yum install python-imaging

    posted in GuaiK机房 read more
  • guluting

    alt


    第一步 添加mongodb源
    vi /etc/yum.repos.d/mongodb-org-3.2.repo


    输入如下内容:

    [mongodb-org-3.2]
    name=MongoDB Repository
    baseurl=https://repo.mongodb.org/yum/redhat/$releasever/mongodb-org/3.2/x86_64/
    gpgcheck=1
    enabled=1
    gpgkey=https://www.mongodb.org/static/pgp/server-3.2.asc
    

    第二步 开始安装
    yum -y install mongodb-org

    posted in GuaiK机房 read more
  • guluting

    # -*- coding:utf-8 -*-
    import json
    import base64
    from Crypto.Signature import PKCS1_v1_5
    from Crypto.Hash import SHA
    from Crypto.PublicKey import RSA
    
    def sign_data(data):
        """ 将交易参数转换成签名用的字符串格式 """
        complex_keys = []
        for key, value in data.items():
            if isinstance(value, dict):
                complex_keys.append(key)
        # 将字典类型的数据单独排序
        for key in complex_keys:
            data[key] = json.dumps(data[key], sort_keys=True).replace(" ", "")
        return "&".join("{}={}".format(k, v) for k, v in sorted([(k, v) for k, v in data.items()]))
    
    def sha1_with_rsa(private_key, unsigned_string):
        """ 支付宝用的签名加密 """
        pkey_template = """-----BEGIN PRIVATE KEY-----
        {}
        -----END PRIVATE KEY-----"""
        # 开始计算签名
        key = RSA.importKey(pkey_template.format(private_key))
        signer = PKCS1_v1_5.new(key)
        signature = signer.sign(SHA.new(unsigned_string.encode("utf8")))
        # base64 编码,转换为unicode表示并移除回车
        sign = base64.b64encode(signature).decode("utf8").replace("\n", "")
        return sign
    

    posted in GuaiK实验室 read more