Mysql 大数据测试

8-15 909 views

创建模拟大数据

表结构

DROP TABLE IF EXISTS `t_user`;
CREATE TABLE `t_user` (
  `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
  `id_time` bigint(20) unsigned NOT NULL,
  `id_uuid` varchar(36) NOT NULL,
  `sequence` int(11) unsigned NOT NULL,
  `account` varchar(20) NOT NULL,
  `password` varchar(32) DEFAULT NULL,
  `name` varchar(40) DEFAULT NULL,
  `email` varchar(32) NOT NULL,
  `phone` varchar(15) NOT NULL,
  `qq` varchar(15) DEFAULT NULL,
  `apiKey` varchar(40) DEFAULT NULL,
  `balance` float DEFAULT NULL,
  `birthday` date DEFAULT NULL,
  `createdAt` datetime DEFAULT NULL,
  `updatedAt` datetime DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

使用Python脚本创建1000W模拟数据,并每隔10W条数据打印一次耗时。

import MySQLdb
import random
import string
import hashlib
import uuid
from  datetime  import  *
import time

LETTER_LOWER = 'abcdefghijklmnopqrstuvwxyz'
LETTER_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
DIGIT = '0123456789'
DIGITx4 = DIGIT+DIGIT+DIGIT+DIGIT
EMAIL = ['@gmail.com', '@qq.com', '@163.com', '@fly3w.com', '@live.com', '@sina.cn', 
    '@sina.com', '@hotmail.com', '@foxmail.com', '@yunyu.com', '@rayvision.com', 
    '@love.com', '@aliyun.com', '@outlook.com', '@tom.com', '@126.com', '@198.com']
LETTERS = LETTER_LOWER + LETTER_UPPER
STRING = LETTERS + DIGIT

MD5 = hashlib.md5()

def random_string(i):
  return string.join(random.sample(STRING, i)).replace(' ','')

def random_digit(i):
  if i > 40 :
    i = 40
  return string.join(random.sample(DIGITx4, i)).replace(' ','')

def random_letter_lower(i):
  return string.join(random.sample(LETTER_LOWER, i)).replace(' ','')

def random_letter_upper(i):
  return string.join(random.sample(LETTER_UPPER, i)).replace(' ','')

def random_account():
  return random_string(random.randint(3,20))

def random_password():
  p = random_string(random.randint(6,20))
  MD5.update(p)
  return MD5.hexdigest() 

def random_email():
  return random_string(random.randint(4,15)) + string.join(random.sample(EMAIL, 1))

def random_name():
  return random_string(random.randint(4,10)) + '.' + random_string(random.randint(4,10))

def random_phone():
  return '1' + random_digit(10)

def random_qq():
  return random_digit(random.randint(6,13))

def random_key():
  return random_string(40)

def random_balance():
  return float(random.uniform(0.0, 999999))

def random_createdAt():
  return datetime.fromtimestamp(random.randint(1000000000, 1470987994))

def random_updatedAt():
  return datetime.fromtimestamp(random.randint(1000000000, 1470987994))

def random_birthday():
  return date.fromtimestamp(random.randint(100000000, 1000000000))

try:
  conn=MySQLdb.connect(host='localhost',user='root',passwd='',db='bigdata',port=3306)
  cur=conn.cursor()

  R1 = 100
  R2 = 100
  R3 = 1000
  TIME = time.time()

  for k in range(R1):
    time1 = time.clock()
    for j in range(R2):
      values=[]
      for i in range(R3):
        sequence = k*R2*R3 + j*R3 + i + 1
        id_time = TIME + sequence
        id_uuid = uuid.uuid1()
        account = random_account()
        password = random_password()
        name = random_name()
        email = random_email()
        phone = random_phone()
        qq = random_qq()
        apiKey = random_key()
        balance = random_balance()
        birthday = random_birthday()
        createdAt = random_createdAt()
        updatedAt = random_updatedAt()
        values.append((id_time, id_uuid, sequence, account, password, name, email, phone, qq, apiKey, balance, birthday, createdAt, updatedAt))
        # print i , j , k , sequence
      cur.executemany('insert into t_user (id_time, id_uuid, sequence, account, password, name, email, phone, qq, apiKey, balance, birthday, createdAt, updatedAt) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',values)
      cur.executemany('insert into t_user_opt (id_time, id_uuid, sequence, account, password, name, email, phone, qq, apiKey, balance, birthday, createdAt, updatedAt) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',values)
      conn.commit()
    time2 = time.clock()
    # print "insert %d records cost %s seconds"%((i+1)*(j+1)*(k+1) ,time2 - time1)
    print "insert %d records cost %s seconds"%((i+1)*(j+1)*(k+1) ,time2 - time1)

  cur.close()
  conn.close()

except MySQLdb.Error,e:
  print "Mysql Error %d: %s" % (e.args[0], e.args[1])

注释掉插入提交语句,看看空跑需要的时间:

insert 1800000 records cost 13.9562696992 seconds
insert 1900000 records cost 13.4804223272 seconds
insert 2000000 records cost 13.3164563912 seconds
insert 2100000 records cost 13.2930246514 seconds
insert 2200000 records cost 13.3073816569 seconds
insert 2300000 records cost 14.0792722724 seconds
insert 2400000 records cost 13.3529577188 seconds
insert 2500000 records cost 13.2821370229 seconds
insert 2600000 records cost 13.4949583231 seconds
insert 2700000 records cost 13.1573338714 seconds
insert 2800000 records cost 13.168848377 seconds
insert 2900000 records cost 13.8299750973 seconds
insert 3000000 records cost 13.7217753637 seconds

完整插入提交数据所花费的时间:

..........
insert 1800000 records cost 36.0650906447 seconds
insert 1900000 records cost 35.2314557097 seconds
insert 2000000 records cost 35.5520021725 seconds
insert 2100000 records cost 35.7447798253 seconds
insert 2200000 records cost 36.0288146973 seconds
insert 2300000 records cost 36.7669691414 seconds
insert 2400000 records cost 35.8457380811 seconds
..........
insert 8800000 records cost 33.5214939874 seconds
insert 8900000 records cost 34.2245814086 seconds
insert 9000000 records cost 34.2755526372 seconds
insert 9100000 records cost 34.2524004673 seconds
insert 9200000 records cost 34.9790082697 seconds
insert 9300000 records cost 35.7389869879 seconds
insert 9400000 records cost 35.0853273719 seconds
insert 9500000 records cost 35.8309244529 seconds
..........

typescript 源码分析

https://www.cnblogs.com/xuld/p/12180913.html

阅读全文

flutter_gen config

const configDefaultYamlContent = ''' name: UNKNOWN flutter_gen: Optional output: lib/gen/ Optional line_length: 80 Optional parse_metadata: false ...

阅读全文

redis 函数的含义

您提供的函数列表是一个 Redis 客户端 API 的部分实现,通常用于与 Redis 数据库进行交互。以下是这些函数的简要解释,按类别进行分类以帮助理解其功能和用...

阅读全文

欢迎留言