大数据去重存储过程

分页调用存储过程

DELIMITER $$

USE `new_stat`$$

DROP PROCEDURE IF EXISTS `callRefUrl`$$

CREATE DEFINER=`root`@`219.224.99.0/255.255.255.0` PROCEDURE `callRefUrl`()
BEGIN
    DECLARE rows_size INT DEFAULT 0;
    DECLARE pagecount INT DEFAULT 0;
    DECLARE pageSize INT DEFAULT 0;
    DECLARE i INT;
    SET @COUNT_STRING ='SELECT count(urlhash) INTO @ROWS_TOTAL FROM stat_temprefurl';
    PREPARE count_stmt FROM @COUNT_STRING;
    EXECUTE count_stmt;
    DEALLOCATE PREPARE count_stmt;
    SET rows_size = @ROWS_TOTAL;
    SET pageSize =10000;
    IF (rows_size <= pageSize) THEN
                    SET pagecount = 1;
                    ELSE IF (rows_size % pageSize > 0) THEN
                    SET pagecount = rows_size / pageSize + 1;
                    ELSE
                     SET pagecount = rows_size / pageSize;
            END IF;
        END IF;
    SELECT pagecount;
    SET i=0;
    WHILE i<pagecount DO
        CALL new_stat.copyRefUrl();
        SET i=i+1;
    END WHILE; 
    END$$
DELIMITER ;

 

数据去重存储过程

 

DELIMITER $$

USE `new_stat`$$

DROP PROCEDURE IF EXISTS `copyRefUrl`$$

CREATE DEFINER=`root`@`219.224.99.0/255.255.255.0` PROCEDURE `copyRefUrl`()
BEGIN
    DECLARE done INT DEFAULT 0;
    DECLARE urlhash_t VARCHAR(64) DEFAULT ''; 
    DECLARE url_cur CURSOR FOR SELECT urlhash FROM stat_temprefurl LIMIT 0,10000;
    DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
    OPEN url_cur;  
    REPEAT
    FETCH url_cur INTO urlhash_t;
    IF NOT done THEN
    IF EXISTS(SELECT * FROM stat_refurl WHERE urlhash=urlhash_t) THEN  
    DELETE FROM  stat_temprefurl WHERE urlhash=urlhash_t;
    END IF;
    END IF;
    UNTIL done END REPEAT;
    CLOSE url_cur;
    INSERT INTO stat_refurl SELECT url,urlhash FROM stat_temprefurl;
    TRUNCATE TABLE stat_temprefurl;
    END$$

DELIMITER ;

 

 

你可能感兴趣的:(存储过程)