删除无点击数据offer数据分析使用

梳理思路:

1、  获取 7month  和 8month  fullreport 报表中 所有offer;输出结果:offerid, totalClickCount;

2、 分析数据7month  totalClickCount=0 and 8month  totalClickCount=0 的offer去除;

      result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)]    获取有效的offer数据data2;

3、 获取点击系统自动跑量列表中offer数据data3,输出结果:offerid,dataSourceid;

4、 分析数据  根据data2有效offer,获取到有效的offer对应的每个dataSourceid;

       result = pd.merge(data1, data2, on='offerId', how='left')

5、 计算出删除后,dataSourceid应该保留的个数及id;

6、 计算出删除offer的个数及offer_id;

操作方法

一、  获取 7month  和 8month  fullreport 报表中 所有offer

         class getFullreportOffer_8month

         class getFullreportOffer_7month

class getFullreportOffer_8month:

    def _process(self,page):
        offer_lit = []
        totalClickCount_lit = []
        fromDate = "2023-08-01"
        toDate = "2023-08-31"
        url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"
        res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']
        for result in res:
            offer_lit.append(result['offerId'])
            totalClickCount_lit.append(result['totalClickCount'])
        return offer_lit,totalClickCount_lit

    def _process_multithread(self,list_):
        # 多线程 下载
        task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]
        response_list = [task.result() for task in task_list]
        return response_list

    def run(self):
        offer_lit_total = []
        totalClickCount_lit = []
        lit = np.arange(1,33)
        offer_lit = self._process_multithread(lit)
        for res in offer_lit:
            offer_lit_total.extend(res[0])
            totalClickCount_lit.extend(res[1])

        lis_dic = {
            'offerId': offer_lit_total,
            'totalClickCount8': totalClickCount_lit,

        }
        result = pd.DataFrame(lis_dic)
        result.to_csv(filename8,index=False)
class getFullreportOffer_7month:

    def _process(self,page):
        offer_lit = []
        totalClickCount_lit = []
        fromDate = "2023-07-01"
        toDate = "2023-07-31"
        url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"
        res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']
        for result in res:
            offer_lit.append(result['offerId'])
            totalClickCount_lit.append(result['totalClickCount'])
        return offer_lit,totalClickCount_lit

    def _process_multithread(self,list_):
        # 多线程 下载
        task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]
        response_list = [task.result() for task in task_list]
        return response_list

    def run(self):
        offer_lit_total = []
        totalClickCount_lit = []
        lit = np.arange(1,34)
        offer_lit = self._process_multithread(lit)
        for res in offer_lit:
            offer_lit_total.extend(res[0])
            totalClickCount_lit.extend(res[1])

        lis_dic = {
            'offerId': offer_lit_total,
            'totalClickCount7': totalClickCount_lit,

        }
        result = pd.DataFrame(lis_dic)
        result.to_csv(filename7,index=False)

二、分析数据7month  totalClickCount=0 and 8month  totalClickCount=0 的offer去除

      result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)]    获取有效的offer数据data2;

class dataAnalysis:

    def get_datas(self):
        data7 = pd.read_csv(filename7)
        data8 = pd.read_csv(filename8)
        result = pd.merge(data7,data8, on='offerId', how='outer')
        result1 = result.loc[(result['totalClickCount7'] >0)| (result['totalClickCount8'] > 0)]
        result1.to_csv(filename, index=False)

三、获取点击系统自动跑量列表中offer数据data3,输出结果:offerid,dataSourceid;

       

class get_click_offer_datasourceid:

    def process(self,page):
        offer_lit = []
        datanumber_lit = []
        url = "····?pageNum="+str(page)+"&pageSize=10"
        res = (requests.get(url=url, headers=header, verify=False).json())['result']['records']
        for result in res:
            value_lit = []
            try:
                if result['dataSourceId'] != "-1":
                    # print()     #  实时数据包
                    value1 = result['dataSourceId']
                    value_lit.append(value1)
                if result['dataSourceIds'] != "-1":
                    if "," in result['dataSourceIds']:
                        # print("存在多个数据包{}".format(result['dataSourceIds']))
                        value2 = result['dataSourceIds'].split(',')
                        value_lit.extend(value2)
                    else:
                        value3 = result['dataSourceIds']
                        value_lit.append(value3)
            except:
                value_lit.append("NaN")
            offer_lit.append(result['offerId'])
            datanumber_lit.append(value_lit)
            # print("offer: {}, 数据包id:{}".format(result['offerId'],value_lit))
        return offer_lit,datanumber_lit

    def process_multithread(self,list_):
        # 多线程 下载
        task_list = [thread_pool_executor.submit(self.process,(page)) for page in list_]
        response_list = [task.result() for task in task_list]
        return response_list

    def run(self):
        offer_lit_total = []
        datanumber_lit_total = []
        lit = np.arange(1,457)
        offer_lit = self.process_multithread(lit)
        for res in offer_lit:
            offer_lit_total.extend(res[0])
            datanumber_lit_total.extend(res[1])

        lis_dic = {
            'offerId': offer_lit_total,
            'dataSources': datanumber_lit_total,

        }
        result = pd.DataFrame(lis_dic)

        result.to_csv(filenameclick,index=False)

四、分析数据  根据data2有效offer,获取到有效的offer对应的每个dataSourceid

       result = pd.merge(data1, data2, on='offerId', how='left')

class effectiveOffer:
    def get_datas(self):
        data1 = pd.read_csv(filename)
        data2 = pd.read_csv(filenameclick)
        result = pd.merge(data1, data2, on='offerId', how='left')
        result.to_csv(filenameoffer, index=False)

五、计算出删除后,dataSourceid应该保留的个数及id

class offerdatasource:
    def get_datasource(self):
        datasource_id = []
        data = pd.read_csv(filenameoffer)
        data1 = data['dataSources']
        data2 = data1.dropna(axis=0)
        for res in data2.values:
            lst = ast.literal_eval(res)
            datasource_id.extend(lst)
        # #
        ll = list(set(datasource_id))
        print(ll)
        print(len(ll))

六、计算出删除offer的个数及offer_id;

class deleteOfferid:

    def get_delete_offerid(self):
        data1 = pd.read_csv(filename)           #  3547
        data2 = pd.read_csv(filenameclick)      #  4544
        data1_new = data1['offerId'].values
        data2_new = data2['offerId'].values
        # lis02中存在,lis01中不存在
        d = [y for y in data2_new if y not in data1_new]

        print(len(d))
        print(d)

你可能感兴趣的:(数据分析,数据分析)