大数据:随机生成10万个整数找出出现次数前一百的数

论坛 期权论坛 脚本     
已经匿名di用户   2022-2-7 16:33   2713   0

实现大数据处理的基本方法是分治法+heapsort

/**
* @100 000个整形数据,范围在0~32768
* 1.输出最大
* 2.输出次数前100的数
*/

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <time.h>

typedef struct
{
 int num[100];
 int times[100];
}Date;
typedef struct
{
 int num;
 int times;
}Time;

void myheap(Time arr[],int len,int cur)
{

 int maxnode = cur*2+1;
 if(maxnode+1<len && arr[maxnode].times<arr[maxnode+1].times)
 {
  maxnode++;
 }
 
 for(int fnode=cur; maxnode<len;)
 {
  int temp;
  if(arr[maxnode].times > arr[fnode].times)
  {
   temp = arr[maxnode].times;
   arr[maxnode].times = arr[fnode].times;
   arr[fnode].times = temp;
   temp = arr[maxnode].num;
   arr[maxnode].num = arr[fnode].num;
   arr[fnode].num = temp;
  }
  fnode = maxnode;
  maxnode = fnode*2+1;
  if(maxnode<len && maxnode<fnode*2+2 && arr[maxnode].times<arr[maxnode+1].times)
  {
   maxnode++;
  }
 }
}
void show(Time arr[],int len)
{
 for(int i=0; i<len/100; i++)
 {
  printf("%3d ",arr[i]);
 }
 printf("\n");
}
void heapsort(Time arr[],int len)
{
 for(int i = (len-1)/2; i>=0; i--)
 {
  myheap(arr,len,i);
 }
 //show(arr,len);
 int temp;
 for(int i=len-1; i>0; i--)
 {
  temp = arr[0].times;
  arr[0].times = arr[i].times;
  arr[i].times = temp;
  temp = arr[0].num;
  arr[0].num = arr[i].num;
  arr[i].num = temp;
  //show(arr,len);
  myheap(arr,i,0);
  //show(arr,len);
 }
 
}
void Createdate(char* path)//创建数据
{
 FILE *fw = fopen(path,"wb");
 assert(fw != NULL);
 
 int temp;
 srand(time(NULL));//随机种子
 for(int i=0; i<1000000; i++)
 {
  temp = rand();
  fwrite(&temp,sizeof(int),1,fw);
 }
 fclose(fw);
}
void  MyHasefile(char *path,Date *d)
{
 FILE *fr = fopen(path,"rb");
 assert(fr != NULL);
 int temp;
 int arr[10000] = {0};
 Time brr[10000] ;
 while(fread(&temp,sizeof(int),1,fr) > 0)
 {
  arr[temp/10] += 1;
 }
 /*Date d ={0,0};
 d->times[0] = 0;
 d->num[0] = 0;*/
 for(int i=0; i<10000; i++)
 {
   brr[i].times = arr[i];
   brr[i].num = i*10+temp%10; 
 }
 heapsort(brr,sizeof(brr)/sizeof(Time));

 for(int i=9999,j=0; i>=9900;i--,j++)
 {
  d->num[j] = brr[i].num;
  d->times[j] = brr[i].times;
 }
 fclose(fr);
 return ;
}
void Divdate(char* path)//拆分数据
{
 char *mypath[10] = {"0.txt","1.txt","2.txt","3.txt","4.txt","5.txt","6.txt","7.txt","8.txt","9.txt"}; 
 FILE *fr = fopen(path,"rb");
 assert(fr != NULL);
 FILE *fw[10];
 for(int i=0; i<10; i++)
 {
  fw[i] = fopen(mypath[i],"wb");
  assert(fw[i] != NULL);
 }
 int temp;
 while(fread(&temp,sizeof(int),1,fr) > 0)
 {
  fwrite(&temp,sizeof(int),1,fw[temp%10]);
 }
 for(int i=0; i<10; i++)
 {
  fclose(fw[i]);
 }
 Date d[10] ;
 for(int i=0; i<10; i++)
 {
  MyHasefile(mypath[i],&(d[i]));
 }
 
 Date dmax;

 for(int i=0; i<10; i++)
 {
  for(int j=0; j<100; j++)
  if(d[i].times[j] >dmax.times[j])
  {
   dmax.times[j] = d[i].times[j];
   dmax.num[j] = d[i].num[j];
  }
 }
 for(int i=0; i<10; i++)
 {
  remove(mypath[i]);
 }
 for(int i=0; i<100; i++)
 printf("num:%3d,times:%3d\n",dmax.num[i],dmax.times[i]);
 return ;
}

int main()
{
 char *path = "D://date.txt";
 Createdate(path);//创建数据
 Divdate(path);//拆分数据

}


分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:81
帖子:4969
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP