C语言实现IP库的整合

在上一篇文章《C语言使用sscanf()解析字符串》中提到过,前段时间在底层同学那接了个作业,用 C 写一个 IP库匹配的程序,以巩固我的C 语言学习成果。最近终于完成了地域IP 与设备IP 的整合,过程也算几经波折,前文写的就是小波折的其中之一,而如今作业就先阶段性的做到这儿吧,我先阶段性的总结一下。毕竟这只是生产环境中用到的其中一环,而我要去做更酷的事情了,具体是什么事情,敬请期待。

作业

已知 IP库 如下

base_ip.csv

start ip end ip code
0.0.0.0 1.0.0.255 1000000000
1.0.1.0 1.0.3.255 1156350100
0.0.0.0 1.0.0.255 1000000000
224.0.0.0 255.255.255.255 1000000000

mobile_ip.csv

start ip end ip code
1.0.1.0 1.0.3.255 WIFI
1.0.8.0 1.0.15.255 WIFI
1.0.32.0 1.0.63.255 WIFI
223.255.252.0 223.255.253.255 WIFI

要求结果格式:

start_ip end_ip base_code mobile_code

附加题:考虑原始IP库文件为海量数据的情况。

几经波折

为什么选择整合地域IP库与设备IP库呢,其实还有个大学IP库,原因很简单:

那货已经给这俩排好序了。

经过程序测试证实,地域IP库是升序且所有网段都是连续的,设备IP库是升序的,但网段不连续。

IP的数据格式

说三个事情

  • 2^32 = 4294967296
  • 32位操作系统中,unsigned int 的范围:0~4294967295
  • 255^4 = 4228250625

嗯。然后呢。

1
2
#include <arpa/inet.h>
in_addr_t inet_addr(const char *);

在linux 网络编程基础api 中,inet_addr函数将用点分十进制字符串表示的IPv4 地址转化为用网络字节序整数表示的IPv4 地址。失败时返回INADDR_NONE。

字节序的问题

字节序分为大端字节序(big endian)和小端字节序(little endian)

现代CPU 的累加器一次都能装载至少4字节,即一个整数。那么这4字节在内存中排列的顺序将影响它被累加器装载成的整数的值。不同的CPU有不同的字节序类型,这些字节序是指整数在内存中保存的顺序,这个叫做主机序。通常有两种:

  • little endian:地址低位存储值的低位,地址高位存储值的高位
  • big endian:地址低位存储值的高位,地址高位存储值的低位

网络字节顺序是TCP/IP中规定好的一种数据表示格式,它与具体的CPU类型、操作系统等无关,从而可以保证数据在不同主机之间传输时能够被正确解释。网络字节顺序采用big endian排序方式。

下面代码可用于检查机器的字节序:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <stdio.h>

int main() {
union {
short value;
char union_bytes[sizeof(short)];
} test;
test.value = 0x0102;
if ((test.union_bytes[0] == 1) && (test.union_bytes[1] == 2)) {
printf("big endian\n");
} else if ((test.union_bytes[0] == 2) && (test.union_bytes[1] == 1)) {
printf("little endian\n");
} else {
printf("unkonwn...\n");
}
return 0;
}

经检测,我现在写这篇文章用的mac是little endian的。

所以 I need this:

1
2
3
4
unsigned long int htonl(unsigned long int hostlong); //Host to Network Long
unsigned short int htons(unsigned short int hostshort); //Host to Network Short
unsigned long int ntohl(unsigned long int netlong); //Network to Host Long
unsigned short int ntohs(unsigned short int netshort); //Network to Host Short

时间复杂度

为了尽快的完成作业,就先不管它什么性能空间之类的了,先解决问题,然后再优化,结果写了个*O(mn),然后里面只有一行if (condition1 > condition2)就运行了24秒,还是算了,好好想想O(n)**怎么实现吧。

毕竟俩库的IP都在0.0.0.0~255.255.255.255之间,好了,剩下的就是if else了。

实现

以下为源代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#include <stdio.h>
#include <stdlib.h>
#include <arpa/inet.h>

#define MAXLIMIT 64
#define IPCOUNT 165158
#define MOBILECOUNT 49664

struct row {
in_addr_t start_ip;
in_addr_t end_ip;
char code[16];
};

void resolve_file(FILE *fp, struct row all_data[]);
in_addr_t addr_str_to_int(char *cp);
void int_to_addr_str(in_addr_t start, in_addr_t end, char *base_code, char *mobile_code);

int main(int argc, char *argv[]) {
FILE *fp;
char *base_ip = "../base_ip.csv";
char *mobile_ip = "../mobile_ip.csv";
struct row struct_base_ip[IPCOUNT];
struct row struct_mobile_ip[MOBILECOUNT];

if ((fp = fopen(base_ip, "r")) == NULL) {
printf("cat: can't open %s\n", base_ip);
exit(1);
} else {
resolve_file(fp, struct_base_ip);
}
if ((fp = fopen(mobile_ip, "r")) == NULL) {
printf("cat: can't open %s\n", mobile_ip);
exit(1);
} else {
resolve_file(fp, struct_mobile_ip);
}

char *line = "start_ip,end_ip,base_code,mobile_code";
printf("%s\n", line);
in_addr_t start = 0, end = 0;
int i = 0, j = 0, is_mobile_out = 0;;
while (i < IPCOUNT) {
if (j < MOBILECOUNT) {
if (struct_mobile_ip[j].start_ip > struct_base_ip[i].start_ip) {
start = struct_base_ip[i].start_ip;
if (struct_mobile_ip[j].start_ip > struct_base_ip[i].end_ip) {
end = struct_base_ip[i].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, "NONE");
++i;
} else if (struct_mobile_ip[j].end_ip > struct_base_ip[i].end_ip) {
end = struct_mobile_ip[j].start_ip - 1;
int_to_addr_str(start, end, struct_base_ip[i].code, "NONE");
start = struct_mobile_ip[j].start_ip;
end = struct_base_ip[i].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
++i;
} else if (struct_mobile_ip[j].end_ip < struct_base_ip[i].end_ip){
end = struct_mobile_ip[j].start_ip - 1;
int_to_addr_str(start, end, struct_base_ip[i].code, "NONE");
start = struct_mobile_ip[j].start_ip;
end = struct_mobile_ip[j].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
struct_base_ip[i].start_ip = struct_mobile_ip[j].end_ip + 1;
++j;
} else if (struct_mobile_ip[j].end_ip > struct_base_ip[i].end_ip) {
end = struct_mobile_ip[j].start_ip - 1;
int_to_addr_str(start, end, struct_base_ip[i].code, "NONE");
start = struct_mobile_ip[j].start_ip;
end = struct_base_ip[i].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
++i;
++j;
}
} else if (struct_mobile_ip[j].start_ip < struct_base_ip[i].start_ip) {
if (struct_base_ip[i].start_ip > struct_mobile_ip[j].end_ip) {
start = struct_mobile_ip[j].start_ip;
end = struct_mobile_ip[j].end_ip;
int_to_addr_str(start, end, "XXX", struct_mobile_ip[j].code);
++j;
} else if (struct_base_ip[i].end_ip > struct_mobile_ip[j].end_ip) {
start = struct_base_ip[i].start_ip;
end = struct_mobile_ip[j].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
struct_base_ip[i].start_ip = struct_mobile_ip[j].end_ip + 1;
++j;
} else if (struct_base_ip[i].end_ip < struct_mobile_ip[j].end_ip) {
start = struct_base_ip[i].start_ip;
end = struct_base_ip[i].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
++i;
} else if (struct_base_ip[i].end_ip == struct_mobile_ip[j].end_ip) {
start = struct_base_ip[i].start_ip;
end = struct_mobile_ip[j].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
struct_base_ip[i].start_ip = struct_mobile_ip[j].end_ip + 1;
++j;
++i;
}
} else {
start = struct_base_ip[i].start_ip;
if (struct_base_ip[i].end_ip > struct_mobile_ip[j].end_ip) {
end = struct_mobile_ip[j].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
struct_base_ip[i].start_ip = struct_mobile_ip[j].end_ip + 1;
++j;
} else if (struct_base_ip[i].end_ip < struct_mobile_ip[j].end_ip) {
end = struct_base_ip[i].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
++i;
} else {
end = struct_base_ip[i].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, struct_mobile_ip[j].code);
++i;
++j;
}
}
} else {
if (!is_mobile_out) {
start = struct_mobile_ip[MOBILECOUNT - 1].end_ip + 1;
is_mobile_out = 1;
} else {
start = struct_base_ip[i].start_ip;
}
end = struct_base_ip[i].end_ip;
int_to_addr_str(start, end, struct_base_ip[i].code, "NONE");
++i;
}
}

exit(0);
}

void resolve_file(FILE *fp, struct row all_data[]) {
char line[MAXLIMIT], s_start_ip[16], s_end_ip[16];
char *lp;
int i = 0;
while ((lp = fgets(line, MAXLIMIT, fp)) != NULL) {
sscanf(lp, "%[^,],%[^,],%s",
s_start_ip,
s_end_ip,
(&all_data[i])->code);
(&all_data[i])->start_ip = addr_str_to_int(s_start_ip);
(&all_data[i])->end_ip = addr_str_to_int(s_end_ip);
i++;
}
fclose(fp);
}

in_addr_t addr_str_to_int(char *cp) {
return ntohl(inet_addr(cp));
}

void int_to_addr_str(in_addr_t start, in_addr_t end, char *base_code, char *mobile_code) {
start = htonl(start);
end = htonl(end);
char start_str[16], end_str[16];
unsigned char *start_bytes = (unsigned char *) &start;
unsigned char *end_bytes = (unsigned char *) &end;
snprintf (start_str, sizeof (start_str), "%d.%d.%d.%d",
start_bytes[0], start_bytes[1], start_bytes[2], start_bytes[3]);
snprintf (end_str, sizeof (start_str), "%d.%d.%d.%d",
end_bytes[0], end_bytes[1], end_bytes[2], end_bytes[3]);
printf("%s,%s,%s,%s\n", start_str, end_str, base_code, mobile_code);
}

至于附加题,就交给看到这儿的你吧:)

结语

写完这篇博文以后,终于我也可以幽幽地对别人说:

不管你懂多少闭包、异常处理,只要你不能解释为什么while(*s++ = *t++),那你就像一个医生看病的依据完全是因为医药销售代表说这种药有用。

完。


本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!