首页 > 学院 > 开发设计 > 正文

对网页数据进行提取与分析

2019-11-09 16:53:16
字体:
来源:转载
供稿:网友
首先请求数据信息
+ (void)requestWithURL:(NSString *)url                method:(NSString *)method             paramInfo:(NSDictionary *)param                handle:(void (^)(NSURLRequest *request, NSData *responseData, NSError *error))handleBlock {    // 创建互斥锁    static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;    // 创建计数器    static NSUInteger count = 0;    // 创建请求对象    NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:[NSURL URLWithString:url]];    [request setHTTPMethod:method];    [request setTimeoutInterval:20.0];    // 添加POST请求体    if ([[method uppercaseString] isEqualToString:@"POST"]) {        NSMutableString *paramStr = [NSMutableString string];        for (NSString *key in [param allKeys]) {            [paramStr appendFormat:@"%@=%@&", key, param[key]];        }        [request setHTTPBody:[[paramStr substringToIndex:paramStr.length - 1] dataUsingEncoding:NSUTF8StringEncoding]];    }    // 创建会话任务    NSURLsessionDataTask *dataTask = [[NSURLSession sharedSession] dataTaskWithRequest:request completionHandler:            ^(NSData * _Nullable data, NSURLResponse * _Nullable response, NSError * _Nullable error) {                // 检测所有任务是否完毕                pthread_mutex_lock(&mutex);                if (--count == 0) {                                    [[UIapplication sharedApplication] setNetworkActivityIndicatorVisible:NO];                    dispatch_async(dispatch_get_main_queue(), ^{                        [[NSNotificationCenter defaultCenter] postNotificationName:@"RequestFinished" object:nil];                    });                }                pthread_mutex_unlock(&mutex);                // 回调                if (handleBlock) {                    handleBlock(request, data, error);                }            }];    [dataTask resume];    [[UIApplication sharedApplication] setNetworkActivityIndicatorVisible:YES];    // 请求计数加1    ++count;}对请求到的数据进行解析分析,用正则进行匹配。导入两个文件分别是RegexKitLite.m,RegexKitLite.h,由于文件比较久远,所以倒入后会报错,所以要在Build phases的Compile Sources中的RegexKitLite.m加入-fno-objc-arc,另外,还要加入相应的类库libicucore.tbd 
+ (NSArray *)parseListWithData:(NSData *)data {    // 数据预处理    NSString *string = [self _PReprocessWithData:data];    // 创建返回数据    NSMutableArray *listDataArray = [NSMutableArray array];    // 匹配动态文本    NSArray  *listMatchArray = [string componentsMatchedByRegex:@"<a.*?</tr>"];    if (listMatchArray.count == 0) {        return nil;    }    // 创建数据模型    for (NSUInteger index = 0; index != listMatchArray.count - 1; ++index) {        // 解析数据        NSString *listRawText = listMatchArray[index];        NSString *titleText = [listRawText componentsMatchedByRegex:@"(?<=>).+?(?=</a>)"][0];        NSString *hrefText  = [listRawText componentsMatchedByRegex:@"(?<=href=/").+?(?=/")"][0];        NSString *dateText  = [listRawText componentsMatchedByRegex:@"(?<=//[)//d{4}-//d{2}-//d{2}(?=//])"][0];        // 插入数据        [listDataArray addObject:[@{@"title": [titleText stringByReplacingOccurrencesOfString:@" " withString:@""],                                    @"href":  hrefText,                                    @"date":  dateText,                                        } mutableCopy]];    }    return listDataArray;}#pragma mark - Helper+ (NSString *)_preprocessWithData:(NSData *)data {    NSString *rawStr = [[NSString alloc] initWithData:data                                             encoding:CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingGB_18030_2000)];    return [rawStr stringByReplacingOccurrencesOfRegex:@"[/t/r/n]" withString:@""];}根据自己的需求相应的得到网页中的具体数据信息
发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表