#include #include #include #include #include "mail_decoder_util.h" #include "mail_decoder_codec.h" #include "mail_decoder_mime.h" static void urldecode2(char *dst, const char *src) { char a, b; while (*src) { if ((*src == '%') && ((a = src[1]) && (b = src[2])) && (isxdigit(a) && isxdigit(b))) { if (a >= 'a') a -= 'a'-'A'; if (a >= 'A') a -= ('A' - 10); else a -= '0'; if (b >= 'a') b -= 'a'-'A'; if (b >= 'A') b -= ('A' - 10); else b -= '0'; *dst++ = 16*a+b; src+=3; } else if (*src == '+') { *dst++ = ' '; src++; } else { *dst++ = *src++; } } *dst++ = '\0'; } static int check_mem_size(int wantedlen, char **pmem, int *len) { if(wantedlen < *len) return 0; if(*pmem == NULL) *pmem = (char *)malloc(wantedlen); else *pmem = (char *)realloc(*pmem, wantedlen); *len = wantedlen; return 0; } #ifdef __cplusplus extern "C" { #endif int decode_mime_header(const char *in, int inl, char *out, int *outl, char* charset, int max_charset_len) { return mime_header_decode(in, inl, out, outl, charset, max_charset_len, NULL); } #ifdef __cplusplus } #endif int mime_header_decode(const char *in, int inl, char *out, int *outl, char* charset, int max_charset_len, void *log_handle) { const char *in_end=in+inl, *begin, *chset_end, *encode_end, *end; int outsize = *outl, total_len=0; int ret, charset_len; begin = (char *)mail_memmem(in, inl, "=?", 2); if(begin == NULL) goto out_cp; chset_end = (char *)memchr(begin+2, '?', in_end-begin-2); if(chset_end == NULL) goto out_cp; encode_end = (char *)memchr(chset_end+1, '?', in_end-chset_end-1); if(encode_end == NULL) goto out_cp; end = (char *)mail_memmem(encode_end+1, in_end-encode_end-1, "?=", 2); if(end == NULL) goto out_cp; charset_len = (chset_end-begin-2)>=max_charset_len?(max_charset_len-1):(chset_end-begin-2); memcpy(charset, begin+2, charset_len); charset[charset_len] = '\0'; if(*(encode_end-1) == 'B' || *(encode_end-1) == 'b') { if(begin - in > 0) { memcpy(out, in, begin-in); total_len = begin-in; } ret = Base64_DecodeBlock((const unsigned char *)(encode_end+1), end-encode_end-1, (unsigned char *)out+total_len, outsize-total_len); if(ret < 0) { char buf[128]={0}; int datalen = (end-encode_end-1)>=128?127:(end-encode_end-1); memcpy(buf, encode_end+1, datalen); buf[datalen] = '\0'; goto out_cp; } total_len += ret; } else if(*(encode_end-1) == 'Q' || *(encode_end-1) == 'q') { if(begin - in > 0) { memcpy(out, in, begin-in); total_len = begin-in; } ret = QP_DecodeBlock((const unsigned char *)(encode_end+1), end-encode_end-1, (unsigned char *)out+total_len); if(ret < 0) { char buf[128]={0}; int datalen = (end-encode_end-1)>=128?127:(end-encode_end-1); memcpy(buf, encode_end+1, datalen); buf[datalen] = '\0'; goto out_cp; } total_len += ret; } else goto out_cp; if(in_end - end - 2 > 0) { outsize -= total_len; mime_header_decode(end+2, in_end-end-2, out+total_len, &outsize, charset, max_charset_len, log_handle); total_len += outsize; } *outl = total_len; return 0; out_cp: memcpy(out, in, inl); *outl = inl; return 0; } int add_boundary(const char *src, int n, boundary_list **Head) { boundary_list *p; int len; if(src == NULL || n <= 0) return -1; p = (boundary_list *)calloc(1, sizeof(boundary_list)); if(*Head==NULL) { p->main_bound = BOUNDARY_TYPE_MAIN; } else { p->main_bound = BOUNDARY_TYPE_OTHER; } len = (n>=BOUNDARY_SIZE)?(BOUNDARY_SIZE-1):n; memcpy(p->str, src, len); p->str_len = len; p->next = *Head; *Head = p; return 0; } static boundary_list *get_boundary(char *src, int srclen, boundary_list *head) { boundary_list *p; for(p=head; p!=NULL; p=p->next) { if(srclen != p->str_len) continue; if (strncmp(src, p->str, srclen) == 0) { return p; } } return NULL; } static int search_boundary(char *src, int srclen, boundary_list *head) { boundary_list *p; for(p=head; p!=NULL; p=p->next) { if(srclen != p->str_len) continue; if (strncmp(src, p->str, srclen) == 0) { p->matched_count++; return p->main_bound; } } return 0; } /********************************************************************* 函数名称:boundary_free 功能简介:释放界线链表 输入参数:head:指向界限链表头的指针 输出参数:无 返回值: 无 *********************************************************************/ static void free_boundary( boundary_list *head) { boundary_list *p = head, *q; while(p != NULL) { q = p; p = p->next; free(q); } } static int get_value_th(char *temp, int len,char **ppval,int *val_len) { char *temp2=NULL; char *tp=NULL; char *tpend=NULL; tp=temp; tpend=tp+len; while((tp[0]==' ')&&(tpTransferEnc = MIME_TRANSENC_UNKNOWN; pmimeinfo->log_handle = log_handle; pmimeinfo->handle.length=0; //handle_init(); pmimeinfo->line_type = MIME_LINE_NULL; *mimeinfo = pmimeinfo; return 0; } void clear_mime_info(MimeParse_t *mimeinfo) { free_boundary(mimeinfo->b_header); if(mimeinfo->filename!=NULL) free(mimeinfo->filename); free(mimeinfo); } void reset_mime_info(MimeParse_t * mimeinfo,int PartEnd) { if(PartEnd==0) { free_boundary(mimeinfo->b_header); mimeinfo->b_header = NULL; mimeinfo->bound_comes = 0; } mimeinfo->src = NULL; mimeinfo->dst = NULL; mimeinfo->srcLen = 0; mimeinfo->dstSize = 0; mimeinfo->actLen = 0; mimeinfo->text_begin = 0; mimeinfo->filenameLen = 0; mimeinfo->is_attachment = 0; if(mimeinfo->filename!=NULL) { free(mimeinfo->filename); mimeinfo->filename = NULL; } mimeinfo->filename_charset[0] = 0; mimeinfo->charset[0] = 0; mimeinfo->TransferEnc = MIME_TRANSENC_UNKNOWN; mimeinfo->line_type = MIME_LINE_NULL; mimeinfo->handle.length=0; } static int detect_transenc(char * transenc, int len) { while(*transenc == ' ') { transenc++; len--; } if(strncmp_one_word_mesa("base64", "BASE64", 6, transenc, len)) return MIME_TRANSENC_BASE64; if(strncmp_one_word_mesa("quoted-printable", "QUOTED-PRINTABLE", 16, transenc, len)) return MIME_TRANSENC_QP; return MIME_TRANSENC_UNKNOWN; } static int mime_line_identify(MimeParse_t *pInfo) { if(pInfo->srcLen==0 || (pInfo->srcLen==2 && pInfo->src[0]=='\r' && pInfo->src[1]=='\n')||(pInfo->srcLen==1 && pInfo->src[0]=='\n')) { return MIME_LINE_NULL; } if(pInfo->srcLen>2 && strncmp(pInfo->src, "--", 2) == 0) { int len = pInfo->srcLen; while(len>2 && (*(pInfo->src+len-1)=='\r' || *(pInfo->src+len-1)=='\n')) len--; if(!search_boundary(pInfo->src + 2, len-2, pInfo->b_header)) { int type; if(len >=4 && pInfo->src[len-1] == '-' && pInfo->src[len-2] == '-' && \ (type=search_boundary(pInfo->src + 2, len-4, pInfo->b_header))>0) { if(type==BOUNDARY_TYPE_MAIN) { pInfo->bound_comes = 0; pInfo->line_type = MIME_LINE_COMMENT; } return MIME_LINE_BOUNARY_END; } if(pInfo->bound_comes==0 && pInfo->b_header != NULL) return MIME_LINE_COMMENT; else return MIME_LINE_CONT_BODY; } boundary_list *b = get_boundary(pInfo->src + 2, len-2, pInfo->b_header); if (b && b->matched_count == 0) { return MIME_LINE_BOUNARY_START; } return MIME_LINE_BOUNARY; } if(pInfo->line_type == MIME_LINE_CONT_BODY || pInfo->line_type == MIME_LINE_COMMENT) return pInfo->line_type; if(strncmp_one_word_mesa("content-type:", "CONTENT-TYPE:", PARTHEAD_CONTTYPE_LEN, pInfo->src, pInfo->srcLen)) return MIME_LINE_CONT_TYPE; if(strncmp_one_word_mesa("content-transfer-encoding:", "CONTENT-TRANSFER-ENCODING:", PARTHEAD_TRANSENC_LEN, pInfo->src, pInfo->srcLen)) return MIME_LINE_CONT_ENC; if(strncmp_one_word_mesa("content-disposition:", "CONTENT-DISPOSITION:", PARTHEAD_CONTENTDISPOSITION_LEN, pInfo->src, pInfo->srcLen)) return MIME_LINE_CONT_DISP; // if(check_str_begin(pInfo->src, pInfo->srcLen, "Content")) // return MIME_LINE_CONT_OTHR; if(pInfo->bound_comes==0 && pInfo->b_header != NULL) return MIME_LINE_COMMENT; if(pInfo->boundary_pending) return MIME_LINE_CONT_OTHR; return MIME_LINE_CONT_BODY; } int mime_parse_feed(MimeParse_t *pInfo, int decode) { int ret; char *pout = NULL, *pc; int outlen=0, pclen; int len; if(pInfo->dst==NULL) return -10; pInfo->actLen = 0; pInfo->line_type = mime_line_identify(pInfo); switch(pInfo->line_type) { case MIME_LINE_ERR: return -2; case MIME_LINE_NULL: pInfo->text_begin = 1; pInfo->boundary_pending = 0; if(pInfo->bound_comes==0 && pInfo->b_header != NULL) pInfo->line_type = MIME_LINE_COMMENT; else pInfo->line_type = MIME_LINE_CONT_BODY; //TODO: 正文中出现"\r\n---anystr",前面的空行被丢弃了 break; case MIME_LINE_BOUNARY_START: reset_mime_info(pInfo, 1); //set text_begin=0 pInfo->boundary_pending = 1; if(pInfo->bound_comes == 0) pInfo->bound_comes = 1; return MIME_LINE_BOUNARY_START; case MIME_LINE_BOUNARY: reset_mime_info(pInfo, 1); //set text_begin=0 pInfo->boundary_pending = 1; if(pInfo->bound_comes == 0) pInfo->bound_comes = 1; return MIME_LINE_BOUNARY; case MIME_LINE_BOUNARY_END: pInfo->text_begin = 0; //一般后续要么结束,要么是新的BOUNARY;为了IMAP结尾的')' break; case MIME_LINE_CONT_TYPE: pc = pInfo->src + PARTHEAD_CONTTYPE_LEN; pclen = pInfo->srcLen - PARTHEAD_CONTTYPE_LEN; if(check_name_th(pc, pclen, PARTHEAD_CHARSET, &pout, &outlen)==0) { len = (outlen>=MAIL_MAX_CHARSET_LEN)?(MAIL_MAX_CHARSET_LEN-1):outlen; memcpy(pInfo->charset, pout, len); pInfo->charset[len] = '\0'; } if(pInfo->filename != NULL) { free(pInfo->filename); pInfo->filename = NULL; pInfo->filenameLen = 0; } else if(check_name_th(pc, pclen, PARTHEAD_NAME, &pout, &outlen)==0) { //tmplen为0的情况,pInfo->filename!=NULL && pInfo->filenameLen==0, 调用者自行处理 if(check_mem_size(outlen+1, &pInfo->filename, &pInfo->filenameLen)) return -3; mime_header_decode(pout, outlen, pInfo->filename, &pInfo->filenameLen, pInfo->filename_charset, MAIL_MAX_CHARSET_LEN, pInfo->log_handle); pInfo->filename[pInfo->filenameLen] = '\0'; //ENDPART reset时会清理,释放 } if(check_name_th(pc, pclen, PARTHEAD_BOUNDARY, &pout, &outlen)==0) { if(search_boundary(pout, outlen, pInfo->b_header)==0) { if(add_boundary(pout, outlen, &pInfo->b_header)) return -4; } } break; case MIME_LINE_CONT_DISP: pInfo->is_attachment = 1; if (pInfo->filename != NULL) // && strlen(pInfo->filename)>0) { free(pInfo->filename); pInfo->filename = NULL; //避免重复调用业务层 pInfo->filenameLen = 0; break; } pc = pInfo->src + PARTHEAD_CONTENTDISPOSITION_LEN; pclen = pInfo->srcLen - PARTHEAD_CONTENTDISPOSITION_LEN; if (check_name_th(pc, pclen, PARTHEAD_FILENAME, &pout, &outlen) == 0) { //tmplen为0的情况,pInfo->filename!=NULL && pInfo->filenameLen==0, 调用者自行处理 if (check_mem_size(outlen + 1, &pInfo->filename, &pInfo->filenameLen)) return -5; mime_header_decode(pout, outlen, pInfo->filename, &pInfo->filenameLen, pInfo->filename_charset, MAIL_MAX_CHARSET_LEN, pInfo->log_handle); pInfo->filename[pInfo->filenameLen] = '\0'; //ENDPART reset时会清理,释放 } else if (check_name_th(pc, pclen, PARTHEAD_FILENAME_EXT, &pout, &outlen) == 0) { // support RFC 6266, ext value, like filename*=utf-8''%e2%82%ac%20rates const char *p_first_quote = NULL, *p_second_quote = NULL, *p_actual_value = NULL; int actual_value_len = 0; // define in RFC 2231.section 4, filename ext value format [charset'language'actual value] for (int i = 0; i < outlen; i++) { if (pout[i] == '\'') { if (p_first_quote == NULL) { p_first_quote = (pout + i); } else { p_second_quote = (pout + i); break; } } } // must include two single quote if (p_first_quote == NULL || p_second_quote == NULL) { p_actual_value = pout; actual_value_len = outlen; } else // copy charset if possible { p_actual_value = (p_second_quote + 1); actual_value_len = outlen - (p_actual_value - pout); if (p_first_quote != pout && (p_first_quote - pout) < MAIL_MAX_CHARSET_LEN) { memcpy(pInfo->filename_charset, pout, (p_first_quote - pout)); } } if (check_mem_size(actual_value_len + 1, &pInfo->filename, &pInfo->filenameLen)) return -3; memset(pInfo->filename, 0, actual_value_len + 1); memcpy(pInfo->filename, p_actual_value, actual_value_len); char *decode_buff = (char *)malloc(actual_value_len + 1); urldecode2(decode_buff, pInfo->filename); memset(pInfo->filename, 0, actual_value_len + 1); memcpy(pInfo->filename, decode_buff, strlen(decode_buff)); pInfo->filenameLen = strlen(decode_buff); free(decode_buff); } else { while (pclen > 0 && (*pc == ' ' || *pc == '\t')) { pc++; pclen--; } if (pclen > 0 && strncmp_one_word_mesa("attachment", "ATTACHMENT", 10, pc, pclen)) { pInfo->filename = (char *)malloc(1); pInfo->filenameLen = 0; pInfo->filename[0] = '\0'; } } break; case MIME_LINE_CONT_ENC: pInfo->TransferEnc = detect_transenc(pInfo->src + PARTHEAD_TRANSENC_LEN, pInfo->srcLen-PARTHEAD_TRANSENC_LEN); break; case MIME_LINE_CONT_OTHR: pInfo->dst = pInfo->src; pInfo->actLen = pInfo->srcLen; break; case MIME_LINE_COMMENT: break; case MIME_LINE_CONT_BODY: if((pInfo->TransferEnc==MIME_TRANSENC_BASE64 || pInfo->TransferEnc==MIME_TRANSENC_QP) && decode!=0) { if(pInfo->TransferEnc==MIME_TRANSENC_BASE64) { ret = Base64_DecodeFeed_r_n(&pInfo->handle, (unsigned char *)pInfo->src, pInfo->srcLen, (unsigned char *)pInfo->dst, pInfo->dstSize); if(ret < 0) { char buf[128]={0}; memcpy(buf, pInfo->src, pInfo->srcLen>=128?127:pInfo->srcLen); return ret; } } else { ret = QP_DecodeFeed(&pInfo->handle, (unsigned char *)pInfo->src, pInfo->srcLen, (unsigned char *)pInfo->dst); if(ret < 0) { return ret; } } pInfo->actLen = ret; } else { pInfo->dst = pInfo->src; pInfo->actLen = pInfo->srcLen; //pInfo->text_begin=1; } break; default: break; } return pInfo->line_type; } void body_update_text_begin(MimeParse_t *pInfo) { if(pInfo->text_begin==0 && pInfo->b_header==NULL) pInfo->text_begin = 1; }