diff options
| author | zhanghongqing <[email protected]> | 2020-08-31 19:07:02 +0800 |
|---|---|---|
| committer | zhanghongqing <[email protected]> | 2020-08-31 19:07:02 +0800 |
| commit | 2d8b45a0d1c4e4e4bfeafb24379f6a116674a04f (patch) | |
| tree | 8a9696196bb6a5d9247f5da788ae297590b88134 | |
| parent | 3bb737b2f8206640adee4bfc4fb078e85266aac8 (diff) | |
修改设备跟操作系统正则和处理方法
| -rw-r--r-- | src/main/java/com/mesalab/ua/DeviceParser.java | 24 | ||||
| -rw-r--r-- | src/main/java/com/mesalab/ua/OSParser.java | 199 | ||||
| -rw-r--r-- | src/main/java/com/mesalab/ua/Parser.java | 15 | ||||
| -rw-r--r-- | src/main/java/com/mesalab/ua/Utils.java | 65 | ||||
| -rw-r--r-- | src/main/resources/device.yaml | 1 | ||||
| -rw-r--r-- | src/main/resources/os.yaml | 168 | ||||
| -rw-r--r-- | src/main/resources/regexes.yaml | 176 | ||||
| -rw-r--r-- | src/test/java/com/mesalab/ua/Test.java | 49 | ||||
| -rw-r--r-- | src/test/java/com/mesalab/ua/UaTest.java | 24 |
9 files changed, 457 insertions, 264 deletions
diff --git a/src/main/java/com/mesalab/ua/DeviceParser.java b/src/main/java/com/mesalab/ua/DeviceParser.java index d21008e..4ef7ea0 100644 --- a/src/main/java/com/mesalab/ua/DeviceParser.java +++ b/src/main/java/com/mesalab/ua/DeviceParser.java @@ -18,8 +18,6 @@ package com.mesalab.ua; import com.alibaba.fastjson.JSON; import com.mesalab.ua.dto.Device; -import eu.bitwalker.useragentutils.Browser; -import eu.bitwalker.useragentutils.OperatingSystem; import org.apache.commons.lang3.ObjectUtils; import java.util.ArrayList; @@ -100,33 +98,31 @@ public class DeviceParser { } int groupCount = matcher.groupCount(); - name = getReplaceResult(model, matcher, groupCount,nameReplacement); + name = getReplaceResult(model, matcher, groupCount, nameReplacement); if (brandReplacement != null) { brand = brandReplacement; - } else if (groupCount >= 2) { - String group2 = matcher.group(2); - if (ObjectUtils.isNotEmpty(group2)) { - brand = group2; - } + } else { + brand = "Other"; } - model = getReplaceResult(model, matcher, groupCount,modelReplacement); + model = getReplaceResult(model, matcher, groupCount, modelReplacement); - if (typeReplacement != null) { - type = Utils.getReplacement(matcher, typeReplacement); - } else { + if (typeReplacement == null) { type = "Other"; + } else { + type = Utils.getReplacement(matcher, typeReplacement); } + System.out.println("pattern+: "+pattern); return name == null ? null : new Device(name, brand, model, type); } - private String getReplaceResult(String model, Matcher matcher, int groupCount,String replacementString) { + private String getReplaceResult(String model, Matcher matcher, int groupCount, String replacementString) { if (replacementString != null) { if (replacementString.contains("$")) { model = replacementString; for (String substitution : getSubstitutions(replacementString)) { - int i = Integer.valueOf(substitution.substring(1)); + int i = Integer.parseInt(substitution.substring(1)); String replacement = matcher.groupCount() >= i && matcher.group(i) != null ? Matcher.quoteReplacement(matcher.group(i)) : ""; model = model.replaceFirst("\\" + substitution, replacement); diff --git a/src/main/java/com/mesalab/ua/OSParser.java b/src/main/java/com/mesalab/ua/OSParser.java index 3813fa3..227f45e 100644 --- a/src/main/java/com/mesalab/ua/OSParser.java +++ b/src/main/java/com/mesalab/ua/OSParser.java @@ -1,12 +1,12 @@ /** * Copyright 2012 Twitter, Inc - * + * <p> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -30,124 +30,103 @@ import java.util.regex.Pattern; */ public class OSParser { - private final List<OSPattern> patterns; - - public OSParser(List<OSPattern> patterns) { - this.patterns = patterns; - } + private final List<OSPattern> patterns; - /** - * Constructs a thread-safe OSParser. - */ - public static OSParser fromList(List<Map<String,String>> configList) { - List<OSPattern> configPatterns = new ArrayList<OSPattern>(); - - for (Map<String,String> configMap : configList) { - configPatterns.add(OSParser.patternFromMap(configMap)); + public OSParser(List<OSPattern> patterns) { + this.patterns = patterns; } - return new OSParser(new CopyOnWriteArrayList<>(configPatterns)); - } - public OS parse(String agentString) { - if (agentString == null) { - return null; - } + /** + * Constructs a thread-safe OSParser. + */ + public static OSParser fromList(List<Map<String, String>> configList) { + List<OSPattern> configPatterns = new ArrayList<OSPattern>(); - OS os; - for (OSPattern p : patterns) { - if ((os = p.match(agentString)) != null) { - return os; - } + for (Map<String, String> configMap : configList) { + configPatterns.add(OSParser.patternFromMap(configMap)); + } + return new OSParser(new CopyOnWriteArrayList<>(configPatterns)); } - return new OS("Other", null, null); - } - protected static OSPattern patternFromMap(Map<String, String> configMap) { - String regex = configMap.get("regex"); - if (regex == null) { - throw new IllegalArgumentException("OS is missing regex"); + public OS parse(String agentString) { + if (agentString == null) { + return null; + } + + OS os; + for (OSPattern p : patterns) { + if ((os = p.match(agentString)) != null) { + return os; + } + } + return new OS("Other", null, null); } - return(new OSPattern(Pattern.compile(regex), - configMap.get("name_replacement"), - configMap.get("version_replacement"), - configMap.get("platform_replacement"))); - } - - protected static class OSPattern { - private final Pattern pattern; - private final String nameReplacement, versionReplacement, platformReplacement; - - public OSPattern(Pattern pattern, String nameReplacement, String versionReplacement, String platformReplacement) { - this.pattern = pattern; - this.nameReplacement = nameReplacement; - this.versionReplacement = versionReplacement; - this.platformReplacement = platformReplacement; - } + protected static OSPattern patternFromMap(Map<String, String> configMap) { + String regex = configMap.get("regex"); + if (regex == null) { + throw new IllegalArgumentException("OS is missing regex"); + } - public OS match(String agentString) { - String name = null, version = null, platform = null; - Matcher matcher = pattern.matcher(agentString); - - if (!matcher.find()) { - return null; - } - - int groupCount = matcher.groupCount(); - - if (nameReplacement != null) { - if (groupCount >= 1) { - name = Pattern.compile("(" + Pattern.quote("$1") + ")") - .matcher(nameReplacement) - .replaceAll(matcher.group(1)); - } else { - name = nameReplacement; - } - } else if (groupCount >= 1) { - name = matcher.group(1); - } else { - name = "Other"; - } - - if (versionReplacement != null) { - version = getReplacement(matcher, versionReplacement); - } else if (groupCount >= 2) { - version = matcher.group(2); - } else { - version = "Other"; - } - - if (platformReplacement != null) { - platform = getReplacement(matcher, platformReplacement); - } else { - platform = "Other"; - } - -// System.out.println("oa : "+pattern); - return name == null ? null : new OS(name, version, platform); + return (new OSPattern(Pattern.compile(regex), + configMap.get("name_replacement"), + configMap.get("version_replacement"), + configMap.get("platform_replacement"))); } - private String getReplacement(Matcher matcher, String replacement) { - if (isBackReference(replacement)) { - int group = getGroup(replacement); - return matcher.group(group); - } else { - return replacement; - } - } + protected static class OSPattern { + private final Pattern pattern; + private final String nameReplacement, versionReplacement, platformReplacement; + + public OSPattern(Pattern pattern, String nameReplacement, String versionReplacement, String platformReplacement) { + this.pattern = pattern; + this.nameReplacement = nameReplacement; + this.versionReplacement = versionReplacement; + this.platformReplacement = platformReplacement; + } + + public OS match(String agentString) { + String name = null, version = null, platform = null; + Matcher matcher = pattern.matcher(agentString); + + if (!matcher.find()) { + return null; + } + + int groupCount = matcher.groupCount(); + + if (nameReplacement != null) { + if (groupCount >= 1) { + name = Pattern.compile("(" + Pattern.quote("$1") + ")") + .matcher(nameReplacement) + .replaceAll(matcher.group(1)); + } else { + name = nameReplacement; + } + } else if (groupCount >= 1) { + name = matcher.group(1); + } else { + name = "Other"; + } + + if (versionReplacement != null) { + version = Utils.getReplacement(matcher, versionReplacement); + } else if (groupCount >= 2) { + version = matcher.group(2); + } else { + version = "Other"; + } + + if (platformReplacement != null) { + platform = Utils.getReplaceResult(platform, matcher, groupCount, nameReplacement); + } else if (groupCount >= 1) { + platform = name; + } else { + platform = "Other"; + } + return name == null ? null : new OS(name, version, platform); + } - /** - * Checks if the replacement is a backreference (i.e. $1, $2, $3, etc) to a capturing group in the regular expression. - */ - private boolean isBackReference(String replacement) { - return replacement.startsWith("$"); } - /** - * Extracts the group number from a backreference like $1, $2, $3, etc. - */ - private int getGroup(String backReference) { - return Integer.valueOf(backReference.substring(1)); - } - } } diff --git a/src/main/java/com/mesalab/ua/Parser.java b/src/main/java/com/mesalab/ua/Parser.java index e2bb93d..bc1ee10 100644 --- a/src/main/java/com/mesalab/ua/Parser.java +++ b/src/main/java/com/mesalab/ua/Parser.java @@ -42,6 +42,7 @@ public class Parser { /** * Creates a parser using the regular expression yaml file bundled in the jar. + * * @throws IOException if there's a problem reading the file from the classpath */ public Parser() throws IOException { @@ -53,6 +54,7 @@ public class Parser { /** * Creates a parser using the supplied regular expression yaml file. * It is the responsibility of the caller to close the InputStream after construction. + * * @param regexYaml the yaml file containing the regular expressions */ public Parser(InputStream regexYaml) { @@ -64,10 +66,19 @@ public class Parser { OS os = parseOS(agentString); Device device = deviceParser.parse(agentString); - os.setPlatform(device.name); String category = device.type; - return new UserAgent(client, os, device, category,System.currentTimeMillis() / 1000, agentString); + return new UserAgent(client, os, device, category, System.currentTimeMillis() / 1000, agentString); + } + + public UserAgent parse(String agentString, Long lastVisit) { + Client client = parseClient(agentString); + OS os = parseOS(agentString); + Device device = deviceParser.parse(agentString); + + String category = device.type; + + return new UserAgent(client, os, device, category, lastVisit, agentString); } public Client parseClient(String agentString) { diff --git a/src/main/java/com/mesalab/ua/Utils.java b/src/main/java/com/mesalab/ua/Utils.java index d0694fd..0a8f306 100644 --- a/src/main/java/com/mesalab/ua/Utils.java +++ b/src/main/java/com/mesalab/ua/Utils.java @@ -1,6 +1,9 @@ package com.mesalab.ua; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * @description: @@ -9,7 +12,9 @@ import java.util.regex.Matcher; **/ public class Utils { - public static String getReplacement(Matcher matcher, String replacement) { + private static final Pattern SUBSTITUTIONS_PATTERN = Pattern.compile("\\$\\d"); + + static String getReplacement(Matcher matcher, String replacement) { if (isBackReference(replacement)) { int group = getGroup(replacement); return matcher.group(group); @@ -18,16 +23,68 @@ public class Utils { } } + static String getReplaceResult(String model, Matcher matcher, int groupCount, String replacementString) { + if (replacementString != null) { + if (replacementString.contains("$")) { + model = replacementString; + for (String substitution : getSubstitutions(replacementString)) { + int i = Integer.parseInt(substitution.substring(1)); + String replacement = matcher.groupCount() >= i && matcher.group(i) != null + ? Matcher.quoteReplacement(matcher.group(i)) : ""; + model = model.replaceFirst("\\" + substitution, replacement); + } + model = model.trim(); + } else { + model = replacementString; + } + } else if (groupCount >= 1) { + model = matcher.group(1); + } + return model; + } + + + static List<String> getSubstitutions(String nameReplacement) { + Matcher matcher = SUBSTITUTIONS_PATTERN.matcher(nameReplacement); + List<String> substitutions = new ArrayList<String>(); + while (matcher.find()) { + substitutions.add(matcher.group()); + } + return substitutions; + } + /** * Checks if the replacement is a backreference (i.e. $1, $2, $3, etc) to a capturing group in the regular expression. */ - public static boolean isBackReference(String replacement) { + static boolean isBackReference(String replacement) { return replacement.startsWith("$"); } + /** * Extracts the group number from a backreference like $1, $2, $3, etc. */ - public static int getGroup(String backReference) { - return Integer.valueOf(backReference.substring(1)); + static int getGroup(String backReference) { + return Integer.parseInt(backReference.substring(1)); + } + + + + static boolean contains(String str, String[] strArr) { + if (strArr == null) + return false; + for (String arrStr : strArr) { + if (str.contains(arrStr)) { + return true; + } + } + return false; + } + static String[] toLowerCase(String[] strArr) { + if (strArr == null) return null; + String[] res = new String[strArr.length]; + for (int i=0; i<strArr.length; i++) { + res[i] = strArr[i].toLowerCase(); + } + return res; } } diff --git a/src/main/resources/device.yaml b/src/main/resources/device.yaml index d7f943d..2441d23 100644 --- a/src/main/resources/device.yaml +++ b/src/main/resources/device.yaml @@ -3467,6 +3467,7 @@ device_parsers: name_replacement: '$1$2,$3' brand_replacement: 'Apple' model_replacement: '$1$2,$3' + type_replacement: 'Desktop' # @note: newer desktop applications don't show device info # This is here so as to not have them recorded as iOS-Device - regex: 'CFNetwork/.* Darwin/\d+\.\d+\.\d+ \(x86_64\)' diff --git a/src/main/resources/os.yaml b/src/main/resources/os.yaml index 118e400..1b2f0a4 100644 --- a/src/main/resources/os.yaml +++ b/src/main/resources/os.yaml @@ -1,9 +1,9 @@ os_parsers: ########自定义 开始######## # android 系统 待验证 - - regex: '([a|A]ndroid)[^\d]*(\d)[_|.]([\d[_|.]]+\d)' - name_replacement: 'Android' - version_replacement: '$2' +# - regex: '([a|A]ndroid)[^\d]*(\d)[_|.]([\d[_|.]]+\d)' +# name_replacement: 'Android' +# version_replacement: '$2' @@ -96,34 +96,42 @@ os_parsers: # can actually detect rooted android os. do we care? ########## - regex: '(Android)[ \-/](\d+)(?:\.(\d+)|)(?:[.\-]([a-z0-9]+)|)' - + name_replacement: 'Android' + version_replacement: '$2' + platform_replacement: 'Android' - regex: '(Android) Donut' version_replacement: '1' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Eclair' version_replacement: '2' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Froyo' version_replacement: '2' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Gingerbread' version_replacement: '2' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Honeycomb' version_replacement: '3' + platform_replacement: 'Android' # Android 9; Android 10; - regex: '(Android) (\d+);' + platform_replacement: 'Android' # UCWEB - regex: '^UCWEB.*; (Adr) (\d+)\.(\d+)(?:[.\-]([a-z0-9]+)|);' name_replacement: 'Android' + platform_replacement: 'Android' + - regex: '^UCWEB.*; (iPad|iPh|iPd) OS (\d+)_(\d+)(?:_(\d+)|);' name_replacement: 'iOS' + platform_replacement: 'iOS' + - regex: '^UCWEB.*; (wds) (\d+)\.(\d+)(?:\.(\d+)|);' name_replacement: 'Windows Phone' # JUC @@ -134,6 +142,7 @@ os_parsers: - regex: '(android)\s(?:mobile\/)(\d+)(?:\.(\d+)(?:\.(\d+)|)|)' name_replacement: 'Android' + ########## # Kindle Android ########## @@ -169,14 +178,16 @@ os_parsers: - regex: '(Windows 10)' name_replacement: 'Windows' version_replacement: '10' + platform_replacement: 'Windows 10' - regex: '(Windows (?:NT 5\.2|NT 5\.1))' name_replacement: 'Windows' version_replacement: 'XP' - + platform_replacement: 'WinXP' - regex: '(Win(?:dows NT |32NT\/)6\.1)' name_replacement: 'Windows' version_replacement: '7' + platform_replacement: 'Windows 7' - regex: '(Win(?:dows NT |32NT\/)6\.0)' name_replacement: 'Windows' @@ -185,6 +196,7 @@ os_parsers: - regex: '(Win 9x 4\.90)' name_replacement: 'Windows' version_replacement: 'ME' + platform_replacement: 'Windows ME' - regex: '(Windows NT 6\.2; ARM;)' name_replacement: 'Windows' @@ -193,48 +205,57 @@ os_parsers: - regex: '(Win(?:dows NT |32NT\/)6\.2)' name_replacement: 'Windows' version_replacement: '8' + platform_replacement: 'Windows' - regex: '(Windows NT 6\.3; ARM;)' name_replacement: 'Windows' version_replacement: 'RT 8' - platform_replacement: + platform_replacement: 'Windows RT' - regex: '(Win(?:dows NT |32NT\/)6\.3)' name_replacement: 'Windows' version_replacement: '8' - platform_replacement: + platform_replacement: 'Windows 8' - regex: '(Win(?:dows NT |32NT\/)6\.4)' name_replacement: 'Windows' version_replacement: '10' + platform_replacement: 'Windows 10' - regex: '(Windows NT 10\.0)' name_replacement: 'Windows' version_replacement: '10' + platform_replacement: 'Windows 10' - regex: '(Windows NT 5\.0)' name_replacement: 'Windows' version_replacement: '2000' + platform_replacement: 'Windows 2000' - regex: '(WinNT4.0)' name_replacement: 'Windows' version_replacement: 'NT 4.0' + platform_replacement: 'Windows NT' - regex: '(Windows ?CE)' name_replacement: 'Windows' version_replacement: 'CE' + platform_replacement: 'Windows CE' - regex: 'Win(?:dows)? ?(95|98|3.1|NT|ME|2000|XP|Vista|7|CE)' name_replacement: 'Windows' version_replacement: '$1' + platform_replacement: 'Windows $1' - regex: 'Win16' name_replacement: 'Windows' version_replacement: '3.1' + platform_replacement: 'Win16' - regex: 'Win32' name_replacement: 'Windows' version_replacement: '95' + platform_replacement: 'Win32' # Box apps (Drive, Sync, Notes) on Windows https://www.box.com/resources/downloads - regex: '^Box.*Windows/([\d.]+);' @@ -257,37 +278,39 @@ os_parsers: - regex: '\w+\s+Mac OS X\s+\w+\s+(\d+).(\d+).(\d+).*' name_replacement: 'Mac OS X' version_replacement: '$1' + platform_replacement: 'Mac OS X' # Leopard - regex: ' (Dar)(win)/(9).(\d+).*\((?:i386|x86_64|Power Macintosh)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Snow Leopard - regex: ' (Dar)(win)/(10).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Lion - regex: ' (Dar)(win)/(11).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Mountain Lion - regex: ' (Dar)(win)/(12).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Mavericks - regex: ' (Dar)(win)/(13).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Yosemite is Darwin/14.x but patch versions are inconsistent in the Darwin string; # more accurately covered by CFNetwork regexes downstream # IE on Mac doesn't specify version number - regex: 'Mac_PowerPC' name_replacement: 'Mac OS' + platform_replacement: 'Mac OS' # builds before tiger don't seem to specify version? @@ -297,6 +320,7 @@ os_parsers: # Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin - regex: '^Box.*;(Darwin)/(10)\.(1\d)(?:\.(\d+)|)' name_replacement: 'Mac OS X' + platform_replacement: 'Mac OS X' ########## # iOS @@ -305,84 +329,101 @@ os_parsers: # keep this above generic iOS, since AppleTV UAs contain 'CPU OS' - regex: '(Apple\s?TV)(?:/(\d+)\.(\d+)|)' name_replacement: 'ATV OS X' + platform_replacement: 'ATV OS X' - regex: '(CPU[ +]OS|iPhone[ +]OS|CPU[ +]iPhone|CPU IPhone OS)[ +]+(\d+)[_\.](\d+)(?:[_\.](\d+)|)' name_replacement: 'iOS' + platform_replacement: 'iOS' # remaining cases are mostly only opera uas, so catch opera as to not catch iphone spoofs - regex: '(iPhone|iPad|iPod); Opera' name_replacement: 'iOS' + platform_replacement: 'iOS' # few more stragglers - regex: '(iPhone|iPad|iPod).*Mac OS X.*Version/(\d+)\.(\d+)' name_replacement: 'iOS' + platform_replacement: 'iOS' # CFNetwork/Darwin - The specific CFNetwork or Darwin version determines # whether the os maps to Mac OS, or iOS, or just Darwin. # See: http://user-agents.me/cfnetwork-version-list - regex: '(CFNetwork)/(5)48\.0\.3.* Darwin/11\.0\.0' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(5)48\.(0)\.4.* Darwin/(1)1\.0\.0' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(5)48\.(1)\.4' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(4)85\.1(3)\.9' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(6)09\.(1)\.4' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(6)(0)9' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/6(7)2\.(1)\.13' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/6(7)2\.(1)\.(1)4' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CF)(Network)/6(7)(2)\.1\.15' name_replacement: 'iOS' version_replacement: '7' - platform_replacement: + platform_replacement: 'iOS' - regex: '(CFNetwork)/6(7)2\.(0)\.(?:2|8)' name_replacement: 'iOS' + platform_replacement: 'iOS' + - regex: '(CFNetwork)/709\.1' name_replacement: 'iOS' version_replacement: '8' - os_v2_replacement: '0.b5' + platform_replacement: 'iOS' - regex: '(CF)(Network)/711\.(\d)' name_replacement: 'iOS' version_replacement: '8' + platform_replacement: 'iOS' - regex: '(CF)(Network)/(720)\.(\d)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: '(CF)(Network)/(760)\.(\d)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: 'CFNetwork/7.* Darwin/15\.4\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/15\.5\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/15\.6\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: '(CF)(Network)/758\.(\d)' name_replacement: 'iOS' version_replacement: '9' + platform_replacement: 'iOS' - regex: 'CFNetwork/808\.3 Darwin/16\.3\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: '(CF)(Network)/808\.(\d)' name_replacement: 'iOS' version_replacement: '10' + platform_replacement: 'iOS' ########## # CFNetwork macOS Apps (must be before CFNetwork iOS Apps @@ -391,15 +432,15 @@ os_parsers: - regex: 'CFNetwork/.* Darwin/17\.\d+.*\(x86_64\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: 'CFNetwork/.* Darwin/16\.\d+.*\(x86_64\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: 'CFNetwork/8.* Darwin/15\.\d+.*\(x86_64\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' ########## # CFNetwork iOS Apps # @ref: https://en.wikipedia.org/wiki/Darwin_(operating_system)#Release_history @@ -407,39 +448,44 @@ os_parsers: - regex: 'CFNetwork/.* Darwin/(9)\.\d+' name_replacement: 'iOS' version_replacement: '1' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/(10)\.\d+' name_replacement: 'iOS' version_replacement: '4' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/(11)\.\d+' name_replacement: 'iOS' version_replacement: '5' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/(13)\.\d+' name_replacement: 'iOS' version_replacement: '6' + platform_replacement: 'iOS' - regex: 'CFNetwork/6.* Darwin/(14)\.\d+' name_replacement: 'iOS' version_replacement: '7' + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/(14)\.\d+' name_replacement: 'iOS' version_replacement: '8' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/(15)\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/16\.5\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/16\.6\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/16\.7\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/(16)\.\d+' name_replacement: 'iOS' @@ -447,50 +493,55 @@ os_parsers: - regex: 'CFNetwork/8.* Darwin/17\.0\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.2\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.3\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.4\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.5\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/17\.6\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/17\.7\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/(17)\.\d+' name_replacement: 'iOS' version_replacement: '11' + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/18\.0\.\d+' name_replacement: 'iOS' version_replacement: '12' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/(18)\.\d+' name_replacement: 'iOS' version_replacement: '12' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/' name_replacement: 'iOS' + platform_replacement: 'iOS' # iOS Apps - regex: '\b(iOS[ /]|iOS; |iPhone(?:/| v|[ _]OS[/,]|; | OS : |\d,\d/|\d,\d; )|iPad/)(\d{1,2})[_\.](\d{1,2})(?:[_\.](\d+)|)' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '\((iOS);' + platform_replacement: 'iOS' ########## # Apple Watch @@ -538,43 +589,59 @@ os_parsers: ########## - regex: '(Symbian[Oo][Ss])[/ ](\d+)\.(\d+)' name_replacement: 'Symbian OS' + platform_replacement: 'Symbian' - regex: '(Symbian/3).+NokiaBrowser/7\.3' name_replacement: 'Symbian^3 Anna' + platform_replacement: 'Symbian' - regex: '(Symbian/3).+NokiaBrowser/7\.4' name_replacement: 'Symbian^3 Belle' + platform_replacement: 'Symbian' - regex: '(Symbian/3)' name_replacement: 'Symbian^3' - regex: '\b(Series 60|SymbOS|S60Version|S60V\d|S60\b)' name_replacement: 'Symbian OS' + platform_replacement: 'Symbian' - regex: '(MeeGo)' - regex: 'Symbian [Oo][Ss]' name_replacement: 'Symbian OS' + platform_replacement: 'Symbian' + - regex: 'Series40;' name_replacement: 'Nokia Series 40' + platform_replacement: 'Symbian' - regex: 'Series30Plus;' name_replacement: 'Nokia Series 30 Plus' + platform_replacement: 'Symbian' ########## # BlackBerry devices ########## - regex: '(BB10);.+Version/(\d+)\.(\d+)\.(\d+)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' - regex: '(Black[Bb]erry)[0-9a-z]+/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' - regex: '(Black[Bb]erry).+Version/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' - regex: '(RIM Tablet OS) (\d+)\.(\d+)\.(\d+)' name_replacement: 'BlackBerry Tablet OS' + platform_replacement: 'BlackBerry' - regex: '(Play[Bb]ook)' name_replacement: 'BlackBerry Tablet OS' + platform_replacement: 'BlackBerry' - regex: '(Black[Bb]erry)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' ########## # KaiOS ########## - regex: '(K[Aa][Ii]OS)\/(\d+)\.(\d+)(?:\.(\d+)|)' name_replacement: 'KaiOS' + platform_replacement: 'KaiOS' + ########## # Firefox OS @@ -582,42 +649,43 @@ os_parsers: - regex: '\((?:Mobile|Tablet);.+Gecko/18.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/18.1 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/26.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/28.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/30.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/32.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '2' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/34.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '2' - platform_replacement: + platform_replacement: 'Firefox OS' # Firefox OS Generic - regex: '\((?:Mobile|Tablet);.+Firefox/\d+\.\d+' name_replacement: 'Firefox OS' + platform_replacement: 'Firefox OS' ########## @@ -625,11 +693,15 @@ os_parsers: # yes, Brew is lower-cased for Brew MP ########## - regex: '(BREW)[ /](\d+)\.(\d+)\.(\d+)' + platform_replacement: 'BREW' - regex: '(BREW);' + platform_replacement: 'BREW' - regex: '(Brew MP|BMP)[ /](\d+)\.(\d+)\.(\d+)' name_replacement: 'Brew MP' + platform_replacement: 'BREW' - regex: 'BMP;' name_replacement: 'Brew MP' + platform_replacement: 'BREW' ########## # Google TV @@ -643,6 +715,7 @@ os_parsers: ########## - regex: '(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)' name_replacement: 'Chromecast' + platform_replacement: 'Chromecast OS' ########## # Misc mobile @@ -680,6 +753,7 @@ os_parsers: name_replacement: 'FreeBSD' - regex: 'linux' name_replacement: 'Linux' + platform_replacement: 'Linux' # Roku Digital-Video-Players https://www.roku.com/ - regex: '^(Roku)/DVP-(\d+)\.(\d+)' diff --git a/src/main/resources/regexes.yaml b/src/main/resources/regexes.yaml index bb8784f..4f0df09 100644 --- a/src/main/resources/regexes.yaml +++ b/src/main/resources/regexes.yaml @@ -1068,21 +1068,17 @@ client_parsers: name_replacement: 'ViaFree' version_replacement: '$2' - os_parsers: ########自定义 开始######## # android 系统 待验证 - - regex: '([a|A]ndroid)[^\d]*(\d)[_|.]([\d[_|.]]+\d)' - name_replacement: 'Android' - version_replacement: '$2' + # - regex: '([a|A]ndroid)[^\d]*(\d)[_|.]([\d[_|.]]+\d)' + # name_replacement: 'Android' + # version_replacement: '$2' ########自定义 结束######## - - - ########## # HbbTV vendors ########## @@ -1170,34 +1166,42 @@ os_parsers: # can actually detect rooted android os. do we care? ########## - regex: '(Android)[ \-/](\d+)(?:\.(\d+)|)(?:[.\-]([a-z0-9]+)|)' - + name_replacement: 'Android' + version_replacement: '$2' + platform_replacement: 'Android' - regex: '(Android) Donut' version_replacement: '1' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Eclair' version_replacement: '2' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Froyo' version_replacement: '2' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Gingerbread' version_replacement: '2' - platform_replacement: + platform_replacement: 'Android' - regex: '(Android) Honeycomb' version_replacement: '3' + platform_replacement: 'Android' # Android 9; Android 10; - regex: '(Android) (\d+);' + platform_replacement: 'Android' # UCWEB - regex: '^UCWEB.*; (Adr) (\d+)\.(\d+)(?:[.\-]([a-z0-9]+)|);' name_replacement: 'Android' + platform_replacement: 'Android' + - regex: '^UCWEB.*; (iPad|iPh|iPd) OS (\d+)_(\d+)(?:_(\d+)|);' name_replacement: 'iOS' + platform_replacement: 'iOS' + - regex: '^UCWEB.*; (wds) (\d+)\.(\d+)(?:\.(\d+)|);' name_replacement: 'Windows Phone' # JUC @@ -1208,6 +1212,7 @@ os_parsers: - regex: '(android)\s(?:mobile\/)(\d+)(?:\.(\d+)(?:\.(\d+)|)|)' name_replacement: 'Android' + ########## # Kindle Android ########## @@ -1243,14 +1248,16 @@ os_parsers: - regex: '(Windows 10)' name_replacement: 'Windows' version_replacement: '10' + platform_replacement: 'Windows 10' - regex: '(Windows (?:NT 5\.2|NT 5\.1))' name_replacement: 'Windows' version_replacement: 'XP' - + platform_replacement: 'WinXP' - regex: '(Win(?:dows NT |32NT\/)6\.1)' name_replacement: 'Windows' version_replacement: '7' + platform_replacement: 'Windows 7' - regex: '(Win(?:dows NT |32NT\/)6\.0)' name_replacement: 'Windows' @@ -1259,6 +1266,7 @@ os_parsers: - regex: '(Win 9x 4\.90)' name_replacement: 'Windows' version_replacement: 'ME' + platform_replacement: 'Windows ME' - regex: '(Windows NT 6\.2; ARM;)' name_replacement: 'Windows' @@ -1267,48 +1275,57 @@ os_parsers: - regex: '(Win(?:dows NT |32NT\/)6\.2)' name_replacement: 'Windows' version_replacement: '8' + platform_replacement: 'Windows' - regex: '(Windows NT 6\.3; ARM;)' name_replacement: 'Windows' version_replacement: 'RT 8' - platform_replacement: + platform_replacement: 'Windows RT' - regex: '(Win(?:dows NT |32NT\/)6\.3)' name_replacement: 'Windows' version_replacement: '8' - platform_replacement: + platform_replacement: 'Windows 8' - regex: '(Win(?:dows NT |32NT\/)6\.4)' name_replacement: 'Windows' version_replacement: '10' + platform_replacement: 'Windows 10' - regex: '(Windows NT 10\.0)' name_replacement: 'Windows' version_replacement: '10' + platform_replacement: 'Windows 10' - regex: '(Windows NT 5\.0)' name_replacement: 'Windows' version_replacement: '2000' + platform_replacement: 'Windows 2000' - regex: '(WinNT4.0)' name_replacement: 'Windows' version_replacement: 'NT 4.0' + platform_replacement: 'Windows NT' - regex: '(Windows ?CE)' name_replacement: 'Windows' version_replacement: 'CE' + platform_replacement: 'Windows CE' - regex: 'Win(?:dows)? ?(95|98|3.1|NT|ME|2000|XP|Vista|7|CE)' name_replacement: 'Windows' version_replacement: '$1' + platform_replacement: 'Windows $1' - regex: 'Win16' name_replacement: 'Windows' version_replacement: '3.1' + platform_replacement: 'Win16' - regex: 'Win32' name_replacement: 'Windows' version_replacement: '95' + platform_replacement: 'Win32' # Box apps (Drive, Sync, Notes) on Windows https://www.box.com/resources/downloads - regex: '^Box.*Windows/([\d.]+);' @@ -1331,39 +1348,39 @@ os_parsers: - regex: '\w+\s+Mac OS X\s+\w+\s+(\d+).(\d+).(\d+).*' name_replacement: 'Mac OS X' version_replacement: '$1' - os_v2_replacement: '$2' - os_v3_replacement: '$3' + platform_replacement: 'Mac OS X' # Leopard - regex: ' (Dar)(win)/(9).(\d+).*\((?:i386|x86_64|Power Macintosh)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Snow Leopard - regex: ' (Dar)(win)/(10).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Lion - regex: ' (Dar)(win)/(11).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Mountain Lion - regex: ' (Dar)(win)/(12).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Mavericks - regex: ' (Dar)(win)/(13).(\d+).*\((?:i386|x86_64)\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' # Yosemite is Darwin/14.x but patch versions are inconsistent in the Darwin string; # more accurately covered by CFNetwork regexes downstream # IE on Mac doesn't specify version number - regex: 'Mac_PowerPC' name_replacement: 'Mac OS' + platform_replacement: 'Mac OS' # builds before tiger don't seem to specify version? @@ -1373,6 +1390,7 @@ os_parsers: # Box Drive and Box Sync on Mac OS X use OSX version numbers, not Darwin - regex: '^Box.*;(Darwin)/(10)\.(1\d)(?:\.(\d+)|)' name_replacement: 'Mac OS X' + platform_replacement: 'Mac OS X' ########## # iOS @@ -1381,84 +1399,101 @@ os_parsers: # keep this above generic iOS, since AppleTV UAs contain 'CPU OS' - regex: '(Apple\s?TV)(?:/(\d+)\.(\d+)|)' name_replacement: 'ATV OS X' + platform_replacement: 'ATV OS X' - regex: '(CPU[ +]OS|iPhone[ +]OS|CPU[ +]iPhone|CPU IPhone OS)[ +]+(\d+)[_\.](\d+)(?:[_\.](\d+)|)' name_replacement: 'iOS' + platform_replacement: 'iOS' # remaining cases are mostly only opera uas, so catch opera as to not catch iphone spoofs - regex: '(iPhone|iPad|iPod); Opera' name_replacement: 'iOS' + platform_replacement: 'iOS' # few more stragglers - regex: '(iPhone|iPad|iPod).*Mac OS X.*Version/(\d+)\.(\d+)' name_replacement: 'iOS' + platform_replacement: 'iOS' # CFNetwork/Darwin - The specific CFNetwork or Darwin version determines # whether the os maps to Mac OS, or iOS, or just Darwin. # See: http://user-agents.me/cfnetwork-version-list - regex: '(CFNetwork)/(5)48\.0\.3.* Darwin/11\.0\.0' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(5)48\.(0)\.4.* Darwin/(1)1\.0\.0' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(5)48\.(1)\.4' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(4)85\.1(3)\.9' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(6)09\.(1)\.4' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/(6)(0)9' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/6(7)2\.(1)\.13' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CFNetwork)/6(7)2\.(1)\.(1)4' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '(CF)(Network)/6(7)(2)\.1\.15' name_replacement: 'iOS' version_replacement: '7' - platform_replacement: + platform_replacement: 'iOS' - regex: '(CFNetwork)/6(7)2\.(0)\.(?:2|8)' name_replacement: 'iOS' + platform_replacement: 'iOS' + - regex: '(CFNetwork)/709\.1' name_replacement: 'iOS' version_replacement: '8' - os_v2_replacement: '0.b5' + platform_replacement: 'iOS' - regex: '(CF)(Network)/711\.(\d)' name_replacement: 'iOS' version_replacement: '8' + platform_replacement: 'iOS' - regex: '(CF)(Network)/(720)\.(\d)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: '(CF)(Network)/(760)\.(\d)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: 'CFNetwork/7.* Darwin/15\.4\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/15\.5\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/15\.6\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: '(CF)(Network)/758\.(\d)' name_replacement: 'iOS' version_replacement: '9' + platform_replacement: 'iOS' - regex: 'CFNetwork/808\.3 Darwin/16\.3\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: '(CF)(Network)/808\.(\d)' name_replacement: 'iOS' version_replacement: '10' + platform_replacement: 'iOS' ########## # CFNetwork macOS Apps (must be before CFNetwork iOS Apps @@ -1467,15 +1502,15 @@ os_parsers: - regex: 'CFNetwork/.* Darwin/17\.\d+.*\(x86_64\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: 'CFNetwork/.* Darwin/16\.\d+.*\(x86_64\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' - regex: 'CFNetwork/8.* Darwin/15\.\d+.*\(x86_64\)' name_replacement: 'Mac OS X' version_replacement: '10' - platform_replacement: + platform_replacement: 'Mac OS X' ########## # CFNetwork iOS Apps # @ref: https://en.wikipedia.org/wiki/Darwin_(operating_system)#Release_history @@ -1483,39 +1518,44 @@ os_parsers: - regex: 'CFNetwork/.* Darwin/(9)\.\d+' name_replacement: 'iOS' version_replacement: '1' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/(10)\.\d+' name_replacement: 'iOS' version_replacement: '4' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/(11)\.\d+' name_replacement: 'iOS' version_replacement: '5' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/(13)\.\d+' name_replacement: 'iOS' version_replacement: '6' + platform_replacement: 'iOS' - regex: 'CFNetwork/6.* Darwin/(14)\.\d+' name_replacement: 'iOS' version_replacement: '7' + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/(14)\.\d+' name_replacement: 'iOS' version_replacement: '8' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/7.* Darwin/(15)\.\d+' name_replacement: 'iOS' version_replacement: '9' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/16\.5\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/16\.6\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/16\.7\.\d+' name_replacement: 'iOS' version_replacement: '10' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/(16)\.\d+' name_replacement: 'iOS' @@ -1523,50 +1563,55 @@ os_parsers: - regex: 'CFNetwork/8.* Darwin/17\.0\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.2\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.3\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.4\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/17\.5\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/17\.6\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/17\.7\.\d+' name_replacement: 'iOS' version_replacement: '11' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/8.* Darwin/(17)\.\d+' name_replacement: 'iOS' version_replacement: '11' + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/18\.0\.\d+' name_replacement: 'iOS' version_replacement: '12' - platform_replacement: + platform_replacement: 'iOS' - regex: 'CFNetwork/9.* Darwin/(18)\.\d+' name_replacement: 'iOS' version_replacement: '12' + platform_replacement: 'iOS' - regex: 'CFNetwork/.* Darwin/' name_replacement: 'iOS' + platform_replacement: 'iOS' # iOS Apps - regex: '\b(iOS[ /]|iOS; |iPhone(?:/| v|[ _]OS[/,]|; | OS : |\d,\d/|\d,\d; )|iPad/)(\d{1,2})[_\.](\d{1,2})(?:[_\.](\d+)|)' name_replacement: 'iOS' + platform_replacement: 'iOS' - regex: '\((iOS);' + platform_replacement: 'iOS' ########## # Apple Watch @@ -1614,43 +1659,59 @@ os_parsers: ########## - regex: '(Symbian[Oo][Ss])[/ ](\d+)\.(\d+)' name_replacement: 'Symbian OS' + platform_replacement: 'Symbian' - regex: '(Symbian/3).+NokiaBrowser/7\.3' name_replacement: 'Symbian^3 Anna' + platform_replacement: 'Symbian' - regex: '(Symbian/3).+NokiaBrowser/7\.4' name_replacement: 'Symbian^3 Belle' + platform_replacement: 'Symbian' - regex: '(Symbian/3)' name_replacement: 'Symbian^3' - regex: '\b(Series 60|SymbOS|S60Version|S60V\d|S60\b)' name_replacement: 'Symbian OS' + platform_replacement: 'Symbian' - regex: '(MeeGo)' - regex: 'Symbian [Oo][Ss]' name_replacement: 'Symbian OS' + platform_replacement: 'Symbian' + - regex: 'Series40;' name_replacement: 'Nokia Series 40' + platform_replacement: 'Symbian' - regex: 'Series30Plus;' name_replacement: 'Nokia Series 30 Plus' + platform_replacement: 'Symbian' ########## # BlackBerry devices ########## - regex: '(BB10);.+Version/(\d+)\.(\d+)\.(\d+)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' - regex: '(Black[Bb]erry)[0-9a-z]+/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' - regex: '(Black[Bb]erry).+Version/(\d+)\.(\d+)\.(\d+)(?:\.(\d+)|)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' - regex: '(RIM Tablet OS) (\d+)\.(\d+)\.(\d+)' name_replacement: 'BlackBerry Tablet OS' + platform_replacement: 'BlackBerry' - regex: '(Play[Bb]ook)' name_replacement: 'BlackBerry Tablet OS' + platform_replacement: 'BlackBerry' - regex: '(Black[Bb]erry)' name_replacement: 'BlackBerry OS' + platform_replacement: 'BlackBerry' ########## # KaiOS ########## - regex: '(K[Aa][Ii]OS)\/(\d+)\.(\d+)(?:\.(\d+)|)' name_replacement: 'KaiOS' + platform_replacement: 'KaiOS' + ########## # Firefox OS @@ -1658,42 +1719,43 @@ os_parsers: - regex: '\((?:Mobile|Tablet);.+Gecko/18.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/18.1 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/26.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/28.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/30.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '1' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/32.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '2' - platform_replacement: + platform_replacement: 'Firefox OS' - regex: '\((?:Mobile|Tablet);.+Gecko/34.0 Firefox/\d+\.\d+' name_replacement: 'Firefox OS' version_replacement: '2' - platform_replacement: + platform_replacement: 'Firefox OS' # Firefox OS Generic - regex: '\((?:Mobile|Tablet);.+Firefox/\d+\.\d+' name_replacement: 'Firefox OS' + platform_replacement: 'Firefox OS' ########## @@ -1701,11 +1763,15 @@ os_parsers: # yes, Brew is lower-cased for Brew MP ########## - regex: '(BREW)[ /](\d+)\.(\d+)\.(\d+)' + platform_replacement: 'BREW' - regex: '(BREW);' + platform_replacement: 'BREW' - regex: '(Brew MP|BMP)[ /](\d+)\.(\d+)\.(\d+)' name_replacement: 'Brew MP' + platform_replacement: 'BREW' - regex: 'BMP;' name_replacement: 'Brew MP' + platform_replacement: 'BREW' ########## # Google TV @@ -1719,6 +1785,7 @@ os_parsers: ########## - regex: '(CrKey)(?:[/](\d+)\.(\d+)(?:\.(\d+)|)|)' name_replacement: 'Chromecast' + platform_replacement: 'Chromecast OS' ########## # Misc mobile @@ -1756,6 +1823,7 @@ os_parsers: name_replacement: 'FreeBSD' - regex: 'linux' name_replacement: 'Linux' + platform_replacement: 'Linux' # Roku Digital-Video-Players https://www.roku.com/ - regex: '^(Roku)/DVP-(\d+)\.(\d+)' @@ -5230,6 +5298,7 @@ device_parsers: name_replacement: '$1$2,$3' brand_replacement: 'Apple' model_replacement: '$1$2,$3' + type_replacement: 'Desktop' # @note: newer desktop applications don't show device info # This is here so as to not have them recorded as iOS-Device - regex: 'CFNetwork/.* Darwin/\d+\.\d+\.\d+ \(x86_64\)' @@ -5737,3 +5806,4 @@ device_parsers: name_replacement: 'Mac' brand_replacement: 'Apple' model_replacement: 'Mac' + diff --git a/src/test/java/com/mesalab/ua/Test.java b/src/test/java/com/mesalab/ua/Test.java index 1775c00..fad7986 100644 --- a/src/test/java/com/mesalab/ua/Test.java +++ b/src/test/java/com/mesalab/ua/Test.java @@ -2,8 +2,11 @@ package com.mesalab.ua; import com.alibaba.fastjson.JSONObject; import com.mesalab.ua.dto.UserAgent; +import eu.bitwalker.useragentutils.OperatingSystem; +import nl.basjes.parse.useragent.UserAgentAnalyzer; import org.springframework.http.HttpEntity; import org.springframework.util.LinkedMultiValueMap; +import org.springframework.util.ObjectUtils; import org.springframework.util.StopWatch; import org.springframework.web.client.RestTemplate; @@ -21,14 +24,13 @@ public class Test { Parser uaParser = new Parser(); UserAgent c = uaParser.parse(uaString); - System.err.println(c); - System.out.println(c.client.name); // => "Mobile Safari" - System.out.println(c.client.version); // => "5" - System.out.println(c.client.type); // => "1" - - System.out.println(c.os.name); // => "iOS" - System.out.println(c.os.version); // => "5" - System.out.println(c.os.platform); // => "1" +// System.out.println(c.client.name); // => "Mobile Safari" +// System.out.println(c.client.version); // => "5" +// System.out.println(c.client.type); // => "1" +// +// System.out.println(c.os.name); // => "iOS" +// System.out.println(c.os.version); // => "5" +// System.out.println(c.os.platform); // => "1" System.out.println(c.device); // => "iPhone" } @@ -36,7 +38,6 @@ public class Test { String uaString = "Mozilla/6.0 (iPhone; CPU iPhone OS 5_1_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206 Safari/7534.48.3"; // String usa = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"; Parser uaParser = new Parser(); - UserAgent parse1 = uaParser.parse(uaString); // System.err.println(JSON.toJSONString(parse, true)); //创建请求头 LinkedMultiValueMap requestMap = new LinkedMultiValueMap(); @@ -46,7 +47,7 @@ public class Test { //构建参数 Map<String, Object> deleteParamMap = getDeleteSource(); String url = "http://" + "192.168.40.193:8123"; - deleteParamMap.put("query", "SELECT DISTINCT (user_agent) FROM k18_ods.tbs_ods_ntc_http_log_local where notEmpty(user_agent) LIMIT 100000 FORMAT JSON;"); + deleteParamMap.put("query", "SELECT DISTINCT (user_agent) FROM k18_ods.tbs_ods_ntc_http_log_local where notEmpty(user_agent) LIMIT 10000 FORMAT JSON;"); // 发送请求解析结果 String result = restTemplate.postForObject(getUrlWithParams(url, deleteParamMap), httpEntity, String.class); @@ -59,11 +60,11 @@ public class Test { Collections.sort(uaList); String filePath = "C:\\Users\\Administrator\\Desktop\\data\\2.2.0\\uap.txt"; -// UserAgentAnalyzer uaa = UserAgentAnalyzer -// .newBuilder() -// .hideMatcherLoadStats() -// .withCache(10000) -// .build(); + UserAgentAnalyzer uaa = UserAgentAnalyzer + .newBuilder() + .hideMatcherLoadStats() + .withCache(10000) + .build(); //输出到文件 try(BufferedWriter fos = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filePath)))){ Parser parser = new Parser(); @@ -73,21 +74,17 @@ public class Test { sw.start(); System.err.println("***********开始解析***********"); for ( String ua : uaList) { - + OperatingSystem ooo = OperatingSystem.parseUserAgentString(ua); +// nl.basjes.parse.useragent.UserAgent.ImmutableUserAgent agent = uaa.parse(ua); UserAgent parse = parser.parse(ua); -// eu.bitwalker.useragentutils.UserAgent userAgent = eu.bitwalker.useragentutils.UserAgent.parseUserAgentString(ua); -// Browser browser = Browser.parseUserAgentString(ua); -// System.err.println(JSON.toJSONString(operatingSystem)); -// System.err.println(JSON.toJSONString(userAgent)); -// System.err.println(JSON.toJSONString(browser)); -// nl.basjes.parse.useragent.UserAgent.ImmutableUserAgent agent = uaa.parse(ua); -// System.err.println(ua); -// System.err.println("\n"+num+": "+"\n"+JSON.toJSONString( parse.os ,true)+ "\n"+"DeviceClass:"+agent); - if ("Other".equals(parse.os.name)||"Other".equals(parse.os.version)||"Other".equals(parse.os.platform)){ + +// System.err.println("\n"+num+": "+ua+"\n"+"DeviceClass:"+agent); + if (ObjectUtils.isEmpty(parse.os.platform)||ObjectUtils.isEmpty(parse.os.version)||ObjectUtils.isEmpty(parse.os.name)|| "Other".equals(parse.os.name)||"Other".equals(parse.os.version)||"Other".equals(parse.os.platform)){ num++; } - if ("Other".equals(parse.device.name)||"Other".equals(parse.device.type)||"Other".equals(parse.device.brand)||"Other".equals(parse.device.model)){ + if (ObjectUtils.isEmpty(parse.device.type)||ObjectUtils.isEmpty(parse.device.brand)||ObjectUtils.isEmpty(parse.device.model)||ObjectUtils.isEmpty(parse.device.name)||"Other".equals(parse.device.name)||"Other".equals(parse.device.type)||"Other".equals(parse.device.brand)||"Other".equals(parse.device.model)){ + System.out.println("ua "+ua+"\n"+parse.device+"\n"+"device: "+ooo.getDeviceType()); num2++; } // fos.write(JSON.toJSONString( parser.parse(ua))); diff --git a/src/test/java/com/mesalab/ua/UaTest.java b/src/test/java/com/mesalab/ua/UaTest.java index 7e2143d..669b1e2 100644 --- a/src/test/java/com/mesalab/ua/UaTest.java +++ b/src/test/java/com/mesalab/ua/UaTest.java @@ -1,12 +1,13 @@ package com.mesalab.ua; -import com.alibaba.fastjson.JSON; import com.mesalab.ua.dto.UserAgent; -import eu.bitwalker.useragentutils.Browser; import eu.bitwalker.useragentutils.OperatingSystem; import nl.basjes.parse.useragent.UserAgentAnalyzer; +import org.junit.Test; import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public class UaTest { @@ -16,15 +17,14 @@ public class UaTest { try { Parser uaParser = new Parser(); UserAgent c = uaParser.parse(uaString); - System.err.println(c); +// System.err.println(c); System.out.println("============================================="); System.out.println("============================================="); - eu.bitwalker.useragentutils.UserAgent userAgent = eu.bitwalker.useragentutils.UserAgent.parseUserAgentString(uaString); +// eu.bitwalker.useragentutils.UserAgent userAgent = eu.bitwalker.useragentutils.UserAgent.parseUserAgentString(uaString); OperatingSystem operatingSystem = OperatingSystem.parseUserAgentString(uaString); - Browser browser = Browser.parseUserAgentString(uaString); - System.err.println(JSON.toJSONString(browser)); - System.err.println(JSON.toJSONString(userAgent)); +// System.err.println(JSON.toJSONString(browser)); + System.err.println(operatingSystem); System.out.println("============================================="); @@ -35,10 +35,18 @@ public class UaTest { .build(); nl.basjes.parse.useragent.UserAgent.ImmutableUserAgent agent = uaa.parse(uaString); - System.err.println(agent); } catch (IOException e) { e.printStackTrace(); } } + @Test + public void test(){ + String agentString = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.2; WOW32; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3)"; + Pattern pattern = Pattern.compile("Win(?:dows NT |32NT\\/)6\\.2"); + Matcher matcher = pattern.matcher(agentString); + System.out.println(matcher.find()); + + } + } |
